Skip to content

Commit

Permalink
Refactor: use logging as a notification mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
drmalex07 committed Dec 8, 2015
1 parent 8cb9896 commit 60ab8ba
Show file tree
Hide file tree
Showing 19 changed files with 415 additions and 364 deletions.
9 changes: 5 additions & 4 deletions alerts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
template_dirs = []
template_loader = genshi.template.TemplateLoader()

from .lib.class_loader import load_class
from .lib.checkers import named_checker
from .class_loader import load_class

def config_from_file(config_file):
global config
Expand All @@ -20,10 +19,10 @@ def config_from_file(config_file):

here = os.path.abspath('.')

confp = ConfigParser(defaults = {'here': here})
confp = ConfigParser(defaults={'here': here})
confp.read(config_file)

for sec in ('stats', 'checkers', 'alerts', 'notifier', 'mailer'):
for sec in ('stats', 'checkers', 'alerts', 'mailer'):
config[sec] = dict(confp.items(sec))

# Setup template loader
Expand All @@ -42,5 +41,7 @@ def config_from_file(config_file):
return

def _load_checker(name, cls_name):
from alerts.lib.checkers import named_checker

cls = load_class(cls_name)
return named_checker(name)(cls)
36 changes: 9 additions & 27 deletions alerts/bin/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@
from ConfigParser import ConfigParser
from paste.deploy.converters import asbool, asint, aslist

from alerts import config, template_loader, config_from_file
from alerts.lib.interfaces import IChecker, INotifier
from alerts import config, config_from_file
from alerts.lib.checkers import checker_for
from alerts.lib.notifiers import Message, Notifier, MailNotifier
from alerts.lib.mailer import Mailer, make_mailer

if __name__ == '__main__':

Expand All @@ -28,41 +25,26 @@

args = argp.parse_args()

logging.config.fileConfig(args.config_file)
log1 = logging.getLogger(__name__)

config_from_file(args.config_file)

# Setup notifier

notifier = None
notifier_name = config['notifier']['notifier']
if notifier_name == 'mailer':
notifier = MailNotifier(
name = 'check',
recipients = aslist(config['notifier']['recipients']),
mailer = make_mailer(config['mailer'])
)
else:
notifier = Notifier('check')
# Setup notifiers (loggers)

logging.config.fileConfig(args.config_file)

# Perform checks

log1 = logging.getLogger(__name__)
hosts = args.hosts
if not hosts and args.check_all:
# List all hosts known to collectd daemon
data_dir = config['stats']['collectd_data_dir']
data_dir = config['stats']['collection_dir']
hosts = os.listdir(data_dir)
log1.info('Found hosts: %s', ', '.join(hosts))

for name in aslist(config['alerts']['check']):
c = checker_for(name, notifier)
c = checker_for(name)
if c:
for h in hosts:
c.check(h)
for host in hosts:
c.check(host)
else:
log1.info('Cannot find a `%s` checker', name)

# Send notifications (if any)

notifier.notify()
File renamed without changes.
29 changes: 15 additions & 14 deletions alerts/config-example.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[stats]

collectd_data_dir = /var/lib/collectd/rrd
collection_dir = /var/lib/collectd/rrd

[checkers]

Expand Down Expand Up @@ -33,25 +33,19 @@ foo.usage_level = 0.9

[mailer]

smtp_host = mail.example.com
smtp_port = 465
smtp_user = [email protected]
smtp_pass = secret
from = [email protected]

[notifier]

notifier = mailer

recipients = [email protected]
smtp_host = mail.localdomain
smtp_port = 25
smtp_user = monitor
smtp_pass = secret
from = monitor@localdomain

## Logging configuration

[loggers]
keys = root, checker

[handlers]
keys = console_short, console_checker, file_generic
keys = console_short, console_checker, file_generic, mail

[formatters]
keys = generic, short, checker
Expand All @@ -62,7 +56,7 @@ handlers = console_short

[logger_checker]
level = INFO
handlers = console_checker
handlers = console_checker, mail
qualname = checker
propagate = 0

Expand All @@ -78,6 +72,13 @@ args = (sys.stderr,)
level = INFO
formatter = checker

[handler_mail]
class = alerts.lib.loggers.MailHandler
args = ('%(recipients)s',)
level = WARNING
formatter = checker
recipients = malex@localdomain

[handler_file_generic]
class = logging.handlers.RotatingFileHandler
args = ('alerts.log', 'a', 131072, 14)
Expand Down
20 changes: 20 additions & 0 deletions alerts/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import zope.interface
from collections import namedtuple

from .interfaces import ICheckContext

@zope.interface.implementer(ICheckContext)
class CheckContext(namedtuple('_Context', ['name', 'hostname'])):

pass

class Message(namedtuple('_Message', ['title', 'summary', 'body'])):

def __str__(self):
return str(self.summary)

def __unicode__(self):
if isinstance(self.summary, str):
return self.summary.decode('utf-8')
else:
return unicode(self.summary)
72 changes: 52 additions & 20 deletions alerts/lib/checkers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,74 @@
import os
import hashlib
import logging
import zope.interface
import zope.schema
from zope.interface.verify import verifyObject
from zope.schema import getValidationErrors

from alerts import config, template_loader
from alerts.lib import CheckContext
from alerts.lib.loggers import LoggingContext
from alerts.lib.interfaces import adapter_registry
from alerts.lib.interfaces import INotifier, IChecker
from alerts.lib.interfaces import IChecker

## Provide adapters ##

def named_checker(name):
def decorate(cls):
adapter_registry.register([INotifier], IChecker, name, cls)
adapter_registry.register([], IChecker, name, cls)
cls.__checker_name__ = name
return cls
return decorate

def checker_for(name, notifier):
verifyObject(INotifier, notifier)
checker = adapter_registry.queryAdapter(notifier, IChecker, name)
if checker:
data_dir = config['stats']['collectd_data_dir']
pfx = name + '.'
opts = {k[len(pfx):]: v
for k, v in config['alerts'].iteritems() if k.startswith(pfx)}
checker.configure(data_dir, opts)
def checker_for(name):
checker = adapter_registry.queryMultiAdapter([], IChecker, name)
if not checker:
return None

# Collect relevant configuration options
collection_dir = config['stats']['collection_dir']
config_items = config['alerts'].iteritems()
pfx = name + '.'
opts = {key[len(pfx):]: val
for key, val in config_items if key.startswith(pfx)}

# Setup this instance
logger = logging.getLogger('checker.' + name)
checker.setup(collection_dir, logger, opts)

return checker

# Provide context for loggers
## Bases ##

class LoggingContext(logging.Filter):
@zope.interface.implementer(IChecker)
class BaseChecker(object):

def __init__(self):
self.collection_dir = None
self.logger = None
return

def setup(self, collection_dir, logger, opts):
self.collection_dir = collection_dir
self.logger = logger
return

def __init__(self, host):
super(LoggingContext, self).__init__()
self.host = host
def get_logger(self, hostname):
h = hashlib.md5(hostname).hexdigest()
logger = self.logger.getChild(h)
ctx = CheckContext(
hostname=hostname, name=getattr(self, '__checker_name__', ''))
logger.addFilter(LoggingContext(ctx))
return logger

def data_dir(self, hostname):
return os.path.join(self.collection_dir, hostname)

def filter(self, record):
record.check_host = self.host
return True
def check(self, hostname):
raise NotImplementedError('This is an abstract method')

# Import basic checkers
## Import basic checkers ##

from . import cpu
from . import memory
Expand Down
68 changes: 31 additions & 37 deletions alerts/lib/checkers/cpu.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,71 @@
import os
import re
import datetime
import logging
import zope.interface
from thrush import rrd

from alerts import template_loader
from alerts.lib.notifiers import Message
from alerts.lib import Message
from alerts.lib.collected_stats import Stats as BaseStats
from alerts.lib.checkers import IChecker, INotifier, named_checker
from alerts.lib.checkers import LoggingContext
from alerts.lib.checkers import BaseChecker, named_checker

class Stats(BaseStats):

class RRD(rrd.RRD):
value = rrd.Gauge(heartbeat=20)

@named_checker('cpu')
class Checker(object):

zope.interface.implements(IChecker)
class Checker(BaseChecker):

def __init__(self, notifier):
self._notifier = notifier
self._data_dir = None
self._max_level = None
self._start = None
self._resolution = None
def __init__(self):
BaseChecker.__init__(self)
self.max_level = None
self.start = None
self.resolution = None
return

def configure(self, data_dir, opts):
self._data_dir = data_dir
self._max_level = int(opts.get('usage_level', 85)) # jiffies
self._start = '-%ds' % (int(opts.get('interval', 1800)))
self._resolution = '%d' % (int(opts.get('resolution', 60)))
def setup(self, collection_dir, logger, opts):
BaseChecker.setup(self, collection_dir, logger, opts)
self.max_level = int(opts.get('usage_level', 85)) # jiffies
self.start = '-%ds' % (int(opts.get('interval', 1800)))
self.resolution = '%d' % (int(opts.get('resolution', 60)))
return

def check(self, hostname):
log1 = logging.getLogger('checker.cpu')
log1.addFilter(LoggingContext(hostname))

data_dir = os.path.join(self._data_dir, hostname)
log1 = self.get_logger(hostname)
data_dir = self.data_dir(hostname)

max_u = self._max_level
notifier = self._notifier
max_u = self.max_level

n = self._find_number_of_cpus(data_dir)
for i in range(0, n):
u = self.get_usage(data_dir, i, 'user')
log1.debug('Computed usage for CPU #%d: %.2f', i, u)
if u > max_u:
tpl = template_loader.load('cpu.excessive-usage.html')
msg_body = tpl.generate(
hostname = hostname,
cpu_number = i,
max_usage = '%.1f' %(max_u),
avg_usage = '%.1f' %(u),
generated_at = datetime.datetime.now())
msg = Message(
title = u'Processor overload at %s' %(hostname),
summary = u'CPU #%d exceeded usage limit of %d jiffies' %(i, max_u),
message = tpl.generate(
hostname = hostname,
cpu_number = i,
max_usage = '%.1f' %(max_u),
avg_usage = '%.1f' %(u),
generated_at = datetime.datetime.now()
).render('html'),
)
notifier.add_message(msg, 0)
log1.info('Check CPU #%d: FAILED (%.1f > %.1f)' %(i, u, max_u))
summary = u'Check CPU #%d: FAILED (%.1f > %.1f)' %(i, u, max_u),
body = msg_body.render('html'))
log1.warn(msg)
else:
log1.info('Check CPU #%d: OK (%.1f < %.1f)' %(i, u, max_u))
msg = Message(
title = u'Processor usage at %s' %(hostname),
summary = u'Check CPU #%d: OK (%.1f < %.1f)' % (i, u, max_u),
body = None)
log1.info(msg)
return

def get_usage(self, data_dir, cpu_number, state='user'):
rrd_file = os.path.join(data_dir, 'cpu-%d/cpu-%s.rrd' % (cpu_number, state))
stats = Stats(rrd_file)
return stats.avg('value', self._start, self._resolution)
return stats.avg('value', self.start, self.resolution)

@staticmethod
def _find_number_of_cpus(data_dir):
Expand Down
Loading

0 comments on commit 60ab8ba

Please sign in to comment.