From 60ab8ba400901996ac2a45797b603eada6b54e49 Mon Sep 17 00:00:00 2001 From: Michail Alexakis Date: Tue, 8 Dec 2015 02:25:40 +0200 Subject: [PATCH] Refactor: use logging as a notification mechanism --- alerts/__init__.py | 9 +-- alerts/bin/check.py | 36 +++-------- alerts/{lib => }/class_loader.py | 0 alerts/config-example.ini | 29 +++++---- alerts/lib/__init__.py | 20 ++++++ alerts/lib/checkers/__init__.py | 72 +++++++++++++++------ alerts/lib/checkers/cpu.py | 68 +++++++++----------- alerts/lib/checkers/df.py | 107 +++++++++++++++---------------- alerts/lib/checkers/foo.py | 30 ++++----- alerts/lib/checkers/load.py | 42 ++++++------ alerts/lib/checkers/memory.py | 74 ++++++++++----------- alerts/lib/checkers/nginx.py | 37 +++++------ alerts/lib/interfaces.py | 25 +++++++- alerts/lib/loggers.py | 41 ++++++++++++ alerts/lib/mailer.py | 37 +++++++++-- alerts/lib/notifiers.py | 67 ------------------- alerts/tests/test-logger.py | 26 ++++++++ alerts/tests/test-mailer.py | 2 +- setup.py | 57 ++++++++-------- 19 files changed, 415 insertions(+), 364 deletions(-) rename alerts/{lib => }/class_loader.py (100%) create mode 100644 alerts/lib/loggers.py delete mode 100644 alerts/lib/notifiers.py create mode 100755 alerts/tests/test-logger.py diff --git a/alerts/__init__.py b/alerts/__init__.py index a127c13..6716b62 100644 --- a/alerts/__init__.py +++ b/alerts/__init__.py @@ -8,8 +8,7 @@ template_dirs = [] template_loader = genshi.template.TemplateLoader() -from .lib.class_loader import load_class -from .lib.checkers import named_checker +from .class_loader import load_class def config_from_file(config_file): global config @@ -20,10 +19,10 @@ def config_from_file(config_file): here = os.path.abspath('.') - confp = ConfigParser(defaults = {'here': here}) + confp = ConfigParser(defaults={'here': here}) confp.read(config_file) - for sec in ('stats', 'checkers', 'alerts', 'notifier', 'mailer'): + for sec in ('stats', 'checkers', 'alerts', 'mailer'): config[sec] = dict(confp.items(sec)) # Setup template loader @@ -42,5 +41,7 @@ def config_from_file(config_file): return def _load_checker(name, cls_name): + from alerts.lib.checkers import named_checker + cls = load_class(cls_name) return named_checker(name)(cls) diff --git a/alerts/bin/check.py b/alerts/bin/check.py index b072f2b..bac1c2d 100755 --- a/alerts/bin/check.py +++ b/alerts/bin/check.py @@ -9,11 +9,8 @@ from ConfigParser import ConfigParser from paste.deploy.converters import asbool, asint, aslist -from alerts import config, template_loader, config_from_file -from alerts.lib.interfaces import IChecker, INotifier +from alerts import config, config_from_file from alerts.lib.checkers import checker_for -from alerts.lib.notifiers import Message, Notifier, MailNotifier -from alerts.lib.mailer import Mailer, make_mailer if __name__ == '__main__': @@ -28,41 +25,26 @@ args = argp.parse_args() - logging.config.fileConfig(args.config_file) - log1 = logging.getLogger(__name__) - config_from_file(args.config_file) - - # Setup notifier - notifier = None - notifier_name = config['notifier']['notifier'] - if notifier_name == 'mailer': - notifier = MailNotifier( - name = 'check', - recipients = aslist(config['notifier']['recipients']), - mailer = make_mailer(config['mailer']) - ) - else: - notifier = Notifier('check') + # Setup notifiers (loggers) + + logging.config.fileConfig(args.config_file) # Perform checks + log1 = logging.getLogger(__name__) hosts = args.hosts if not hosts and args.check_all: # List all hosts known to collectd daemon - data_dir = config['stats']['collectd_data_dir'] + data_dir = config['stats']['collection_dir'] hosts = os.listdir(data_dir) log1.info('Found hosts: %s', ', '.join(hosts)) for name in aslist(config['alerts']['check']): - c = checker_for(name, notifier) + c = checker_for(name) if c: - for h in hosts: - c.check(h) + for host in hosts: + c.check(host) else: log1.info('Cannot find a `%s` checker', name) - - # Send notifications (if any) - - notifier.notify() diff --git a/alerts/lib/class_loader.py b/alerts/class_loader.py similarity index 100% rename from alerts/lib/class_loader.py rename to alerts/class_loader.py diff --git a/alerts/config-example.ini b/alerts/config-example.ini index 8bfc6a1..98ade2f 100644 --- a/alerts/config-example.ini +++ b/alerts/config-example.ini @@ -1,6 +1,6 @@ [stats] -collectd_data_dir = /var/lib/collectd/rrd +collection_dir = /var/lib/collectd/rrd [checkers] @@ -33,17 +33,11 @@ foo.usage_level = 0.9 [mailer] -smtp_host = mail.example.com -smtp_port = 465 -smtp_user = submit@example.com -smtp_pass = secret -from = monitor@example.com - -[notifier] - -notifier = mailer - -recipients = admin@example.com +smtp_host = mail.localdomain +smtp_port = 25 +smtp_user = monitor +smtp_pass = secret +from = monitor@localdomain ## Logging configuration @@ -51,7 +45,7 @@ recipients = admin@example.com keys = root, checker [handlers] -keys = console_short, console_checker, file_generic +keys = console_short, console_checker, file_generic, mail [formatters] keys = generic, short, checker @@ -62,7 +56,7 @@ handlers = console_short [logger_checker] level = INFO -handlers = console_checker +handlers = console_checker, mail qualname = checker propagate = 0 @@ -78,6 +72,13 @@ args = (sys.stderr,) level = INFO formatter = checker +[handler_mail] +class = alerts.lib.loggers.MailHandler +args = ('%(recipients)s',) +level = WARNING +formatter = checker +recipients = malex@localdomain + [handler_file_generic] class = logging.handlers.RotatingFileHandler args = ('alerts.log', 'a', 131072, 14) diff --git a/alerts/lib/__init__.py b/alerts/lib/__init__.py index e69de29..c4fcc9a 100644 --- a/alerts/lib/__init__.py +++ b/alerts/lib/__init__.py @@ -0,0 +1,20 @@ +import zope.interface +from collections import namedtuple + +from .interfaces import ICheckContext + +@zope.interface.implementer(ICheckContext) +class CheckContext(namedtuple('_Context', ['name', 'hostname'])): + + pass + +class Message(namedtuple('_Message', ['title', 'summary', 'body'])): + + def __str__(self): + return str(self.summary) + + def __unicode__(self): + if isinstance(self.summary, str): + return self.summary.decode('utf-8') + else: + return unicode(self.summary) diff --git a/alerts/lib/checkers/__init__.py b/alerts/lib/checkers/__init__.py index a12a72c..5d1632f 100644 --- a/alerts/lib/checkers/__init__.py +++ b/alerts/lib/checkers/__init__.py @@ -1,42 +1,74 @@ import os +import hashlib import logging import zope.interface +import zope.schema from zope.interface.verify import verifyObject +from zope.schema import getValidationErrors from alerts import config, template_loader +from alerts.lib import CheckContext +from alerts.lib.loggers import LoggingContext from alerts.lib.interfaces import adapter_registry -from alerts.lib.interfaces import INotifier, IChecker +from alerts.lib.interfaces import IChecker + +## Provide adapters ## def named_checker(name): def decorate(cls): - adapter_registry.register([INotifier], IChecker, name, cls) + adapter_registry.register([], IChecker, name, cls) + cls.__checker_name__ = name return cls return decorate -def checker_for(name, notifier): - verifyObject(INotifier, notifier) - checker = adapter_registry.queryAdapter(notifier, IChecker, name) - if checker: - data_dir = config['stats']['collectd_data_dir'] - pfx = name + '.' - opts = {k[len(pfx):]: v - for k, v in config['alerts'].iteritems() if k.startswith(pfx)} - checker.configure(data_dir, opts) +def checker_for(name): + checker = adapter_registry.queryMultiAdapter([], IChecker, name) + if not checker: + return None + + # Collect relevant configuration options + collection_dir = config['stats']['collection_dir'] + config_items = config['alerts'].iteritems() + pfx = name + '.' + opts = {key[len(pfx):]: val + for key, val in config_items if key.startswith(pfx)} + + # Setup this instance + logger = logging.getLogger('checker.' + name) + checker.setup(collection_dir, logger, opts) + return checker -# Provide context for loggers +## Bases ## -class LoggingContext(logging.Filter): +@zope.interface.implementer(IChecker) +class BaseChecker(object): + + def __init__(self): + self.collection_dir = None + self.logger = None + return + + def setup(self, collection_dir, logger, opts): + self.collection_dir = collection_dir + self.logger = logger + return - def __init__(self, host): - super(LoggingContext, self).__init__() - self.host = host + def get_logger(self, hostname): + h = hashlib.md5(hostname).hexdigest() + logger = self.logger.getChild(h) + ctx = CheckContext( + hostname=hostname, name=getattr(self, '__checker_name__', '')) + logger.addFilter(LoggingContext(ctx)) + return logger + + def data_dir(self, hostname): + return os.path.join(self.collection_dir, hostname) - def filter(self, record): - record.check_host = self.host - return True + def check(self, hostname): + raise NotImplementedError('This is an abstract method') -# Import basic checkers +## Import basic checkers ## from . import cpu from . import memory diff --git a/alerts/lib/checkers/cpu.py b/alerts/lib/checkers/cpu.py index 6d6b7c0..39b67a3 100644 --- a/alerts/lib/checkers/cpu.py +++ b/alerts/lib/checkers/cpu.py @@ -1,15 +1,13 @@ import os import re import datetime -import logging import zope.interface from thrush import rrd from alerts import template_loader -from alerts.lib.notifiers import Message +from alerts.lib import Message from alerts.lib.collected_stats import Stats as BaseStats -from alerts.lib.checkers import IChecker, INotifier, named_checker -from alerts.lib.checkers import LoggingContext +from alerts.lib.checkers import BaseChecker, named_checker class Stats(BaseStats): @@ -17,33 +15,27 @@ class RRD(rrd.RRD): value = rrd.Gauge(heartbeat=20) @named_checker('cpu') -class Checker(object): - - zope.interface.implements(IChecker) +class Checker(BaseChecker): - def __init__(self, notifier): - self._notifier = notifier - self._data_dir = None - self._max_level = None - self._start = None - self._resolution = None + def __init__(self): + BaseChecker.__init__(self) + self.max_level = None + self.start = None + self.resolution = None return - def configure(self, data_dir, opts): - self._data_dir = data_dir - self._max_level = int(opts.get('usage_level', 85)) # jiffies - self._start = '-%ds' % (int(opts.get('interval', 1800))) - self._resolution = '%d' % (int(opts.get('resolution', 60))) + def setup(self, collection_dir, logger, opts): + BaseChecker.setup(self, collection_dir, logger, opts) + self.max_level = int(opts.get('usage_level', 85)) # jiffies + self.start = '-%ds' % (int(opts.get('interval', 1800))) + self.resolution = '%d' % (int(opts.get('resolution', 60))) return def check(self, hostname): - log1 = logging.getLogger('checker.cpu') - log1.addFilter(LoggingContext(hostname)) - - data_dir = os.path.join(self._data_dir, hostname) + log1 = self.get_logger(hostname) + data_dir = self.data_dir(hostname) - max_u = self._max_level - notifier = self._notifier + max_u = self.max_level n = self._find_number_of_cpus(data_dir) for i in range(0, n): @@ -51,27 +43,29 @@ def check(self, hostname): log1.debug('Computed usage for CPU #%d: %.2f', i, u) if u > max_u: tpl = template_loader.load('cpu.excessive-usage.html') + msg_body = tpl.generate( + hostname = hostname, + cpu_number = i, + max_usage = '%.1f' %(max_u), + avg_usage = '%.1f' %(u), + generated_at = datetime.datetime.now()) msg = Message( title = u'Processor overload at %s' %(hostname), - summary = u'CPU #%d exceeded usage limit of %d jiffies' %(i, max_u), - message = tpl.generate( - hostname = hostname, - cpu_number = i, - max_usage = '%.1f' %(max_u), - avg_usage = '%.1f' %(u), - generated_at = datetime.datetime.now() - ).render('html'), - ) - notifier.add_message(msg, 0) - log1.info('Check CPU #%d: FAILED (%.1f > %.1f)' %(i, u, max_u)) + summary = u'Check CPU #%d: FAILED (%.1f > %.1f)' %(i, u, max_u), + body = msg_body.render('html')) + log1.warn(msg) else: - log1.info('Check CPU #%d: OK (%.1f < %.1f)' %(i, u, max_u)) + msg = Message( + title = u'Processor usage at %s' %(hostname), + summary = u'Check CPU #%d: OK (%.1f < %.1f)' % (i, u, max_u), + body = None) + log1.info(msg) return def get_usage(self, data_dir, cpu_number, state='user'): rrd_file = os.path.join(data_dir, 'cpu-%d/cpu-%s.rrd' % (cpu_number, state)) stats = Stats(rrd_file) - return stats.avg('value', self._start, self._resolution) + return stats.avg('value', self.start, self.resolution) @staticmethod def _find_number_of_cpus(data_dir): diff --git a/alerts/lib/checkers/df.py b/alerts/lib/checkers/df.py index 0510bdf..c19745a 100644 --- a/alerts/lib/checkers/df.py +++ b/alerts/lib/checkers/df.py @@ -1,16 +1,14 @@ import os import re import datetime -import logging import zope.interface from thrush import rrd from collections import namedtuple from alerts import template_loader -from alerts.lib.notifiers import Message +from alerts.lib import Message from alerts.lib.collected_stats import Stats as BaseStats -from alerts.lib.checkers import IChecker, INotifier, named_checker -from alerts.lib.checkers import LoggingContext +from alerts.lib.checkers import BaseChecker, named_checker class Stats(BaseStats): @@ -26,80 +24,75 @@ def as_percentage(self): return .0 @named_checker('df') -class Checker(object): - - zope.interface.implements(IChecker) +class Checker(BaseChecker): - def __init__(self, notifier): - self._log = logging.getLogger(__name__) - self._notifier = notifier - self._data_dir = None - self._max_level = None - self._start = None - self._resolution = None + def __init__(self): + BaseChecker.__init__(self) + self.max_level = None + self.start = None + self.resolution = None return - def configure(self, data_dir, opts): - self._data_dir = data_dir - self._max_level = int(opts.get('usage_level', 90)) # - self._start = '-%ds' % (int(opts.get('interval', 1200))) - self._resolution = '%d' % (int(opts.get('resolution', 600))) + def setup(self, collection_dir, logger, opts): + BaseChecker.setup(self, collection_dir, logger, opts) + self.max_level = int(opts.get('usage_level', 90)) # + self.start = '-%ds' % (int(opts.get('interval', 1200))) + self.resolution = '%d' % (int(opts.get('resolution', 600))) return def check(self, hostname): - log1 = logging.getLogger('checker.df') - log1.addFilter(LoggingContext(hostname)) - - data_dir = os.path.join(self._data_dir, hostname) + log1 = self.get_logger(hostname) + data_dir = self.data_dir(hostname) - max_u = self._max_level - notifier = self._notifier + max_u = self.max_level fs_names = self._find_fs_names(data_dir) for name in fs_names: # Check space uv = self.get_usage_of_space(data_dir, name) u = uv.as_percentage() - log1.debug('Computed usage for filesystem `%s`: %.1f%% of space', name, u) if u > max_u: tpl = template_loader.load('df.excessive-usage.html') + msg_body = tpl.generate( + hostname = hostname, + fs_name = name, + max_usage = '%.1f' %(max_u), + avg_usage = '%.1f' %(u), + generated_at = datetime.datetime.now()) msg = Message( title = u'Running out of space at %s' %(hostname), - summary = u'Used disk space exceeded %d%% at `%s`' %(max_u, name), - message = tpl.generate( - hostname = hostname, - fs_name = name, - max_usage = '%.1f' %(max_u), - avg_usage = '%.1f' %(u), - generated_at = datetime.datetime.now() - ).render('html') - ) - notifier.add_message(msg, 0) - log1.info('Check df space at `%s`: FAILED (%.1f > %.1f)' %(name, u, max_u)) + summary = u'Check df space at `%s`: FAILED (%.1f > %.1f)' %(name, u, max_u), + body = msg_body.render('html')) + log1.warn(msg) else: - log1.info('Check df space at `%s`: OK (%.1f < %.1f)' %(name, u, max_u)) + msg = Message( + title = u'Checking df space at %s' % (hostname), + summary = u'Check df space at `%s`: OK (%.1f < %.1f)' % (name, u, max_u), + body = None) + log1.info(msg) # Check inodes uv = self.get_usage_of_inodes(data_dir, name) u = uv.as_percentage() - log1.debug('Computed usage for filesystem `%s`: %.1f%% of inodes', name, u) if u > max_u: tpl = template_loader.load('df.excessive-usage-of-inodes.html') + msg_body = tpl.generate( + hostname = hostname, + fs_name = name, + max_usage = '%.1f' %(max_u), + avg_usage = '%.1f' %(u), + generated_at = datetime.datetime.now()) msg = Message( title = u'Running out of inodes at %s' %(hostname), - summary = u'Used inodes exceeded %d%% at `%s`' %(max_u, name), - message = tpl.generate( - hostname = hostname, - fs_name = name, - max_usage = '%.1f' %(max_u), - avg_usage = '%.1f' %(u), - generated_at = datetime.datetime.now() - ).render('html') - ) - notifier.add_message(msg, 0) - log1.info('Check df inodes at `%s`: FAILED (%.1f > %.1f)' %(name, u, max_u)) + summary = u'Check df inodes at `%s`: FAILED (%.1f > %.1f)' %(name, u, max_u), + message = msg_body.render('html')) + log1.warn(msg) else: - log1.info('Check df inodes at `%s`: OK (%.1f < %.1f)' %(name, u, max_u)) + msg = Message( + title = u'Checking df inodes at %s' % (hostname), + summary = u'Check df inodes at `%s`: OK (%.1f < %.1f)' % (name, u, max_u), + body = None) + log1.info(msg) return @@ -107,15 +100,15 @@ def get_usage_of_space(self, data_dir, fs_name): rrd_file = os.path.join(data_dir, 'df-%s/df_complex-free.rrd' % (fs_name)) stats = Stats(rrd_file) - free_bytes = stats.avg('value', self._start, self._resolution) + free_bytes = stats.avg('value', self.start, self.resolution) rrd_file = os.path.join(data_dir, 'df-%s/df_complex-reserved.rrd' % (fs_name)) stats = Stats(rrd_file) - reserved_bytes = stats.avg('value', self._start, self._resolution) + reserved_bytes = stats.avg('value', self.start, self.resolution) rrd_file = os.path.join(data_dir, 'df-%s/df_complex-used.rrd' % (fs_name)) stats = Stats(rrd_file) - used_bytes = stats.avg('value', self._start, self._resolution) + used_bytes = stats.avg('value', self.start, self.resolution) return Usage( free = free_bytes, @@ -126,15 +119,15 @@ def get_usage_of_inodes(self, data_dir, fs_name): rrd_file = os.path.join(data_dir, 'df-%s/df_inodes-free.rrd' % (fs_name)) stats = Stats(rrd_file) - free_inodes = stats.avg('value', self._start, self._resolution) + free_inodes = stats.avg('value', self.start, self.resolution) rrd_file = os.path.join(data_dir, 'df-%s/df_inodes-reserved.rrd' % (fs_name)) stats = Stats(rrd_file) - reserved_inodes = stats.avg('value', self._start, self._resolution) + reserved_inodes = stats.avg('value', self.start, self.resolution) rrd_file = os.path.join(data_dir, 'df-%s/df_inodes-used.rrd' % (fs_name)) stats = Stats(rrd_file) - used_inodes = stats.avg('value', self._start, self._resolution) + used_inodes = stats.avg('value', self.start, self.resolution) return Usage( free = free_inodes, diff --git a/alerts/lib/checkers/foo.py b/alerts/lib/checkers/foo.py index 1984b74..789898d 100644 --- a/alerts/lib/checkers/foo.py +++ b/alerts/lib/checkers/foo.py @@ -1,28 +1,20 @@ import logging import zope.interface -from alerts.lib.interfaces import IChecker, INotifier +from alerts.lib.checkers import IChecker, BaseChecker -class Checker(object): - - zope.interface.implements(IChecker) +class Checker(BaseChecker): - def __init__(self, notifier): - self._log = logging.getLogger(__name__) - self._notifier = notifier - self._data_dir = None - self._opts = None - self._log.info('Created instance') - pass + def __init__(self): + BaseChecker.__init__(self) + self.opts = None - def configure(self, data_dir, opts): - self._data_dir = data_dir - self._opts = opts.copy() - self._log.info( - 'Configured instance with: data_dir=%s opts=%r' % (data_dir, opts)) - pass + def setup(self, collection_dir, logger, opts): + BaseChecker.setup(self, collection_dir, logger, opts) + self.opts = opts.copy() def check(self, hostname): - self._log.info('Checking host: %s', hostname) - pass + log1 = self.get_logger(hostname) + data_dir = self.data_dir(hostname) + log1.info('Checking foo (data_dir is %s)', data_dir) diff --git a/alerts/lib/checkers/load.py b/alerts/lib/checkers/load.py index 02c7a55..e8f4067 100644 --- a/alerts/lib/checkers/load.py +++ b/alerts/lib/checkers/load.py @@ -1,14 +1,13 @@ import os import re import datetime -import logging import zope.interface from thrush import rrd from alerts import template_loader -from alerts.lib.notifiers import Message +from alerts.lib import Message from alerts.lib.collected_stats import Stats as BaseStats -from alerts.lib.checkers import IChecker, INotifier, named_checker +from alerts.lib.checkers import BaseChecker, named_checker class Stats(BaseStats): @@ -18,30 +17,27 @@ class RRD(rrd.RRD): longterm = rrd.Gauge(heartbeat=20) @named_checker('load') -class Checker(object): - - zope.interface.implements(IChecker) - - def __init__(self, notifier): - self._notifier = notifier - self._data_dir = None - self._max_level = None - self._start = None - self._resolution = None +class Checker(BaseChecker): + + def __init__(self): + BaseChecker.__init__(self) + self.max_level = None + self.start = None + self.resolution = None return - def configure(self, data_dir, opts): - self._data_dir = data_dir - self._max_level = int(opts.get('usage_level')) # units?? - self._start = '-%ds' % (int(opts.get('interval', 1200))) - self._resolution = '%d' % (int(opts.get('resolution', 120))) + def setup(self, collection_dir, logger, opts): + BaseChecker.setup(self, collection_dir, logger, opts) + self.max_level = int(opts.get('usage_level')) # units?? + self.start = '-%ds' % (int(opts.get('interval', 1200))) + self.resolution = '%d' % (int(opts.get('resolution', 120))) return def check(self, hostname): - data_dir = os.path.join(self._data_dir, hostname) - - max_u = self._max_level - notifier = self._notifier + log1 = self.get_logger(hostname) + data_dir = self.data_dir(hostname) + + max_u = self.max_level # Todo @@ -50,6 +46,6 @@ def check(self, hostname): def get_usage(self, data_dir): rrd_file = os.path.join(data_dir, 'load/load.rrd') stats = Stats(rrd_file) - return stats.avg('midterm', self._start, self._resolution) + return stats.avg('midterm', self.start, self.resolution) diff --git a/alerts/lib/checkers/memory.py b/alerts/lib/checkers/memory.py index a453e53..34f3178 100644 --- a/alerts/lib/checkers/memory.py +++ b/alerts/lib/checkers/memory.py @@ -1,16 +1,14 @@ import os import re import datetime -import logging import zope.interface from thrush import rrd from collections import namedtuple from alerts import template_loader -from alerts.lib.notifiers import Message +from alerts.lib import Message from alerts.lib.collected_stats import Stats as BaseStats -from alerts.lib.checkers import IChecker, INotifier, named_checker -from alerts.lib.checkers import LoggingContext +from alerts.lib.checkers import BaseChecker, named_checker class Stats(BaseStats): @@ -26,34 +24,27 @@ def as_percentage(self): return .0 @named_checker('memory') -class Checker(object): +class Checker(BaseChecker): - zope.interface.implements(IChecker) - - def __init__(self, notifier): - self._log = logging.getLogger(__name__) - self._notifier = notifier - self._data_dir = None - self._max_level = None - self._start = None - self._resolution = None + def __init__(self): + BaseChecker.__init__(self) + self.max_level = None + self.start = None + self.resolution = None return - def configure(self, data_dir, opts): - self._data_dir = data_dir - self._max_level = int(opts.get('usage_level', 90)) # percentage - self._start = '-%ds' % (int(opts.get('interval', 600))) - self._resolution = '%d' % (int(opts.get('resolution', 120))) + def setup(self, collection_dir, logger, opts): + BaseChecker.setup(self, collection_dir, logger, opts) + self.max_level = int(opts.get('usage_level', 90)) # percentage + self.start = '-%ds' % (int(opts.get('interval', 600))) + self.resolution = '%d' % (int(opts.get('resolution', 120))) return def check(self, hostname): - log1 = logging.getLogger('checker.memory') - log1.addFilter(LoggingContext(hostname)) - - data_dir = os.path.join(self._data_dir, hostname) + log1 = self.get_logger(hostname) + data_dir = self.data_dir(hostname) - max_u = self._max_level - notifier = self._notifier + max_u = self.max_level uv = self.get_usage(data_dir) u = uv.as_percentage() @@ -62,36 +53,37 @@ def check(self, hostname): u, (uv.used/(1<<20)), (uv.free/(1<<20)), (uv.cached/(1<<20))) if u > max_u: tpl = template_loader.load('memory.excessive-usage.html') + msg_body = tpl.generate( + hostname = hostname, + max_usage = '%.1f' %(max_u), + avg_usage = '%.1f' %(u), + generated_at = datetime.datetime.now()) msg = Message( title = u'Running out of memory at %s' %(hostname), - summary = u'Memory exceeded usage limit of %d%%' %(max_u), - message = tpl.generate( - hostname = hostname, - max_usage = '%.1f' %(max_u), - avg_usage = '%.1f' %(u), - generated_at = datetime.datetime.now() - ).render('html'), - ) - notifier.add_message(msg, -5) - log1.info('Check memory: FAILED (%.1f > %.1f)' %(u, max_u)) + summary = u'Check memory usage: FAILED (%.1f > %.1f)' %(u, max_u), + body = msg_body.render('html')) + log1.warn(msg) else: - log1.info('Check memory: OK (%.1f < %.1f)' %(u, max_u)) - + msg = Message( + title = u'Checking memory at %s' % (hostname), + summary = u'Check memory: OK (%.1f < %.1f)' % (u, max_u), + body = None) + log1.info(msg) return def get_usage(self, data_dir): stats = Stats(os.path.join(data_dir, 'memory/memory-free.rrd')) - free_bytes = stats.avg('value', self._start, self._resolution) + free_bytes = stats.avg('value', self.start, self.resolution) stats = Stats(os.path.join(data_dir, 'memory/memory-buffered.rrd')) - buffered_bytes = stats.avg('value', self._start, self._resolution) + buffered_bytes = stats.avg('value', self.start, self.resolution) stats = Stats(os.path.join(data_dir, 'memory/memory-cached.rrd')) - cached_bytes = stats.avg('value', self._start, self._resolution) + cached_bytes = stats.avg('value', self.start, self.resolution) stats = Stats(os.path.join(data_dir, 'memory/memory-used.rrd')) - used_bytes = stats.avg('value', self._start, self._resolution) + used_bytes = stats.avg('value', self.start, self.resolution) return Usage( free = free_bytes, diff --git a/alerts/lib/checkers/nginx.py b/alerts/lib/checkers/nginx.py index 07f035d..8ee14bd 100644 --- a/alerts/lib/checkers/nginx.py +++ b/alerts/lib/checkers/nginx.py @@ -6,9 +6,9 @@ from thrush import rrd from alerts import template_loader -from alerts.lib.notifiers import Message +from alerts.lib import Message from alerts.lib.collected_stats import Stats as BaseStats -from alerts.lib.checkers import IChecker, INotifier, named_checker +from alerts.lib.checkers import BaseChecker, named_checker class Stats(BaseStats): @@ -16,30 +16,27 @@ class RRD(rrd.RRD): value = rrd.Gauge(heartbeat=20) @named_checker('nginx') -class Checker(object): - - zope.interface.implements(IChecker) +class Checker(BaseChecker): - def __init__(self, notifier): - self._notifier = notifier - self._data_dir = None - self._max_level = None - self._start = None - self._resolution = None + def __init__(self): + BaseChecker.__init__(self) + self.max_level = None + self.start = None + self._esolution = None return - def configure(self, data_dir, opts): - self._data_dir = data_dir - self._max_level = int(opts.get('usage_level', 1000)) # number of connections - self._start = '-%ds' % (int(opts.get('interval', 600))) - self._resolution = '%d' % (int(opts.get('resolution', 120))) + def setup(self, collection_dir, logger, opts): + BaseChecker.setup(self, collection_dir, logger, opts) + self.max_level = int(opts.get('usage_level', 1000)) # number of connections + self.start = '-%ds' % (int(opts.get('interval', 600))) + self.resolution = '%d' % (int(opts.get('resolution', 120))) return def check(self, hostname): - data_dir = os.path.join(self._data_dir, hostname) + log1 = self.get_logger(hostname) + data_dir = self.data_dir(hostname) - max_u = self._max_level - notifier = self._notifier + max_u = self.max_level # Todo @@ -48,5 +45,5 @@ def check(self, hostname): def get_usage(self, data_dir): rrd_file = os.path.join(data_dir, 'nginx/nginx_connections-active.rrd') stats = Stats(rrd_file) - return stats.avg('value', self._start, self._resolution) + return stats.avg('value', self.start, self.resolution) diff --git a/alerts/lib/interfaces.py b/alerts/lib/interfaces.py index e5efb31..ffbb97c 100644 --- a/alerts/lib/interfaces.py +++ b/alerts/lib/interfaces.py @@ -1,3 +1,6 @@ +import re +import zope.interface +import zope.schema from zope.interface import Interface from zope.interface.adapter import AdapterRegistry @@ -13,12 +16,30 @@ def notify(): '''Pop and send notifications for queued messages.''' pass +re_hostname = re.compile( + '^' + '(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*' + + '([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])' + '$') + +class ICheckContext(Interface): + + name = zope.schema.DottedName(required=False) + + hostname = zope.schema.NativeString(required=True, constraint=re_hostname.match) + class IChecker(Interface): - def configure(data_dir, opts): - '''Configure this checker from the running environment.''' + def setup(collection_dir, logger, opts): + '''Setup this checker from given configuration.''' pass + def data_dir(hostname): + '''Get the (local) absolute path for collectd stats of a given host.''' + pass + + def get_logger(hostname): + '''Get a context-aware logger to handle messages for a given host''' + pass + def check(hostname): '''Perform checks on the specified host, generate alerts if needed.''' pass diff --git a/alerts/lib/loggers.py b/alerts/lib/loggers.py new file mode 100644 index 0000000..b968132 --- /dev/null +++ b/alerts/lib/loggers.py @@ -0,0 +1,41 @@ +import logging +import zope.interface +import zope.schema +from zope.schema import getValidationErrors + +from alerts import config +from .interfaces import ICheckContext +from .mailer import Mailer, make_mailer + +## Filters ## + +class LoggingContext(logging.Filter): + + def __init__(self, context): + logging.Filter.__init__(self) + + errs = getValidationErrors(ICheckContext, context) + if errs: + raise ValueError('Not a proper context: %r' %(errs)) + self.context = context + + def filter(self, record): + record.check_host = self.context.hostname + record.checker_name = self.context.name + return True + +## Handlers ## + +class MailHandler(logging.Handler): + + def __init__(self, recipients, mailer=None): + logging.Handler.__init__(self) + self.mailer = mailer or make_mailer(config['mailer']) + self.recipients = recipients + + def emit(self, record): + msg = record.msg + headers = { + 'Subject': unicode(msg.title), + } + self.mailer.send(self.recipients, headers, msg.body) diff --git a/alerts/lib/mailer.py b/alerts/lib/mailer.py index 4b5fd37..8a8b1a4 100644 --- a/alerts/lib/mailer.py +++ b/alerts/lib/mailer.py @@ -4,23 +4,43 @@ class Mailer(object): - def __init__(self, host, port, username, password='', from_addr='', verbose=False): + SSL_PORTS = [465] + + verbose = False + + def __init__(self, host, port, username, password, from_addr='', verbose=None): self.smtp = None self.host = host self.port = int(port) self.username = username self.password = password self.from_addr = from_addr or username - self.verbose = verbose + + if not (verbose is None): + self.verbose = bool(verbose) def connect(self): if self.smtp: logging.warn( - "Allready connected to SMTP endpoint (%s,%d)" %(self.host, self.port)) + "Allready connected to SMTP endpoint (%s,%d)" % ( + self.host, self.port)) return - self.smtp = smtplib.SMTP_SSL() + + # Follow port conventions to guess SMTP connection protocol + use_ssl = False + if self.port in self.SSL_PORTS: + self.smtp = smtplib.SMTP_SSL() + use_ssl = True + else: + self.smtp = smtplib.SMTP(); + self.smtp.set_debuglevel(self.verbose) self.smtp.connect(self.host, self.port) + + if not use_ssl: + # Complete the STARTTLS handshake + self.smtp.starttls() + self.smtp.login(self.username, self.password) return @@ -33,13 +53,18 @@ def send(self, to_addr, headers, body): pass msg = MIMEText(body, 'html', 'utf-8') + msg['Content-Type'] = 'text/html; charset=utf-8' + + # Update message with custom headers for h,v in headers.items(): msg[h] = v + if self.smtp is None: # Try to connect first self.connect() from_addr = self.from_addr + # Send on the wire self.smtp.sendmail(from_addr, to_addr, msg.as_string()) return @@ -50,8 +75,8 @@ def __del__(self): def make_mailer(config): smtp_host = config.get('smtp_host', '127.0.0.1') smtp_port = config.get('smtp_port', '465') # SMTP over SSL - smtp_user = config.get('smtp_user') - smtp_pass = config.get('smtp_pass') + smtp_user = config.get('smtp_user', '') + smtp_pass = config.get('smtp_pass', '') from_addr = config.get('from') mailer = Mailer( host = smtp_host, diff --git a/alerts/lib/notifiers.py b/alerts/lib/notifiers.py deleted file mode 100644 index 4686f66..0000000 --- a/alerts/lib/notifiers.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -import sys -import logging -import zope.interface -from Queue import PriorityQueue -from collections import namedtuple - -from .interfaces import INotifier -from .mailer import Mailer - -Message = namedtuple('Message', ['title', 'summary', 'message'], verbose=False) - -class Notifier(object): - - zope.interface.implements(INotifier) - - def __init__(self, name): - self.name = name - self.messages = PriorityQueue() - self.log1 = logging.getLogger(__name__) - return - - def add_message(self, msg, priority=0): - assert isinstance(msg, Message), 'Expected an argument of type %s' % (Message) - self.messages.put((priority, msg)) - return - - def notify(self): - n = self.messages.qsize() - if not n: - return - self.log1.info('Notifying on %d events' %(n)) - while not self.messages.empty(): - priority, msg = self.messages.get() - self.log1.warn("%s: %s" %(msg.title, msg.summary)) - -class MailNotifier(Notifier): - - MAX_NUM_EMAILS = 5 - - def __init__(self, name, recipients, mailer): - assert isinstance(mailer, Mailer), 'Expected a 2nd argument of type %s' % (Mailer) - self.recipients = recipients - self.mailer = mailer - Notifier.__init__(self, name) - return - - def notify(self): - n = self.messages.qsize() - if not n: - return - self.log1.info('Sending email notifications on %d events' %(n)) - i = 0 - while not self.messages.empty(): - priority, msg = self.messages.get() - self.log1.warn("%s: %s" %(msg.title, msg.summary)) - if i < self.MAX_NUM_EMAILS: - headers = { - 'Subject': unicode(msg.title), - } - self.mailer.send(self.recipients, headers, msg.message) - i += 1 - if n > self.MAX_NUM_EMAILS: - self.log1.warn( - 'Encountered too many email notifications (silenced %d of them)' % (n - self.MAX_NUM_EMAILS)) - return - diff --git a/alerts/tests/test-logger.py b/alerts/tests/test-logger.py new file mode 100755 index 0000000..a8220df --- /dev/null +++ b/alerts/tests/test-logger.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +import logging +import logging.config + +from alerts import config, config_from_file +from alerts.lib import Message, CheckContext, ICheckContext +from alerts.lib.loggers import LoggingContext + +if __name__ == '__main__': + config_from_file('config.ini') + logging.config.fileConfig('config.ini') + + log1 = logging.getLogger('checker.cpu') + + ctx = CheckContext(name='cpu', hostname='foo.localdomain') + filt1 = LoggingContext(ctx) + log1.addFilter(filt1) + + log1.info('Hello World (1)') + + msg = Message(summary='Hello World (2)', title='Greeting', body='I say hello to the whole world!') + + log1.info(msg) + + del log1 diff --git a/alerts/tests/test-mailer.py b/alerts/tests/test-mailer.py index 6449df0..81346e1 100755 --- a/alerts/tests/test-mailer.py +++ b/alerts/tests/test-mailer.py @@ -12,7 +12,7 @@ m = make_mailer(config['mailer']) headers = { - 'Subject': 'Yet Another Test!', + 'Subject': 'Testing alerts.lib.mailer!', } t1 = template_loader.load('hello.html') diff --git a/setup.py b/setup.py index 66ac335..a85b694 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,35 @@ from setuptools import setup, find_packages import sys, os -version = '0.2' +version = '0.3' -setup(name='alerts', - version=version, - description="Alerts on excessive use of system resources", - long_description="""\ -""", - classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers - keywords='collectd alerts rrd monitoring', - author='Michail Alexakis', - author_email='drmalex07@gmail.com', - url='https://github.com/drmalex07/alerts', - license='GPLv3', - packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), - include_package_data=True, - zip_safe=False, - install_requires=[ - # -*- Extra requirements: -*- - "PasteDeploy", - "thrush", - "genshi", - "zope.interface", - ], - entry_points=""" - # -*- Entry points: -*- - """, - ) +setup( + name='alerts', + version=version, + description="Generate alerts based on collectd stats", + long_description="""Generate alerts based on collectd stats""", + classifiers=[ + # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers + 'Topic :: System :: Monitoring', + 'Programming Language :: Python', + ], + keywords='collectd alerts rrd monitoring', + author='Michail Alexakis', + author_email='drmalex07@gmail.com', + url='https://github.com/drmalex07/alerts', + license='GPLv3', + packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), + include_package_data=True, + zip_safe=False, + install_requires=[ + # -*- Extra requirements: -*- + "PasteDeploy", + "thrush", + "genshi", + "zope.interface", + "zope.schema", + ], + entry_points=""" + # -*- Entry points: -*- + """, +)