Skip to content

Commit

Permalink
Merge pull request #6 from rnt/parallel-check
Browse files Browse the repository at this point in the history
Parallel check
  • Loading branch information
elacheche authored Jan 6, 2018
2 parents fa654c8 + 2f171fe commit 06051c9
Showing 1 changed file with 170 additions and 82 deletions.
252 changes: 170 additions & 82 deletions docker_check.py
Original file line number Diff line number Diff line change
@@ -1,121 +1,209 @@
#!/usr/bin/env python3

"docker_check.py is a nagios compatible plugin to check docker containers."

import os
import re
import sys
import argparse

import logging
import queue
import threading

logging.basicConfig(
format='%(asctime)s level=%(levelname)-7s '
'threadName=%(threadName)s name=%(name)s %(message)s',
level=logging.INFO
)

try:
import docker
except ImportError as e:
except ImportError as error:
print("{}: Please install the docker module, you can use' \
''pip install docker' to do that".format(e))
''pip install docker' to do that".format(error))
sys.exit(1)

__author__ = 'El Acheche Anis'
__license__ = 'GPL'
__version__ = '0.1'


def get_mem_pct(ct, stats):
def get_ct_stats(container):
'''Get container status'''
return container.stats(stream=False)


def get_mem_pct(stats):
'''Get a container memory usage in %'''
mem = stats[ct]['memory_stats']
usage = mem['usage']
limit = mem['limit']
return round(usage*100/limit, 2)
usage = stats['memory_stats']['usage']
limit = stats['memory_stats']['limit']
return round(usage * 100 / limit, 2)


def get_cpu_pct(ct):
'''Get a container cpu usage in % via docker stats cmd'''
usage = str(os.popen("docker stats --no-stream=true "+ct).read()).split()
usage_pct = usage[usage.index(ct)+1]
return float(usage_pct[:-1])
def get_cpu_pct(stats):
'''Get a container cpu usage in %'''
cpu_delta = stats['cpu_stats']['cpu_usage']['total_usage'] - \
stats['precpu_stats']['cpu_usage']['total_usage']
system_delta = stats['cpu_stats']['system_cpu_usage'] - \
stats['precpu_stats']['system_cpu_usage']
try:
online_cpus = stats['cpu_stats']['online_cpus']
except KeyError:
online_cpus = len([item
for item in stats['cpu_stats']['percpu_usage']
if item > 0])
if cpu_delta > 0 and system_delta > 0:
return (cpu_delta / system_delta) * online_cpus * 100
return 0.0


def get_net_io(ct, stats):
def get_net_io(stats):
'''Get a container Net In / Out usage since it's launche'''
net = stats[ct]['networks']
net_in = net['eth0']['rx_bytes']
net_out = net['eth0']['tx_bytes']
return [net_in, net_out]
net_in = stats['networks']['eth0']['rx_bytes']
net_out = stats['networks']['eth0']['tx_bytes']
return net_in, net_out


def get_disk_io(ct, stats):
def get_disk_io(stats):
'''Get a container Disk In / Out usage since it's launche'''
disk = stats[ct]['blkio_stats']['io_service_bytes_recursive']
disk_in = disk[0]['value']
disk_out = disk[1]['value']
disk = stats['blkio_stats']['io_service_bytes_recursive']
try:
disk_in = disk[0]['value']
except IndexError:
disk_in = 0
try:
disk_out = disk[1]['value']
except IndexError:
disk_out = 0
return disk_in, disk_out


def get_ct_stats(ct, client):
return client.containers.get(ct).stats(stream=False)
def get_ct_metrics(container_queue, containers_stats):
'''Get container metrics from docker API'''
logging.debug("Running get_ct_metrics()")
while not container_queue.empty():
container = container_queue.get()
logging.debug("Get container %s stats", container.name)
stats = get_ct_stats(container)

mem_pct = get_mem_pct(stats)
cpu_pct = get_cpu_pct(stats)
net_in, net_out = get_net_io(stats)
disk_in, disk_out = get_disk_io(stats)

containers_stats['%s_mem_pct' % container.name] = mem_pct
containers_stats['%s_cpu_pct' % container.name] = cpu_pct
containers_stats['%s_net_in' % container.name] = net_in
containers_stats['%s_net_out' % container.name] = net_out
containers_stats['%s_disk_in' % container.name] = disk_in
containers_stats['%s_disk_out' % container.name] = disk_out

container_queue.task_done()
logging.debug("Done with container %s stats", container.name)
logging.debug("End get_ct_metrics()")


def get_ct_stats_message(containers_stats):
'''Get check message from containers stats'''
return ', '.join(
[
"%s have %.2f%%" % (k, v)
for k, v
in containers_stats.items()
]
)


def get_ct_perfdata_message(containers_stats):
'''Get perfdata message from containers stats'''
return ' '.join(
[
"%s=%s" % (k, v)
for k, v
in containers_stats.items()
]
)


def main():
'''Scripts main function'''
parser = argparse.ArgumentParser(description='Check docker processes.')
parser.add_argument('-w', '--warning', type=int, help='warning percentage (default 50)', default=50)
parser.add_argument('-c', '--critical', type=int, help='critcal percentage (default 80)', default=80)
parser.add_argument('-w', '--warning', type=int,
help='warning percentage (default 50)', default=50)
parser.add_argument('-c', '--critical', type=int,
help='critcal percentage (default 80)', default=80)
args = parser.parse_args()

'''Try to use the lastest API version otherwise use
the installed client API version
'''
# Try to use the lastest API version otherwise use
# the installed client API version
# Get list of running containers
try:
docker.from_env().containers.list()
containers_list = docker.from_env().containers.list()
client = docker.from_env()
except docker.errors.APIError as e:
v = re.sub('[^0-9.]+', '', str(e).split('server API version:')[1])
client = docker.from_env(version=v)
# Get list of running containers
ls = client.containers.list()
ct = []
# If cid is True containers IDs will be used, otherwise names
cid = False
for i in ls:
c = str(i).replace('<', '').replace('>', '').split()[1]
if cid:
ct.append(c)
else:
ct.append(os.popen("docker ps -f id="+c).read().split()[-1])
# Get stats and metrics
summary = ''
stats = {}
metrics = [0, 0]
ct_stats = {}
for i in ct:
ct_stats[i] = get_ct_stats(i, client)
mem_pct = get_mem_pct(i, ct_stats)
cpu_pct = get_cpu_pct(i)
net_in = get_net_io(i, ct_stats)[0]
net_out = get_net_io(i, ct_stats)[1]
disk_in = get_disk_io(i, ct_stats)[0]
disk_out = get_disk_io(i, ct_stats)[1]
stats[i+'_mem_pct'] = mem_pct
stats[i+'_cpu_pct'] = cpu_pct
summary += '{}_mem_pct={}% {}_cpu_pct={}% {}_net_in={} {}_net_out={} '\
'{}_disk_in={} {}_disk_out={} '.format(
i, mem_pct, i, cpu_pct, i, net_in, i, net_out, i, disk_in,
i, disk_out)
# Get the highest % use
for s in stats:
if stats[s] >= metrics[1]:
metrics[0] = s
metrics[1] = stats[s]
except docker.errors.APIError as error:
version = re.sub('[^0-9.]+', '',
str(error).split('server API version:')[1])
client = docker.from_env(version=version)
containers_list = client.containers.list()

logging.debug("containers_list = %s", containers_list)

# START
containers_queue = queue.Queue()
for container in containers_list:
containers_queue.put(container)

containers_stats = {}

# Set up some threads to fetch the enclosures
for th_id in range(len(containers_list)):
worker = threading.Thread(
target=get_ct_metrics,
args=(containers_queue, containers_stats,),
name='worker-{}'.format(th_id),
)
worker.setDaemon(True)
worker.start()

containers_queue.join()

logging.debug("containers_stats = %s", containers_stats)
stats = {
k: v
for k, v
in containers_stats.items()
if k.endswith('_mem_pct') or k.endswith('_cpu_pct')
}
logging.debug("stats = %s", stats)

# Check stats values and output perfdata
if metrics[1] < args.warning:
print("OK | {}".format(summary))
sys.exit(0)
elif args.warning <= metrics[1] <= args.critical:
print("WARNING: Some containers need your attention: {} have {}%'\
' | {}".format(metrics[0], metrics[1], summary))
sys.exit(1)
elif metrics[1] > 80:
print("CRITICAL: Some containers need your attention: {} have {}%'\
' | {}".format(metrics[0], metrics[1], summary))
sys.exit(2)
else:
print("UKNOWN | {}".format(summary))
sys.exit(3)
critical_ct = {k: v for k, v in stats.items() if v > args.critical}
if critical_ct:
print("CRITICAL: %s | %s" % (
get_ct_stats_message(critical_ct),
get_ct_perfdata_message(containers_stats)))
sys.exit(2)

warning_ct = {k: v for k, v in stats.items() if v > args.warning}
if warning_ct:
print("WARNING: %s | %s" % (
get_ct_stats_message(warning_ct),
get_ct_perfdata_message(containers_stats)))
sys.exit(1)

print("OK | %s" % get_ct_perfdata_message(containers_stats))
sys.exit(0)


if __name__ == '__main__':
main()
try:
main()
except SystemExit:
pass
except BaseException as exc:
EXC_TYPE, _, EXC_TRACEBACK = sys.exc_info()
FNAME = os.path.split(EXC_TRACEBACK.tb_frame.f_code.co_filename)[1]
print("UNKNOWN: %s Exception \"%s\" in %s line %s" % (
EXC_TYPE.__name__, exc, FNAME, EXC_TRACEBACK.tb_lineno))
sys.exit(3)

0 comments on commit 06051c9

Please sign in to comment.