Skip to content
This repository has been archived by the owner on Nov 14, 2022. It is now read-only.

Commit

Permalink
statistics - new latency metrics and counters
Browse files Browse the repository at this point in the history
  • Loading branch information
dmachard committed Dec 15, 2020
1 parent 61d1beb commit 433d857
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 6 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,12 @@ Some statistics are computed [on the fly](/dnstap_receiver/statistics.py) and st
- **query/[rrtype]**: number of queries per record resource type = = a, aaaa, cname,...
- **query/bytes**: total number of bytes with queries
- **response/bytes**: total number of bytes with answers
- **response/latency0_1**: number of queries answered in less than 1ms
- **response/latency1_10**: number of queries answered in 1-10 ms
- **response/latency10_50**: number of queries answered in 10-50 ms
- **response/latency50_100**: number of queries answered in 50-100 ms
- **response/latency100_1000**: number of queries answered in 100-1000 ms
- **response/latency_slow**: number of queries answered in more than 1 second

### Tables

Expand Down
53 changes: 52 additions & 1 deletion dnstap_receiver/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ async def handle_metrics(self, request):
"query", "query/udp", "query/tcp", "query/inet", "query/inet6",
"query/a", "query/aaaa", "query/svr",
"response", "response/udp", "response/tcp", "response/inet", "response/inet6",
"response/nxdomain", "response/noerror", "response/serverfail" ]
"response/nxdomain", "response/noerror", "response/serverfail",
"response/latency0_1", "response/latency1_10", "response/latency10_50",
"response/latency50_100", "response/latency100_1000", "response/latency_slow"]

# global counters
counters = self.stats.get_counters(stream=None, filters=filters)
Expand Down Expand Up @@ -145,7 +147,30 @@ async def handle_metrics(self, request):
p.append( "# HELP dnstap_responses_serverfail Number of SERVFAIL answers" )
p.append( "# TYPE dnstap_responses_serverfail counter" )
p.append( "dnstap_responses_serverfail %s" % counters["response/serverfail"] )

p.append( "# HELP dnstap_latency0_1 Number of queries answered in less than 1ms" )
p.append( "# TYPE dnstap_latency0_1 counter" )
p.append( "dnstap_latency0_1 %s" % counters["response/latency0_1"] )

p.append( "# HELP dnstap_latency1_10 Number of queries answered in 1-10 ms" )
p.append( "# TYPE dnstap_latency1_10 counter" )
p.append( "dnstap_latency1_10 %s" % counters["response/latency1_10"] )

p.append( "# HELP dnstap_latency10_50 Number of queries answered in 10-50 ms" )
p.append( "# TYPE dnstap_latency10_50 counter" )
p.append( "dnstap_latency10_50 %s" % counters["response/latency10_50"] )

p.append( "# HELP dnstap_latency50_100 Number of queries answered in 50-100 ms" )
p.append( "# TYPE dnstap_latency50_100 counter" )
p.append( "dnstap_latency50_100 %s" % counters["response/latency50_100"] )

p.append( "# HELP dnstap_latency100_1000 Number of queries answered in 100-1000 ms" )
p.append( "# TYPE dnstap_latency100_1000 counter" )
p.append( "dnstap_latency100_1000 %s" % counters["response/latency100_1000"] )

p.append( "# HELP dnstap_latency_slow Number of queries answered in more than 1 second" )
p.append( "# TYPE dnstap_latency_slow counter" )
p.append( "dnstap_latency_slow %s" % counters["response/latency_slow"] )

for s in self.stats.get_nameslist():
sub_cntrs = self.stats.get_counters(stream=s, filters=filters)
Expand Down Expand Up @@ -214,6 +239,25 @@ async def handle_metrics(self, request):
p.append( "# TYPE dnstap_responses_serverfail counter" )


p.append( "# HELP dnstap_latency1_10 Number of queries answered in 1-10 ms for this dnstap identity" )
p.append( "# TYPE dnstap_latency1_10 counter" )

p.append( "# HELP dnstap_latency1_10 Number of queries answered in 1-10 ms for this dnstap identity" )
p.append( "# TYPE dnstap_latency1_10 counter" )

p.append( "# HELP dnstap_latency10_50 Number of queries answered in 10-50 ms for this dnstap identity" )
p.append( "# TYPE dnstap_latency10_50 counter" )

p.append( "# HELP dnstap_latency10_50 Number of queries answered in 10-50 ms for this dnstap identity" )
p.append( "# TYPE dnstap_latency10_50 counter" )

p.append( "# HELP dnstap_latency100_1000 Number of queries answered in 100-1000 ms for this dnstap identity" )
p.append( "# TYPE dnstap_latency100_1000 counter" )

p.append( "# HELP dnstap_latency_slow Number of queries answered in more than 1 second for this dnstap identity" )
p.append( "# TYPE dnstap_latency_slow counter" )


p.append( "dnstap_queries_bytes_total{identity=\"%s\"} %s" % (s,sub_cntrs["query/bytes"]) )
p.append( "dnstap_responses_bytes_total{identity=\"%s\"} %s" % (s,sub_cntrs["response/bytes"]) )

Expand All @@ -239,6 +283,13 @@ async def handle_metrics(self, request):
p.append( "dnstap_responses_nxdomain{identity=\"%s\"} %s" % (s,sub_cntrs["response/nxdomain"]) )
p.append( "dnstap_responses_serverfail{identity=\"%s\"} %s" % (s,sub_cntrs["response/serverfail"]) )

p.append( "dnstap_latency0_1{identity=\"%s\"} %s" % (s,sub_cntrs["response/latency0_1"]) )
p.append( "dnstap_latency1_10{identity=\"%s\"} %s" % (s,sub_cntrs["response/latency1_10"]) )
p.append( "dnstap_latency10_50{identity=\"%s\"} %s" % (s,sub_cntrs["response/latency10_50"]) )
p.append( "dnstap_latency50_100{identity=\"%s\"} %s" % (s,sub_cntrs["response/latency50_100"]) )
p.append( "dnstap_latency100_1000{identity=\"%s\"} %s" % (s,sub_cntrs["response/latency100_1000"]) )
p.append( "dnstap_latency_slow{identity=\"%s\"} %s" % (s,sub_cntrs["response/latency_slow"]) )

return web.Response(text="\n".join(p), content_type='text/plan')

async def handle_counters(self, request):
Expand Down
7 changes: 3 additions & 4 deletions dnstap_receiver/receiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ async def cb_ondnstap(dnstap_decoder, payload, cfg, queues_list, stats, geoip_re
d1 = dm.query_time_sec + (round(dm.query_time_nsec ) / 1000000000)
tap["timestamp"] = datetime.fromtimestamp(d1, tz=timezone.utc).isoformat()
tap["type"] = "query"
latency = UnknownValue.name
latency = 0.0

# handle response message
if (dm.type % 2 ) == 0 :
Expand All @@ -123,10 +123,9 @@ async def cb_ondnstap(dnstap_decoder, payload, cfg, queues_list, stats, geoip_re
tap["timestamp"] = datetime.fromtimestamp(d2, tz=timezone.utc).isoformat()
tap["type"] = "response"

#compute latency
# compute latency
d1 = dm.query_time_sec + (round(dm.query_time_nsec ) / 1000000000)
latency_float = d2-d1
latency = "%.3f" % latency_float
latency = round(d2-d1,3)

tap["latency"] = latency

Expand Down
26 changes: 25 additions & 1 deletion dnstap_receiver/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(self, name, stats):

self.cnts_tlds = Counter()

self.cnts_latency = Counter()

def record(self, tap):
"""record only response dnstap message"""
qname = tap["qname"]; srcip = tap["source-ip"];
Expand Down Expand Up @@ -81,6 +83,20 @@ def record(self, tap):
tld_matched = list( filter(lambda x: x in tld_set, qname.rsplit(".", 2)) )
if len(tld_matched):
self.cnts_tlds["%s/%s" % (qr,tld_matched[-1])] += 1

# latency
if tap["latency"] <= 0.001:
self.cnts_latency["%s/latency0_1" % qr ] += 1
if 0.001 < tap["latency"] <= 0.010 :
self.cnts_latency["%s/latency1_10" % qr ] += 1
if 0.010 < tap["latency"] <= 0.050 :
self.cnts_latency["%s/latency10_50" % qr ] += 1
if 0.050 < tap["latency"] <= 0.100 :
self.cnts_latency["%s/latency50_100" % qr ] += 1
if 0.100 < tap["latency"] <= 1.000 :
self.cnts_latency["%s/latency100_1000" % qr ] += 1
if tap["latency"] > 1.000 :
self.cnts_latency["%s/latency_slow" % qr ] += 1

def reset(self):
"""reset the stream"""
Expand All @@ -94,6 +110,7 @@ def reset(self):
self.cnts_rrtype.clear()

self.cnts_tlds.clear()
self.cnts_latency.clear()

self.prev_qr = 0

Expand Down Expand Up @@ -123,6 +140,8 @@ def __init__(self, cfg):

# Counter({'query|response/<tlds>': <int>})
self.cnts_tlds = Counter()
# Counter({'response/latency0_10': <int>})
self.cnts_latency = Counter()

self.global_qps = Counter()

Expand All @@ -140,6 +159,7 @@ def update_counters(self):
self.cnts_rcode.clear()
self.cnts_rrtype.clear()
self.cnts_tlds.clear()
self.cnts_latency.clear()

qnames = set()
ips = set()
Expand All @@ -154,6 +174,7 @@ def update_counters(self):
self.cnts_rrtype.update(self.streams[s].cnts_rrtype)

self.cnts_tlds.update(self.streams[s].cnts_tlds)
self.cnts_latency.update(self.streams[s].cnts_latency)

self.cnts["clients"] = len(ips)
self.cnts["domains"] = len(qnames)
Expand All @@ -171,6 +192,7 @@ def reset(self):
self.global_qps.clear()

self.cnts_tlds.clear()
self.cnts_latency.clear()

def get_streams(self, stream=None):
"""return list of stream object"""
Expand Down Expand Up @@ -208,12 +230,14 @@ def get_counters(self, stream=None, filters=[]):
_cnt.update(self.cnts_rcode)
_cnt.update(self.cnts_rrtype)
_cnt.update(self.cnts_tlds)
_cnt.update(self.cnts_latency)
else:
_cnt.update(s.cnts)
_cnt.update(s.cnts_rcode)
_cnt.update(s.cnts_rrtype)
_cnt.update(s.cnts_tlds)

_cnt.update(s.cnts_latency)

# set counters
c = {}
for f in filters:
Expand Down

0 comments on commit 433d857

Please sign in to comment.