-
Notifications
You must be signed in to change notification settings - Fork 16
/
cymruwhois.py
308 lines (256 loc) · 8.97 KB
/
cymruwhois.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/usr/bin/env python
# cymruwhois.py
# Copyright (C) 2009 Justin Azoff [email protected]
#
# This module is released under the MIT License:
# http://www.opensource.org/licenses/mit-license.php
import socket
import errno
try :
import memcache
HAVE_MEMCACHE = True
except ImportError:
HAVE_MEMCACHE = False
def iterwindow(l, slice=50):
"""Generate sublists from an iterator
>>> list(iterwindow(iter(range(10)),11))
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]
>>> list(iterwindow(iter(range(10)),9))
[[0, 1, 2, 3, 4, 5, 6, 7, 8], [9]]
>>> list(iterwindow(iter(range(10)),5))
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
>>> list(iterwindow(iter(range(10)),3))
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
>>> list(iterwindow(iter(range(10)),1))
[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
"""
assert(slice > 0)
a=[]
for x in l:
if len(a) >= slice :
yield a
a=[]
a.append(x)
if a:
yield a
class record:
def __init__(self, asn, ip, prefix, cc, owner):
def fix(x):
x = x.strip()
try:
x = str(x.decode('ascii','ignore'))
except AttributeError:
pass # for Python 3
return x
self.asn = fix(asn)
self.ip = fix(ip)
self.prefix = fix(prefix)
self.cc = fix(cc)
self.owner = fix(owner)
self.key = self.ip
def __str__(self):
return "%-10s %-16s %-16s %s '%s'" % (self.asn, self.ip, self.prefix, self.cc, self.owner)
def __repr__(self):
return "<%s instance: %s|%s|%s|%s|%s>" % (self.__class__, self.asn, self.ip, self.prefix, self.cc, self.owner)
class asrecord:
def __init__(self, asn, cc, owner):
def fix(x):
x = x.strip()
if x == "NA":
return None
try:
x = str(x.decode('ascii','ignore'))
except AttributeError:
pass # for Python 3
return x
self.asn = fix(asn)
self.cc = fix(cc)
self.owner = fix(owner)
self.key = "AS" + self.asn
def __str__(self):
return "%-10s %s '%s'" % (self.asn, self.cc, self.owner)
def __repr__(self):
return "<%s instance: %s|%s|%s>" % (self.__class__, self.asn, self.cc, self.owner)
class Client:
"""Python interface to whois.cymru.com
**Usage**
>>> import socket
>>> ip = socket.gethostbyname("www.google.com")
>>> from cymruwhois import Client
>>> c=Client()
>>> r=c.lookup(ip)
>>> print(r.asn)
15169
>>> print(r.owner)
GOOGLE - Google Inc., US
>>>
>>> for r in c.lookupmany([ip, "8.8.8.8"]):
... print(r.owner)
GOOGLE - Google Inc., US
GOOGLE - Google Inc., US
"""
def make_key(self, arg):
if arg.startswith("AS"):
return "cymruwhois:as:" + arg
else:
return "cymruwhois:ip:" + arg
def __init__(self, host="whois.cymru.com", port=43, memcache_host='localhost:11211'):
self.host=host
self.port=port
self._connected=False
self.c = None
if HAVE_MEMCACHE and memcache_host:
self.c = memcache.Client([memcache_host])
def _connect(self):
self.socket=socket.socket()
self.socket.settimeout(5.0)
self.socket.connect((self.host,self.port))
self.socket.settimeout(10.0)
self.file = self.socket.makefile("rw")
def _sendline(self, line):
self.file.write(line + "\r\n")
self.file.flush()
def _readline(self):
return self.file.readline()
def _disconnect(self):
self.file.close()
self.socket.close()
def read_and_discard(self):
self.socket.setblocking(0)
try :
try :
self.file.read(1024)
except socket.error as e:
#10035 is WSAEWOULDBLOCK for windows systems on older python versions
if e.args[0] not in (errno.EAGAIN, errno.EWOULDBLOCK, 10035):
raise
finally:
self.socket.setblocking(1)
def _begin(self):
"""Explicitly connect and send BEGIN to start the lookup process"""
self._connect()
self._sendline("BEGIN")
self._readline() #discard the message "Bulk mode; one IP per line. [2005-08-02 18:54:55 GMT]"
self._sendline("PREFIX\nASNUMBER\nCOUNTRYCODE\nNOTRUNC")
self._connected=True
def disconnect(self):
"""Explicitly send END to stop the lookup process and disconnect"""
if not self._connected: return
self._sendline("END")
self._disconnect()
self._connected=False
def get_cached(self, ips):
if not self.c:
return {}
keys = [self.make_key(ip) for ip in ips]
vals = self.c.get_multi(keys)
#convert cymruwhois:ip:1.2.3.4 into just 1.2.3.4
return dict((k.split(":")[-1], v) for k,v in list(vals.items()))
def cache(self, r):
if not self.c:
return
self.c.set(self.make_key(r.key), r, 60*60*6)
def lookup(self, ip):
"""Look up a single address.
.. warning::
Do not call this function inside of a loop, the performance
will be terrible. Instead, call lookupmany or lookupmany_dict
"""
return list(self.lookupmany([ip]))[0]
def lookupmany(self, ips):
"""Look up many ip addresses"""
ips = [str(ip).strip() for ip in ips]
for batch in iterwindow(ips, 100):
cached = self.get_cached(batch)
not_cached = [ip for ip in batch if not cached.get(ip)]
#print "cached:%d not_cached:%d" % (len(cached), len(not_cached))
if not_cached:
for rec in self._lookupmany_raw(not_cached):
cached[rec.key] = rec
for ip in batch:
if ip in cached:
yield cached[ip]
def lookupmany_dict(self, ips):
"""Look up many ip addresses, returning a dictionary of ip -> record"""
ips = set(ips)
return dict((r.key, r) for r in self.lookupmany(ips))
def _lookupmany_raw(self, ips):
"""Do a look up for some ips"""
if not self._connected:
self._begin()
ips = set(ips)
for ip in ips:
self._sendline(ip)
need = len(ips)
last = None
while need:
result=self._readline()
if 'Error: no ASN or IP match on line' in result:
need -=1
continue
parts=result.split("|")
if len(parts)==5:
r=record(*parts)
else:
r=asrecord(*parts)
#check for multiple records being returned for a single IP
#in this case, just skip any extra records
if last and r.key == last.key:
continue
self.cache(r)
yield r
last = r
need -=1
#skip any trailing records that might have been caused by multiple records for the last ip
self.read_and_discard()
#backwards compatibility
lookerupper = Client
def lookup_stdin():
from optparse import OptionParser
import fileinput
parser = OptionParser(usage = "usage: %prog [options] [files]")
parser.add_option("-d", "--delim", dest="delim", action="store", default=None,
help="delimiter to use instead of justified")
parser.add_option("-f", "--fields", dest="fields", action="append",
help="comma separated fields to include (asn,ip,prefix,cc,owner)")
if HAVE_MEMCACHE:
parser.add_option("-c", "--cache", dest="cache", action="store", default="localhost:11211",
help="memcache server (default localhost)")
parser.add_option("-n", "--no-cache", dest="cache", action="store_false",
help="don't use memcached")
else:
memcache_host = None
(options, args) = parser.parse_args()
#fix the fields: convert ['a,b','c'] into ['a','b','c'] if needed
fields = []
if options.fields:
for f in options.fields:
fields.extend(f.split(","))
else:
fields = 'asn ip prefix cc owner'.split()
#generate the format string
fieldwidths = {
'asn': 8,
'ip': 15,
'prefix': 18,
'cc': 2,
'owner': 0,
}
if options.delim:
format = options.delim.join("%%(%s)s" % f for f in fields)
else:
format = ' '.join("%%(%s)-%ds" % (f, fieldwidths[f]) for f in fields)
#setup the memcache option
if HAVE_MEMCACHE:
memcache_host = options.cache
if memcache_host and ':' not in memcache_host:
memcache_host += ":11211"
c=Client(memcache_host=memcache_host)
ips = []
for line in fileinput.input(args):
ip=line.strip()
ips.append(ip)
for r in c.lookupmany(ips):
print(format % r.__dict__)
if __name__ == "__main__":
lookup_stdin()