From 285e38320c299339695be7cf0240607e8c3140d9 Mon Sep 17 00:00:00 2001 From: firejq Date: Thu, 14 Dec 2017 19:47:24 +0800 Subject: [PATCH 1/4] Editing for py3 support --- README.md | 4 +-- test/article.py | 19 ++++++++------ url2io.py | 69 ++++++++++++++++++++++++++----------------------- 3 files changed, 50 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 7efbc43..7d73caa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # URL2io Python SDK -This is the URL2io python SDK suite. Note that python2.7 is required. +This is the URL2io python SDK suite. Note that python3 is required. [API Docs](http://www.url2io.com/docs) @@ -18,7 +18,7 @@ How to use? example: >>> api = url2io.API(token) >>> # get content and next page link >>> ret = api.article(url='http://www.url2io.com/products', fields=['next',]) ->>> print ret +>>> print(ret) { u'content': u'

\u63d0\u4f9b\u7b80\u5355\uff0c', u'date': None, diff --git a/test/article.py b/test/article.py index c947f85..a86924f 100644 --- a/test/article.py +++ b/test/article.py @@ -1,15 +1,18 @@ -import sys; sys.path.append('..') +import sys +sys.path.append('..') import url2io -api = url2io.API('demo') +# your user token +token = 'xxx' +api = url2io.API(token) -print "get article" +print("get article") ret = api.article(url='http://www.url2io.com/docs') -print ret.keys() +print(ret.keys()) -print "get article & next" +print("get article & next") ret = {'next': 'http://tech.sina.com.cn/i/2010-08-18/19554560539.shtml'} -print 'get: ', ret.get('next') +print('get: ', ret.get('next')) while ret.get('next'): - ret = api.article(url=ret.get('next'), fields=['next','text']) - print 'next: ', ret.get('next') + ret = api.article(url=ret.get('next'), fields=['next', 'text']) + print('next: ', ret.get('next')) diff --git a/url2io.py b/url2io.py index bccfac4..5257ca5 100644 --- a/url2io.py +++ b/url2io.py @@ -1,4 +1,4 @@ -#coding: utf-8 +# coding: utf-8 # # This program is free software. It comes without any warranty, to # the extent permitted by applicable law. You can redistribute it @@ -6,19 +6,19 @@ # To Public License, Version 2, as published by Sam Hocevar. See # http://sam.zoy.org/wtfpl/COPYING (copied as below) for more details. # -# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE -# Version 2, December 2004 +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# Version 2, December 2004 # -# Copyright (C) 2004 Sam Hocevar +# Copyright (C) 2004 Sam Hocevar # -# Everyone is permitted to copy and distribute verbatim or modified -# copies of this license document, and changing it is allowed as long -# as the name is changed. +# Everyone is permitted to copy and distribute verbatim or modified +# copies of this license document, and changing it is allowed as long +# as the name is changed. # -# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE -# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION # -# 0. You just DO WHAT THE FUCK YOU WANT TO. +# 0. You just DO WHAT THE FUCK YOU WANT TO. """a simple url2io sdk example: @@ -28,17 +28,19 @@ __all__ = ['APIError', 'API'] - DEBUG_LEVEL = 1 import sys import socket import json import urllib -import urllib2 +import urllib.request +# import urllib2 import time +from urllib.parse import urlparse from collections import Iterable + class APIError(Exception): code = None """HTTP status code""" @@ -55,7 +57,7 @@ def __init__(self, code, url, body): self.body = body def __str__(self): - return 'code={s.code}\nurl={s.url}\n{s.body}'.format(s = self) + return 'code={s.code}\nurl={s.url}\n{s.body}'.format(s=self) __repr__ = __str__ @@ -69,9 +71,9 @@ class API(object): max_retries = None retry_delay = None - def __init__(self, token, srv = None, - decode_result = True, timeout = 30, max_retries = 5, - retry_delay = 3): + def __init__(self, token, srv=None, + decode_result=True, timeout=30, max_retries=5, + retry_delay=3): """:param srv: The API server address :param decode_result: whether to json_decode the result :param timeout: HTTP request timeout in seconds @@ -120,19 +122,19 @@ class _APIProxy(object): def __init__(self, apiobj, path): _setup_apiobj(self, apiobj, path) - def __call__(self, post = False, *args, **kwargs): + def __call__(self, post=False, *args, **kwargs): # /article # url = 'http://xxxx.xxx', # fields = ['next',], # if len(args): raise TypeError('only keyword arguments are allowed') - if type(post) is not bool: + if not isinstance(post, bool): raise TypeError('post argument can only be True or False') url = self.geturl(**kwargs) - request = urllib2.Request(url) + request = urllib.request.urlopen(url, timeout=self._api.timeout) self._api.update_request(request) @@ -140,11 +142,11 @@ def __call__(self, post = False, *args, **kwargs): while True: retry -= 1 try: - ret = urllib2.urlopen(request, timeout = self._api.timeout).read() + ret = request.read() break - except urllib2.HTTPError as e: + except urllib.error.HTTPError as e: raise APIError(e.code, url, e.read()) - except (socket.error, urllib2.URLError) as e: + except (socket.error, urllib.error.HTTPError) as e: if retry < 0: raise e _print_debug('caught error: {}; retrying'.format(e)) @@ -153,24 +155,27 @@ def __call__(self, post = False, *args, **kwargs): if self._api.decode_result: try: ret = json.loads(ret) - except: - raise APIError(-1, url, 'json decode error, value={0!r}'.format(ret)) + except BaseException: + raise APIError(-1, + url, + 'json decode error, value={0!r}'.format(ret)) return ret def _mkarg(self, kargs): """change the argument list (encode value, add api key/secret) :return: the new argument list""" + def enc(x): - #if isinstance(x, unicode): + # if isinstance(x, unicode): # return x.encode('utf-8') - #return str(x) - return x.encode('utf-8') if isinstance(x, unicode) else str(x) + # return str(x) + return x.encode('utf-8') if isinstance(x, str) else str(x) kargs = kargs.copy() kargs['token'] = self._api.token for (k, v) in kargs.items(): - if isinstance(v, Iterable) and not isinstance(v, basestring): - kargs[k] = ','.join([enc(i) for i in v]) + if isinstance(v, Iterable) and not isinstance(v, str): + kargs[k] = ','.join('%s' % id for id in [enc(i) for i in v]) else: kargs[k] = enc(v) @@ -178,17 +183,17 @@ def enc(x): def geturl(self, **kargs): """return the request url""" - return self._urlbase + '?' + urllib.urlencode(self._mkarg(kargs)) + return self._urlbase + '?' + urllib.parse.urlencode(self._mkarg(kargs)) def _print_debug(msg): if DEBUG_LEVEL: sys.stderr.write(str(msg) + '\n') + _APIS = [ '/article', - #'/images', + # '/images', ] _APIS = [i.split('/')[1:] for i in _APIS] - From 4b9469a49dd20684c6c702717f73ae9e550974c3 Mon Sep 17 00:00:00 2001 From: firejq Date: Thu, 14 Dec 2017 19:50:58 +0800 Subject: [PATCH 2/4] Editing for py3 support --- example show how to use SDK.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/example show how to use SDK.ipynb b/example show how to use SDK.ipynb index de66489..35ea5c7 100644 --- a/example show how to use SDK.ipynb +++ b/example show how to use SDK.ipynb @@ -1,12 +1,14 @@ { "cells": [ { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# URL2io Python SDK\n", "\n", - "This is the URL2io python SDK suite. Note that python2.7 is required.\n", + "This is the URL2io python SDK suite. Note that python3 is required.\n", "\n", "[API Docs](http://www.url2io.com/docs), [URL2io Python SDK下载地址](https://github.com/url2io/url2io-python-sdk)\n", "\n", From 1f2f3f06dfdb325d07d8678ea180134f95428cda Mon Sep 17 00:00:00 2001 From: firejq Date: Thu, 14 Dec 2017 20:03:52 +0800 Subject: [PATCH 3/4] Add py3 support --- README.md | 18 ++- example show how to use SDK.ipynb | 2 +- test/article-py3.py | 18 +++ test/article.py | 19 ++- url2io.py | 50 ++++---- url2io3.py | 199 ++++++++++++++++++++++++++++++ 6 files changed, 265 insertions(+), 41 deletions(-) create mode 100644 test/article-py3.py create mode 100644 url2io3.py diff --git a/README.md b/README.md index 7d73caa..6d9854d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # URL2io Python SDK -This is the URL2io python SDK suite. Note that python3 is required. +This is the URL2io python SDK suite. [API Docs](http://www.url2io.com/docs) @@ -13,6 +13,7 @@ How to use? example: 1. [register](http://www.url2io.com/accounts/register) and get **`token`** 2. coding +python2.7 version ```python >>> import url2io >>> api = url2io.API(token) @@ -27,4 +28,19 @@ How to use? example: } ``` +python3 version +```python +>>> import url2io3 +>>> api = url2io3.API(token) +>>> # get content and next page link +>>> ret = api.article(url='http://www.url2io.com/products', fields=['next',]) +>>> print(ret) +{ + u'content': u'

\u63d0\u4f9b\u7b80\u5355\uff0c', + u'date': None, + u'title': u'URL2io \u4ecb\u7ecd', + u'url': u'http://www.url2io.com/products' +} +``` + [more detial](http://blog.url2io.com/url2io-python-sdk/example%20show%20how%20to%20use%20SDK/) diff --git a/example show how to use SDK.ipynb b/example show how to use SDK.ipynb index 35ea5c7..530da5c 100644 --- a/example show how to use SDK.ipynb +++ b/example show how to use SDK.ipynb @@ -8,7 +8,7 @@ "source": [ "# URL2io Python SDK\n", "\n", - "This is the URL2io python SDK suite. Note that python3 is required.\n", + "This is the URL2io python SDK suite. Note that python2.7 is required.\n", "\n", "[API Docs](http://www.url2io.com/docs), [URL2io Python SDK下载地址](https://github.com/url2io/url2io-python-sdk)\n", "\n", diff --git a/test/article-py3.py b/test/article-py3.py new file mode 100644 index 0000000..6eba7e6 --- /dev/null +++ b/test/article-py3.py @@ -0,0 +1,18 @@ +import sys +sys.path.append('..') +import url2io3 + +# your user token +token = 'xxx' +api = url2io3.API(token) + +print("get article") +ret = api.article(url='http://www.url2io.com/docs') +print(ret.keys()) + +print("get article & next") +ret = {'next': 'http://tech.sina.com.cn/i/2010-08-18/19554560539.shtml'} +print('get: ', ret.get('next')) +while ret.get('next'): + ret = api.article(url=ret.get('next'), fields=['next', 'text']) + print('next: ', ret.get('next')) diff --git a/test/article.py b/test/article.py index a86924f..4b2b7d6 100644 --- a/test/article.py +++ b/test/article.py @@ -1,18 +1,15 @@ -import sys -sys.path.append('..') +import sys; sys.path.append('..') import url2io -# your user token -token = 'xxx' -api = url2io.API(token) +api = url2io.API('demo') -print("get article") +print "get article" ret = api.article(url='http://www.url2io.com/docs') -print(ret.keys()) +print ret.keys() -print("get article & next") +print "get article & next" ret = {'next': 'http://tech.sina.com.cn/i/2010-08-18/19554560539.shtml'} -print('get: ', ret.get('next')) +print 'get: ', ret.get('next') while ret.get('next'): - ret = api.article(url=ret.get('next'), fields=['next', 'text']) - print('next: ', ret.get('next')) + ret = api.article(url=ret.get('next'), fields=['next','text']) + print 'next: ', ret.get('next') \ No newline at end of file diff --git a/url2io.py b/url2io.py index 5257ca5..2b5e723 100644 --- a/url2io.py +++ b/url2io.py @@ -1,4 +1,4 @@ -# coding: utf-8 +#coding: utf-8 # # This program is free software. It comes without any warranty, to # the extent permitted by applicable law. You can redistribute it @@ -28,19 +28,17 @@ __all__ = ['APIError', 'API'] + DEBUG_LEVEL = 1 import sys import socket import json import urllib -import urllib.request -# import urllib2 +import urllib2 import time -from urllib.parse import urlparse from collections import Iterable - class APIError(Exception): code = None """HTTP status code""" @@ -57,7 +55,7 @@ def __init__(self, code, url, body): self.body = body def __str__(self): - return 'code={s.code}\nurl={s.url}\n{s.body}'.format(s=self) + return 'code={s.code}\nurl={s.url}\n{s.body}'.format(s = self) __repr__ = __str__ @@ -71,9 +69,9 @@ class API(object): max_retries = None retry_delay = None - def __init__(self, token, srv=None, - decode_result=True, timeout=30, max_retries=5, - retry_delay=3): + def __init__(self, token, srv = None, + decode_result = True, timeout = 30, max_retries = 5, + retry_delay = 3): """:param srv: The API server address :param decode_result: whether to json_decode the result :param timeout: HTTP request timeout in seconds @@ -122,19 +120,19 @@ class _APIProxy(object): def __init__(self, apiobj, path): _setup_apiobj(self, apiobj, path) - def __call__(self, post=False, *args, **kwargs): + def __call__(self, post = False, *args, **kwargs): # /article # url = 'http://xxxx.xxx', # fields = ['next',], # if len(args): raise TypeError('only keyword arguments are allowed') - if not isinstance(post, bool): + if type(post) is not bool: raise TypeError('post argument can only be True or False') url = self.geturl(**kwargs) - request = urllib.request.urlopen(url, timeout=self._api.timeout) + request = urllib2.Request(url) self._api.update_request(request) @@ -142,11 +140,11 @@ def __call__(self, post=False, *args, **kwargs): while True: retry -= 1 try: - ret = request.read() + ret = urllib2.urlopen(request, timeout = self._api.timeout).read() break - except urllib.error.HTTPError as e: + except urllib2.HTTPError as e: raise APIError(e.code, url, e.read()) - except (socket.error, urllib.error.HTTPError) as e: + except (socket.error, urllib2.URLError) as e: if retry < 0: raise e _print_debug('caught error: {}; retrying'.format(e)) @@ -155,27 +153,24 @@ def __call__(self, post=False, *args, **kwargs): if self._api.decode_result: try: ret = json.loads(ret) - except BaseException: - raise APIError(-1, - url, - 'json decode error, value={0!r}'.format(ret)) + except: + raise APIError(-1, url, 'json decode error, value={0!r}'.format(ret)) return ret def _mkarg(self, kargs): """change the argument list (encode value, add api key/secret) :return: the new argument list""" - def enc(x): - # if isinstance(x, unicode): + #if isinstance(x, unicode): # return x.encode('utf-8') - # return str(x) - return x.encode('utf-8') if isinstance(x, str) else str(x) + #return str(x) + return x.encode('utf-8') if isinstance(x, unicode) else str(x) kargs = kargs.copy() kargs['token'] = self._api.token for (k, v) in kargs.items(): - if isinstance(v, Iterable) and not isinstance(v, str): - kargs[k] = ','.join('%s' % id for id in [enc(i) for i in v]) + if isinstance(v, Iterable) and not isinstance(v, basestring): + kargs[k] = ','.join([enc(i) for i in v]) else: kargs[k] = enc(v) @@ -183,17 +178,16 @@ def enc(x): def geturl(self, **kargs): """return the request url""" - return self._urlbase + '?' + urllib.parse.urlencode(self._mkarg(kargs)) + return self._urlbase + '?' + urllib.urlencode(self._mkarg(kargs)) def _print_debug(msg): if DEBUG_LEVEL: sys.stderr.write(str(msg) + '\n') - _APIS = [ '/article', - # '/images', + #'/images', ] _APIS = [i.split('/')[1:] for i in _APIS] diff --git a/url2io3.py b/url2io3.py new file mode 100644 index 0000000..5257ca5 --- /dev/null +++ b/url2io3.py @@ -0,0 +1,199 @@ +# coding: utf-8 +# +# This program is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING (copied as below) for more details. +# +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# Version 2, December 2004 +# +# Copyright (C) 2004 Sam Hocevar +# +# Everyone is permitted to copy and distribute verbatim or modified +# copies of this license document, and changing it is allowed as long +# as the name is changed. +# +# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE +# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +# +# 0. You just DO WHAT THE FUCK YOU WANT TO. + +"""a simple url2io sdk +example: +api = API(token) +api.article(url='http://www.url2io.com/products', fields=['next', 'text']) +""" + +__all__ = ['APIError', 'API'] + +DEBUG_LEVEL = 1 + +import sys +import socket +import json +import urllib +import urllib.request +# import urllib2 +import time +from urllib.parse import urlparse +from collections import Iterable + + +class APIError(Exception): + code = None + """HTTP status code""" + + url = None + """request URL""" + + body = None + """server response body; or detailed error information""" + + def __init__(self, code, url, body): + self.code = code + self.url = url + self.body = body + + def __str__(self): + return 'code={s.code}\nurl={s.url}\n{s.body}'.format(s=self) + + __repr__ = __str__ + + +class API(object): + token = None + server = 'http://api.url2io.com/' + + decode_result = True + timeout = None + max_retries = None + retry_delay = None + + def __init__(self, token, srv=None, + decode_result=True, timeout=30, max_retries=5, + retry_delay=3): + """:param srv: The API server address + :param decode_result: whether to json_decode the result + :param timeout: HTTP request timeout in seconds + :param max_retries: maximal number of retries after catching URL error + or socket error + :param retry_delay: time to sleep before retrying""" + self.token = token + if srv: + self.server = srv + self.decode_result = decode_result + assert timeout >= 0 or timeout is None + assert max_retries >= 0 + self.timeout = timeout + self.max_retries = max_retries + self.retry_delay = retry_delay + + _setup_apiobj(self, self, []) + + def update_request(self, request): + """overwrite this function to update the request before sending it to + server""" + pass + + +def _setup_apiobj(self, apiobj, path): + if self is not apiobj: + self._api = apiobj + self._urlbase = apiobj.server + '/'.join(path) + + lvl = len(path) + done = set() + for i in _APIS: + if len(i) <= lvl: + continue + cur = i[lvl] + if i[:lvl] == path and cur not in done: + done.add(cur) + setattr(self, cur, _APIProxy(apiobj, i[:lvl + 1])) + + +class _APIProxy(object): + _api = None + + _urlbase = None + + def __init__(self, apiobj, path): + _setup_apiobj(self, apiobj, path) + + def __call__(self, post=False, *args, **kwargs): + # /article + # url = 'http://xxxx.xxx', + # fields = ['next',], + # + if len(args): + raise TypeError('only keyword arguments are allowed') + if not isinstance(post, bool): + raise TypeError('post argument can only be True or False') + + url = self.geturl(**kwargs) + + request = urllib.request.urlopen(url, timeout=self._api.timeout) + + self._api.update_request(request) + + retry = self._api.max_retries + while True: + retry -= 1 + try: + ret = request.read() + break + except urllib.error.HTTPError as e: + raise APIError(e.code, url, e.read()) + except (socket.error, urllib.error.HTTPError) as e: + if retry < 0: + raise e + _print_debug('caught error: {}; retrying'.format(e)) + time.sleep(self._api.retry_delay) + + if self._api.decode_result: + try: + ret = json.loads(ret) + except BaseException: + raise APIError(-1, + url, + 'json decode error, value={0!r}'.format(ret)) + return ret + + def _mkarg(self, kargs): + """change the argument list (encode value, add api key/secret) + :return: the new argument list""" + + def enc(x): + # if isinstance(x, unicode): + # return x.encode('utf-8') + # return str(x) + return x.encode('utf-8') if isinstance(x, str) else str(x) + + kargs = kargs.copy() + kargs['token'] = self._api.token + for (k, v) in kargs.items(): + if isinstance(v, Iterable) and not isinstance(v, str): + kargs[k] = ','.join('%s' % id for id in [enc(i) for i in v]) + else: + kargs[k] = enc(v) + + return kargs + + def geturl(self, **kargs): + """return the request url""" + return self._urlbase + '?' + urllib.parse.urlencode(self._mkarg(kargs)) + + +def _print_debug(msg): + if DEBUG_LEVEL: + sys.stderr.write(str(msg) + '\n') + + +_APIS = [ + '/article', + # '/images', +] + +_APIS = [i.split('/')[1:] for i in _APIS] From 2484367a9d0374faafc8fbd6f916fdec198eb0c9 Mon Sep 17 00:00:00 2001 From: firejq Date: Thu, 14 Dec 2017 23:06:46 +0800 Subject: [PATCH 4/4] ADD HTTPError handler --- url2io3.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/url2io3.py b/url2io3.py index 5257ca5..eb813d3 100644 --- a/url2io3.py +++ b/url2io3.py @@ -37,7 +37,7 @@ import urllib.request # import urllib2 import time -from urllib.parse import urlparse +# from urllib.parse import urlparse from collections import Iterable @@ -133,8 +133,9 @@ def __call__(self, post=False, *args, **kwargs): raise TypeError('post argument can only be True or False') url = self.geturl(**kwargs) + # print(url) - request = urllib.request.urlopen(url, timeout=self._api.timeout) + request = urllib.request.Request(url) self._api.update_request(request) @@ -142,16 +143,20 @@ def __call__(self, post=False, *args, **kwargs): while True: retry -= 1 try: - ret = request.read() + # ret = request.read() + ret = urllib.request.urlopen(url=request, + timeout=self._api.timeout) break - except urllib.error.HTTPError as e: - raise APIError(e.code, url, e.read()) + # except urllib.error.HTTPError as e: + # raise APIError(e.code, url, e.read()) except (socket.error, urllib.error.HTTPError) as e: if retry < 0: - raise e + # raise e + return json.loads(e.read()) _print_debug('caught error: {}; retrying'.format(e)) time.sleep(self._api.retry_delay) + ret = ret.read() if self._api.decode_result: try: ret = json.loads(ret)