Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add python3 support #1

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# URL2io Python SDK

This is the URL2io python SDK suite. Note that python2.7 is required.
This is the URL2io python SDK suite.

[API Docs](http://www.url2io.com/docs)

Expand All @@ -13,12 +13,28 @@ How to use? example:
1. [register](http://www.url2io.com/accounts/register) and get **`token`**
2. coding

python2.7 version
```python
>>> import url2io
>>> api = url2io.API(token)
>>> # get content and next page link
>>> ret = api.article(url='http://www.url2io.com/products', fields=['next',])
>>> print ret
>>> print(ret)
{
u'content': u'<div><p>\u63d0\u4f9b\u7b80\u5355\uff0c',
u'date': None,
u'title': u'URL2io \u4ecb\u7ecd',
u'url': u'http://www.url2io.com/products'
}
```

python3 version
```python
>>> import url2io3
>>> api = url2io3.API(token)
>>> # get content and next page link
>>> ret = api.article(url='http://www.url2io.com/products', fields=['next',])
>>> print(ret)
{
u'content': u'<div><p>\u63d0\u4f9b\u7b80\u5355\uff0c',
u'date': None,
Expand Down
4 changes: 3 additions & 1 deletion example show how to use SDK.ipynb
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{
"cells": [
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# URL2io Python SDK\n",
"\n",
Expand Down
18 changes: 18 additions & 0 deletions test/article-py3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import sys
sys.path.append('..')
import url2io3

# your user token
token = 'xxx'
api = url2io3.API(token)

print("get article")
ret = api.article(url='http://www.url2io.com/docs')
print(ret.keys())

print("get article & next")
ret = {'next': 'http://tech.sina.com.cn/i/2010-08-18/19554560539.shtml'}
print('get: ', ret.get('next'))
while ret.get('next'):
ret = api.article(url=ret.get('next'), fields=['next', 'text'])
print('next: ', ret.get('next'))
2 changes: 1 addition & 1 deletion test/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
print 'get: ', ret.get('next')
while ret.get('next'):
ret = api.article(url=ret.get('next'), fields=['next','text'])
print 'next: ', ret.get('next')
print 'next: ', ret.get('next')
21 changes: 10 additions & 11 deletions url2io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING (copied as below) for more details.
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# Version 2, December 2004
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# Version 2, December 2004
#
# Copyright (C) 2004 Sam Hocevar <[email protected]>
# Copyright (C) 2004 Sam Hocevar <[email protected]>
#
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
#
# 0. You just DO WHAT THE FUCK YOU WANT TO.
# 0. You just DO WHAT THE FUCK YOU WANT TO.

"""a simple url2io sdk
example:
Expand Down Expand Up @@ -178,7 +178,7 @@ def enc(x):

def geturl(self, **kargs):
"""return the request url"""
return self._urlbase + '?' + urllib.urlencode(self._mkarg(kargs))
return self._urlbase + '?' + urllib.urlencode(self._mkarg(kargs))


def _print_debug(msg):
Expand All @@ -191,4 +191,3 @@ def _print_debug(msg):
]

_APIS = [i.split('/')[1:] for i in _APIS]

204 changes: 204 additions & 0 deletions url2io3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# coding: utf-8
#
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING (copied as below) for more details.
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# Version 2, December 2004
#
# Copyright (C) 2004 Sam Hocevar <[email protected]>
#
# Everyone is permitted to copy and distribute verbatim or modified
# copies of this license document, and changing it is allowed as long
# as the name is changed.
#
# DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
# TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
#
# 0. You just DO WHAT THE FUCK YOU WANT TO.

"""a simple url2io sdk
example:
api = API(token)
api.article(url='http://www.url2io.com/products', fields=['next', 'text'])
"""

__all__ = ['APIError', 'API']

DEBUG_LEVEL = 1

import sys
import socket
import json
import urllib
import urllib.request
# import urllib2
import time
# from urllib.parse import urlparse
from collections import Iterable


class APIError(Exception):
code = None
"""HTTP status code"""

url = None
"""request URL"""

body = None
"""server response body; or detailed error information"""

def __init__(self, code, url, body):
self.code = code
self.url = url
self.body = body

def __str__(self):
return 'code={s.code}\nurl={s.url}\n{s.body}'.format(s=self)

__repr__ = __str__


class API(object):
token = None
server = 'http://api.url2io.com/'

decode_result = True
timeout = None
max_retries = None
retry_delay = None

def __init__(self, token, srv=None,
decode_result=True, timeout=30, max_retries=5,
retry_delay=3):
""":param srv: The API server address
:param decode_result: whether to json_decode the result
:param timeout: HTTP request timeout in seconds
:param max_retries: maximal number of retries after catching URL error
or socket error
:param retry_delay: time to sleep before retrying"""
self.token = token
if srv:
self.server = srv
self.decode_result = decode_result
assert timeout >= 0 or timeout is None
assert max_retries >= 0
self.timeout = timeout
self.max_retries = max_retries
self.retry_delay = retry_delay

_setup_apiobj(self, self, [])

def update_request(self, request):
"""overwrite this function to update the request before sending it to
server"""
pass


def _setup_apiobj(self, apiobj, path):
if self is not apiobj:
self._api = apiobj
self._urlbase = apiobj.server + '/'.join(path)

lvl = len(path)
done = set()
for i in _APIS:
if len(i) <= lvl:
continue
cur = i[lvl]
if i[:lvl] == path and cur not in done:
done.add(cur)
setattr(self, cur, _APIProxy(apiobj, i[:lvl + 1]))


class _APIProxy(object):
_api = None

_urlbase = None

def __init__(self, apiobj, path):
_setup_apiobj(self, apiobj, path)

def __call__(self, post=False, *args, **kwargs):
# /article
# url = 'http://xxxx.xxx',
# fields = ['next',],
#
if len(args):
raise TypeError('only keyword arguments are allowed')
if not isinstance(post, bool):
raise TypeError('post argument can only be True or False')

url = self.geturl(**kwargs)
# print(url)

request = urllib.request.Request(url)

self._api.update_request(request)

retry = self._api.max_retries
while True:
retry -= 1
try:
# ret = request.read()
ret = urllib.request.urlopen(url=request,
timeout=self._api.timeout)
break
# except urllib.error.HTTPError as e:
# raise APIError(e.code, url, e.read())
except (socket.error, urllib.error.HTTPError) as e:
if retry < 0:
# raise e
return json.loads(e.read())
_print_debug('caught error: {}; retrying'.format(e))
time.sleep(self._api.retry_delay)

ret = ret.read()
if self._api.decode_result:
try:
ret = json.loads(ret)
except BaseException:
raise APIError(-1,
url,
'json decode error, value={0!r}'.format(ret))
return ret

def _mkarg(self, kargs):
"""change the argument list (encode value, add api key/secret)
:return: the new argument list"""

def enc(x):
# if isinstance(x, unicode):
# return x.encode('utf-8')
# return str(x)
return x.encode('utf-8') if isinstance(x, str) else str(x)

kargs = kargs.copy()
kargs['token'] = self._api.token
for (k, v) in kargs.items():
if isinstance(v, Iterable) and not isinstance(v, str):
kargs[k] = ','.join('%s' % id for id in [enc(i) for i in v])
else:
kargs[k] = enc(v)

return kargs

def geturl(self, **kargs):
"""return the request url"""
return self._urlbase + '?' + urllib.parse.urlencode(self._mkarg(kargs))


def _print_debug(msg):
if DEBUG_LEVEL:
sys.stderr.write(str(msg) + '\n')


_APIS = [
'/article',
# '/images',
]

_APIS = [i.split('/')[1:] for i in _APIS]