-
-
Notifications
You must be signed in to change notification settings - Fork 297
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add .mobi and .azw support with KindleUnpack
- Loading branch information
1 parent
e952045
commit c2d2353
Showing
24 changed files
with
7,321 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
_build/ | ||
build/ | ||
.flatpak-builder/ | ||
.mo | ||
.mo | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#!/usr/bin/env python | ||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,278 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab | ||
|
||
# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay | ||
# All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without modification, | ||
# are permitted provided that the following conditions are met: | ||
# | ||
# 1. Redistributions of source code must retain the above copyright notice, this list of | ||
# conditions and the following disclaimer. | ||
# | ||
# 2. Redistributions in binary form must reproduce the above copyright notice, this list | ||
# of conditions and the following disclaimer in the documentation and/or other materials | ||
# provided with the distribution. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY | ||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT | ||
# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | ||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | ||
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY | ||
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
from __future__ import unicode_literals, division, absolute_import, print_function | ||
|
||
import sys | ||
import codecs | ||
|
||
PY2 = sys.version_info[0] == 2 | ||
PY3 = sys.version_info[0] == 3 | ||
|
||
iswindows = sys.platform.startswith('win') | ||
|
||
try: | ||
from urllib.parse import unquote | ||
except ImportError: | ||
from urllib import unquote | ||
|
||
if PY2: | ||
from HTMLParser import HTMLParser | ||
_h = HTMLParser() | ||
elif sys.version_info[1] < 4: | ||
import html.parser | ||
_h = html.parser.HTMLParser() | ||
else: | ||
import html as _h | ||
|
||
if PY3: | ||
text_type = str | ||
binary_type = bytes | ||
# if will be printing arbitraty binary data to stdout on python 3 | ||
# sys.stdin = sys.stdin.detach() | ||
# sys.stdout = sys.stdout.detach() | ||
# sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) | ||
else: | ||
range = xrange | ||
text_type = unicode | ||
binary_type = str | ||
# if will be printing unicode under python 2 need to protect | ||
# against sys.stdout.encoding being None stupidly forcing forcing ascii encoding of unicode | ||
# sys.stdout = codecs.getwriter("utf-8")(sys.stdout) | ||
# alternatively set environment variable as follows **before** launching python: export PYTHONIOENCODING=UTF-8 | ||
|
||
# NOTE: Python 3 is completely broken when accessing single bytes in bytes strings | ||
# (and they amazingly claim by design and no bug!) | ||
|
||
# To illustrate: this works for unicode in Python 3 and for all Python 2.X for both bytestrings and unicode | ||
# >>> o = '123456789' | ||
# >>> o[-3] | ||
# '7' | ||
# >>> type(o[-3]) | ||
# <class 'str'> | ||
# >>> type(o) | ||
# <class 'str'> | ||
|
||
# Unfortunately, this is what Python 3 does for no sane reason and only for bytestrings | ||
# >>> o = b'123456789' | ||
# >>> o[-3] | ||
# 55 | ||
# >>> type(o[-3]) | ||
# <class 'int'> | ||
# >>> type(o) | ||
# <class 'bytes'> | ||
|
||
# This mind boggling behaviour also happens when indexing a bytestring and/or | ||
# iteratoring over a bytestring. In other words it will return an int but not | ||
# the byte itself!!!!!!! | ||
|
||
# The only way to access a single byte as a byte in bytestring and get the byte in both | ||
# Python 2 and Python 3 is to use a slice | ||
|
||
# This problem is so common there are horrible hacks floating around the net to **try** | ||
# to work around it, so that code that works on both Python 2 and Python 3 is possible. | ||
|
||
# So in order to write code that works on both Python 2 and Python 3 | ||
# if you index or access a single byte and want its ord() then use the bord() function. | ||
# If instead you want it as a single character byte use the bchar() function | ||
# both of which are defined below. | ||
|
||
if PY3: | ||
# Also Note: if decode a bytestring using 'latin-1' (or any other full range 0-255 encoding) | ||
# in place of ascii you will get a byte value to half-word or integer value | ||
# one-to-one mapping (in the 0 - 255 range) | ||
|
||
def bchr(s): | ||
return bytes([s]) | ||
|
||
def bstr(s): | ||
if isinstance(s, str): | ||
return bytes(s, 'latin-1') | ||
else: | ||
return bytes(s) | ||
|
||
def bord(s): | ||
return s | ||
|
||
def bchar(s): | ||
return bytes([s]) | ||
|
||
else: | ||
def bchr(s): | ||
return chr(s) | ||
|
||
def bstr(s): | ||
return str(s) | ||
|
||
def bord(s): | ||
return ord(s) | ||
|
||
def bchar(s): | ||
return s | ||
|
||
if PY3: | ||
# list-producing versions of the major Python iterating functions | ||
def lrange(*args, **kwargs): | ||
return list(range(*args, **kwargs)) | ||
|
||
def lzip(*args, **kwargs): | ||
return list(zip(*args, **kwargs)) | ||
|
||
def lmap(*args, **kwargs): | ||
return list(map(*args, **kwargs)) | ||
|
||
def lfilter(*args, **kwargs): | ||
return list(filter(*args, **kwargs)) | ||
else: | ||
import __builtin__ | ||
# Python 2-builtin ranges produce lists | ||
lrange = __builtin__.range | ||
lzip = __builtin__.zip | ||
lmap = __builtin__.map | ||
lfilter = __builtin__.filter | ||
|
||
# In Python 3 you can no longer use .encode('hex') on a bytestring | ||
# instead use the following on both platforms | ||
import binascii | ||
def hexlify(bdata): | ||
return (binascii.hexlify(bdata)).decode('ascii') | ||
|
||
# If you: import struct | ||
# Note: struct pack, unpack, unpack_from all *require* bytestring format | ||
# data all the way up to at least Python 2.7.5, Python 3 is okay with either | ||
|
||
# If you: import re | ||
# note: Python 3 "re" requires the pattern to be the exact same type as the data to be | ||
# searched ... but u"" is not allowed for the pattern itself only b"" | ||
# Python 2.X allows the pattern to be any type and converts it to match the data | ||
# and returns the same type as the data | ||
|
||
# convert string to be utf-8 encoded | ||
def utf8_str(p, enc='utf-8'): | ||
if p is None: | ||
return None | ||
if isinstance(p, text_type): | ||
return p.encode('utf-8') | ||
if enc != 'utf-8': | ||
return p.decode(enc).encode('utf-8') | ||
return p | ||
|
||
# convert string to be unicode encoded | ||
def unicode_str(p, enc='utf-8'): | ||
if p is None: | ||
return None | ||
if isinstance(p, text_type): | ||
return p | ||
return p.decode(enc) | ||
|
||
ASCII_CHARS = set(chr(x) for x in range(128)) | ||
URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ' | ||
'abcdefghijklmnopqrstuvwxyz' | ||
'0123456789' '#' '_.-/~') | ||
IRI_UNSAFE = ASCII_CHARS - URL_SAFE | ||
|
||
# returns a quoted IRI (not a URI) | ||
def quoteurl(href): | ||
if isinstance(href,binary_type): | ||
href = href.decode('utf-8') | ||
result = [] | ||
for char in href: | ||
if char in IRI_UNSAFE: | ||
char = "%%%02x" % ord(char) | ||
result.append(char) | ||
return ''.join(result) | ||
|
||
# unquotes url/iri | ||
def unquoteurl(href): | ||
if isinstance(href,binary_type): | ||
href = href.decode('utf-8') | ||
href = unquote(href) | ||
return href | ||
|
||
# unescape html | ||
def unescapeit(sval): | ||
return _h.unescape(sval) | ||
|
||
# Python 2.X commandline parsing under Windows has been horribly broken for years! | ||
# Use the following code to emulate full unicode commandline parsing on Python 2 | ||
# ie. To get sys.argv arguments and properly encode them as unicode | ||
|
||
def unicode_argv(): | ||
global iswindows | ||
global PY3 | ||
if PY3: | ||
return sys.argv | ||
if iswindows: | ||
# Versions 2.x of Python don't support Unicode in sys.argv on | ||
# Windows, with the underlying Windows API instead replacing multi-byte | ||
# characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv | ||
# as a list of Unicode strings | ||
from ctypes import POINTER, byref, cdll, c_int, windll | ||
from ctypes.wintypes import LPCWSTR, LPWSTR | ||
|
||
GetCommandLineW = cdll.kernel32.GetCommandLineW | ||
GetCommandLineW.argtypes = [] | ||
GetCommandLineW.restype = LPCWSTR | ||
|
||
CommandLineToArgvW = windll.shell32.CommandLineToArgvW | ||
CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] | ||
CommandLineToArgvW.restype = POINTER(LPWSTR) | ||
|
||
cmd = GetCommandLineW() | ||
argc = c_int(0) | ||
argv = CommandLineToArgvW(cmd, byref(argc)) | ||
if argc.value > 0: | ||
# Remove Python executable and commands if present | ||
start = argc.value - len(sys.argv) | ||
return [argv[i] for i in | ||
range(start, argc.value)] | ||
# this should never happen | ||
return None | ||
else: | ||
argv = [] | ||
argvencoding = sys.stdin.encoding | ||
if argvencoding is None: | ||
argvencoding = sys.getfilesystemencoding() | ||
if argvencoding is None: | ||
argvencoding = 'utf-8' | ||
for arg in sys.argv: | ||
if isinstance(arg, text_type): | ||
argv.append(arg) | ||
else: | ||
argv.append(arg.decode(argvencoding)) | ||
return argv | ||
|
||
|
||
# Python 2.X is broken in that it does not recognize CP65001 as UTF-8 | ||
def add_cp65001_codec(): | ||
if PY2: | ||
try: | ||
codecs.lookup('cp65001') | ||
except LookupError: | ||
codecs.register( | ||
lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) | ||
return |
Oops, something went wrong.