Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove logic for python2 #312

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions pypandoc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these headers are also obsolete.

from __future__ import absolute_import, print_function, with_statement
from typing import Iterable
from typing import Union
from typing import Generator
Expand All @@ -13,10 +12,12 @@
import textwrap
import glob
from pathlib import Path
from urllib.parse import urlparse
from urllib.request import url2pathname

from .handler import _check_log_handler
from .pandoc_download import DEFAULT_TARGET_FOLDER, download_pandoc
from .py3compat import cast_bytes, cast_unicode, string_types, url2path, urlparse
from .py3compat import _DEFAULT_ENCODING

__author__ = u'Juho Vepsäläinen'
__author_email__ = "[email protected]"
Expand Down Expand Up @@ -53,6 +54,11 @@
# Set up the module level logger
logger = logging.getLogger(__name__)

def url2path(url): # noqa: E303
# from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
return url2pathname(urlparse(url).path)


def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encoding:str='utf-8',
outputfile:Union[None, str, Path]=None, filters:Union[Iterable, None]=None, verify_format:bool=True,
sandbox:bool=True, cworkdir:Union[str, None]=None) -> str:
Expand Down Expand Up @@ -238,7 +244,10 @@ def _as_unicode(source:any, encoding:str) -> any:
# if a source and a different encoding is given, try to decode the the source into a
# unicode string
try:
source = cast_unicode(source, encoding=encoding)
if isinstance(source, bytes):
encoding = encoding or _DEFAULT_ENCODING
source = source.decode(encoding)

except (UnicodeDecodeError, UnicodeEncodeError):
pass
return source
Expand Down Expand Up @@ -356,7 +365,7 @@ def _convert_input(source, format, input_type, to, extra_args=(),

# adds the proper filter syntax for each item in the filters list
if filters is not None:
if isinstance(filters, string_types):
if isinstance(filters, str):
filters = filters.split()
f = ['--lua-filter=' + x if x.endswith(".lua") else '--filter=' + x for x in filters]
args.extend(f)
Expand Down Expand Up @@ -392,7 +401,8 @@ def _convert_input(source, format, input_type, to, extra_args=(),

if string_input:
try:
source = cast_bytes(source, encoding='utf-8')
if not isinstance(source, bytes):
source = source.encode('utf-8')
except (UnicodeDecodeError, UnicodeEncodeError):
# assume that it is already a utf-8 encoded string
pass
Expand Down
8 changes: 2 additions & 6 deletions pypandoc/pandoc_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,9 @@
import subprocess
import sys
import tempfile
from typing import Union

import urllib
try:
from urllib.request import urlopen
except ImportError:
from urllib import urlopen
from typing import Union
from urllib.request import urlopen

from .handler import _check_log_handler

Expand Down
61 changes: 0 additions & 61 deletions pypandoc/py3compat.py

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could the remaining contents of this module moved into another one?

Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import with_statement

import locale
import sys

Expand All @@ -16,62 +14,3 @@
pass

_DEFAULT_ENCODING = _DEFAULT_ENCODING or sys.getdefaultencoding()


def _decode(s, encoding=None):
encoding = encoding or _DEFAULT_ENCODING
return s.decode(encoding)


def _encode(u, encoding=None):
encoding = encoding or _DEFAULT_ENCODING
return u.encode(encoding)


def cast_unicode(s, encoding=None):
if isinstance(s, bytes):
return _decode(s, encoding)
return s


def cast_bytes(s, encoding=None):
# bytes == str on py2.7 -> always encode on py2
if not isinstance(s, bytes):
return _encode(s, encoding)
return s


if sys.version_info[0] >= 3:
PY3 = True

string_types = (str,)
unicode_type = str

# from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
from urllib.parse import urljoin, urlparse
from urllib.request import pathname2url, url2pathname


def path2url(path): # noqa: E303
return urljoin('file:', pathname2url(path))


def url2path(url): # noqa: E303
return url2pathname(urlparse(url).path)

else:
PY3 = False

string_types = (str, unicode) # noqa: F821
unicode_type = unicode # noqa: F821

from urlparse import urljoin, urlparse
import urllib


def path2url(path): # noqa: E303
return urljoin('file:', urllib.pathname2url(path))


def url2path(url): # noqa: E303
return urllib.url2pathname(urlparse(url).path)
17 changes: 10 additions & 7 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
import unittest
import warnings
from pathlib import Path
from urllib.parse import urljoin
from urllib.request import pathname2url

import pypandoc
from pypandoc.py3compat import path2url, string_types, unicode_type


@contextlib.contextmanager
Expand Down Expand Up @@ -53,7 +54,7 @@ def closed_tempfile(suffix, text=None, dir_name=None):
# Stolen from pandas
def is_list_like(arg):
return (hasattr(arg, '__iter__') and
not isinstance(arg, string_types))
not isinstance(arg, str))


@contextlib.contextmanager
Expand Down Expand Up @@ -155,7 +156,7 @@ def test_get_pandoc_formats(self):
def test_get_pandoc_version(self):
assert "HOME" in os.environ, "No HOME set, this will error..."
version = pypandoc.get_pandoc_version()
self.assertTrue(isinstance(version, pypandoc.string_types))
self.assertTrue(isinstance(version, str))
major = int(version.split(".")[0])
# according to http://pandoc.org/releases.html there were only two versions 0.x ...
self.assertTrue(major in [0, 1, 2])
Expand Down Expand Up @@ -221,7 +222,9 @@ def test_basic_conversion_from_file_url(self):
expected = u'some title{0}=========={0}{0}'.format(os.linesep)
# this keeps the : (which should be '|' on windows but pandoc
# doesn't like it
file_url = path2url(file_name)

# from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
file_url = urljoin('file:', pathname2url(file_name))
assert pypandoc._identify_path(file_url)

received = pypandoc.convert_file(file_url, 'rst')
Expand Down Expand Up @@ -490,12 +493,12 @@ def test_unicode_input(self):
# make sure that pandoc always returns unicode and does not mishandle it
expected = u'üäöîôû{0}'.format(os.linesep)
written = pypandoc.convert_text(u'<p>üäöîôû</p>', 'md', format='html')
self.assertTrue(isinstance(written, unicode_type))
self.assertTrue(isinstance(written, str))
self.assertEqualExceptForNewlineEnd(expected, written)
bytes = u'<p>üäöîôû</p>'.encode("utf-8")
written = pypandoc.convert_text(bytes, 'md', format='html')
self.assertTrue(expected == written)
self.assertTrue(isinstance(written, unicode_type))
self.assertTrue(isinstance(written, str))

# Only use german umlauts in the next test, as iso-8859-15 covers that
expected = u'äüäö{0}'.format(os.linesep)
Expand All @@ -516,7 +519,7 @@ def f():
# with the right encoding it should work...
written = pypandoc.convert_text(bytes, 'md', format='html', encoding="iso-8859-15")
self.assertEqualExceptForNewlineEnd(expected, written)
self.assertTrue(isinstance(written, unicode_type))
self.assertTrue(isinstance(written, str))

def test_conversion_from_non_plain_text_file(self):
with closed_tempfile('.docx') as file_name:
Expand Down