JessicaTegner · AbdealiLoKo · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/pypandoc/__init__.py b/pypandoc/__init__.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import, print_function, with_statement
 from typing import Iterable
 from typing import Union
 from typing import Generator
@@ -13,10 +12,12 @@
 import textwrap
 import glob
 from pathlib import Path
+from urllib.parse import urlparse
+from urllib.request import url2pathname
 
 from .handler import _check_log_handler
 from .pandoc_download import DEFAULT_TARGET_FOLDER, download_pandoc
-from .py3compat import cast_bytes, cast_unicode, string_types, url2path, urlparse
+from .py3compat import _DEFAULT_ENCODING
 
 __author__ = u'Juho Vepsäläinen'
 __author_email__ = "[email protected]"
@@ -53,6 +54,11 @@
 # Set up the module level logger
 logger = logging.getLogger(__name__)
 
+def url2path(url):  # noqa: E303
+    # from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
+    return url2pathname(urlparse(url).path)
+
+
 def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encoding:str='utf-8',
                  outputfile:Union[None, str, Path]=None, filters:Union[Iterable, None]=None, verify_format:bool=True,
                  sandbox:bool=True, cworkdir:Union[str, None]=None) -> str:
@@ -238,7 +244,10 @@ def _as_unicode(source:any, encoding:str) -> any:
         # if a source and a different encoding is given, try to decode the the source into a
         # unicode string
         try:
-            source = cast_unicode(source, encoding=encoding)
+            if isinstance(source, bytes):
+                encoding = encoding or _DEFAULT_ENCODING
+                source = source.decode(encoding)
+
         except (UnicodeDecodeError, UnicodeEncodeError):
             pass
     return source
@@ -356,7 +365,7 @@ def _convert_input(source, format, input_type, to, extra_args=(),
 
     # adds the proper filter syntax for each item in the filters list
     if filters is not None:
-        if isinstance(filters, string_types):
+        if isinstance(filters, str):
             filters = filters.split()
         f = ['--lua-filter=' + x if x.endswith(".lua") else '--filter=' + x for x in filters]
         args.extend(f)
@@ -392,7 +401,8 @@ def _convert_input(source, format, input_type, to, extra_args=(),
 
     if string_input:
         try:
-            source = cast_bytes(source, encoding='utf-8')
+            if not isinstance(source, bytes):
+                source = source.encode('utf-8')
         except (UnicodeDecodeError, UnicodeEncodeError):
             # assume that it is already a utf-8 encoded string
             pass

diff --git a/pypandoc/pandoc_download.py b/pypandoc/pandoc_download.py
@@ -9,13 +9,9 @@
 import subprocess
 import sys
 import tempfile
-from typing import Union
-
 import urllib
-try:
-    from urllib.request import urlopen
-except ImportError:
-    from urllib import urlopen
+from typing import Union
+from urllib.request import urlopen
 
 from .handler import _check_log_handler
 

diff --git a/pypandoc/py3compat.py b/pypandoc/py3compat.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 
-from __future__ import with_statement
-
 import locale
 import sys
 
@@ -16,62 +14,3 @@
     pass
 
 _DEFAULT_ENCODING = _DEFAULT_ENCODING or sys.getdefaultencoding()
-
-
-def _decode(s, encoding=None):
-    encoding = encoding or _DEFAULT_ENCODING
-    return s.decode(encoding)
-
-
-def _encode(u, encoding=None):
-    encoding = encoding or _DEFAULT_ENCODING
-    return u.encode(encoding)
-
-
-def cast_unicode(s, encoding=None):
-    if isinstance(s, bytes):
-        return _decode(s, encoding)
-    return s
-
-
-def cast_bytes(s, encoding=None):
-    # bytes == str on py2.7 -> always encode on py2
-    if not isinstance(s, bytes):
-        return _encode(s, encoding)
-    return s
-
-
-if sys.version_info[0] >= 3:
-    PY3 = True
-
-    string_types = (str,)
-    unicode_type = str
-
-    # from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
-    from urllib.parse import urljoin, urlparse
-    from urllib.request import pathname2url, url2pathname
-
-
-    def path2url(path):  # noqa: E303
-        return urljoin('file:', pathname2url(path))
-
-
-    def url2path(url):  # noqa: E303
-        return url2pathname(urlparse(url).path)
-
-else:
-    PY3 = False
-
-    string_types = (str, unicode)  # noqa: F821
-    unicode_type = unicode  # noqa: F821
-
-    from urlparse import urljoin, urlparse
-    import urllib
-
-
-    def path2url(path):  # noqa: E303
-        return urljoin('file:', urllib.pathname2url(path))
-
-
-    def url2path(url):  # noqa: E303
-        return urllib.url2pathname(urlparse(url).path)
diff --git a/tests.py b/tests.py
@@ -14,9 +14,10 @@
 import unittest
 import warnings
 from pathlib import Path
+from urllib.parse import urljoin
+from urllib.request import pathname2url
 
 import pypandoc
-from pypandoc.py3compat import path2url, string_types, unicode_type
 
 
 @contextlib.contextmanager
@@ -53,7 +54,7 @@ def closed_tempfile(suffix, text=None, dir_name=None):
 # Stolen from pandas
 def is_list_like(arg):
     return (hasattr(arg, '__iter__') and
-            not isinstance(arg, string_types))
+            not isinstance(arg, str))
 
 
 @contextlib.contextmanager
@@ -155,7 +156,7 @@ def test_get_pandoc_formats(self):
     def test_get_pandoc_version(self):
         assert "HOME" in os.environ, "No HOME set, this will error..."
         version = pypandoc.get_pandoc_version()
-        self.assertTrue(isinstance(version, pypandoc.string_types))
+        self.assertTrue(isinstance(version, str))
         major = int(version.split(".")[0])
         # according to http://pandoc.org/releases.html there were only two versions 0.x ...
         self.assertTrue(major in [0, 1, 2])
@@ -221,7 +222,9 @@ def test_basic_conversion_from_file_url(self):
             expected = u'some title{0}=========={0}{0}'.format(os.linesep)
             # this keeps the : (which should be '|' on windows but pandoc
             # doesn't like it
-            file_url = path2url(file_name)
+
+            # from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url
+            file_url = urljoin('file:', pathname2url(file_name))
             assert pypandoc._identify_path(file_url)
 
             received = pypandoc.convert_file(file_url, 'rst')
@@ -490,12 +493,12 @@ def test_unicode_input(self):
         # make sure that pandoc always returns unicode and does not mishandle it
         expected = u'üäöîôû{0}'.format(os.linesep)
         written = pypandoc.convert_text(u'<p>üäöîôû</p>', 'md', format='html')
-        self.assertTrue(isinstance(written, unicode_type))
+        self.assertTrue(isinstance(written, str))
         self.assertEqualExceptForNewlineEnd(expected, written)
         bytes = u'<p>üäöîôû</p>'.encode("utf-8")
         written = pypandoc.convert_text(bytes, 'md', format='html')
         self.assertTrue(expected == written)
-        self.assertTrue(isinstance(written, unicode_type))
+        self.assertTrue(isinstance(written, str))
 
         # Only use german umlauts in the next test, as iso-8859-15 covers that
         expected = u'äüäö{0}'.format(os.linesep)
@@ -516,7 +519,7 @@ def f():
         # with the right encoding it should work...
         written = pypandoc.convert_text(bytes, 'md', format='html', encoding="iso-8859-15")
         self.assertEqualExceptForNewlineEnd(expected, written)
-        self.assertTrue(isinstance(written, unicode_type))
+        self.assertTrue(isinstance(written, str))
 
     def test_conversion_from_non_plain_text_file(self):
         with closed_tempfile('.docx') as file_name: