diff --git a/pex/http.py b/pex/http.py index a6c0f1605..ddd49e4f0 100644 --- a/pex/http.py +++ b/pex/http.py @@ -119,7 +119,7 @@ def content(self, link): with contextlib.closing(self.open(link)) as fp: encoding = message_from_string(str(fp.headers)).get_content_charset(self.DEFAULT_ENCODING) - return fp.read().decode(encoding, errors='replace') + return fp.read().decode(encoding, 'replace') Context.register(UrllibContext) @@ -222,7 +222,7 @@ def content(self, link): raise self.Error('Context.content only works with remote URLs.') with contextlib.closing(self.open(link)) as request: - return request.read().decode(request.encoding or self.DEFAULT_ENCODING, errors='replace') + return request.read().decode(request.encoding or self.DEFAULT_ENCODING, 'replace') if requests: diff --git a/tests/test_http.py b/tests/test_http.py index 772acebd3..1b7c6dfbb 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -5,7 +5,7 @@ import pytest from twitter.common.contextutil import temporary_file -from pex.http import Context, RequestsContext, StreamFilelike +from pex.http import Context, RequestsContext, StreamFilelike, UrllibContext from pex.link import Link try: @@ -203,3 +203,21 @@ def test_requests_context_retries_read_timeout_retries_exhausted(): with pytest.raises(Context.Error): context.read(Link.wrap(url)) + + +def test_urllib_context_utf8_encoding(): + BYTES = b'this is a decoded utf8 string' + + with temporary_file() as tf: + tf.write(BYTES) + tf.flush() + local_link = Link.wrap(tf.name) + + # Trick UrllibContext into thinking this is a remote link + class MockUrllibContext(UrllibContext): + def open(self, link): + return super(MockUrllibContext, self).open(local_link) + + context = MockUrllibContext() + assert context.content(Link.wrap('http://www.google.com')) == BYTES.decode( + UrllibContext.DEFAULT_ENCODING)