diff --git a/AUTHORS.rst b/AUTHORS.rst index 896e959..5a8e9d4 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -53,3 +53,5 @@ Patches and Suggestions - Ryan Ashley - Sam Bull (@greatestape) + +- Morten Lied Johansen diff --git a/docs/dumputils.rst b/docs/dumputils.rst index 052ec61..d3bad89 100644 --- a/docs/dumputils.rst +++ b/docs/dumputils.rst @@ -10,8 +10,35 @@ you may need to look to find it. In :mod:`requests_toolbelt.utils.dump` there are two functions that will return a :class:`bytearray` with the information retrieved from a response object. +Sanitizing information before dumping +------------------------------------- + +When debugging, it is quite often useful to dump the request or response to +debugging log, where it can be inspected. The problem is that often the request +or response can contain sensitive data that should not be stored in a logfile +on disk. + +To solve this, it is possible to supply a :class:`Sanitizer` which can +manipulate the body or the headers before they are dumped. The default is to +not do anything. For convenience, :class:`HeaderSanitizer` is provided, which +will redact the value of headers that are commonly considered sensitive (See +:attr:`HeaderSanitizer.SENSITIVE_HEADERS`). + +You can make any sanitizing you need by subclassing :class:`Sanitizer` and +passing in an instance to :py:func:`dump_all` or :func:`dump_response`. + +Public members +-------------- + .. autofunction:: requests_toolbelt.utils.dump.dump_all .. autofunction:: requests_toolbelt.utils.dump.dump_response + +.. autoclass:: + requests_toolbelt.utils.dump.Sanitizer + :members: + +.. autoclass:: + requests_toolbelt.utils.dump.HeaderSanitizer diff --git a/requests_toolbelt/utils/dump.py b/requests_toolbelt/utils/dump.py index 23b35e7..fd3d928 100644 --- a/requests_toolbelt/utils/dump.py +++ b/requests_toolbelt/utils/dump.py @@ -4,7 +4,7 @@ from requests import compat -__all__ = ('dump_response', 'dump_all') +__all__ = ['dump_response', 'dump_all', 'NoopSanitizer', 'HeaderSanitizer'] HTTP_VERSIONS = { 9: b'0.9', @@ -16,6 +16,123 @@ ['request', 'response']) +class Sanitizer(object): + CLEANSED_SUBSTITUTE = "********************" + + def _sanitize_headers(self, headers): + sanitized_headers = headers.copy() + for name in headers: + if self.should_sanitize_header(name): + sanitized_headers[name] = self.CLEANSED_SUBSTITUTE + if self.should_strip_header(name): + del sanitized_headers[name] + return sanitized_headers + + def request_headers(self, headers): + """Sanitize the request headers + + :param headers: The request headers + :type headers: :class:`requests.structures.CaseInsensitiveDict` + :return: A new headers object + :rtype: :class:`requests.structures.CaseInsensitiveDict` + """ + return self._sanitize_headers(headers) + + def request_body(self, body): + """Sanitize a request body + + :param body: The body of the request + :type body: `bytes` + :return: The value to dump for the body + :rtype: `bytes` + """ + raise NotImplementedError + + def response_headers(self, headers): + """Sanitize the response headers + + Modify the headers in place, removing or redacting values + + :param headers: The response headers + :type headers: :class:`requests.structures.CaseInsensitiveDict` + """ + return self._sanitize_headers(headers) + + def response_body(self, body): + """Sanitize a request body + + :param body: The body of the request + :type body: `bytes` + :return: The value to dump for the body + :rtype: `bytes` + """ + raise NotImplementedError + + def should_sanitize_header(self, name): + raise NotImplementedError + + def should_strip_header(self, name): + raise NotImplementedError + + +class NoopSanitizer(Sanitizer): + """Performs no sanitation""" + + def should_sanitize_header(self, name): + return False + + def should_strip_header(self, name): + return False + + def request_body(self, body): + return body + + def response_body(self, body): + return body + + +class HeaderSanitizer(NoopSanitizer): + """Redact the values of headers considered sensitive + + This will check all headers in both request and response against a set of + sensitive headers (see :attr:`HeaderSanitizer.SENSITIVE_HEADERS`), and + redact the values to protect sensitive data. + + """ + + # List of sensitive headers copied from: + # https://github.com/google/har-sanitizer + SENSITIVE_HEADERS = { + "state", + "shdf", + "usg", + "password", + "email", + "code", + "code-verifier", + "client-secret", + "client-id", + "token", + "access-token", + "authenticity-token", + "id-token", + "appid", + "challenge", + "facetid", + "assertion", + "fcparams", + "serverdata", + "authorization", + "auth", + "x-client-data", + "samlrequest", + "samlresponse" + } + + def should_sanitize_header(self, name): + return name.lower().replace('_', '-') in self.SENSITIVE_HEADERS + + class PrefixSettings(_PrefixSettings): def __new__(cls, request, response): request = _coerce_to_bytes(request) @@ -54,9 +171,12 @@ def _build_request_path(url, proxy_info): return request_path, uri -def _dump_request_data(request, prefixes, bytearr, proxy_info=None): +def _dump_request_data(request, prefixes, bytearr, proxy_info=None, + sanitizer=None): if proxy_info is None: proxy_info = {} + if sanitizer is None: + sanitizer = NoopSanitizer() prefix = prefixes.request method = _coerce_to_bytes(proxy_info.pop('method', request.method)) @@ -70,13 +190,16 @@ def _dump_request_data(request, prefixes, bytearr, proxy_info=None): host_header = _coerce_to_bytes(headers.pop('Host', uri.netloc)) bytearr.extend(prefix + b'Host: ' + host_header + b'\r\n') - for name, value in headers.items(): + sanitized_headers = sanitizer.request_headers(headers) + for name, value in sanitized_headers.items(): bytearr.extend(prefix + _format_header(name, value)) bytearr.extend(prefix + b'\r\n') if request.body: if isinstance(request.body, compat.basestring): - bytearr.extend(prefix + _coerce_to_bytes(request.body)) + body = _coerce_to_bytes(request.body) + body = sanitizer.request_body(body) + bytearr.extend(prefix + body) else: # In the event that the body is a file-like object, let's not try # to read everything into memory. @@ -84,7 +207,10 @@ def _dump_request_data(request, prefixes, bytearr, proxy_info=None): bytearr.extend(b'\r\n') -def _dump_response_data(response, prefixes, bytearr): +def _dump_response_data(response, prefixes, bytearr, sanitizer=None): + if sanitizer is None: + sanitizer = NoopSanitizer() + prefix = prefixes.response # Let's interact almost entirely with urllib3's response raw = response.raw @@ -97,14 +223,15 @@ def _dump_response_data(response, prefixes, bytearr): str(raw.status).encode('ascii') + b' ' + _coerce_to_bytes(response.reason) + b'\r\n') - headers = raw.headers - for name in headers.keys(): - for value in headers.getlist(name): + sanitized_headers = sanitizer.response_headers(raw.headers) + for name in sanitized_headers.keys(): + for value in sanitized_headers.getlist(name): bytearr.extend(prefix + _format_header(name, value)) bytearr.extend(prefix + b'\r\n') - bytearr.extend(response.content) + body = sanitizer.response_body(response.content) + bytearr.extend(body) def _coerce_to_bytes(data): @@ -115,12 +242,17 @@ def _coerce_to_bytes(data): def dump_response(response, request_prefix=b'< ', response_prefix=b'> ', - data_array=None): + data_array=None, sanitizer=None): """Dump a single request-response cycle's information. This will take a response object and dump only the data that requests can see for that single request-response cycle. + If the optional ``sanitize`` parameter is used, it should be an object that + implements the same interface as :class:`Sanitizer`. One possible + implementation is :class:`HeaderSanitizer`, which will redact sensitive + headers. + Example:: import requests @@ -142,29 +274,40 @@ def dump_response(response, request_prefix=b'< ', response_prefix=b'> ', :param data_array: (*optional*) Bytearray to which we append the request-response cycle data :type data_array: :class:`bytearray` + :param sanitizer: (*optional*) + How to sanitize the dump. + :type sanitizer: :class:`NoopSanitizer` :returns: Formatted bytes of request and response information. :rtype: :class:`bytearray` """ data = data_array if data_array is not None else bytearray() prefixes = PrefixSettings(request_prefix, response_prefix) + if sanitizer is None: + sanitizer = NoopSanitizer() if not hasattr(response, 'request'): raise ValueError('Response has no associated request') proxy_info = _get_proxy_information(response) _dump_request_data(response.request, prefixes, data, - proxy_info=proxy_info) - _dump_response_data(response, prefixes, data) + proxy_info=proxy_info, sanitizer=sanitizer) + _dump_response_data(response, prefixes, data, sanitizer) return data -def dump_all(response, request_prefix=b'< ', response_prefix=b'> '): +def dump_all(response, request_prefix=b'< ', response_prefix=b'> ', + sanitizer=None): """Dump all requests and responses including redirects. This takes the response returned by requests and will dump all request-response pairs in the redirect history in order followed by the final request-response. + If the optional ``sanitize`` parameter is used, it should be an object that + implements the same interface as :class:`Sanitizer`. One possible + implementation is :class:`HeaderSanitizer`, which will redact sensitive + headers. + Example:: import requests @@ -183,15 +326,22 @@ def dump_all(response, request_prefix=b'< ', response_prefix=b'> '): :param response_prefix: (*optional*) Bytes to prefix each line of the response data :type response_prefix: :class:`bytes` + :param sanitizer: (*optional*) + How to sanitize the dump. + :type sanitizer: :class:`NoopSanitizer` :returns: Formatted bytes of request and response information. :rtype: :class:`bytearray` """ + if sanitizer is None: + sanitizer = NoopSanitizer() + data = bytearray() history = list(response.history[:]) history.append(response) for response in history: - dump_response(response, request_prefix, response_prefix, data) + dump_response(response, request_prefix, response_prefix, data, + sanitizer) return data diff --git a/tests/test_dump.py b/tests/test_dump.py index da51f45..f328696 100644 --- a/tests/test_dump.py +++ b/tests/test_dump.py @@ -18,6 +18,20 @@ from . import get_betamax +# A semi-random selection of "normal" headers not touched by sanitizing +NORMAL_HEADERS = [ + "Accept", "Accept-Encoding", "Host", "User-Agent", "Accept-Ranges", + "Cache-Control", "Content-Encoding", "Content-Length", "Content-Type", + "Date", "Etag", "Expires", "Last-Modified", "Server", "Vary", "X-Cache" +] +SENSITIVE_HEADERS = [ + "state", "shdf", "usg", "password", "email", "code", "code_verifier", + "client_secret", "client_id", "token", "access_token", + "authenticity_token", "id_token", "appID", "challenge", "facetID", + "assertion", "fcParams", "serverData", "Authorization", "auth", + "x-client-data", "SAMLRequest", "SAMLResponse" +] + HTTP_1_1 = 11 HTTP_1_0 = 10 HTTP_0_9 = 9 @@ -77,6 +91,26 @@ def test_build_request_path_with_proxy_info(self): assert path == b'https://example.com/test' +class TestHeaderSanitizer(object): + @pytest.fixture + def sanitizer(self): + return dump.HeaderSanitizer() + + def test_redacts_sensitive_headers(self, sanitizer): + headers = requests.utils.CaseInsensitiveDict() + for name in SENSITIVE_HEADERS: + headers[name] = "sensitive" + for name in NORMAL_HEADERS: + headers[name] = "normal" + + sanitized_headers = sanitizer.request_headers(headers) + + for name in SENSITIVE_HEADERS: + assert sanitized_headers[name] == "********************" + for name in NORMAL_HEADERS: + assert sanitized_headers[name] == "normal" + + class RequestResponseMixin(object): """Mix-in for test classes needing mocked requests and responses.""" @@ -145,6 +179,14 @@ def configure_httpresponse(self, headers=None, reason=b'', status=200, self.httpresponse.version = version +class SensitiveBodySanitizer(dump.NoopSanitizer): + def request_body(self, body): + return b"#REDACTED REQUEST#" + + def response_body(self, body): + return b"#REDACTED RESPONSE#" + + class TestResponsePrivateFunctions(RequestResponseMixin): """Excercise private functions using responses.""" @@ -253,6 +295,41 @@ def test_dump_request_data_with_proxy_info(self): assert b'request:CONNECT fake-request-path HTTP/1.1\r\n' in array assert b'request:Host: example.com\r\n' in array + def test_dump_request_with_sensitive_headers(self): + self.configure_request( + url='http://example.com/', + method='GET', + headers={"password": "foobar"} + ) + array = bytearray() + prefixes = dump.PrefixSettings('request:', 'response:') + dump._dump_request_data( + request=self.request, + prefixes=prefixes, + bytearr=array, + sanitizer=dump.HeaderSanitizer(), + ) + + assert b'request:password: ********************\r\n' in array + assert b'foobar' not in array + + def test_dump_request_with_sensitive_body(self): + self.configure_request( + url='http://example.com/', + method='GET', + body=b'my request body', + ) + array = bytearray() + prefixes = dump.PrefixSettings('request:', 'response:') + dump._dump_request_data( + request=self.request, + prefixes=prefixes, + bytearr=array, + sanitizer=SensitiveBodySanitizer(), + ) + + assert b'request:#REDACTED REQUEST#\r\n' in array + def test_dump_response_data(self): """Build up the response data into a bytearray.""" self.configure_response( @@ -327,6 +404,55 @@ def test_dump_response_data_with_unknown_http_version(self): assert b'response:HTTP/? 201 OK\r\n' in array assert b'response:Content-Type: application/json\r\n' in array + def test_dump_response_with_sensitive_headers(self): + self.configure_response( + url='https://example.com/redirected', + content=b'foobarbogus', + reason=b'OK', + ) + self.configure_httpresponse( + headers={'SamlResponse': 'fancy_footwork'}, + reason=b'OK', + status=201, + version=HTTP_UNKNOWN, + ) + + array = bytearray() + prefixes = dump.PrefixSettings('request:', 'response:') + dump._dump_response_data( + response=self.response, + prefixes=prefixes, + bytearr=array, + sanitizer=dump.HeaderSanitizer(), + ) + + assert b'response:SamlResponse: ********************\r\n' in array + assert b'fancy_footwork' not in array + + def test_dump_response_with_sensitive_body(self): + self.configure_response( + url='https://example.com/redirected', + content=b'foobarbogus', + reason=b'OK', + ) + self.configure_httpresponse( + headers={'SamlResponse': 'fancy_footwork'}, + reason=b'OK', + status=201, + version=HTTP_UNKNOWN, + ) + + array = bytearray() + prefixes = dump.PrefixSettings('request:', 'response:') + dump._dump_response_data( + response=self.response, + prefixes=prefixes, + bytearr=array, + sanitizer=SensitiveBodySanitizer(), + ) + + assert b'response:\r\n#REDACTED RESPONSE#' in array + class TestResponsePublicFunctions(RequestResponseMixin):