From c13f54db87a8e1ae80bea4ae4f5f3a0e3a02096f Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 7 Feb 2023 21:04:51 +0400 Subject: [PATCH 01/28] Add saving/loading of HttpClient responses. --- tests/test_testing.py | 47 +++++++++++++++++++++++++++- web_poet/page_inputs/client.py | 19 +++++++++++- web_poet/page_inputs/http.py | 5 +++ web_poet/serialization/functions.py | 48 ++++++++++++++++++++++++++--- 4 files changed, 113 insertions(+), 6 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index ef9d397a..d7baa8a7 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -11,7 +11,7 @@ from itemadapter import ItemAdapter from zyte_common_items import Item, Metadata, Product -from web_poet import HttpResponse, WebPage +from web_poet import HttpClient, HttpRequest, HttpResponse, HttpResponseBody, WebPage from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME from web_poet.utils import get_fq_class_name @@ -158,3 +158,48 @@ def test_pytest_frozen_time_tz_windows_pass(pytester, book_list_html_response) - 2022, 3, 4, 20, 21, 22, tzinfo=dateutil.tz.tzlocal() ) _assert_frozen_item(frozen_time, pytester, book_list_html_response) + + +@attrs.define +class ClientPage(WebPage): + client: HttpClient + + async def to_item(self) -> dict: # noqa: D102 + resp1 = await self.client.get("http://books.toscrape.com/1.html") + resp2 = await self.client.get("http://books.toscrape.com/2.html") + return {"foo": "bar", "additional": [resp1.body.decode(), resp2.body.decode()]} + + +def _get_fp_for_url(url: str) -> str: + req = HttpRequest(url=url) + return req.fingerprint() + + +def test_httpclient_serialize(pytester, book_list_html_response) -> None: + client = HttpClient() + body1 = HttpResponseBody(b"body1") + url1 = "http://books.toscrape.com/1.html" + response1 = HttpResponse(url=url1, body=body1, encoding="utf-8") + fp1 = _get_fp_for_url(url1) + client.saved_responses[fp1] = response1 + body2 = HttpResponseBody(b"body2") + url2 = "http://books.toscrape.com/2.html" + response2 = HttpResponse(url=url2, body=body2, encoding="utf-8") + fp2 = _get_fp_for_url(url2) + client.saved_responses[fp2] = response2 + + base_dir = pytester.path / "fixtures" / get_fq_class_name(ClientPage) + item = { + "foo": "bar", + "additional": ["body1", "body2"], + } + Fixture.save(base_dir, inputs=[book_list_html_response, client], item=item) + input_dir = base_dir / "test-1" / INPUT_DIR_NAME + assert (input_dir / "HttpResponse-body.html").read_bytes() == bytes( + book_list_html_response.body + ) + assert (input_dir / f"HttpClient-{fp1}-other.json").exists() + assert (input_dir / f"HttpClient-{fp1}-body.html").read_bytes() == bytes(body1) + assert (input_dir / f"HttpClient-{fp2}-body.html").read_bytes() == bytes(body2) + result = pytester.runpytest() + result.assert_outcomes(passed=1) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index 3df1f58d..7302df69 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -39,8 +39,17 @@ class HttpClient: :ref:`advanced-downloader-impl` documentation. """ - def __init__(self, request_downloader: Callable = None): + def __init__( + self, + request_downloader: Callable = None, + *, + save_responses: bool = False, + return_only_saved_responses: bool = False, + ): self._request_downloader = request_downloader or _perform_request + self.save_responses = save_responses + self.return_only_saved_responses = return_only_saved_responses + self.saved_responses: Dict[str, HttpResponse] = {} @staticmethod def _handle_status( @@ -165,8 +174,16 @@ async def execute( There is no need to include ``100-3xx`` status codes in ``allow_status``, because :class:`~.HttpResponseError` is not raised for them. """ + response_key = request.fingerprint() + if self.return_only_saved_responses: + saved_response = self.saved_responses.get(response_key) + if saved_response: + return saved_response + raise ValueError(f"No saved response for {request}") response = await self._request_downloader(request) self._handle_status(response, request, allow_status=allow_status) + if self.save_responses: + self.saved_responses[response_key] = response # TODO: copy()? return response async def batch_execute( diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index 2aa1b16f..8d435825 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -1,4 +1,5 @@ import json +from hashlib import sha1 from typing import Any, AnyStr, Dict, List, Optional, Tuple, Type, TypeVar, Union from urllib.parse import urljoin @@ -181,6 +182,10 @@ def urljoin(self, url: Union[str, _RequestUrl, _ResponseUrl]) -> _RequestUrl: If *url* is relative, it is made absolute relative to :attr:`url`.""" return _RequestUrl(urljoin(str(self.url), str(url))) + def fingerprint(self) -> str: + """Return the fingerprint of this request.""" + return sha1(str(self.url).encode()).hexdigest() # TODO + @attrs.define(auto_attribs=False, slots=False, eq=False) class HttpResponse(SelectableMixin): diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 4fa3947d..9bcefeac 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -1,9 +1,20 @@ import json -from typing import Type - -from .. import HttpResponse, HttpResponseBody, HttpResponseHeaders, ResponseUrl +from typing import Dict, Type + +from .. import ( + HttpClient, + HttpResponse, + HttpResponseBody, + HttpResponseHeaders, + ResponseUrl, +) from ..page_inputs.url import _Url -from .api import SerializedLeafData, register_serialization +from .api import ( + SerializedLeafData, + deserialize_leaf, + register_serialization, + serialize_leaf, +) def _serialize_HttpResponse(o: HttpResponse) -> SerializedLeafData: @@ -60,3 +71,32 @@ def _deserialize__Url(cls: Type[_Url], data: SerializedLeafData) -> _Url: register_serialization(_serialize__Url, _deserialize__Url) + + +def _serialize_HttpClient(o: HttpClient) -> SerializedLeafData: + serialized_data: SerializedLeafData = {} + for response_key, response in o.saved_responses.items(): + serialized_response = serialize_leaf(response) + key_prefix = response_key + "-" + for k, v in serialized_response.items(): + serialized_data[key_prefix + k] = v + return serialized_data + + +def _deserialize_HttpClient( + cls: Type[HttpClient], data: SerializedLeafData +) -> HttpClient: + serialized_responses: Dict[str, SerializedLeafData] = {} + for k, v in data.items(): + response_key, subkey = k.rsplit("-", 1) + serialized_responses.setdefault(response_key, {})[subkey] = v + + result = cls(return_only_saved_responses=True) + for response_key, serialized_response in serialized_responses.items(): + result.saved_responses[response_key] = deserialize_leaf( + HttpResponse, serialized_response + ) + return result + + +register_serialization(_serialize_HttpClient, _deserialize_HttpClient) From 1b666d2b97f40e5929a81a9413d9fd68b8a99cf0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 8 Feb 2023 14:58:17 +0400 Subject: [PATCH 02/28] Improve the fingerprinting and move to a separate function. --- tests/test_testing.py | 5 +++-- web_poet/page_inputs/client.py | 3 ++- web_poet/page_inputs/http.py | 16 ++++++++++++---- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index d7baa8a7..7d4a364e 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -12,6 +12,7 @@ from zyte_common_items import Item, Metadata, Product from web_poet import HttpClient, HttpRequest, HttpResponse, HttpResponseBody, WebPage +from web_poet.page_inputs.http import request_fingerprint from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME from web_poet.utils import get_fq_class_name @@ -172,10 +173,10 @@ async def to_item(self) -> dict: # noqa: D102 def _get_fp_for_url(url: str) -> str: req = HttpRequest(url=url) - return req.fingerprint() + return request_fingerprint(req) -def test_httpclient_serialize(pytester, book_list_html_response) -> None: +def test_httpclient(pytester, book_list_html_response) -> None: client = HttpClient() body1 = HttpResponseBody(b"body1") url1 = "http://books.toscrape.com/1.html" diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index 7302df69..b3eff44d 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -9,6 +9,7 @@ HttpRequestBody, HttpRequestHeaders, HttpResponse, + request_fingerprint, ) from web_poet.page_inputs.url import _Url from web_poet.requests import _perform_request @@ -174,7 +175,7 @@ async def execute( There is no need to include ``100-3xx`` status codes in ``allow_status``, because :class:`~.HttpResponseError` is not raised for them. """ - response_key = request.fingerprint() + response_key = request_fingerprint(request) if self.return_only_saved_responses: saved_response = self.saved_responses.get(response_key) if saved_response: diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index 8d435825..99ce6d0e 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -12,6 +12,7 @@ resolve_encoding, ) from w3lib.html import get_base_url +from w3lib.url import canonicalize_url from web_poet._base import _HttpHeaders from web_poet.mixins import SelectableMixin @@ -182,10 +183,6 @@ def urljoin(self, url: Union[str, _RequestUrl, _ResponseUrl]) -> _RequestUrl: If *url* is relative, it is made absolute relative to :attr:`url`.""" return _RequestUrl(urljoin(str(self.url), str(url))) - def fingerprint(self) -> str: - """Return the fingerprint of this request.""" - return sha1(str(self.url).encode()).hexdigest() # TODO - @attrs.define(auto_attribs=False, slots=False, eq=False) class HttpResponse(SelectableMixin): @@ -305,3 +302,14 @@ def _auto_detect_fun(self, body: bytes) -> Optional[str]: except UnicodeError: continue return resolve_encoding(enc) + + +def request_fingerprint(req: HttpRequest) -> str: + """Return the fingerprint of the request.""" + fp = sha1() + fp.update(req.method.encode() + b"\n") + fp.update(canonicalize_url(str(req.url)).encode()) + fp.update(str(req.url).encode() + b"\n") + for name, value in sorted(req.headers.items()): + fp.update(f"{name}:{value}\n".encode()) + return fp.hexdigest() From 059dbaa3871476c3b6f1e2336c1540a0f0a0133b Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 8 Feb 2023 15:49:26 +0400 Subject: [PATCH 03/28] Don't calculate the fingerprint when not needed. --- web_poet/page_inputs/client.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index b3eff44d..de076ddd 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -175,16 +175,17 @@ async def execute( There is no need to include ``100-3xx`` status codes in ``allow_status``, because :class:`~.HttpResponseError` is not raised for them. """ - response_key = request_fingerprint(request) if self.return_only_saved_responses: - saved_response = self.saved_responses.get(response_key) + saved_response = self.saved_responses.get(request_fingerprint(request)) if saved_response: return saved_response raise ValueError(f"No saved response for {request}") + response = await self._request_downloader(request) self._handle_status(response, request, allow_status=allow_status) if self.save_responses: - self.saved_responses[response_key] = response # TODO: copy()? + # TODO: copy()? + self.saved_responses[request_fingerprint(request)] = response return response async def batch_execute( From d181c84722331164bd5534af011d3b4f7da781c0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 8 Feb 2023 16:31:15 +0400 Subject: [PATCH 04/28] Set saved_responses via an __init__ arg. --- tests/test_testing.py | 8 +++++--- web_poet/page_inputs/client.py | 3 ++- web_poet/serialization/functions.py | 9 ++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index 7d4a364e..5f837b78 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -177,17 +177,19 @@ def _get_fp_for_url(url: str) -> str: def test_httpclient(pytester, book_list_html_response) -> None: - client = HttpClient() body1 = HttpResponseBody(b"body1") url1 = "http://books.toscrape.com/1.html" response1 = HttpResponse(url=url1, body=body1, encoding="utf-8") fp1 = _get_fp_for_url(url1) - client.saved_responses[fp1] = response1 body2 = HttpResponseBody(b"body2") url2 = "http://books.toscrape.com/2.html" response2 = HttpResponse(url=url2, body=body2, encoding="utf-8") fp2 = _get_fp_for_url(url2) - client.saved_responses[fp2] = response2 + responses = { + fp1: response1, + fp2: response2, + } + client = HttpClient(responses=responses) base_dir = pytester.path / "fixtures" / get_fq_class_name(ClientPage) item = { diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index de076ddd..f2907c81 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -46,11 +46,12 @@ def __init__( *, save_responses: bool = False, return_only_saved_responses: bool = False, + responses: Optional[Dict[str, HttpResponse]] = None, ): self._request_downloader = request_downloader or _perform_request self.save_responses = save_responses self.return_only_saved_responses = return_only_saved_responses - self.saved_responses: Dict[str, HttpResponse] = {} + self.saved_responses: Dict[str, HttpResponse] = responses or {} @staticmethod def _handle_status( diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 9bcefeac..2deb6f1a 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -91,12 +91,11 @@ def _deserialize_HttpClient( response_key, subkey = k.rsplit("-", 1) serialized_responses.setdefault(response_key, {})[subkey] = v - result = cls(return_only_saved_responses=True) + responses: Dict[str, HttpResponse] = {} for response_key, serialized_response in serialized_responses.items(): - result.saved_responses[response_key] = deserialize_leaf( - HttpResponse, serialized_response - ) - return result + responses[response_key] = deserialize_leaf(HttpResponse, serialized_response) + + return cls(return_only_saved_responses=True, responses=responses) register_serialization(_serialize_HttpClient, _deserialize_HttpClient) From 02290cef4fadd9076a6c4986248f175107320d41 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 8 Feb 2023 17:49:11 +0400 Subject: [PATCH 05/28] Fix request_fingerprint. --- web_poet/page_inputs/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index 99ce6d0e..4fea0112 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -309,7 +309,7 @@ def request_fingerprint(req: HttpRequest) -> str: fp = sha1() fp.update(req.method.encode() + b"\n") fp.update(canonicalize_url(str(req.url)).encode()) - fp.update(str(req.url).encode() + b"\n") for name, value in sorted(req.headers.items()): fp.update(f"{name}:{value}\n".encode()) + fp.update(req.body) return fp.hexdigest() From 50036ad3767ce780e3f54987ff4d89d8d6f11b70 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 8 Feb 2023 19:11:33 +0400 Subject: [PATCH 06/28] Add NoSavedHttpResponse. --- web_poet/exceptions/http.py | 17 +++++++++++++++++ web_poet/page_inputs/client.py | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/web_poet/exceptions/http.py b/web_poet/exceptions/http.py index 44ccab28..ad304535 100644 --- a/web_poet/exceptions/http.py +++ b/web_poet/exceptions/http.py @@ -73,3 +73,20 @@ def __init__( if msg is None: msg = f"Unexpected HTTP Response received: {self.response}" super().__init__(msg, request=request) + + +class NoSavedHttpResponse(HttpError): + """Indicates that there is no saved response for this request. + + Can only be raised when a :class:`~.HttpClient` instance is used to + get saved responses. + + :param request: The :class:`~.HttpRequest` instance that was used. + :type request: HttpRequest + """ + + def __init__(self, msg: str = None, request: HttpRequest = None): + self.request = request + if msg is None: + msg = f"There is no saved reponse available for this HTTP Request: {self.request}" + super().__init__(msg) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index f2907c81..868a1c5e 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -3,7 +3,7 @@ from http import HTTPStatus from typing import Callable, Dict, List, Optional, Union -from web_poet.exceptions import HttpResponseError +from web_poet.exceptions import HttpResponseError, NoSavedHttpResponse from web_poet.page_inputs.http import ( HttpRequest, HttpRequestBody, @@ -180,7 +180,7 @@ async def execute( saved_response = self.saved_responses.get(request_fingerprint(request)) if saved_response: return saved_response - raise ValueError(f"No saved response for {request}") + raise NoSavedHttpResponse(request=request) response = await self._request_downloader(request) self._handle_status(response, request, allow_status=allow_status) From 70155d3c2bd49ced4617373faf199696cc8c94f1 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 13 Feb 2023 14:21:35 +0400 Subject: [PATCH 07/28] Save requests together with responses in HttpClient. --- tests/test_testing.py | 29 ++++++------ web_poet/page_inputs/client.py | 30 ++++++++---- web_poet/serialization/functions.py | 71 ++++++++++++++++++++++++----- 3 files changed, 96 insertions(+), 34 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index 5f837b78..a63f13c2 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -12,7 +12,6 @@ from zyte_common_items import Item, Metadata, Product from web_poet import HttpClient, HttpRequest, HttpResponse, HttpResponseBody, WebPage -from web_poet.page_inputs.http import request_fingerprint from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME from web_poet.utils import get_fq_class_name @@ -171,24 +170,19 @@ async def to_item(self) -> dict: # noqa: D102 return {"foo": "bar", "additional": [resp1.body.decode(), resp2.body.decode()]} -def _get_fp_for_url(url: str) -> str: - req = HttpRequest(url=url) - return request_fingerprint(req) - - def test_httpclient(pytester, book_list_html_response) -> None: body1 = HttpResponseBody(b"body1") url1 = "http://books.toscrape.com/1.html" + request1 = HttpRequest(url1) response1 = HttpResponse(url=url1, body=body1, encoding="utf-8") - fp1 = _get_fp_for_url(url1) body2 = HttpResponseBody(b"body2") url2 = "http://books.toscrape.com/2.html" + request2 = HttpRequest(url2) response2 = HttpResponse(url=url2, body=body2, encoding="utf-8") - fp2 = _get_fp_for_url(url2) - responses = { - fp1: response1, - fp2: response2, - } + responses = [ + (request1, response1), + (request2, response2), + ] client = HttpClient(responses=responses) base_dir = pytester.path / "fixtures" / get_fq_class_name(ClientPage) @@ -201,8 +195,13 @@ def test_httpclient(pytester, book_list_html_response) -> None: assert (input_dir / "HttpResponse-body.html").read_bytes() == bytes( book_list_html_response.body ) - assert (input_dir / f"HttpClient-{fp1}-other.json").exists() - assert (input_dir / f"HttpClient-{fp1}-body.html").read_bytes() == bytes(body1) - assert (input_dir / f"HttpClient-{fp2}-body.html").read_bytes() == bytes(body2) + assert (input_dir / "HttpClient-0-HttpRequest.other.json").exists() + assert (input_dir / "HttpClient-0-HttpResponse.other.json").exists() + assert (input_dir / "HttpClient-0-HttpResponse.body.html").read_bytes() == bytes( + body1 + ) + assert (input_dir / "HttpClient-1-HttpResponse.body.html").read_bytes() == bytes( + body2 + ) result = pytester.runpytest() result.assert_outcomes(passed=1) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index 868a1c5e..fbf4d1ea 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -1,7 +1,7 @@ import asyncio import logging from http import HTTPStatus -from typing import Callable, Dict, List, Optional, Union +from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union from web_poet.exceptions import HttpResponseError, NoSavedHttpResponse from web_poet.page_inputs.http import ( @@ -9,7 +9,6 @@ HttpRequestBody, HttpRequestHeaders, HttpResponse, - request_fingerprint, ) from web_poet.page_inputs.url import _Url from web_poet.requests import _perform_request @@ -46,12 +45,14 @@ def __init__( *, save_responses: bool = False, return_only_saved_responses: bool = False, - responses: Optional[Dict[str, HttpResponse]] = None, + responses: Optional[Iterable[Tuple[HttpRequest, HttpResponse]]] = None, ): self._request_downloader = request_downloader or _perform_request self.save_responses = save_responses self.return_only_saved_responses = return_only_saved_responses - self.saved_responses: Dict[str, HttpResponse] = responses or {} + self.saved_responses: List[Tuple[HttpRequest, HttpResponse]] = ( + list(responses) if responses else [] + ) @staticmethod def _handle_status( @@ -177,16 +178,16 @@ async def execute( because :class:`~.HttpResponseError` is not raised for them. """ if self.return_only_saved_responses: - saved_response = self.saved_responses.get(request_fingerprint(request)) - if saved_response: - return saved_response + for saved_request, saved_response in self.saved_responses: + if self.compare_requests(saved_request, request): + return saved_response raise NoSavedHttpResponse(request=request) response = await self._request_downloader(request) self._handle_status(response, request, allow_status=allow_status) if self.save_responses: # TODO: copy()? - self.saved_responses[request_fingerprint(request)] = response + self.saved_responses.append((request, response)) return response async def batch_execute( @@ -227,3 +228,16 @@ async def batch_execute( *coroutines, return_exceptions=return_exceptions ) return responses + + @staticmethod + def compare_requests( # noqa: D102 + request1: HttpRequest, request2: HttpRequest + ) -> bool: + return all( + [ + request1.method == request2.method, + str(request1.url) == str(request2.url), + request1.headers == request2.headers, + request1.body == request2.body, + ] + ) diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 2deb6f1a..9d33c1e4 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -1,14 +1,17 @@ import json -from typing import Dict, Type +from typing import Dict, List, Tuple, Type from .. import ( HttpClient, + HttpRequest, + HttpRequestBody, + HttpRequestHeaders, HttpResponse, HttpResponseBody, HttpResponseHeaders, ResponseUrl, ) -from ..page_inputs.url import _Url +from ..page_inputs.url import RequestUrl, _Url from .api import ( SerializedLeafData, deserialize_leaf, @@ -17,6 +20,38 @@ ) +def _serialize_HttpRequest(o: HttpRequest) -> SerializedLeafData: + other_data = { + "url": str(o.url), + "method": o.method, + "headers": list(o.headers.items()), + } + result: SerializedLeafData = { + "other.json": json.dumps( + other_data, ensure_ascii=False, sort_keys=True, indent=2 + ).encode(), + } + if o.body: + result["body.txt"] = bytes(o.body) + return result + + +def _deserialize_HttpRequest( + cls: Type[HttpRequest], data: SerializedLeafData +) -> HttpRequest: + body = HttpRequestBody(data.get("body.txt", b"")) + other_data = json.loads(data["other.json"]) + return cls( + body=body, + url=RequestUrl(other_data["url"]), + method=other_data["method"], + headers=HttpRequestHeaders(other_data["headers"]), + ) + + +register_serialization(_serialize_HttpRequest, _deserialize_HttpRequest) + + def _serialize_HttpResponse(o: HttpResponse) -> SerializedLeafData: other_data = { "url": str(o.url), @@ -75,25 +110,39 @@ def _deserialize__Url(cls: Type[_Url], data: SerializedLeafData) -> _Url: def _serialize_HttpClient(o: HttpClient) -> SerializedLeafData: serialized_data: SerializedLeafData = {} - for response_key, response in o.saved_responses.items(): + for i, (request, response) in enumerate(o.saved_responses): + serialized_request = serialize_leaf(request) + for k, v in serialized_request.items(): + serialized_data[f"{i}-HttpRequest.{k}"] = v serialized_response = serialize_leaf(response) - key_prefix = response_key + "-" for k, v in serialized_response.items(): - serialized_data[key_prefix + k] = v + serialized_data[f"{i}-HttpResponse.{k}"] = v return serialized_data def _deserialize_HttpClient( cls: Type[HttpClient], data: SerializedLeafData ) -> HttpClient: + responses: List[Tuple[HttpRequest, HttpResponse]] = [] + + serialized_requests: Dict[str, SerializedLeafData] = {} serialized_responses: Dict[str, SerializedLeafData] = {} for k, v in data.items(): - response_key, subkey = k.rsplit("-", 1) - serialized_responses.setdefault(response_key, {})[subkey] = v - - responses: Dict[str, HttpResponse] = {} - for response_key, serialized_response in serialized_responses.items(): - responses[response_key] = deserialize_leaf(HttpResponse, serialized_response) + # k is number-("HttpRequest"|"HttpResponse").("body"|"other").ext + key, type_suffix = k.split("-", 1) + type_name, suffix = type_suffix.split(".", 1) + if type_name == "HttpRequest": + serialized_requests.setdefault(key, {})[suffix] = v + elif type_name == "HttpResponse": + serialized_responses.setdefault(key, {})[suffix] = v + + for key, serialized_request in serialized_requests.items(): + serialized_response = serialized_responses.get(key) + if not serialized_response: + continue + request = deserialize_leaf(HttpRequest, serialized_request) + response = deserialize_leaf(HttpResponse, serialized_response) + responses.append((request, response)) return cls(return_only_saved_responses=True, responses=responses) From 9a1e3ebaf185af8775fe0431d79257b3a5c92475 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 13 Feb 2023 14:34:45 +0400 Subject: [PATCH 08/28] Calculate fingerprints for saved requests. --- web_poet/page_inputs/client.py | 28 ++++++++++------------------ web_poet/serialization/functions.py | 2 +- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index fbf4d1ea..420c1a74 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -9,6 +9,7 @@ HttpRequestBody, HttpRequestHeaders, HttpResponse, + request_fingerprint, ) from web_poet.page_inputs.url import _Url from web_poet.requests import _perform_request @@ -50,9 +51,9 @@ def __init__( self._request_downloader = request_downloader or _perform_request self.save_responses = save_responses self.return_only_saved_responses = return_only_saved_responses - self.saved_responses: List[Tuple[HttpRequest, HttpResponse]] = ( - list(responses) if responses else [] - ) + self._saved_responses: Dict[str, Tuple[HttpRequest, HttpResponse]] = { + request_fingerprint(req): (req, resp) for req, resp in responses or {} + } @staticmethod def _handle_status( @@ -178,8 +179,8 @@ async def execute( because :class:`~.HttpResponseError` is not raised for them. """ if self.return_only_saved_responses: - for saved_request, saved_response in self.saved_responses: - if self.compare_requests(saved_request, request): + for fp, (_, saved_response) in self._saved_responses.items(): + if request_fingerprint(request) == fp: return saved_response raise NoSavedHttpResponse(request=request) @@ -187,7 +188,7 @@ async def execute( self._handle_status(response, request, allow_status=allow_status) if self.save_responses: # TODO: copy()? - self.saved_responses.append((request, response)) + self._saved_responses[request_fingerprint(request)] = (request, response) return response async def batch_execute( @@ -229,15 +230,6 @@ async def batch_execute( ) return responses - @staticmethod - def compare_requests( # noqa: D102 - request1: HttpRequest, request2: HttpRequest - ) -> bool: - return all( - [ - request1.method == request2.method, - str(request1.url) == str(request2.url), - request1.headers == request2.headers, - request1.body == request2.body, - ] - ) + def get_saved_responses(self) -> Iterable[Tuple[HttpRequest, HttpResponse]]: + """Return saved requests and responses.""" + return self._saved_responses.values() diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 9d33c1e4..9cf34a20 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -110,7 +110,7 @@ def _deserialize__Url(cls: Type[_Url], data: SerializedLeafData) -> _Url: def _serialize_HttpClient(o: HttpClient) -> SerializedLeafData: serialized_data: SerializedLeafData = {} - for i, (request, response) in enumerate(o.saved_responses): + for i, (request, response) in enumerate(o.get_saved_responses()): serialized_request = serialize_leaf(request) for k, v in serialized_request.items(): serialized_data[f"{i}-HttpRequest.{k}"] = v From f931137f70c233e4b212a53c57b9952aa86775bc Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 13 Feb 2023 17:08:17 +0400 Subject: [PATCH 09/28] Add a test for NoSavedHttpResponse. --- tests/test_testing.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_testing.py b/tests/test_testing.py index a63f13c2..7cd0d65c 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -205,3 +205,23 @@ def test_httpclient(pytester, book_list_html_response) -> None: ) result = pytester.runpytest() result.assert_outcomes(passed=1) + + +def test_httpclient_no_response(pytester, book_list_html_response) -> None: + body1 = HttpResponseBody(b"body1") + url1 = "http://books.toscrape.com/1.html" + request1 = HttpRequest(url1) + response1 = HttpResponse(url=url1, body=body1, encoding="utf-8") + responses = [ + (request1, response1), + ] + client = HttpClient(responses=responses) + + base_dir = pytester.path / "fixtures" / get_fq_class_name(ClientPage) + item = { + "foo": "bar", + "additional": ["body1", "body2"], + } + Fixture.save(base_dir, inputs=[book_list_html_response, client], item=item) + result = pytester.runpytest() + result.assert_outcomes(failed=1) From 364b22908a1d86f45e683b0b367f232da0140c95 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 13 Feb 2023 17:08:34 +0400 Subject: [PATCH 10/28] Fix a typo. --- web_poet/exceptions/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_poet/exceptions/http.py b/web_poet/exceptions/http.py index ad304535..a0f2e3f9 100644 --- a/web_poet/exceptions/http.py +++ b/web_poet/exceptions/http.py @@ -88,5 +88,5 @@ class NoSavedHttpResponse(HttpError): def __init__(self, msg: str = None, request: HttpRequest = None): self.request = request if msg is None: - msg = f"There is no saved reponse available for this HTTP Request: {self.request}" + msg = f"There is no saved response available for this HTTP Request: {self.request}" super().__init__(msg) From dc7b8218304057c1dfd5f2367727a535e8080ae0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 13 Feb 2023 19:47:59 +0400 Subject: [PATCH 11/28] Add tests for request_fingerprint, normalize header name case. --- tests/test_page_inputs.py | 27 +++++++++++++++++++++++++++ web_poet/page_inputs/http.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index 22713a6f..4a9dc283 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -16,6 +16,7 @@ HttpResponseBody, HttpResponseHeaders, ) +from web_poet.page_inputs.http import request_fingerprint @pytest.mark.parametrize("body_cls", [HttpRequestBody, HttpResponseBody]) @@ -580,3 +581,29 @@ def test_responseurl_move() -> None: ), ): ResponseUrl("https://example.com") + + +def test_request_fingerprint() -> None: + req1 = HttpRequest(url="http://toscrape.com/1") + req2 = HttpRequest(url="http://toscrape.com/1") + assert request_fingerprint(req1) == request_fingerprint(req2) + req3 = HttpRequest(url="http://toscrape.com/2") + assert request_fingerprint(req1) != request_fingerprint(req3) + + req4 = HttpRequest(url="http://toscrape.com/1", method="POST") + assert request_fingerprint(req1) != request_fingerprint(req4) + + req5 = HttpRequest(url="http://toscrape.com/1", body=b"") + assert request_fingerprint(req1) == request_fingerprint(req5) + req6 = HttpRequest(url="http://toscrape.com/1", body=b"foo") + assert request_fingerprint(req1) != request_fingerprint(req6) + + req7 = HttpRequest(url="http://toscrape.com/1", headers={}) + assert request_fingerprint(req1) == request_fingerprint(req7) + req8 = HttpRequest(url="http://toscrape.com/1", headers={"a": "b"}) + assert request_fingerprint(req1) != request_fingerprint(req8) + req9 = HttpRequest(url="http://toscrape.com/1", headers={"A": "b"}) + assert request_fingerprint(req8) == request_fingerprint(req9) + req10 = HttpRequest(url="http://toscrape.com/1", headers=[("a", "b"), ("a", "c")]) + assert request_fingerprint(req1) != request_fingerprint(req10) + assert request_fingerprint(req8) != request_fingerprint(req10) diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index 4fea0112..992615a0 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -310,6 +310,6 @@ def request_fingerprint(req: HttpRequest) -> str: fp.update(req.method.encode() + b"\n") fp.update(canonicalize_url(str(req.url)).encode()) for name, value in sorted(req.headers.items()): - fp.update(f"{name}:{value}\n".encode()) + fp.update(f"{name.title()}:{value}\n".encode()) fp.update(req.body) return fp.hexdigest() From 1eb7d720afa424230adf48b82857567ce23a197b Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 14 Feb 2023 17:55:06 +0400 Subject: [PATCH 12/28] Remove some unnecessary explicit conversions. --- web_poet/serialization/functions.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 9cf34a20..ecf3539f 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -1,17 +1,8 @@ import json from typing import Dict, List, Tuple, Type -from .. import ( - HttpClient, - HttpRequest, - HttpRequestBody, - HttpRequestHeaders, - HttpResponse, - HttpResponseBody, - HttpResponseHeaders, - ResponseUrl, -) -from ..page_inputs.url import RequestUrl, _Url +from .. import HttpClient, HttpRequest, HttpRequestBody, HttpResponse, HttpResponseBody +from ..page_inputs.url import _Url from .api import ( SerializedLeafData, deserialize_leaf, @@ -43,9 +34,9 @@ def _deserialize_HttpRequest( other_data = json.loads(data["other.json"]) return cls( body=body, - url=RequestUrl(other_data["url"]), + url=other_data["url"], method=other_data["method"], - headers=HttpRequestHeaders(other_data["headers"]), + headers=other_data["headers"], ) @@ -74,9 +65,9 @@ def _deserialize_HttpResponse( other_data = json.loads(data["other.json"]) return cls( body=body, - url=ResponseUrl(other_data["url"]), + url=other_data["url"], status=other_data["status"], - headers=HttpResponseHeaders(other_data["headers"]), + headers=other_data["headers"], encoding=other_data["_encoding"], ) From 3250f11683ddcdba783a9372861d270fb5527dda Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 14 Feb 2023 18:01:52 +0400 Subject: [PATCH 13/28] Add a missing newline after the URL in request_fingerprint. --- web_poet/page_inputs/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index 992615a0..e57549a8 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -308,7 +308,7 @@ def request_fingerprint(req: HttpRequest) -> str: """Return the fingerprint of the request.""" fp = sha1() fp.update(req.method.encode() + b"\n") - fp.update(canonicalize_url(str(req.url)).encode()) + fp.update(canonicalize_url(str(req.url)).encode() + b"\n") for name, value in sorted(req.headers.items()): fp.update(f"{name.title()}:{value}\n".encode()) fp.update(req.body) From 2068d9dca3b749ee10bdd6efe97ba1f7dc497912 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 14 Feb 2023 18:07:53 +0400 Subject: [PATCH 14/28] Replace "other" with "info" in the request/response serialization. --- tests/test_serialization.py | 6 +++--- tests/test_testing.py | 6 +++--- web_poet/serialization/functions.py | 32 ++++++++++++++--------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 18878029..062a9962 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -66,7 +66,7 @@ class MyWebPage(WebPage): url = ResponseUrl(url_str) serialized_deps = serialize([book_list_html_response, url]) - other_json = f"""{{ + info_json = f"""{{ "_encoding": "utf-8", "headers": [], "status": null, @@ -75,7 +75,7 @@ class MyWebPage(WebPage): assert serialized_deps == { "HttpResponse": { "body.html": bytes(book_list_html_response.body), - "other.json": other_json, + "info.json": info_json, }, "ResponseUrl": { "txt": url_str.encode(), @@ -147,7 +147,7 @@ class MyWebPage(WebPage): assert (directory / "HttpResponse-body.html").read_bytes() == bytes( book_list_html_response.body ) - assert (directory / "HttpResponse-other.json").exists() + assert (directory / "HttpResponse-info.json").exists() assert (directory / "ResponseUrl.txt").exists() assert (directory / "ResponseUrl.txt").read_text( encoding="utf-8" diff --git a/tests/test_testing.py b/tests/test_testing.py index 7cd0d65c..e6080f5f 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -30,7 +30,7 @@ def _assert_fixture_files( assert (input_dir / "HttpResponse-body.html").read_bytes() == bytes( book_list_html_response.body ) - assert (input_dir / "HttpResponse-other.json").exists() + assert (input_dir / "HttpResponse-info.json").exists() assert (directory / OUTPUT_FILE_NAME).exists() assert json.loads((directory / OUTPUT_FILE_NAME).read_bytes()) == item if expected_meta: @@ -195,8 +195,8 @@ def test_httpclient(pytester, book_list_html_response) -> None: assert (input_dir / "HttpResponse-body.html").read_bytes() == bytes( book_list_html_response.body ) - assert (input_dir / "HttpClient-0-HttpRequest.other.json").exists() - assert (input_dir / "HttpClient-0-HttpResponse.other.json").exists() + assert (input_dir / "HttpClient-0-HttpRequest.info.json").exists() + assert (input_dir / "HttpClient-0-HttpResponse.info.json").exists() assert (input_dir / "HttpClient-0-HttpResponse.body.html").read_bytes() == bytes( body1 ) diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index ecf3539f..29ca2295 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -12,14 +12,14 @@ def _serialize_HttpRequest(o: HttpRequest) -> SerializedLeafData: - other_data = { + info = { "url": str(o.url), "method": o.method, "headers": list(o.headers.items()), } result: SerializedLeafData = { - "other.json": json.dumps( - other_data, ensure_ascii=False, sort_keys=True, indent=2 + "info.json": json.dumps( + info, ensure_ascii=False, sort_keys=True, indent=2 ).encode(), } if o.body: @@ -31,12 +31,12 @@ def _deserialize_HttpRequest( cls: Type[HttpRequest], data: SerializedLeafData ) -> HttpRequest: body = HttpRequestBody(data.get("body.txt", b"")) - other_data = json.loads(data["other.json"]) + info = json.loads(data["info.json"]) return cls( body=body, - url=other_data["url"], - method=other_data["method"], - headers=other_data["headers"], + url=info["url"], + method=info["method"], + headers=info["headers"], ) @@ -44,7 +44,7 @@ def _deserialize_HttpRequest( def _serialize_HttpResponse(o: HttpResponse) -> SerializedLeafData: - other_data = { + info = { "url": str(o.url), "status": o.status, "headers": list(o.headers.items()), @@ -52,8 +52,8 @@ def _serialize_HttpResponse(o: HttpResponse) -> SerializedLeafData: } return { "body.html": bytes(o.body), - "other.json": json.dumps( - other_data, ensure_ascii=False, sort_keys=True, indent=2 + "info.json": json.dumps( + info, ensure_ascii=False, sort_keys=True, indent=2 ).encode(), } @@ -62,13 +62,13 @@ def _deserialize_HttpResponse( cls: Type[HttpResponse], data: SerializedLeafData ) -> HttpResponse: body = HttpResponseBody(data["body.html"]) - other_data = json.loads(data["other.json"]) + info = json.loads(data["info.json"]) return cls( body=body, - url=other_data["url"], - status=other_data["status"], - headers=other_data["headers"], - encoding=other_data["_encoding"], + url=info["url"], + status=info["status"], + headers=info["headers"], + encoding=info["_encoding"], ) @@ -119,7 +119,7 @@ def _deserialize_HttpClient( serialized_requests: Dict[str, SerializedLeafData] = {} serialized_responses: Dict[str, SerializedLeafData] = {} for k, v in data.items(): - # k is number-("HttpRequest"|"HttpResponse").("body"|"other").ext + # k is number-("HttpRequest"|"HttpResponse").("body"|"info").ext key, type_suffix = k.split("-", 1) type_name, suffix = type_suffix.split(".", 1) if type_name == "HttpRequest": From 59386dc8cc3fcb9d76106d2c06ca2894dec49751 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 14 Feb 2023 20:01:58 +0400 Subject: [PATCH 15/28] Update file names in the doc. --- docs/page-objects/testing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index f58665a2..344b6898 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -57,14 +57,14 @@ it, that contains data for Page Object inputs and output:: ├── test-1 │ ├── inputs │ │ ├── HttpResponse-body.html - │ │ ├── HttpResponse-other.json + │ │ ├── HttpResponse-info.json │ │ └── ResponseUrl.txt │ ├── meta.json │ └── output.json └─── test-2 ├── inputs │ ├── HttpResponse-body.html - │ ├── HttpResponse-other.json + │ ├── HttpResponse-info.json │ └── ResponseUrl.txt ├── meta.json └── output.json From f5fdcf3e1f867fd998f4466c223d921227d246c3 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 14 Feb 2023 21:16:08 +0400 Subject: [PATCH 16/28] Doc improvements. --- docs/page-objects/testing.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index 344b6898..371b0ca9 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -202,3 +202,24 @@ Please also check the official Git LFS documentation for more information. .. _Git LFS: https://git-lfs.com/ .. _implementations: https://github.com/git-lfs/git-lfs/wiki/Implementations + +Additional requests support +=========================== + +If the page object uses the :class:`~.HttpClient` dependency to make +:ref:`additional requests `, the generated fixtures will +contain these requests and their responses. When the test runs, +:class:`~.HttpClient` will return the saved responses without doing actual +requests. + +Currently requests are compared by their URL, method, headers and body, so if a +page object makes requests that differ between runs, the test won't be able to +find a saved response and will fail. + +Test coverage +============= + +The coverage for page object code is reported correctly if tools such as +`coverage`_ are used when running web-poet tests. + +.. _coverage: https://coverage.readthedocs.io/ From a026997b3d35f42ef6a0e4d9e95dcc61f4d8d23f Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 13:59:30 +0400 Subject: [PATCH 17/28] Simplify response body code. --- tests/test_testing.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index e6080f5f..00b31520 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -11,7 +11,7 @@ from itemadapter import ItemAdapter from zyte_common_items import Item, Metadata, Product -from web_poet import HttpClient, HttpRequest, HttpResponse, HttpResponseBody, WebPage +from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME from web_poet.utils import get_fq_class_name @@ -171,14 +171,12 @@ async def to_item(self) -> dict: # noqa: D102 def test_httpclient(pytester, book_list_html_response) -> None: - body1 = HttpResponseBody(b"body1") url1 = "http://books.toscrape.com/1.html" request1 = HttpRequest(url1) - response1 = HttpResponse(url=url1, body=body1, encoding="utf-8") - body2 = HttpResponseBody(b"body2") + response1 = HttpResponse(url=url1, body=b"body1", encoding="utf-8") url2 = "http://books.toscrape.com/2.html" request2 = HttpRequest(url2) - response2 = HttpResponse(url=url2, body=body2, encoding="utf-8") + response2 = HttpResponse(url=url2, body=b"body2", encoding="utf-8") responses = [ (request1, response1), (request2, response2), @@ -197,23 +195,18 @@ def test_httpclient(pytester, book_list_html_response) -> None: ) assert (input_dir / "HttpClient-0-HttpRequest.info.json").exists() assert (input_dir / "HttpClient-0-HttpResponse.info.json").exists() - assert (input_dir / "HttpClient-0-HttpResponse.body.html").read_bytes() == bytes( - body1 - ) - assert (input_dir / "HttpClient-1-HttpResponse.body.html").read_bytes() == bytes( - body2 - ) + assert (input_dir / "HttpClient-0-HttpResponse.body.html").read_bytes() == b"body1" + assert (input_dir / "HttpClient-1-HttpResponse.body.html").read_bytes() == b"body2" result = pytester.runpytest() result.assert_outcomes(passed=1) def test_httpclient_no_response(pytester, book_list_html_response) -> None: - body1 = HttpResponseBody(b"body1") - url1 = "http://books.toscrape.com/1.html" - request1 = HttpRequest(url1) - response1 = HttpResponse(url=url1, body=body1, encoding="utf-8") + url = "http://books.toscrape.com/1.html" + request = HttpRequest(url) + response = HttpResponse(url=url, body=b"body1", encoding="utf-8") responses = [ - (request1, response1), + (request, response), ] client = HttpClient(responses=responses) From d645bafb8fc9228e35e7a9a0b30478a16abbcecb Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 14:00:56 +0400 Subject: [PATCH 18/28] Improve the body separator in request_fingerprint. --- web_poet/page_inputs/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index e57549a8..9325a36d 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -311,5 +311,5 @@ def request_fingerprint(req: HttpRequest) -> str: fp.update(canonicalize_url(str(req.url)).encode() + b"\n") for name, value in sorted(req.headers.items()): fp.update(f"{name.title()}:{value}\n".encode()) - fp.update(req.body) + fp.update(b"\n" + req.body) return fp.hexdigest() From 263cc597d4cbd362c9f412f71b0afdc582c47f2c Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 14:16:39 +0400 Subject: [PATCH 19/28] Remove a TODO about copying requests/responses. --- web_poet/page_inputs/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index 420c1a74..cc800efa 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -187,7 +187,6 @@ async def execute( response = await self._request_downloader(request) self._handle_status(response, request, allow_status=allow_status) if self.save_responses: - # TODO: copy()? self._saved_responses[request_fingerprint(request)] = (request, response) return response From ca919b752bea9a55afeacec70097cf280cb15d37 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 14:24:30 +0400 Subject: [PATCH 20/28] Update expected result numbers. --- tests/test_testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index 2023f121..94f71f59 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -284,7 +284,7 @@ def test_httpclient(pytester, book_list_html_response) -> None: assert (input_dir / "HttpClient-0-HttpResponse.body.html").read_bytes() == b"body1" assert (input_dir / "HttpClient-1-HttpResponse.body.html").read_bytes() == b"body2" result = pytester.runpytest() - result.assert_outcomes(passed=1) + result.assert_outcomes(passed=3) def test_httpclient_no_response(pytester, book_list_html_response) -> None: @@ -303,4 +303,4 @@ def test_httpclient_no_response(pytester, book_list_html_response) -> None: } Fixture.save(base_dir, inputs=[book_list_html_response, client], item=item) result = pytester.runpytest() - result.assert_outcomes(failed=1) + result.assert_outcomes(failed=3) From f75a83043efdadd1a89ab6ee876e0a775acaba17 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 14:27:50 +0400 Subject: [PATCH 21/28] Use _save_fixture where possible. --- tests/test_testing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index 94f71f59..e7960b89 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -296,11 +296,15 @@ def test_httpclient_no_response(pytester, book_list_html_response) -> None: ] client = HttpClient(responses=responses) - base_dir = pytester.path / "fixtures" / get_fq_class_name(ClientPage) item = { "foo": "bar", "additional": ["body1", "body2"], } - Fixture.save(base_dir, inputs=[book_list_html_response, client], item=item) + _save_fixture( + pytester, + page_cls=ClientPage, + page_inputs=[book_list_html_response, client], + expected=item, + ) result = pytester.runpytest() result.assert_outcomes(failed=3) From 608ef533e83abfa0164c83da7b6d63075048f269 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 14:33:55 +0400 Subject: [PATCH 22/28] Add a test for the request body. --- tests/test_testing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index e7960b89..d5195194 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -252,7 +252,7 @@ class ClientPage(WebPage): async def to_item(self) -> dict: # noqa: D102 resp1 = await self.client.get("http://books.toscrape.com/1.html") - resp2 = await self.client.get("http://books.toscrape.com/2.html") + resp2 = await self.client.post("http://books.toscrape.com/2.html", body=b"post") return {"foo": "bar", "additional": [resp1.body.decode(), resp2.body.decode()]} @@ -261,7 +261,7 @@ def test_httpclient(pytester, book_list_html_response) -> None: request1 = HttpRequest(url1) response1 = HttpResponse(url=url1, body=b"body1", encoding="utf-8") url2 = "http://books.toscrape.com/2.html" - request2 = HttpRequest(url2) + request2 = HttpRequest(url2, method="POST", body=b"post") response2 = HttpResponse(url=url2, body=b"body2", encoding="utf-8") responses = [ (request1, response1), From 445eb78b480bb027a833c4d4f33bfd7747772541 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 15:37:42 +0500 Subject: [PATCH 23/28] Update web_poet/page_inputs/http.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrián Chaves --- web_poet/page_inputs/http.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index 9325a36d..dc907a2a 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -311,5 +311,6 @@ def request_fingerprint(req: HttpRequest) -> str: fp.update(canonicalize_url(str(req.url)).encode() + b"\n") for name, value in sorted(req.headers.items()): fp.update(f"{name.title()}:{value}\n".encode()) - fp.update(b"\n" + req.body) + fp.update(b"\n") + fp.update(req.body) return fp.hexdigest() From 7a5c2af9f0c32aef7f949ea95bc4e9188c074fe3 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 19:09:28 +0400 Subject: [PATCH 24/28] Add HttpClient files to the serialization docs. --- docs/page-objects/testing.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index 4ebabb74..84ac456c 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -63,6 +63,13 @@ it, that contains data for Page Object inputs and output:: │ └── output.json └─── test-2 ├── inputs + │ ├── HttpClient-0-HttpRequest.info.json + │ ├── HttpClient-0-HttpResponse.body.html + │ ├── HttpClient-0-HttpResponse.info.json + │ ├── HttpClient-1-HttpRequest.body.txt + │ ├── HttpClient-1-HttpRequest.info.json + │ ├── HttpClient-1-HttpResponse.body.html + │ ├── HttpClient-1-HttpResponse.info.json │ ├── HttpResponse-body.html │ ├── HttpResponse-info.json │ └── ResponseUrl.txt From fd4016bb28342d9b3a8cc4418baa668d0c3ebec6 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 19:24:04 +0400 Subject: [PATCH 25/28] Introduce the SavedResponseData type. --- tests/test_testing.py | 7 ++++--- web_poet/page_inputs/client.py | 31 +++++++++++++++++++++-------- web_poet/serialization/functions.py | 13 ++++++------ 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index d5195194..08968049 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -12,6 +12,7 @@ from zyte_common_items import Item, Metadata, Product from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage +from web_poet.page_inputs.client import SavedResponseData from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME from web_poet.utils import get_fq_class_name @@ -264,8 +265,8 @@ def test_httpclient(pytester, book_list_html_response) -> None: request2 = HttpRequest(url2, method="POST", body=b"post") response2 = HttpResponse(url=url2, body=b"body2", encoding="utf-8") responses = [ - (request1, response1), - (request2, response2), + SavedResponseData(request1, response1), + SavedResponseData(request2, response2), ] client = HttpClient(responses=responses) @@ -292,7 +293,7 @@ def test_httpclient_no_response(pytester, book_list_html_response) -> None: request = HttpRequest(url) response = HttpResponse(url=url, body=b"body1", encoding="utf-8") responses = [ - (request, response), + SavedResponseData(request, response), ] client = HttpClient(responses=responses) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index cc800efa..01d6a22a 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -1,7 +1,8 @@ import asyncio import logging +from dataclasses import dataclass from http import HTTPStatus -from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Callable, Dict, Iterable, List, Optional, Union from web_poet.exceptions import HttpResponseError, NoSavedHttpResponse from web_poet.page_inputs.http import ( @@ -23,6 +24,18 @@ _StatusList = Union[str, int, List[Union[str, int]]] +@dataclass +class SavedResponseData: + """Class for storing a request and its result.""" + + request: HttpRequest + response: HttpResponse + + def fingerprint(self) -> str: + """Return the request fingeprint.""" + return request_fingerprint(self.request) + + class HttpClient: """Async HTTP client to be used in Page Objects. @@ -46,13 +59,13 @@ def __init__( *, save_responses: bool = False, return_only_saved_responses: bool = False, - responses: Optional[Iterable[Tuple[HttpRequest, HttpResponse]]] = None, + responses: Optional[Iterable[SavedResponseData]] = None, ): self._request_downloader = request_downloader or _perform_request self.save_responses = save_responses self.return_only_saved_responses = return_only_saved_responses - self._saved_responses: Dict[str, Tuple[HttpRequest, HttpResponse]] = { - request_fingerprint(req): (req, resp) for req, resp in responses or {} + self._saved_responses: Dict[str, SavedResponseData] = { + data.fingerprint(): data for data in responses or [] } @staticmethod @@ -179,15 +192,17 @@ async def execute( because :class:`~.HttpResponseError` is not raised for them. """ if self.return_only_saved_responses: - for fp, (_, saved_response) in self._saved_responses.items(): + for fp, saved_data in self._saved_responses.items(): if request_fingerprint(request) == fp: - return saved_response + return saved_data.response raise NoSavedHttpResponse(request=request) response = await self._request_downloader(request) self._handle_status(response, request, allow_status=allow_status) if self.save_responses: - self._saved_responses[request_fingerprint(request)] = (request, response) + self._saved_responses[request_fingerprint(request)] = SavedResponseData( + request, response + ) return response async def batch_execute( @@ -229,6 +244,6 @@ async def batch_execute( ) return responses - def get_saved_responses(self) -> Iterable[Tuple[HttpRequest, HttpResponse]]: + def get_saved_responses(self) -> Iterable[SavedResponseData]: """Return saved requests and responses.""" return self._saved_responses.values() diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 29ca2295..10ce3938 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -1,7 +1,8 @@ import json -from typing import Dict, List, Tuple, Type +from typing import Dict, List, Type from .. import HttpClient, HttpRequest, HttpRequestBody, HttpResponse, HttpResponseBody +from ..page_inputs.client import SavedResponseData from ..page_inputs.url import _Url from .api import ( SerializedLeafData, @@ -101,11 +102,11 @@ def _deserialize__Url(cls: Type[_Url], data: SerializedLeafData) -> _Url: def _serialize_HttpClient(o: HttpClient) -> SerializedLeafData: serialized_data: SerializedLeafData = {} - for i, (request, response) in enumerate(o.get_saved_responses()): - serialized_request = serialize_leaf(request) + for i, data in enumerate(o.get_saved_responses()): + serialized_request = serialize_leaf(data.request) for k, v in serialized_request.items(): serialized_data[f"{i}-HttpRequest.{k}"] = v - serialized_response = serialize_leaf(response) + serialized_response = serialize_leaf(data.response) for k, v in serialized_response.items(): serialized_data[f"{i}-HttpResponse.{k}"] = v return serialized_data @@ -114,7 +115,7 @@ def _serialize_HttpClient(o: HttpClient) -> SerializedLeafData: def _deserialize_HttpClient( cls: Type[HttpClient], data: SerializedLeafData ) -> HttpClient: - responses: List[Tuple[HttpRequest, HttpResponse]] = [] + responses: List[SavedResponseData] = [] serialized_requests: Dict[str, SerializedLeafData] = {} serialized_responses: Dict[str, SerializedLeafData] = {} @@ -133,7 +134,7 @@ def _deserialize_HttpClient( continue request = deserialize_leaf(HttpRequest, serialized_request) response = deserialize_leaf(HttpResponse, serialized_response) - responses.append((request, response)) + responses.append(SavedResponseData(request, response)) return cls(return_only_saved_responses=True, responses=responses) From 5fbc56d29072ab196fa1e3966bc9a9a79032d345 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 21:44:57 +0400 Subject: [PATCH 26/28] Process non-200 responses in tests correctly. --- tests/test_testing.py | 37 ++++++++++++++++++++++++++++++++++ web_poet/page_inputs/client.py | 7 ++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index 08968049..a8684713 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -12,6 +12,7 @@ from zyte_common_items import Item, Metadata, Product from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage +from web_poet.exceptions import HttpResponseError from web_poet.page_inputs.client import SavedResponseData from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME @@ -309,3 +310,39 @@ def test_httpclient_no_response(pytester, book_list_html_response) -> None: ) result = pytester.runpytest() result.assert_outcomes(failed=3) + + +@attrs.define +class ClientExceptionPage(WebPage): + client: HttpClient + + async def to_item(self) -> dict: # noqa: D102 + msg = "" + try: + await self.client.get("http://books.toscrape.com/1.html") + except HttpResponseError as ex: + msg = ex.args[0] + return {"foo": "bar", "exception": msg} + + +def test_httpclient_exception(pytester, book_list_html_response) -> None: + url = "http://books.toscrape.com/1.html" + request = HttpRequest(url) + response = HttpResponse(url=url, body=b"body1", status=404, encoding="utf-8") + responses = [ + SavedResponseData(request, response), + ] + client = HttpClient(responses=responses) + + item = { + "foo": "bar", + "exception": "404 NOT_FOUND response for http://books.toscrape.com/1.html", + } + _save_fixture( + pytester, + page_cls=ClientExceptionPage, + page_inputs=[book_list_html_response, client], + expected=item, + ) + result = pytester.runpytest() + result.assert_outcomes(passed=3) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index 01d6a22a..a908c93f 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -194,15 +194,20 @@ async def execute( if self.return_only_saved_responses: for fp, saved_data in self._saved_responses.items(): if request_fingerprint(request) == fp: + self._handle_status( + saved_data.response, + saved_data.request, + allow_status=allow_status, + ) return saved_data.response raise NoSavedHttpResponse(request=request) response = await self._request_downloader(request) - self._handle_status(response, request, allow_status=allow_status) if self.save_responses: self._saved_responses[request_fingerprint(request)] = SavedResponseData( request, response ) + self._handle_status(response, request, allow_status=allow_status) return response async def batch_execute( From 95d506953c97ba62bfb0a7a74b79a69b54ce3486 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 22:04:42 +0400 Subject: [PATCH 27/28] Move NoSavedHttpResponse to exceptions.core, change its parent class. --- web_poet/exceptions/core.py | 28 ++++++++++++++++++++++++++++ web_poet/exceptions/http.py | 17 ----------------- web_poet/page_inputs/client.py | 3 ++- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/web_poet/exceptions/core.py b/web_poet/exceptions/core.py index 89d7ffa8..9f3691a6 100644 --- a/web_poet/exceptions/core.py +++ b/web_poet/exceptions/core.py @@ -5,6 +5,17 @@ These exceptions are tied to how **web-poet** operates. """ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from web_poet import HttpRequest + + +__all__ = [ + "RequestDownloaderVarError", + "Retry", +] + class RequestDownloaderVarError(Exception): """The ``web_poet.request_downloader_var`` had its contents accessed but there @@ -25,3 +36,20 @@ class Retry(ValueError): """ pass + + +class NoSavedHttpResponse(AssertionError): + """Indicates that there is no saved response for this request. + + Can only be raised when a :class:`~.HttpClient` instance is used to + get saved responses. + + :param request: The :class:`~.HttpRequest` instance that was used. + :type request: HttpRequest + """ + + def __init__(self, msg: str = None, request: "HttpRequest" = None): + self.request = request + if msg is None: + msg = f"There is no saved response available for this HTTP Request: {self.request}" + super().__init__(msg) diff --git a/web_poet/exceptions/http.py b/web_poet/exceptions/http.py index a0f2e3f9..44ccab28 100644 --- a/web_poet/exceptions/http.py +++ b/web_poet/exceptions/http.py @@ -73,20 +73,3 @@ def __init__( if msg is None: msg = f"Unexpected HTTP Response received: {self.response}" super().__init__(msg, request=request) - - -class NoSavedHttpResponse(HttpError): - """Indicates that there is no saved response for this request. - - Can only be raised when a :class:`~.HttpClient` instance is used to - get saved responses. - - :param request: The :class:`~.HttpRequest` instance that was used. - :type request: HttpRequest - """ - - def __init__(self, msg: str = None, request: HttpRequest = None): - self.request = request - if msg is None: - msg = f"There is no saved response available for this HTTP Request: {self.request}" - super().__init__(msg) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index a908c93f..428558eb 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -4,7 +4,8 @@ from http import HTTPStatus from typing import Callable, Dict, Iterable, List, Optional, Union -from web_poet.exceptions import HttpResponseError, NoSavedHttpResponse +from web_poet.exceptions import HttpResponseError +from web_poet.exceptions.core import NoSavedHttpResponse from web_poet.page_inputs.http import ( HttpRequest, HttpRequestBody, From 89d8da6ef91f7793a486ec02baedede1a49e3500 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 15 Feb 2023 22:23:17 +0400 Subject: [PATCH 28/28] Rename SavedResponseData to _SavedResponseData. --- tests/test_testing.py | 10 +++++----- web_poet/page_inputs/client.py | 10 +++++----- web_poet/serialization/functions.py | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/test_testing.py b/tests/test_testing.py index a8684713..0960f329 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -13,7 +13,7 @@ from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage from web_poet.exceptions import HttpResponseError -from web_poet.page_inputs.client import SavedResponseData +from web_poet.page_inputs.client import _SavedResponseData from web_poet.testing import Fixture from web_poet.testing.fixture import INPUT_DIR_NAME, META_FILE_NAME, OUTPUT_FILE_NAME from web_poet.utils import get_fq_class_name @@ -266,8 +266,8 @@ def test_httpclient(pytester, book_list_html_response) -> None: request2 = HttpRequest(url2, method="POST", body=b"post") response2 = HttpResponse(url=url2, body=b"body2", encoding="utf-8") responses = [ - SavedResponseData(request1, response1), - SavedResponseData(request2, response2), + _SavedResponseData(request1, response1), + _SavedResponseData(request2, response2), ] client = HttpClient(responses=responses) @@ -294,7 +294,7 @@ def test_httpclient_no_response(pytester, book_list_html_response) -> None: request = HttpRequest(url) response = HttpResponse(url=url, body=b"body1", encoding="utf-8") responses = [ - SavedResponseData(request, response), + _SavedResponseData(request, response), ] client = HttpClient(responses=responses) @@ -330,7 +330,7 @@ def test_httpclient_exception(pytester, book_list_html_response) -> None: request = HttpRequest(url) response = HttpResponse(url=url, body=b"body1", status=404, encoding="utf-8") responses = [ - SavedResponseData(request, response), + _SavedResponseData(request, response), ] client = HttpClient(responses=responses) diff --git a/web_poet/page_inputs/client.py b/web_poet/page_inputs/client.py index 428558eb..601ff24d 100644 --- a/web_poet/page_inputs/client.py +++ b/web_poet/page_inputs/client.py @@ -26,7 +26,7 @@ @dataclass -class SavedResponseData: +class _SavedResponseData: """Class for storing a request and its result.""" request: HttpRequest @@ -60,12 +60,12 @@ def __init__( *, save_responses: bool = False, return_only_saved_responses: bool = False, - responses: Optional[Iterable[SavedResponseData]] = None, + responses: Optional[Iterable[_SavedResponseData]] = None, ): self._request_downloader = request_downloader or _perform_request self.save_responses = save_responses self.return_only_saved_responses = return_only_saved_responses - self._saved_responses: Dict[str, SavedResponseData] = { + self._saved_responses: Dict[str, _SavedResponseData] = { data.fingerprint(): data for data in responses or [] } @@ -205,7 +205,7 @@ async def execute( response = await self._request_downloader(request) if self.save_responses: - self._saved_responses[request_fingerprint(request)] = SavedResponseData( + self._saved_responses[request_fingerprint(request)] = _SavedResponseData( request, response ) self._handle_status(response, request, allow_status=allow_status) @@ -250,6 +250,6 @@ async def batch_execute( ) return responses - def get_saved_responses(self) -> Iterable[SavedResponseData]: + def get_saved_responses(self) -> Iterable[_SavedResponseData]: """Return saved requests and responses.""" return self._saved_responses.values() diff --git a/web_poet/serialization/functions.py b/web_poet/serialization/functions.py index 10ce3938..dcabc46a 100644 --- a/web_poet/serialization/functions.py +++ b/web_poet/serialization/functions.py @@ -2,7 +2,7 @@ from typing import Dict, List, Type from .. import HttpClient, HttpRequest, HttpRequestBody, HttpResponse, HttpResponseBody -from ..page_inputs.client import SavedResponseData +from ..page_inputs.client import _SavedResponseData from ..page_inputs.url import _Url from .api import ( SerializedLeafData, @@ -115,7 +115,7 @@ def _serialize_HttpClient(o: HttpClient) -> SerializedLeafData: def _deserialize_HttpClient( cls: Type[HttpClient], data: SerializedLeafData ) -> HttpClient: - responses: List[SavedResponseData] = [] + responses: List[_SavedResponseData] = [] serialized_requests: Dict[str, SerializedLeafData] = {} serialized_responses: Dict[str, SerializedLeafData] = {} @@ -134,7 +134,7 @@ def _deserialize_HttpClient( continue request = deserialize_leaf(HttpRequest, serialized_request) response = deserialize_leaf(HttpResponse, serialized_response) - responses.append(SavedResponseData(request, response)) + responses.append(_SavedResponseData(request, response)) return cls(return_only_saved_responses=True, responses=responses)