From c78d304656929bfc5e9561082733235d5d4875ba Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 9 Dec 2024 14:31:44 +0400 Subject: [PATCH] Add a test for multiple data types extraction. (#233) --- tests/mockserver.py | 7 ++++++ tests/test_providers.py | 55 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/tests/mockserver.py b/tests/mockserver.py index 7567a3f1..583323a7 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -241,6 +241,13 @@ def render_POST(self, request): }, } + if request_data.get("productNavigation") is True: + response_data["productNavigation"] = { + "url": response_data["url"], + "name": "Product navigation", + "pageNumber": 0, + } + return json.dumps(response_data).encode() diff --git a/tests/test_providers.py b/tests/test_providers.py index 3781943f..e9ac9ab4 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -31,6 +31,7 @@ CustomAttributes, CustomAttributesValues, Product, + ProductNavigation, ) from zyte_common_items.fields import auto_field @@ -68,6 +69,13 @@ class ProductPage(BasePage): product: Product +@attrs.define +class ProductNavigationPage(BasePage): + html: BrowserHtml + response: BrowserResponse + product_nav: ProductNavigation + + class ZyteAPISpider(Spider): url: str @@ -1723,3 +1731,50 @@ def parse(self, response: DummyResponse, product: Product): # Reset rules default_registry.__init__() # type: ignore[misc] + + +class ZyteAPIMultipleSpider(Spider): + url: str + + def start_requests(self): + yield Request(self.url, callback=self.parse_) + + def parse_( + self, + response: DummyResponse, + page: ProductPage, + nav_page: ProductNavigationPage, + ): + yield { + "html": page.html, + "response_html": page.response.html, + "product": page.product, + "productNavigation": nav_page.product_nav, + } + + +@ensureDeferred +async def test_multiple_types(mockserver): + settings = create_scrapy_settings() + settings["ZYTE_API_URL"] = mockserver.urljoin("/") + settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} + item, url, _ = await crawl_single_item( + ZyteAPIMultipleSpider, HtmlResource, settings + ) + assert item["html"] == "Hello

World!

" + assert item["response_html"] == "Hello

World!

" + assert item["product"] == Product.from_dict( + dict( + url=url, + name="Product name", + price="10", + currency="USD", + ) + ) + assert item["productNavigation"] == ProductNavigation.from_dict( + dict( + url=url, + name="Product navigation", + pageNumber=0, + ) + )