Skip to content

Commit

Permalink
Add a test for multiple data types extraction. (#233)
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR authored Dec 9, 2024
1 parent 5fbf0bc commit c78d304
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
7 changes: 7 additions & 0 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,13 @@ def render_POST(self, request):
},
}

if request_data.get("productNavigation") is True:
response_data["productNavigation"] = {
"url": response_data["url"],
"name": "Product navigation",
"pageNumber": 0,
}

return json.dumps(response_data).encode()


Expand Down
55 changes: 55 additions & 0 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
CustomAttributes,
CustomAttributesValues,
Product,
ProductNavigation,
)
from zyte_common_items.fields import auto_field

Expand Down Expand Up @@ -68,6 +69,13 @@ class ProductPage(BasePage):
product: Product


@attrs.define
class ProductNavigationPage(BasePage):
html: BrowserHtml
response: BrowserResponse
product_nav: ProductNavigation


class ZyteAPISpider(Spider):
url: str

Expand Down Expand Up @@ -1723,3 +1731,50 @@ def parse(self, response: DummyResponse, product: Product):

# Reset rules
default_registry.__init__() # type: ignore[misc]


class ZyteAPIMultipleSpider(Spider):
url: str

def start_requests(self):
yield Request(self.url, callback=self.parse_)

def parse_(
self,
response: DummyResponse,
page: ProductPage,
nav_page: ProductNavigationPage,
):
yield {
"html": page.html,
"response_html": page.response.html,
"product": page.product,
"productNavigation": nav_page.product_nav,
}


@ensureDeferred
async def test_multiple_types(mockserver):
settings = create_scrapy_settings()
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}
item, url, _ = await crawl_single_item(
ZyteAPIMultipleSpider, HtmlResource, settings
)
assert item["html"] == "<html><body>Hello<h1>World!</h1></body></html>"
assert item["response_html"] == "<html><body>Hello<h1>World!</h1></body></html>"
assert item["product"] == Product.from_dict(
dict(
url=url,
name="Product name",
price="10",
currency="USD",
)
)
assert item["productNavigation"] == ProductNavigation.from_dict(
dict(
url=url,
name="Product navigation",
pageNumber=0,
)
)

0 comments on commit c78d304

Please sign in to comment.