From cd8e1c9a37e27ef893ce0dcf4cc7862e7af8bb8b Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 26 Apr 2023 18:14:53 +0400 Subject: [PATCH 1/7] Support custom item adapters in fixtures. --- tests/test_testing.py | 34 ++++++++++++++++++++++++++++++++- web_poet/testing/fixture.py | 24 ++++++++++++++--------- web_poet/testing/itemadapter.py | 27 ++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 web_poet/testing/itemadapter.py diff --git a/tests/test_testing.py b/tests/test_testing.py index 50ad4f0b..41da92a3 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -1,14 +1,16 @@ import datetime import json import sys +from collections import deque from pathlib import Path -from typing import Optional +from typing import Any, Optional import attrs import dateutil.tz import pytest import time_machine from itemadapter import ItemAdapter +from itemadapter.adapter import DictAdapter from zyte_common_items import Item, Metadata, Product from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage, field @@ -65,6 +67,36 @@ async def to_item(self) -> dict: return {"foo": None} +class CapitalizingDictAdapter(DictAdapter): + def __getitem__(self, field_name: str) -> Any: + item = super().__getitem__(field_name) + if isinstance(item, str): + return item.capitalize() + return item + + +class CustomItemAdapter(ItemAdapter): + ADAPTER_CLASSES = deque([CapitalizingDictAdapter]) + + +def test_fixture_adapter(book_list_html_response, tmp_path) -> None: + item = {"foo": "bar"} + meta = {"adapter_type_name": get_fq_class_name(CustomItemAdapter)} + base_dir = tmp_path / "fixtures" / get_fq_class_name(MyItemPage) + + fixture = Fixture.save( + base_dir, inputs=[book_list_html_response], item=item, meta=meta + ) + saved_output = json.loads(fixture.output_path.read_bytes()) + assert saved_output["foo"] == "Bar" + + loaded_fixture = Fixture(base_dir / "test-1") + loaded_output = loaded_fixture.get_output() + assert loaded_output["foo"] == "Bar" + actual_output = loaded_fixture.get_expected_output() + assert actual_output["foo"] == "Bar" + + def _save_fixture( pytester, page_cls, page_inputs, *, expected_output=None, expected_exception=None ): diff --git a/web_poet/testing/fixture.py b/web_poet/testing/fixture.py index 0066fdea..4605f7fe 100644 --- a/web_poet/testing/fixture.py +++ b/web_poet/testing/fixture.py @@ -5,7 +5,7 @@ import os import sys from pathlib import Path -from typing import Any, Iterable, Optional, Type, TypeVar, Union +from typing import Any, Iterable, Optional, Type, TypeVar, Union, cast import dateutil.parser import dateutil.tz @@ -30,6 +30,7 @@ ItemValueIncorrect, WrongExceptionRaised, ) +from .itemadapter import WebPoetTestItemAdapter logger = logging.getLogger(__name__) @@ -114,10 +115,16 @@ def get_meta(self) -> dict: return {} return json.loads(self.meta_path.read_bytes()) + def _get_adapter_cls(self) -> Type[ItemAdapter]: + type_name = self.get_meta().get("adapter_type_name") + if not type_name: + return WebPoetTestItemAdapter + return cast(Type[ItemAdapter], load_class(type_name)) + def _get_output(self) -> dict: page = self.get_page() item = asyncio.run(ensure_awaitable(page.to_item())) - return ItemAdapter(item).asdict() + return self._get_adapter_cls()(item).asdict() @memoizemethod_noargs def get_output(self) -> dict: @@ -138,10 +145,9 @@ def get_output(self) -> dict: self._output_error = e raise - @classmethod - def item_to_json(cls, item: Any) -> str: + def item_to_json(self, item: Any) -> str: """Convert an item to a JSON string.""" - return _format_json(ItemAdapter(item).asdict()) + return _format_json(self._get_adapter_cls()(item).asdict()) @memoizemethod_noargs def get_expected_output(self) -> dict: @@ -262,13 +268,13 @@ def save( storage = SerializedDataFileStorage(fixture.input_path) storage.write(serialized_inputs) - if item is not None: - with fixture.output_path.open("w") as f: - f.write(cls.item_to_json(item)) - if meta: fixture.meta_path.write_text(_format_json(meta)) + if item is not None: + with fixture.output_path.open("w") as f: + f.write(fixture.item_to_json(item)) + if exception: exc_data = _exception_to_dict(exception) fixture.exception_path.write_text(_format_json(exc_data)) diff --git a/web_poet/testing/itemadapter.py b/web_poet/testing/itemadapter.py new file mode 100644 index 00000000..193ed7ff --- /dev/null +++ b/web_poet/testing/itemadapter.py @@ -0,0 +1,27 @@ +from collections import deque +from typing import Deque, Type + +from itemadapter import ItemAdapter +from itemadapter.adapter import ( + AdapterInterface, + AttrsAdapter, + DataclassAdapter, + DictAdapter, + PydanticAdapter, + ScrapyItemAdapter, +) + + +class WebPoetTestItemAdapter(ItemAdapter): + """A default adapter implementation""" + + # In case the user changes ItemAdapter.ADAPTER_CLASSES it's copied here. + ADAPTER_CLASSES: Deque[Type[AdapterInterface]] = deque( + [ + ScrapyItemAdapter, + DictAdapter, + DataclassAdapter, + AttrsAdapter, + PydanticAdapter, + ] + ) From da6cfbc62a5a51d05dbd075406234c0a29d4893c Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 26 Apr 2023 18:30:18 +0400 Subject: [PATCH 2/7] Bump the minimum itemadapter version. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index eebcfcb3..dec73c04 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "multidict", "w3lib >= 1.22.0", "async-lru >= 1.0.3", - "itemadapter >= 0.7.0", + "itemadapter >= 0.8.0", "andi", "python-dateutil", "time-machine", From a60a3097a615aaeb56e4d0386dc9e6c2a37a6cfc Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 1 May 2023 18:29:32 +0400 Subject: [PATCH 3/7] Add docs for adapter_type_name. --- docs/page-objects/testing.rst | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index 9e401f69..8121996b 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -322,3 +322,42 @@ The coverage for page object code is reported correctly if tools such as `coverage`_ are used when running web-poet tests. .. _coverage: https://coverage.readthedocs.io/ + +Item adapters +============= + +The testing framework uses the itemadapter_ library to convert items to dicts +when storing them in fixtures and when comparing the expected and the actual +output. As adapters may influence the resulting dicts, it's important to use +the same adapter in both cases and it may be useful to use a different adapter +in tests and in production (for example, you may want to omit empty fields in +production but be able to distinguish between empty and absent fields in +tests). For this you can set the ``adapter_type_name`` field in the metadata +dictionary to the type name of a class that inherits from +:class:`itemadapter.ItemAdapter` and has the adapter(s) you want to use in +tests in its ``ADAPTER_CLASSES`` attribute (see `the relevant itemadapter +docs`_ for more information). An example:: + + from collections import deque + + from itemadapter import ItemAdapter + from itemadapter.adapter import DictAdapter + + + class MyAdapter(DictAdapter): + # any needed customization + ... + + class MyItemAdapter(ItemAdapter): + ADAPTER_CLASSES = deque([MyAdapter]) + +You can then put the fully qualified name of ``MyItemAdapter`` into +``adapter_type_name`` and it will be used by the testing framework. + +If ``adapter_type_name`` is not set, +:class:`~web_poet.testing.itemadapter.WebPoetTestItemAdapter` will be used. +It works like :class:`itemadapter.ItemAdapter` but doesn't change behavior when +:attr:`itemadapter.ItemAdapter.ADAPTER_CLASSES` is modified. + +.. _itemadapter: https://github.com/scrapy/itemadapter +.. _the relevant itemadapter docs: https://github.com/scrapy/itemadapter/#multiple-adapter-classes From 4d65aa54280b262be3334247ac2600266ddf0a01 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 2 May 2023 15:30:36 +0400 Subject: [PATCH 4/7] Implement suggested docs changes. --- docs/page-objects/testing.rst | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index 8121996b..b76f3241 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -78,14 +78,16 @@ it, that contains data for Page Object inputs and output:: ├── meta.json └── output.json +.. _fixture-save: + :func:`web_poet.testing.Fixture.save` can be used to create a fixture inside a Page Object directory from an iterable of dependencies, an output item and an optional metadata dictionary. It can optionally take a name for the fixture directory. By default it uses incrementing names "test-1", "test-2" etc. .. note:: - ``output.json`` contains a result of - ``ItemAdapter(page_object.to_item()).asdict()`` saved as JSON. + ``output.json`` contains a result of ``page_object.to_item()`` converted to + a dict using the itemadapter_ library and saved as JSON. After generating a fixture you can edit ``output.json`` to modify expected field values and add new fields, which is useful when creating tests for code @@ -194,9 +196,10 @@ Handling time fields Sometimes output of a page object might depend on the current time. For example, the item may contain the scraping datetime, or a current timestamp may be used to build some URLs. When a test runs at a different time it will break. -To avoid this the metadata dictionary can contain a ``frozen_time`` field set -to the time value used when generating the test. This will instruct the test -runner to use the same time value so that field comparisons are still correct. +To avoid this :ref:`the metadata dictionary ` can contain a +``frozen_time`` field set to the time value used when generating the test. This +will instruct the test runner to use the same time value so that field +comparisons are still correct. The value can be any string understood by `dateutil`_. If it doesn't include timezone information, the local time of the machine will be assumed. If it @@ -329,11 +332,14 @@ Item adapters The testing framework uses the itemadapter_ library to convert items to dicts when storing them in fixtures and when comparing the expected and the actual output. As adapters may influence the resulting dicts, it's important to use -the same adapter in both cases and it may be useful to use a different adapter -in tests and in production (for example, you may want to omit empty fields in -production but be able to distinguish between empty and absent fields in -tests). For this you can set the ``adapter_type_name`` field in the metadata -dictionary to the type name of a class that inherits from +the same adapter when generating and running the tests. + +It may also be useful to use different adapters in tests and in production. For +example, you may want to omit empty fields in production, but be able to +distinguish between empty and absent fields in tests. + +For this you can set the ``adapter_type_name`` field in :ref:`the metadata +dictionary ` to the type name of a class that inherits from :class:`itemadapter.ItemAdapter` and has the adapter(s) you want to use in tests in its ``ADAPTER_CLASSES`` attribute (see `the relevant itemadapter docs`_ for more information). An example:: From 7f401c0767a86c14b4ff2647e9c224644a218c5d Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 2 May 2023 15:33:36 +0400 Subject: [PATCH 5/7] Rename the exception type key. --- web_poet/serialization/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web_poet/serialization/utils.py b/web_poet/serialization/utils.py index 9a3c73b8..d2b1d9b3 100644 --- a/web_poet/serialization/utils.py +++ b/web_poet/serialization/utils.py @@ -10,7 +10,7 @@ def _exception_to_dict(ex: Exception) -> Dict[str, Any]: Only the exception type and the first argument are saved. """ return { - "type_name": _get_name_for_class(type(ex)), + "import_path": _get_name_for_class(type(ex)), "msg": ex.args[0] if ex.args else None, } @@ -20,7 +20,7 @@ def _exception_from_dict(data: Dict[str, Any]) -> Exception: Only the exception type and the first argument are restored. """ - exc_cls = load_class(data["type_name"]) + exc_cls = load_class(data["import_path"]) return exc_cls(data["msg"]) From 36efbc941ea4761946ac68e4b19da1b092859c06 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 2 May 2023 15:49:36 +0400 Subject: [PATCH 6/7] Change `adapter_type_name` to `adapter`. --- docs/page-objects/testing.rst | 10 +++++----- tests/test_testing.py | 2 +- web_poet/testing/fixture.py | 15 ++++++++++----- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index b76f3241..49c5e832 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -338,8 +338,8 @@ It may also be useful to use different adapters in tests and in production. For example, you may want to omit empty fields in production, but be able to distinguish between empty and absent fields in tests. -For this you can set the ``adapter_type_name`` field in :ref:`the metadata -dictionary ` to the type name of a class that inherits from +For this you can set the ``adapter`` field in :ref:`the metadata dictionary +` to the class that inherits from :class:`itemadapter.ItemAdapter` and has the adapter(s) you want to use in tests in its ``ADAPTER_CLASSES`` attribute (see `the relevant itemadapter docs`_ for more information). An example:: @@ -357,10 +357,10 @@ docs`_ for more information). An example:: class MyItemAdapter(ItemAdapter): ADAPTER_CLASSES = deque([MyAdapter]) -You can then put the fully qualified name of ``MyItemAdapter`` into -``adapter_type_name`` and it will be used by the testing framework. +You can then put the ``MyItemAdapter`` class object into ``adapter`` and it +will be used by the testing framework. -If ``adapter_type_name`` is not set, +If ``adapter`` is not set, :class:`~web_poet.testing.itemadapter.WebPoetTestItemAdapter` will be used. It works like :class:`itemadapter.ItemAdapter` but doesn't change behavior when :attr:`itemadapter.ItemAdapter.ADAPTER_CLASSES` is modified. diff --git a/tests/test_testing.py b/tests/test_testing.py index 41da92a3..c26abe1f 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -81,7 +81,7 @@ class CustomItemAdapter(ItemAdapter): def test_fixture_adapter(book_list_html_response, tmp_path) -> None: item = {"foo": "bar"} - meta = {"adapter_type_name": get_fq_class_name(CustomItemAdapter)} + meta = {"adapter": CustomItemAdapter} base_dir = tmp_path / "fixtures" / get_fq_class_name(MyItemPage) fixture = Fixture.save( diff --git a/web_poet/testing/fixture.py b/web_poet/testing/fixture.py index 4605f7fe..cc19fa0b 100644 --- a/web_poet/testing/fixture.py +++ b/web_poet/testing/fixture.py @@ -19,7 +19,7 @@ load_class, serialize, ) -from web_poet.utils import ensure_awaitable, memoizemethod_noargs +from web_poet.utils import ensure_awaitable, get_fq_class_name, memoizemethod_noargs from ..serialization.utils import _exception_from_dict, _exception_to_dict, _format_json from .exceptions import ( @@ -113,13 +113,16 @@ def get_meta(self) -> dict: """Return the test metadata.""" if not self.meta_path.exists(): return {} - return json.loads(self.meta_path.read_bytes()) + meta_dict = json.loads(self.meta_path.read_bytes()) + if meta_dict.get("adapter"): + meta_dict["adapter"] = load_class(meta_dict["adapter"]) + return meta_dict def _get_adapter_cls(self) -> Type[ItemAdapter]: - type_name = self.get_meta().get("adapter_type_name") - if not type_name: + cls = self.get_meta().get("adapter") + if not cls: return WebPoetTestItemAdapter - return cast(Type[ItemAdapter], load_class(type_name)) + return cast(Type[ItemAdapter], cls) def _get_output(self) -> dict: page = self.get_page() @@ -269,6 +272,8 @@ def save( storage.write(serialized_inputs) if meta: + if meta.get("adapter"): + meta["adapter"] = get_fq_class_name(meta["adapter"]) fixture.meta_path.write_text(_format_json(meta)) if item is not None: From 74946a2bdbf5bb7a6dbfb2bf8da1345da1edb247 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 2 May 2023 19:14:52 +0400 Subject: [PATCH 7/7] Add a section marker for adapter docs. --- docs/page-objects/testing.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index 49c5e832..3632cdc3 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -326,6 +326,8 @@ The coverage for page object code is reported correctly if tools such as .. _coverage: https://coverage.readthedocs.io/ +.. _web-poet-testing-adapters: + Item adapters =============