diff --git a/docs/page-objects/testing.rst b/docs/page-objects/testing.rst index 9e401f69..3632cdc3 100644 --- a/docs/page-objects/testing.rst +++ b/docs/page-objects/testing.rst @@ -78,14 +78,16 @@ it, that contains data for Page Object inputs and output:: ├── meta.json └── output.json +.. _fixture-save: + :func:`web_poet.testing.Fixture.save` can be used to create a fixture inside a Page Object directory from an iterable of dependencies, an output item and an optional metadata dictionary. It can optionally take a name for the fixture directory. By default it uses incrementing names "test-1", "test-2" etc. .. note:: - ``output.json`` contains a result of - ``ItemAdapter(page_object.to_item()).asdict()`` saved as JSON. + ``output.json`` contains a result of ``page_object.to_item()`` converted to + a dict using the itemadapter_ library and saved as JSON. After generating a fixture you can edit ``output.json`` to modify expected field values and add new fields, which is useful when creating tests for code @@ -194,9 +196,10 @@ Handling time fields Sometimes output of a page object might depend on the current time. For example, the item may contain the scraping datetime, or a current timestamp may be used to build some URLs. When a test runs at a different time it will break. -To avoid this the metadata dictionary can contain a ``frozen_time`` field set -to the time value used when generating the test. This will instruct the test -runner to use the same time value so that field comparisons are still correct. +To avoid this :ref:`the metadata dictionary ` can contain a +``frozen_time`` field set to the time value used when generating the test. This +will instruct the test runner to use the same time value so that field +comparisons are still correct. The value can be any string understood by `dateutil`_. If it doesn't include timezone information, the local time of the machine will be assumed. If it @@ -322,3 +325,47 @@ The coverage for page object code is reported correctly if tools such as `coverage`_ are used when running web-poet tests. .. _coverage: https://coverage.readthedocs.io/ + +.. _web-poet-testing-adapters: + +Item adapters +============= + +The testing framework uses the itemadapter_ library to convert items to dicts +when storing them in fixtures and when comparing the expected and the actual +output. As adapters may influence the resulting dicts, it's important to use +the same adapter when generating and running the tests. + +It may also be useful to use different adapters in tests and in production. For +example, you may want to omit empty fields in production, but be able to +distinguish between empty and absent fields in tests. + +For this you can set the ``adapter`` field in :ref:`the metadata dictionary +` to the class that inherits from +:class:`itemadapter.ItemAdapter` and has the adapter(s) you want to use in +tests in its ``ADAPTER_CLASSES`` attribute (see `the relevant itemadapter +docs`_ for more information). An example:: + + from collections import deque + + from itemadapter import ItemAdapter + from itemadapter.adapter import DictAdapter + + + class MyAdapter(DictAdapter): + # any needed customization + ... + + class MyItemAdapter(ItemAdapter): + ADAPTER_CLASSES = deque([MyAdapter]) + +You can then put the ``MyItemAdapter`` class object into ``adapter`` and it +will be used by the testing framework. + +If ``adapter`` is not set, +:class:`~web_poet.testing.itemadapter.WebPoetTestItemAdapter` will be used. +It works like :class:`itemadapter.ItemAdapter` but doesn't change behavior when +:attr:`itemadapter.ItemAdapter.ADAPTER_CLASSES` is modified. + +.. _itemadapter: https://github.com/scrapy/itemadapter +.. _the relevant itemadapter docs: https://github.com/scrapy/itemadapter/#multiple-adapter-classes diff --git a/setup.py b/setup.py index eebcfcb3..dec73c04 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "multidict", "w3lib >= 1.22.0", "async-lru >= 1.0.3", - "itemadapter >= 0.7.0", + "itemadapter >= 0.8.0", "andi", "python-dateutil", "time-machine", diff --git a/tests/test_testing.py b/tests/test_testing.py index 50ad4f0b..c26abe1f 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -1,14 +1,16 @@ import datetime import json import sys +from collections import deque from pathlib import Path -from typing import Optional +from typing import Any, Optional import attrs import dateutil.tz import pytest import time_machine from itemadapter import ItemAdapter +from itemadapter.adapter import DictAdapter from zyte_common_items import Item, Metadata, Product from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage, field @@ -65,6 +67,36 @@ async def to_item(self) -> dict: return {"foo": None} +class CapitalizingDictAdapter(DictAdapter): + def __getitem__(self, field_name: str) -> Any: + item = super().__getitem__(field_name) + if isinstance(item, str): + return item.capitalize() + return item + + +class CustomItemAdapter(ItemAdapter): + ADAPTER_CLASSES = deque([CapitalizingDictAdapter]) + + +def test_fixture_adapter(book_list_html_response, tmp_path) -> None: + item = {"foo": "bar"} + meta = {"adapter": CustomItemAdapter} + base_dir = tmp_path / "fixtures" / get_fq_class_name(MyItemPage) + + fixture = Fixture.save( + base_dir, inputs=[book_list_html_response], item=item, meta=meta + ) + saved_output = json.loads(fixture.output_path.read_bytes()) + assert saved_output["foo"] == "Bar" + + loaded_fixture = Fixture(base_dir / "test-1") + loaded_output = loaded_fixture.get_output() + assert loaded_output["foo"] == "Bar" + actual_output = loaded_fixture.get_expected_output() + assert actual_output["foo"] == "Bar" + + def _save_fixture( pytester, page_cls, page_inputs, *, expected_output=None, expected_exception=None ): diff --git a/web_poet/serialization/utils.py b/web_poet/serialization/utils.py index 9a3c73b8..d2b1d9b3 100644 --- a/web_poet/serialization/utils.py +++ b/web_poet/serialization/utils.py @@ -10,7 +10,7 @@ def _exception_to_dict(ex: Exception) -> Dict[str, Any]: Only the exception type and the first argument are saved. """ return { - "type_name": _get_name_for_class(type(ex)), + "import_path": _get_name_for_class(type(ex)), "msg": ex.args[0] if ex.args else None, } @@ -20,7 +20,7 @@ def _exception_from_dict(data: Dict[str, Any]) -> Exception: Only the exception type and the first argument are restored. """ - exc_cls = load_class(data["type_name"]) + exc_cls = load_class(data["import_path"]) return exc_cls(data["msg"]) diff --git a/web_poet/testing/fixture.py b/web_poet/testing/fixture.py index 0066fdea..cc19fa0b 100644 --- a/web_poet/testing/fixture.py +++ b/web_poet/testing/fixture.py @@ -5,7 +5,7 @@ import os import sys from pathlib import Path -from typing import Any, Iterable, Optional, Type, TypeVar, Union +from typing import Any, Iterable, Optional, Type, TypeVar, Union, cast import dateutil.parser import dateutil.tz @@ -19,7 +19,7 @@ load_class, serialize, ) -from web_poet.utils import ensure_awaitable, memoizemethod_noargs +from web_poet.utils import ensure_awaitable, get_fq_class_name, memoizemethod_noargs from ..serialization.utils import _exception_from_dict, _exception_to_dict, _format_json from .exceptions import ( @@ -30,6 +30,7 @@ ItemValueIncorrect, WrongExceptionRaised, ) +from .itemadapter import WebPoetTestItemAdapter logger = logging.getLogger(__name__) @@ -112,12 +113,21 @@ def get_meta(self) -> dict: """Return the test metadata.""" if not self.meta_path.exists(): return {} - return json.loads(self.meta_path.read_bytes()) + meta_dict = json.loads(self.meta_path.read_bytes()) + if meta_dict.get("adapter"): + meta_dict["adapter"] = load_class(meta_dict["adapter"]) + return meta_dict + + def _get_adapter_cls(self) -> Type[ItemAdapter]: + cls = self.get_meta().get("adapter") + if not cls: + return WebPoetTestItemAdapter + return cast(Type[ItemAdapter], cls) def _get_output(self) -> dict: page = self.get_page() item = asyncio.run(ensure_awaitable(page.to_item())) - return ItemAdapter(item).asdict() + return self._get_adapter_cls()(item).asdict() @memoizemethod_noargs def get_output(self) -> dict: @@ -138,10 +148,9 @@ def get_output(self) -> dict: self._output_error = e raise - @classmethod - def item_to_json(cls, item: Any) -> str: + def item_to_json(self, item: Any) -> str: """Convert an item to a JSON string.""" - return _format_json(ItemAdapter(item).asdict()) + return _format_json(self._get_adapter_cls()(item).asdict()) @memoizemethod_noargs def get_expected_output(self) -> dict: @@ -262,13 +271,15 @@ def save( storage = SerializedDataFileStorage(fixture.input_path) storage.write(serialized_inputs) - if item is not None: - with fixture.output_path.open("w") as f: - f.write(cls.item_to_json(item)) - if meta: + if meta.get("adapter"): + meta["adapter"] = get_fq_class_name(meta["adapter"]) fixture.meta_path.write_text(_format_json(meta)) + if item is not None: + with fixture.output_path.open("w") as f: + f.write(fixture.item_to_json(item)) + if exception: exc_data = _exception_to_dict(exception) fixture.exception_path.write_text(_format_json(exc_data)) diff --git a/web_poet/testing/itemadapter.py b/web_poet/testing/itemadapter.py new file mode 100644 index 00000000..193ed7ff --- /dev/null +++ b/web_poet/testing/itemadapter.py @@ -0,0 +1,27 @@ +from collections import deque +from typing import Deque, Type + +from itemadapter import ItemAdapter +from itemadapter.adapter import ( + AdapterInterface, + AttrsAdapter, + DataclassAdapter, + DictAdapter, + PydanticAdapter, + ScrapyItemAdapter, +) + + +class WebPoetTestItemAdapter(ItemAdapter): + """A default adapter implementation""" + + # In case the user changes ItemAdapter.ADAPTER_CLASSES it's copied here. + ADAPTER_CLASSES: Deque[Type[AdapterInterface]] = deque( + [ + ScrapyItemAdapter, + DictAdapter, + DataclassAdapter, + AttrsAdapter, + PydanticAdapter, + ] + )