Skip to content

Commit

Permalink
Merge remote-tracking branch 'scrapinghub/master' into docs-apply-rules
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed May 3, 2023
2 parents 400e399 + bfd6683 commit dcd3fca
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 20 deletions.
57 changes: 52 additions & 5 deletions docs/page-objects/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,16 @@ it, that contains data for Page Object inputs and output::
├── meta.json
└── output.json

.. _fixture-save:

:func:`web_poet.testing.Fixture.save` can be used to create a fixture inside a
Page Object directory from an iterable of dependencies, an output item and an
optional metadata dictionary. It can optionally take a name for the fixture
directory. By default it uses incrementing names "test-1", "test-2" etc.

.. note::
``output.json`` contains a result of
``ItemAdapter(page_object.to_item()).asdict()`` saved as JSON.
``output.json`` contains a result of ``page_object.to_item()`` converted to
a dict using the itemadapter_ library and saved as JSON.

After generating a fixture you can edit ``output.json`` to modify expected
field values and add new fields, which is useful when creating tests for code
Expand Down Expand Up @@ -194,9 +196,10 @@ Handling time fields
Sometimes output of a page object might depend on the current time. For
example, the item may contain the scraping datetime, or a current timestamp may
be used to build some URLs. When a test runs at a different time it will break.
To avoid this the metadata dictionary can contain a ``frozen_time`` field set
to the time value used when generating the test. This will instruct the test
runner to use the same time value so that field comparisons are still correct.
To avoid this :ref:`the metadata dictionary <fixture-save>` can contain a
``frozen_time`` field set to the time value used when generating the test. This
will instruct the test runner to use the same time value so that field
comparisons are still correct.

The value can be any string understood by `dateutil`_. If it doesn't include
timezone information, the local time of the machine will be assumed. If it
Expand Down Expand Up @@ -322,3 +325,47 @@ The coverage for page object code is reported correctly if tools such as
`coverage`_ are used when running web-poet tests.

.. _coverage: https://coverage.readthedocs.io/

.. _web-poet-testing-adapters:

Item adapters
=============

The testing framework uses the itemadapter_ library to convert items to dicts
when storing them in fixtures and when comparing the expected and the actual
output. As adapters may influence the resulting dicts, it's important to use
the same adapter when generating and running the tests.

It may also be useful to use different adapters in tests and in production. For
example, you may want to omit empty fields in production, but be able to
distinguish between empty and absent fields in tests.

For this you can set the ``adapter`` field in :ref:`the metadata dictionary
<fixture-save>` to the class that inherits from
:class:`itemadapter.ItemAdapter` and has the adapter(s) you want to use in
tests in its ``ADAPTER_CLASSES`` attribute (see `the relevant itemadapter
docs`_ for more information). An example::

from collections import deque

from itemadapter import ItemAdapter
from itemadapter.adapter import DictAdapter


class MyAdapter(DictAdapter):
# any needed customization
...

class MyItemAdapter(ItemAdapter):
ADAPTER_CLASSES = deque([MyAdapter])

You can then put the ``MyItemAdapter`` class object into ``adapter`` and it
will be used by the testing framework.

If ``adapter`` is not set,
:class:`~web_poet.testing.itemadapter.WebPoetTestItemAdapter` will be used.
It works like :class:`itemadapter.ItemAdapter` but doesn't change behavior when
:attr:`itemadapter.ItemAdapter.ADAPTER_CLASSES` is modified.

.. _itemadapter: https://github.com/scrapy/itemadapter
.. _the relevant itemadapter docs: https://github.com/scrapy/itemadapter/#multiple-adapter-classes
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"multidict",
"w3lib >= 1.22.0",
"async-lru >= 1.0.3",
"itemadapter >= 0.7.0",
"itemadapter >= 0.8.0",
"andi",
"python-dateutil",
"time-machine",
Expand Down
34 changes: 33 additions & 1 deletion tests/test_testing.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import datetime
import json
import sys
from collections import deque
from pathlib import Path
from typing import Optional
from typing import Any, Optional

import attrs
import dateutil.tz
import pytest
import time_machine
from itemadapter import ItemAdapter
from itemadapter.adapter import DictAdapter
from zyte_common_items import Item, Metadata, Product

from web_poet import HttpClient, HttpRequest, HttpResponse, WebPage, field
Expand Down Expand Up @@ -65,6 +67,36 @@ async def to_item(self) -> dict:
return {"foo": None}


class CapitalizingDictAdapter(DictAdapter):
def __getitem__(self, field_name: str) -> Any:
item = super().__getitem__(field_name)
if isinstance(item, str):
return item.capitalize()
return item


class CustomItemAdapter(ItemAdapter):
ADAPTER_CLASSES = deque([CapitalizingDictAdapter])


def test_fixture_adapter(book_list_html_response, tmp_path) -> None:
item = {"foo": "bar"}
meta = {"adapter": CustomItemAdapter}
base_dir = tmp_path / "fixtures" / get_fq_class_name(MyItemPage)

fixture = Fixture.save(
base_dir, inputs=[book_list_html_response], item=item, meta=meta
)
saved_output = json.loads(fixture.output_path.read_bytes())
assert saved_output["foo"] == "Bar"

loaded_fixture = Fixture(base_dir / "test-1")
loaded_output = loaded_fixture.get_output()
assert loaded_output["foo"] == "Bar"
actual_output = loaded_fixture.get_expected_output()
assert actual_output["foo"] == "Bar"


def _save_fixture(
pytester, page_cls, page_inputs, *, expected_output=None, expected_exception=None
):
Expand Down
4 changes: 2 additions & 2 deletions web_poet/serialization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def _exception_to_dict(ex: Exception) -> Dict[str, Any]:
Only the exception type and the first argument are saved.
"""
return {
"type_name": _get_name_for_class(type(ex)),
"import_path": _get_name_for_class(type(ex)),
"msg": ex.args[0] if ex.args else None,
}

Expand All @@ -20,7 +20,7 @@ def _exception_from_dict(data: Dict[str, Any]) -> Exception:
Only the exception type and the first argument are restored.
"""
exc_cls = load_class(data["type_name"])
exc_cls = load_class(data["import_path"])
return exc_cls(data["msg"])


Expand Down
33 changes: 22 additions & 11 deletions web_poet/testing/fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import sys
from pathlib import Path
from typing import Any, Iterable, Optional, Type, TypeVar, Union
from typing import Any, Iterable, Optional, Type, TypeVar, Union, cast

import dateutil.parser
import dateutil.tz
Expand All @@ -19,7 +19,7 @@
load_class,
serialize,
)
from web_poet.utils import ensure_awaitable, memoizemethod_noargs
from web_poet.utils import ensure_awaitable, get_fq_class_name, memoizemethod_noargs

from ..serialization.utils import _exception_from_dict, _exception_to_dict, _format_json
from .exceptions import (
Expand All @@ -30,6 +30,7 @@
ItemValueIncorrect,
WrongExceptionRaised,
)
from .itemadapter import WebPoetTestItemAdapter

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -112,12 +113,21 @@ def get_meta(self) -> dict:
"""Return the test metadata."""
if not self.meta_path.exists():
return {}
return json.loads(self.meta_path.read_bytes())
meta_dict = json.loads(self.meta_path.read_bytes())
if meta_dict.get("adapter"):
meta_dict["adapter"] = load_class(meta_dict["adapter"])
return meta_dict

def _get_adapter_cls(self) -> Type[ItemAdapter]:
cls = self.get_meta().get("adapter")
if not cls:
return WebPoetTestItemAdapter
return cast(Type[ItemAdapter], cls)

def _get_output(self) -> dict:
page = self.get_page()
item = asyncio.run(ensure_awaitable(page.to_item()))
return ItemAdapter(item).asdict()
return self._get_adapter_cls()(item).asdict()

@memoizemethod_noargs
def get_output(self) -> dict:
Expand All @@ -138,10 +148,9 @@ def get_output(self) -> dict:
self._output_error = e
raise

@classmethod
def item_to_json(cls, item: Any) -> str:
def item_to_json(self, item: Any) -> str:
"""Convert an item to a JSON string."""
return _format_json(ItemAdapter(item).asdict())
return _format_json(self._get_adapter_cls()(item).asdict())

@memoizemethod_noargs
def get_expected_output(self) -> dict:
Expand Down Expand Up @@ -262,13 +271,15 @@ def save(
storage = SerializedDataFileStorage(fixture.input_path)
storage.write(serialized_inputs)

if item is not None:
with fixture.output_path.open("w") as f:
f.write(cls.item_to_json(item))

if meta:
if meta.get("adapter"):
meta["adapter"] = get_fq_class_name(meta["adapter"])
fixture.meta_path.write_text(_format_json(meta))

if item is not None:
with fixture.output_path.open("w") as f:
f.write(fixture.item_to_json(item))

if exception:
exc_data = _exception_to_dict(exception)
fixture.exception_path.write_text(_format_json(exc_data))
Expand Down
27 changes: 27 additions & 0 deletions web_poet/testing/itemadapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from collections import deque
from typing import Deque, Type

from itemadapter import ItemAdapter
from itemadapter.adapter import (
AdapterInterface,
AttrsAdapter,
DataclassAdapter,
DictAdapter,
PydanticAdapter,
ScrapyItemAdapter,
)


class WebPoetTestItemAdapter(ItemAdapter):
"""A default adapter implementation"""

# In case the user changes ItemAdapter.ADAPTER_CLASSES it's copied here.
ADAPTER_CLASSES: Deque[Type[AdapterInterface]] = deque(
[
ScrapyItemAdapter,
DictAdapter,
DataclassAdapter,
AttrsAdapter,
PydanticAdapter,
]
)

0 comments on commit dcd3fca

Please sign in to comment.