From a627be9fe503d8beae1f7e77352bcee3bd419fc8 Mon Sep 17 00:00:00 2001 From: AndrewKorzh <92707967+AndrewKorzh@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:02:00 +0300 Subject: [PATCH] Add har files (#33) * Har add to scrapy-puppeteer client * Update actions.py * Refactoring * tests fix * Update constants.py * Update test_actions.py * "version": "0.3.3" --- scrapypuppeteer/actions.py | 12 +++++++++++- scrapypuppeteer/middleware.py | 4 ++++ scrapypuppeteer/request.py | 3 ++- scrapypuppeteer/response.py | 13 +++++++++++++ setup.py | 2 +- tests/actions/constants.py | 1 + tests/actions/test_actions.py | 13 +++++++------ 7 files changed, 39 insertions(+), 9 deletions(-) diff --git a/scrapypuppeteer/actions.py b/scrapypuppeteer/actions.py index 141a703..369039b 100644 --- a/scrapypuppeteer/actions.py +++ b/scrapypuppeteer/actions.py @@ -58,17 +58,19 @@ class GoTo(PuppeteerServiceAction): endpoint = "goto" def __init__( - self, url: str, navigation_options: dict = None, wait_options: dict = None + self, url: str, navigation_options: dict = None, wait_options: dict = None, har_recording: bool = False ): self.url = url self.navigation_options = navigation_options self.wait_options = wait_options + self.har_recording = har_recording def payload(self): return { "url": self.url, "navigationOptions": self.navigation_options, "waitOptions": self.wait_options, + "harRecording": self.har_recording, } @@ -221,6 +223,14 @@ def __init__(self, options: dict = None, **kwargs): def payload(self): return {"options": self.options} + + +class Har(PuppeteerServiceAction): + endpoint = "har" + + def payload(self): + return {} + class RecaptchaSolver(PuppeteerServiceAction): diff --git a/scrapypuppeteer/middleware.py b/scrapypuppeteer/middleware.py index 6907ae0..f7b79a5 100644 --- a/scrapypuppeteer/middleware.py +++ b/scrapypuppeteer/middleware.py @@ -20,11 +20,13 @@ Screenshot, Scroll, CustomJsAction, + Har ) from scrapypuppeteer.response import ( PuppeteerResponse, PuppeteerHtmlResponse, PuppeteerScreenshotResponse, + PuppeteerHarResponse, PuppeteerRecaptchaSolverResponse, PuppeteerJsonResponse, ) @@ -232,6 +234,8 @@ def _get_response_class(request_action): return PuppeteerHtmlResponse if isinstance(request_action, Screenshot): return PuppeteerScreenshotResponse + if isinstance(request_action, Har): + return PuppeteerHarResponse if isinstance(request_action, RecaptchaSolver): return PuppeteerRecaptchaSolverResponse return PuppeteerJsonResponse diff --git a/scrapypuppeteer/request.py b/scrapypuppeteer/request.py index a3d55a7..8a69d7c 100644 --- a/scrapypuppeteer/request.py +++ b/scrapypuppeteer/request.py @@ -56,6 +56,7 @@ def __init__( page_id: str = None, close_page: bool = True, include_headers: Union[bool, List[str]] = None, + har_recording: bool = False, **kwargs, ): """ @@ -80,7 +81,7 @@ def __init__( navigation_options = kwargs.pop("navigation_options", None) wait_options = kwargs.pop("wait_options", None) action = GoTo( - url, navigation_options=navigation_options, wait_options=wait_options + url, navigation_options=navigation_options, wait_options=wait_options, har_recording = har_recording ) elif isinstance(action, GoTo): url = action.url diff --git a/scrapypuppeteer/response.py b/scrapypuppeteer/response.py index f1d39f6..8a1a2d8 100644 --- a/scrapypuppeteer/response.py +++ b/scrapypuppeteer/response.py @@ -107,6 +107,19 @@ def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs): self.screenshot = kwargs.pop("screenshot") super().__init__(url, puppeteer_request, context_id, page_id, **kwargs) +class PuppeteerHarResponse(PuppeteerResponse): + + """ + Response for Har action. + Har is available via self.har. + """ + + attributes: Tuple[str, ...] = PuppeteerResponse.attributes + ("har",) + + def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs): + self.har = kwargs.pop("har") + super().__init__(url, puppeteer_request, context_id, page_id, **kwargs) + class PuppeteerJsonResponse(PuppeteerResponse): """ diff --git a/setup.py b/setup.py index a0c57c6..9435c26 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="scrapy-puppeteer-client", - version="0.3.2", + version="0.3.3", description="A library to use Puppeteer-managed browser in Scrapy spiders", long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/actions/constants.py b/tests/actions/constants.py index 36ce7f8..3b64256 100644 --- a/tests/actions/constants.py +++ b/tests/actions/constants.py @@ -6,6 +6,7 @@ WAIT_OPTS = [None] SELECTORS = ("nothing", "tr.td::attr(something)") CLICK_OPTS = [None] +HAR_RECORDING = [None] def __gen_nav_opts(): diff --git a/tests/actions/test_actions.py b/tests/actions/test_actions.py index e3a36b4..5dfa3f4 100644 --- a/tests/actions/test_actions.py +++ b/tests/actions/test_actions.py @@ -1,17 +1,18 @@ from pytest import mark from scrapypuppeteer.actions import GoTo, GoForward, GoBack, Click, Scroll from itertools import product -from constants import URLS, NAV_OPTS, WAIT_OPTS, SELECTORS, CLICK_OPTS +from constants import URLS, NAV_OPTS, WAIT_OPTS, SELECTORS, CLICK_OPTS, HAR_RECORDING def _gen_goto(): - for url, nav_opt, wait_opt in product(URLS, NAV_OPTS, WAIT_OPTS): + for url, nav_opt, wait_opt, har_recording in product(URLS, NAV_OPTS, WAIT_OPTS, HAR_RECORDING): expected = { "url": url, "navigationOptions": nav_opt, "waitOptions": wait_opt, + "harRecording": har_recording } - yield url, nav_opt, wait_opt, expected + yield url, nav_opt, wait_opt, har_recording, expected def _gen_back_forward(): @@ -42,9 +43,9 @@ def _gen_scroll(): yield selector, wait_opt, expected -@mark.parametrize("url, navigation_options, wait_options, expected", _gen_goto()) -def test_goto(url, navigation_options, wait_options, expected): - action = GoTo(url, navigation_options, wait_options) +@mark.parametrize("url, navigation_options, wait_options, har_recording, expected", _gen_goto()) +def test_goto(url, navigation_options, wait_options, har_recording, expected): + action = GoTo(url, navigation_options, wait_options, har_recording) assert action.payload() == expected