diff --git a/openeo_driver/backend.py b/openeo_driver/backend.py index 2f9787c0..58c80ae5 100644 --- a/openeo_driver/backend.py +++ b/openeo_driver/backend.py @@ -388,7 +388,7 @@ class JobListing: (e.g. including pagination) """ - # TODO: just implement as frozen dataclass, or does that conflict with later need to subclass? + # TODO #332 just implement as frozen dataclass, or does that conflict with later need to subclass? __slots__ = ["_jobs", "_next_parameters"] def __init__(self, jobs: List[BatchJobMetadata], next_parameters: Optional[dict] = None): diff --git a/openeo_driver/jobregistry.py b/openeo_driver/jobregistry.py index 2ba67be6..d1ed3333 100644 --- a/openeo_driver/jobregistry.py +++ b/openeo_driver/jobregistry.py @@ -27,7 +27,6 @@ from openeo_driver.constants import JOB_STATUS from openeo_driver.errors import InternalException, JobNotFoundException from openeo_driver.util.auth import ClientCredentials, ClientCredentialsAccessTokenHelper -from openeo_driver.util.http import UrlSafeStructCodec from openeo_driver.util.logging import ExtraLoggingFilter from openeo_driver.utils import generate_unique_id @@ -359,7 +358,7 @@ def _do_request( url = url_join(self._api_url, path) self.logger.debug(f"Doing EJR request `{method} {url}` {params=} {headers.keys()=}") if self._debug_show_curl: - # TODO: add params to curl command + # TODO #332 add params to curl command curl_command = self._as_curl(method=method, url=url, data=json, headers=headers) self.logger.debug(f"Equivalent curl command: {curl_command}") try: @@ -600,7 +599,7 @@ def _search(self, query: dict, fields: Optional[List[str]] = None) -> List[JobDi @dataclasses.dataclass(frozen=True) class PaginatedSearchResult: jobs: List[JobDict] - pagination: dict + next_page: Union[int, None] def _search_paginated( self, @@ -612,12 +611,12 @@ def _search_paginated( ) -> PaginatedSearchResult: fields = set(fields or []) # Make sure to include some basic fields by default - # TODO: avoid duplication of this default field set + # TODO #332 avoid duplication of this default field set fields.update(["job_id", "user_id", "created", "status", "updated"]) params = {} if page_size: params["size"] = page_size - if page_number: + if page_number is not None and page_number >= 0: params["page"] = page_number body = { "query": query, @@ -630,11 +629,29 @@ def _search_paginated( # "jobs": [list of job docs], # "pagination': {"previous": "size=5&page=1", "next": "size=5&page=3"} # } + next_page_number = self._parse_response_pagination( + pagination=response.get("pagination", {}).get("next"), + expected_size=page_size, + ) + self.logger.debug(f"Parsed {next_page_number=} from {response.get('pagination')=}") return self.PaginatedSearchResult( jobs=response.get("jobs", []), - pagination=response.get("pagination", {}), + next_page=next_page_number, ) + @staticmethod + def _parse_response_pagination(pagination: dict, expected_size: int) -> Union[int, None]: + """Extract page number from pagination construct""" + if isinstance(pagination, str): + # TODO #332 get rid of this legacy format translation code path + params = urllib.parse.parse_qs(pagination) + if "size" in params and "page" in params: + pagination = {"size": int(params["size"][0]), "page": int(params["page"][0])} + if "size" in pagination and "page" in pagination: + if int(pagination["size"]) != expected_size: + raise ValueError(f"Page size mismatch {expected_size=} vs {pagination=}") + return int(pagination["page"]) + def list_user_jobs( self, user_id: Optional[str], @@ -655,34 +672,20 @@ def list_user_jobs( if limit: # Do paginated search - # TODO: make this the one and only code path - url_safe_codec = UrlSafeStructCodec(signature_field="_usc") - page_number = None - page_params = (request_parameters or {}).get(self.PAGINATION_URL_PARAM) - if page_params: - # Extract page number - page_params = url_safe_codec.decode(page_params) - if isinstance(page_params, str): - # TODO: this is old code path where page params where encoded as URL query string - # parse as URL params - page_params = urllib.parse.parse_qs(page_params) - assert limit == int(page_params["size"][0]) - page_number = int(page_params["page"][0]) - elif isinstance(page_params, dict): - # TODO: this dict handling should be (is?) the only code path? - assert limit == page_params["size"] - page_number = page_params["page"] - else: - raise ValueError(page_params) - + # TODO #332 make this the one and only code path + page_number = (request_parameters or {}).get(self.PAGINATION_URL_PARAM) + if page_number: + page_number = int(page_number) + else: + page_number = None data = self._search_paginated(query=query, fields=fields, page_size=limit, page_number=page_number) return JobListing( jobs=[ejr_job_info_to_metadata(j, full=False) for j in data.jobs], - next_parameters={self.PAGINATION_URL_PARAM: url_safe_codec.encode(data.pagination.get("next"))}, + next_parameters={"limit": limit, self.PAGINATION_URL_PARAM: data.next_page}, ) else: # Deprecated non-paginated search - # TODO: eliminate this code path + # TODO #332 eliminate this code path return self._search(query=query, fields=fields) def list_active_jobs( @@ -781,11 +784,7 @@ def _parse_cli(self) -> argparse.Namespace: cli_list_user.add_argument("--backend-id", help="Backend id to filter on.") cli_list_user.add_argument("user_id", help="User id to filter on.") cli_list_user.add_argument("--limit", type=int, default=None, help="Page size of job listing") - cli_list_user.add_argument( - "--pagination", - default=None, - help=f"Pagination URL fragment from 'next' link. E.g. '/jobs?{ElasticJobRegistry.PAGINATION_URL_PARAM}=InNpemU9N...'", - ) + cli_list_user.add_argument("--page", default=0) cli_list_user.set_defaults(func=self.list_user_jobs) cli_create = subparsers.add_parser("create", help="Create a new job.") @@ -881,8 +880,8 @@ def list_user_jobs(self, args: argparse.Namespace): user_id = args.user_id ejr = self._get_job_registry(cli_args=args) request_parameters = {} - if args.pagination: - request_parameters[ElasticJobRegistry.PAGINATION_URL_PARAM] = args.pagination.split("?")[-1] + if args.page: + request_parameters[ElasticJobRegistry.PAGINATION_URL_PARAM] = str(args.page) jobs = ejr.list_user_jobs( user_id=user_id, # TODO: option to return more fields? diff --git a/tests/test_jobregistry.py b/tests/test_jobregistry.py index 40c5f6c9..83966b22 100644 --- a/tests/test_jobregistry.py +++ b/tests/test_jobregistry.py @@ -545,7 +545,7 @@ def _build_url(self, params: dict): None, {"size": ["3"]}, True, - "https://oeo.test/jobs?page=InNpemU9MyZwYWdlPTEi", + "https://oeo.test/jobs?limit=3&page=1", ["j-1", "j-2", "j-3"], ), ( @@ -553,31 +553,31 @@ def _build_url(self, params: dict): None, {"size": ["3"]}, False, - "https://oeo.test/jobs?page=eyJzaXplIjozLCJwYWdlIjoxfQ%3D%3D", + "https://oeo.test/jobs?limit=3&page=1", ["j-1", "j-2", "j-3"], ), ( 3, - {ElasticJobRegistry.PAGINATION_URL_PARAM: "InNpemU9MyZwYWdlPTEi"}, + {ElasticJobRegistry.PAGINATION_URL_PARAM: "1"}, {"size": ["3"], "page": ["1"]}, True, - "https://oeo.test/jobs?page=InNpemU9MyZwYWdlPTIi", + "https://oeo.test/jobs?limit=3&page=2", ["j-4", "j-5", "j-6"], ), ( 3, - {ElasticJobRegistry.PAGINATION_URL_PARAM: "InNpemU9MyZwYWdlPTEi"}, + {ElasticJobRegistry.PAGINATION_URL_PARAM: "1"}, {"size": ["3"], "page": ["1"]}, False, - "https://oeo.test/jobs?page=eyJzaXplIjozLCJwYWdlIjoyfQ%3D%3D", + "https://oeo.test/jobs?limit=3&page=2", ["j-4", "j-5", "j-6"], ), ( 5, - {ElasticJobRegistry.PAGINATION_URL_PARAM: "InNpemU9NSZwYWdlPTExMSI="}, + {ElasticJobRegistry.PAGINATION_URL_PARAM: "111"}, {"size": ["5"], "page": ["111"]}, False, - "https://oeo.test/jobs?page=eyJzaXplIjo1LCJwYWdlIjoxMTJ9", + "https://oeo.test/jobs?limit=5&page=112", ["j-556", "j-557", "j-558", "j-559", "j-560"], ), ],