diff --git a/CHANGES.rst b/CHANGES.rst index f9b72a837..c627afc3a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,7 @@ Changes: -------- - Drop Python 3.7 support. - Add Python 3.12 to GitHub CI experimental builds. +- Bump ``werkzeug>=3.0.1`` to resolve security vulnerability from the package. Fixes: ------ diff --git a/requirements-dev.txt b/requirements-dev.txt index abc5fb454..7209620ee 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,6 +22,7 @@ parameterized pluggy>=0.7 # FIXME: bad interpolation of 'setup.cfg' for pytest 'log_format' (https://github.com/pytest-dev/pytest/issues/10019) pytest<7 +pytest-httpserver>=1.0.7 # support werkzeug>=3 pytest-server-fixtures pytest-rerunfailures pydocstyle diff --git a/requirements.txt b/requirements.txt index dc5537a07..eb8d97150 100644 --- a/requirements.txt +++ b/requirements.txt @@ -89,5 +89,4 @@ simplejson urlmatch xmltodict webob -# avoid error related to query string parsing (https://github.com/pallets/werkzeug/issues/2710) -werkzeug>2,<2.3 +werkzeug>=3.0.1,<3.1 diff --git a/tests/test_utils.py b/tests/test_utils.py index 619f74752..c78070d47 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -30,9 +30,11 @@ HTTPNotFound, HTTPOk ) +from pyramid.request import Request as PyramidRequest from pywps.response.status import WPS_STATUS from requests import Response from requests.exceptions import HTTPError as RequestsHTTPError +from werkzeug import Request as WerkzeugRequest from tests.utils import ( MOCK_AWS_REGION, @@ -63,6 +65,7 @@ get_any_value, get_base_url, get_path_kvp, + get_request_args, get_request_options, get_sane_name, get_secure_directory_name, @@ -482,6 +485,45 @@ def test_bytes2str(): assert bytes2str(u"test-unicode") == u"test-unicode" +class PseudoRequest(object): + query_string = "" + + def __init__(self, *_): + ... + + +class BadQueryStringTypeRequest(PseudoRequest): + @property + def args(self): + raise AttributeError + + @property + def params(self): + raise AttributeError + + +@pytest.mark.parametrize( + ["request_cls", "converter", "query_string_expect_params"], + itertools.product( + [PyramidRequest, WerkzeugRequest, PseudoRequest, BadQueryStringTypeRequest], + [str, str2bytes], + [ + ("", {}), + ("param=", {"param": ""}), + ("param=value", {"param": "value"}), + ("param=val1,val2", {"param": "val1,val2"}), + ("param1=val1,val2¶m2=val3", {"param1": "val1,val2", "param2": "val3"}), + ] + ) +) +def test_get_request_args(request_cls, converter, query_string_expect_params): + query_string, expect_params = query_string_expect_params + request = request_cls({}) + request.query_string = converter(query_string) + result = get_request_args(request) + assert dict(result) == expect_params + + def test_get_ssl_verify_option(): assert get_ssl_verify_option("get", "http://test.com", {}) is True assert get_ssl_verify_option("get", "http://test.com", {"weaver.ssl_verify": False}) is False diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 88130cf93..359486822 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -9,7 +9,7 @@ import uuid from datetime import datetime from decimal import Decimal - from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, TypeVar, Union + from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Type, TypeVar, Union from typing_extensions import Literal, NotRequired, ParamSpec, Protocol, Required, TypeAlias, TypedDict import psutil @@ -60,8 +60,10 @@ from requests.structures import CaseInsensitiveDict from webob.acceptparse import AcceptLanguageInvalidHeader, AcceptLanguageNoHeader, AcceptLanguageValidHeader from webob.headers import EnvironHeaders, ResponseHeaders + from webob.multidict import MultiDict as PyramidMultiDict from webob.response import Response as WebobResponse from webtest.response import TestResponse + from werkzeug.datastructures.structures import MultiDict as WerkzeugMultiDict from werkzeug.wrappers import Request as WerkzeugRequest from weaver.datatype import Process, Service @@ -331,6 +333,7 @@ class CWL_SchemaName(Protocol): "head", "get", "post", "put", "patch", "delete", ] AnyRequestMethod = Union[RequestMethod, str] + AnyRequestQueryMultiDict = Union[PyramidMultiDict, WerkzeugMultiDict, MutableMapping[str, str]] HTTPValid = Union[HTTPSuccessful, HTTPRedirection] AnyAcceptLanguageHeader = Union[AcceptLanguageNoHeader, AcceptLanguageValidHeader, AcceptLanguageInvalidHeader] diff --git a/weaver/utils.py b/weaver/utils.py index e1a99b642..e9b7aec0b 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -21,7 +21,7 @@ from datetime import datetime from pkgutil import get_loader from typing import TYPE_CHECKING, overload -from urllib.parse import ParseResult, unquote, urlparse, urlunsplit +from urllib.parse import ParseResult, parse_qsl, unquote, urlparse, urlunsplit import boto3 import colander @@ -69,7 +69,7 @@ try: # refactor in jsonschema==4.18.0 from jsonschema.validators import _RefResolver as JsonSchemaRefResolver # pylint: disable=E0611 -except ImportError: +except ImportError: # pragma: no cover from jsonschema.validators import RefResolver as JsonSchemaRefResolver # pylint: disable=E0611 if TYPE_CHECKING: @@ -106,6 +106,8 @@ AnyKey, AnyRegistryContainer, AnyRequestMethod, + AnyRequestQueryMultiDict, + AnyRequestType, AnyResponseType, AnySettingsContainer, AnyUUID, @@ -552,6 +554,35 @@ def get_cookie_headers(header_container, cookie_header_name="Cookie"): return {} +def get_request_args(request): + # type: (AnyRequestType) -> AnyRequestQueryMultiDict + """ + Extracts the parsed query string arguments from the appropriate request object strategy. + + Depending on the request implementation, attribute ``query_string`` are expected as :class:`bytes` (:mod:`werkzeug`) + or :class:`str` (:mod:`pyramid`, :mod:`webob`). The ``query_string`` attribute is then used by ``args`` and + ``params`` for respective implementations, but assuming their string-like formats are respected. + + .. seealso:: + https://github.com/pallets/werkzeug/issues/2710 + """ + try: + # cannot assume/check only by object type, since they are sometimes extended with both (see 'extend_instance') + # instead, rely on the expected 'query_string' type by each implementation + if isinstance(request.query_string, bytes) and hasattr(request, "args"): + return request.args + if isinstance(request.query_string, str) and hasattr(request, "params"): + return request.params + except (AttributeError, TypeError): # pragma: no cover + LOGGER.warning( + "Could not resolve expected query string parameter parser in request of type: [%s]. Using default parsing.", + type(request) + ) + # perform essentially what both implementations do + params = parse_qsl(bytes2str(request.query_string), keep_blank_values=True) + return dict(params) + + def parse_kvp(query, # type: str key_value_sep="=", # type: str pair_sep=";", # type: str diff --git a/weaver/wps/service.py b/weaver/wps/service.py index b96921b0d..9d88937d0 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -24,7 +24,7 @@ from weaver.processes.types import ProcessType from weaver.processes.utils import get_process from weaver.store.base import StoreProcesses -from weaver.utils import extend_instance, get_header, get_registry, get_settings, get_weaver_url +from weaver.utils import extend_instance, get_header, get_registry, get_request_args, get_settings, get_weaver_url from weaver.visibility import Visibility from weaver.wps.storage import ReferenceStatusLocationStorage from weaver.wps.utils import ( @@ -194,7 +194,7 @@ def _submit_job(self, wps_request): Returns the status response as is if XML, or convert it to JSON, according to request ``Accept`` header. """ - req = wps_request.http_request + req = wps_request.http_request # type: Union[PyramidRequest, WerkzeugRequest] pid = wps_request.identifier ctx = get_wps_output_context(req) # re-validate here in case submitted via WPS endpoint instead of REST-API proc = get_process(process_id=pid, settings=self.settings) # raises if invalid or missing @@ -202,7 +202,8 @@ def _submit_job(self, wps_request): # create the JSON payload from the XML content and submit job is_workflow = proc.type == ProcessType.WORKFLOW - tags = req.args.get("tags", "").split(",") + ["xml", f"wps-{wps_request.version}"] + args = get_request_args(req) + tags = args.get("tags", "").split(",") + ["xml", f"wps-{wps_request.version}"] data = wps2json_job_payload(wps_request, wps_process) resp = submit_job_handler( data, self.settings, proc.processEndpointWPS1,