From 353f49683261b9d458896cfc22c63ec7771432df Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 11 Dec 2020 19:42:58 +0000 Subject: [PATCH] Add `Parser.pre_load` as an extension point `Parser.pre_load` is a "do nothing" method out of the box, but lets users subclass and override with custom implementations for their needs. This resolves #234 by allowing a custom parser to strip whitespace -- or perform any other desirable action -- after location loading and before schema loading. By giving the pre_load method an interface which takes the schema and location, users can extend this in a myriad of ways. In an explicit nod towards #234, an example is provided which strips whitespace from certain locations. Furthermore, since satisfying this use case is the purpose of this PR, I've included a somewhat unusual test which copies the doc example code and ensures it does the right thing in a couple of basic scenarios. Defining this interface as a method for subclasses to override keeps the changes to `webargs` minimal (vs. supporting another hook with a decorator, like error handlers and location loaders). --- CHANGELOG.rst | 4 +++ docs/advanced.rst | 44 +++++++++++++++++++++++ src/webargs/core.py | 14 +++++++- tests/test_core.py | 85 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 138d5f12..26589cad 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,10 @@ Changelog Features: +* Add `Parser.pre_load` as a method for allowing users to modify data before + schema loading, but without redefining location loaders. See advanced docs on + `Parser pre_load` for usage information + * ``unknown`` defaults to `None` for body locations (`json`, `form` and `json_or_form`) (:issue:`580`). diff --git a/docs/advanced.rst b/docs/advanced.rst index 50ad01d1..853fd649 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -435,6 +435,50 @@ To add your own parser, extend :class:`Parser ` and impleme structure_dict_pair(r, k, v) return r +Parser pre_load +--------------- + +Similar to ``@pre_load`` decorated hooks on marshmallow Schemas, +:class:`Parser ` classes define a method, +`pre_load ` which can +be overridden to provide per-parser transformations of data. +The only way to make use of `pre_load ` is to +subclass a :class:`Parser ` and provide an +implementation. + +`pre_load ` is given the data fetched from a +location, the schema which will be used, the request object, and the location +name which was requested. For example, to define a ``FlaskParser`` which strips +whitespace from ``form`` and ``query`` data, one could write the following: + +.. code-block:: python + + from webargs.flaskparser import FlaskParser + import typing + + + def _strip_whitespace(value): + if isinstance(value, str): + value = value.strip() + elif isinstance(value, typing.Mapping): + return {k: _strip_whitespace(value[k]) for k in value} + elif isinstance(value, (list, tuple)): + return type(value)(map(_strip_whitespace, value)) + return value + + + class WhitspaceStrippingFlaskParser(FlaskParser): + def pre_load(self, location_data, *, schema, req, location): + if location in ("query", "form"): + return _strip_whitespace(location_data) + return location_data + +Note that `Parser.pre_load ` is run after location +loading but before ``Schema.load`` is called. It can therefore be called on +multiple types of mapping objects, including +:class:`MultiDictProxy `, depending on what the +location loader returns. + Returning HTTP 400 Responses ---------------------------- diff --git a/src/webargs/core.py b/src/webargs/core.py index 3752479d..e675d772 100644 --- a/src/webargs/core.py +++ b/src/webargs/core.py @@ -322,7 +322,10 @@ def parse( location_data = self._load_location_data( schema=schema, req=req, location=location ) - data = schema.load(location_data, **load_kwargs) + preprocessed_data = self.pre_load( + location_data, schema=schema, req=req, location=location + ) + data = schema.load(preprocessed_data, **load_kwargs) self._validate_arguments(data, validators) except ma.exceptions.ValidationError as error: self._on_validation_error( @@ -523,6 +526,15 @@ def handle_error(error, req, schema, *, error_status_code, error_headers): self.error_callback = func return func + def pre_load( + self, location_data: Mapping, *, schema: ma.Schema, req: Request, location: str + ) -> Mapping: + """A method of the parser which can transform data after location + loading is done. By default it does nothing, but users can subclass + parsers and override this method. + """ + return location_data + def _handle_invalid_json_error( self, error: typing.Union[json.JSONDecodeError, UnicodeDecodeError], diff --git a/tests/test_core.py b/tests/test_core.py index 12196597..48b8ca6b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,4 +1,5 @@ import datetime +import typing from unittest import mock import pytest @@ -37,6 +38,9 @@ class MockRequestParser(Parser): def load_querystring(self, req, schema): return self._makeproxy(req.query, schema) + def load_form(self, req, schema): + return MultiDictProxy(req.form, schema) + def load_json(self, req, schema): return req.json @@ -1224,3 +1228,84 @@ class CustomParser(Parser): p = CustomParser() ret = p.parse(argmap, web_request) assert ret == {"value": "hello world"} + + +def test_parser_pre_load(web_request): + class CustomParser(MockRequestParser): + # pre-load hook to strip whitespace from query params + def pre_load(self, data, *, schema, req, location): + if location == "query": + return {k: v.strip() for k, v in data.items()} + return data + + parser = CustomParser() + + # mock data for both query and json + web_request.query = web_request.json = {"value": " hello "} + argmap = {"value": fields.Str()} + + # data gets through for 'json' just fine + ret = parser.parse(argmap, web_request) + assert ret == {"value": " hello "} + + # but for 'query', the pre_load hook changes things + ret = parser.parse(argmap, web_request, location="query") + assert ret == {"value": "hello"} + + +# this test is meant to be a run of the WhitspaceStrippingFlaskParser we give +# in the docs/advanced.rst examples for how to use pre_load +# this helps ensure that the example code is correct +# rather than a FlaskParser, we're working with the mock parser, but it's +# otherwise the same +def test_whitespace_stripping_parser_example(web_request): + def _strip_whitespace(value): + if isinstance(value, str): + value = value.strip() + elif isinstance(value, typing.Mapping): + return {k: _strip_whitespace(value[k]) for k in value} + elif isinstance(value, (list, tuple)): + return type(value)(map(_strip_whitespace, value)) + return value + + class WhitspaceStrippingParser(MockRequestParser): + def pre_load(self, location_data, *, schema, req, location): + if location in ("query", "form"): + ret = _strip_whitespace(location_data) + return ret + return location_data + + parser = WhitspaceStrippingParser() + + # mock data for query, form, and json + web_request.form = web_request.query = web_request.json = {"value": " hello "} + argmap = {"value": fields.Str()} + + # data gets through for 'json' just fine + ret = parser.parse(argmap, web_request) + assert ret == {"value": " hello "} + + # but for 'query' and 'form', the pre_load hook changes things + for loc in ("query", "form"): + ret = parser.parse(argmap, web_request, location=loc) + assert ret == {"value": "hello"} + + # check that it applies in the case where the field is a list type + # applied to an argument (logic for `tuple` is effectively the same) + web_request.form = web_request.query = web_request.json = { + "ids": [" 1", "3", " 4"], + "values": [" foo ", " bar"], + } + schema = Schema.from_dict( + {"ids": fields.List(fields.Int), "values": fields.List(fields.Str)} + ) + for loc in ("query", "form"): + ret = parser.parse(schema, web_request, location=loc) + assert ret == {"ids": [1, 3, 4], "values": ["foo", "bar"]} + + # json loading should also work even though the pre_load hook above + # doesn't strip whitespace from JSON data + # - values=[" foo ", ...] will have whitespace preserved + # - ids=[" 1", ...] will still parse okay because " 1" is valid for fields.Int + ret = parser.parse(schema, web_request, location="json") + assert ret == {"ids": [1, 3, 4], "values": [" foo ", " bar"]}