From 6266424580c6c222265bddceeaed57124b516ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sat, 6 Jan 2024 20:45:05 +0100 Subject: [PATCH 01/18] msgspec first pass --- docs/preconf.md | 28 ++++++++++++------- pdm.lock | 48 +++++++++++++++++++++++++++++++-- pyproject.toml | 3 +++ src/cattr/gen.py | 2 +- src/cattrs/converters.py | 6 ++--- src/cattrs/preconf/__init__.py | 14 ++++++++++ src/cattrs/preconf/bson.py | 3 ++- src/cattrs/preconf/cbor2.py | 2 ++ src/cattrs/preconf/json.py | 2 ++ src/cattrs/preconf/msgpack.py | 2 ++ src/cattrs/preconf/msgspec.py | 49 ++++++++++++++++++++++++++++++++++ src/cattrs/preconf/orjson.py | 2 ++ src/cattrs/preconf/pyyaml.py | 3 ++- src/cattrs/preconf/tomlkit.py | 3 ++- src/cattrs/preconf/ujson.py | 2 ++ tests/test_preconf.py | 47 +++++++++++++++++++++++++------- 16 files changed, 187 insertions(+), 29 deletions(-) create mode 100644 src/cattrs/preconf/msgspec.py diff --git a/docs/preconf.md b/docs/preconf.md index 48d75ce3..d805f7ab 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -56,15 +56,6 @@ Found at {mod}`cattrs.preconf.json`. Bytes are serialized as base 85 strings. Counters are serialized as dictionaries. Sets are serialized as lists, and deserialized back into sets. `datetime` s and `date` s are serialized as ISO 8601 strings. -## _ujson_ - -Found at {mod}`cattrs.preconf.ujson`. - -Bytes are serialized as base 85 strings. Sets are serialized as lists, and deserialized back into sets. `datetime` s and `date` s are serialized as ISO 8601 strings. - -`ujson` doesn't support integers less than -9223372036854775808, and greater than 9223372036854775807, nor does it support `float('inf')`. - - ## _orjson_ Found at {mod}`cattrs.preconf.orjson`. @@ -77,6 +68,25 @@ _orjson_ doesn't support integers less than -9223372036854775808, and greater th _orjson_ only supports mappings with string keys so mappings will have their keys stringified before serialization, and destringified during deserialization. +## _msgspec_ + +Found at {mod}`cattrs.preconf.msgspec`. +Only JSON functionality is currently available, other formats supported by msgspec to follow in the future. + +Bytes are un/structured as base 64 strings directly by msgspec. +_msgspec_ [encodes special float values](https://jcristharif.com/msgspec/supported-types.html#float) (`NaN, Inf, -Inf`) as `null`. +`datetime` s and `date` s are passed through to be unstructured into RFC 3339 by _msgspec_ itself. + + +## _ujson_ + +Found at {mod}`cattrs.preconf.ujson`. + +Bytes are serialized as base 85 strings. Sets are serialized as lists, and deserialized back into sets. `datetime` s and `date` s are serialized as ISO 8601 strings. + +`ujson` doesn't support integers less than -9223372036854775808, and greater than 9223372036854775807, nor does it support `float('inf')`. + + ## _msgpack_ Found at {mod}`cattrs.preconf.msgpack`. diff --git a/pdm.lock b/pdm.lock index a3c71a1d..faf1e3b7 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "bench", "bson", "cbor2", "docs", "lint", "msgpack", "orjson", "pyyaml", "test", "tomlkit", "ujson"] +groups = ["default", "bench", "bson", "cbor2", "docs", "lint", "msgpack", "orjson", "pyyaml", "test", "tomlkit", "ujson", "msgspec"] strategy = ["cross_platform"] lock_version = "4.4.1" -content_hash = "sha256:c48ae8c45873dfe03d3b677793be038f06b49fff96076a3f62731ed9b94b3de3" +content_hash = "sha256:7f0761ff761a474620f436f9a8f8ef5b00a94cdd2d0669d3d6f241706ab27b95" [[package]] name = "alabaster" @@ -615,6 +615,50 @@ files = [ {file = "msgpack-1.0.5.tar.gz", hash = "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c"}, ] +[[package]] +name = "msgspec" +version = "0.18.5" +requires_python = ">=3.8" +summary = "A fast serialization and validation library, with builtin support for JSON, MessagePack, YAML, and TOML." +files = [ + {file = "msgspec-0.18.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50479d88f3c4e9c73b55fbe84dc14b1cee8cec753e9170bbeafe3f9837e9f7af"}, + {file = "msgspec-0.18.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf885edac512e464c70a5f4f93b6f778c83ea4b91d646b6d72f6f5ac950f268e"}, + {file = "msgspec-0.18.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:773a38ead7832d171d1b9406bf42448a218245584af36e42c31f26d9f48a493a"}, + {file = "msgspec-0.18.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5999eb65646b131f439ebb07c22446e8976b7fd8a312dca09ce6fa2c21162bb"}, + {file = "msgspec-0.18.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a0ec78bd93684db61dfccf7a421b2e1a525b1a0546b4d8c4e339151be57d58a6"}, + {file = "msgspec-0.18.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b547c7ad9786a79b0090a811d95d2d04063625a66fd96ed767cdfbabd8087c67"}, + {file = "msgspec-0.18.5-cp310-cp310-win_amd64.whl", hash = "sha256:e4c2fc93a98afefd1a78e957ca63363a8e5fd1b58bf70a8d66413c8f2a4723a2"}, + {file = "msgspec-0.18.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee1f9414523d9a53744d21a6a2b6a636d9008be016963148a2646b38132e11dd"}, + {file = "msgspec-0.18.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0017f6af35a3959002df4c82af60c1df2160701529dd89b17df971fde5945257"}, + {file = "msgspec-0.18.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13da9df61745b7757070dae6e3476ab4e13bb9dd3e3d11b050dfcae540058bd1"}, + {file = "msgspec-0.18.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01ed3472a0508f88a25a9d3bccafb840110f0fc5eb493b4baa43646e4e7c75c2"}, + {file = "msgspec-0.18.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f55c4610cb0514aef8b35bfd0682f4cc2d7efd5e9b58acf30abd90b2a2376b5d"}, + {file = "msgspec-0.18.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8f7c0460aefdc8f01ea35f26e38c62b574bbf0b138ade860f557bbf9e9dac50c"}, + {file = "msgspec-0.18.5-cp311-cp311-win_amd64.whl", hash = "sha256:024f880df7d2f8cfdb9f9904efa0f386d3692457159bd58f850c20f11c07d16f"}, + {file = "msgspec-0.18.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3d206af4280172948d014d20b2cea7939784a99ea9a7ac943ce71100dbe8f98"}, + {file = "msgspec-0.18.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:261cc6e3a687e6f31b80056ab12f6adff3255f9b68b86d92b0b497f8b289c84c"}, + {file = "msgspec-0.18.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6af133ba491a09ef8dcbc2d9904bcec220247e2067bb75d5d6daa12e0739d6c"}, + {file = "msgspec-0.18.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d318593e0ddc11b600552a470ec27baeb0b86a8e37903ac5ce7472ba0d6f7bf8"}, + {file = "msgspec-0.18.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9a7b682cca3ba251a19cc769d38615ddd9551e086858decd950c156c2e79ecc1"}, + {file = "msgspec-0.18.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b491b2549d22e11d7cfe34a231f9bd006cb6b71adefa070a070075d2f601e75c"}, + {file = "msgspec-0.18.5-cp312-cp312-win_amd64.whl", hash = "sha256:c79e7115f0143688c5d866359e7b6b76dd1581a81c9aeac7805a9d6320e9f2ca"}, + {file = "msgspec-0.18.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c13e0a510bbd00cb29d193fceff55d1e17a99c9f97284cdbe61c15496c2f7803"}, + {file = "msgspec-0.18.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f4eeb22921ca6cdfbf17ca874eccbe23eb010c89ffb3017b628940c37d53ce4a"}, + {file = "msgspec-0.18.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9420750f19c311e490db3edff9d153621c4989c582cf1be40c307c86d6cc2c1e"}, + {file = "msgspec-0.18.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6431305c645fb2a88a6da1fcec53dbaac61697f1219000b9589f9286532aabc0"}, + {file = "msgspec-0.18.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b7b49cba0577edc8ac166263b5fec3619fe5a267805cfc041bccaf8a0c58ef05"}, + {file = "msgspec-0.18.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3f387cabddf2dc26d6fa7f1a8158deefc8db9e0626eacebbe4875f421c66d574"}, + {file = "msgspec-0.18.5-cp38-cp38-win_amd64.whl", hash = "sha256:482bdf77f3892dd603061b2b21ac6a4492bb797a552c92e833a41fe157162257"}, + {file = "msgspec-0.18.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f290bfe7e21e8069890d101d8a060500b22a3aeb7860274644c4ec9240ddbedc"}, + {file = "msgspec-0.18.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0027fba5362a3cb1bdd5503709aa2dbffad22dffd50f415086ed5f74f229ead9"}, + {file = "msgspec-0.18.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd8a64da668b4eeef4b21dcecc640ed6950db661e2ea42ae52bbac5a2dbffb3a"}, + {file = "msgspec-0.18.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be2440fa5699e1b3062d17fdfd8c6a459d72bb4edbce403353af6f39c8c5a6fa"}, + {file = "msgspec-0.18.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:eccba21248f90f332335b109e89685e79940367974812cd13975313f480f3dd8"}, + {file = "msgspec-0.18.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c30fadc1a1118097920dd868e42469fed32c7078ca2feff2fc19e7c017065322"}, + {file = "msgspec-0.18.5-cp39-cp39-win_amd64.whl", hash = "sha256:fae28faef5fd61847930d8e86fd83c18f991a338efd8fbf69c1d35d42c652f41"}, + {file = "msgspec-0.18.5.tar.gz", hash = "sha256:8e545651531f2d01b983d0ac0c7f3b6d99674267ff261b5f344f5016160b5608"}, +] + [[package]] name = "mypy-extensions" version = "1.0.0" diff --git a/pyproject.toml b/pyproject.toml index 32101045..9f3530ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,9 @@ cbor2 = [ bson = [ "pymongo>=4.4.0", ] +msgspec = [ + "msgspec>=0.18.5", +] [tool.pytest.ini_options] addopts = "-l --benchmark-sort=fullname --benchmark-warmup=true --benchmark-warmup-iterations=5 --benchmark-group-by=fullname" diff --git a/src/cattr/gen.py b/src/cattr/gen.py index 660d4d53..a41c2d11 100644 --- a/src/cattr/gen.py +++ b/src/cattr/gen.py @@ -1,5 +1,4 @@ from cattrs.gen import ( - AttributeOverride, make_dict_structure_fn, make_dict_unstructure_fn, make_hetero_tuple_unstructure_fn, @@ -8,6 +7,7 @@ make_mapping_unstructure_fn, override, ) +from cattrs.gen._consts import AttributeOverride __all__ = [ "AttributeOverride", diff --git a/src/cattrs/converters.py b/src/cattrs/converters.py index 172a7584..d9a18241 100644 --- a/src/cattrs/converters.py +++ b/src/cattrs/converters.py @@ -750,12 +750,10 @@ def _get_dis_func( ) -> Callable[[Any], type]: """Fetch or try creating a disambiguation function for a union.""" union_types = union.__args__ - if NoneType in union_types: # type: ignore + if NoneType in union_types: # We support unions of attrs classes and NoneType higher in the # logic. - union_types = tuple( - e for e in union_types if e is not NoneType # type: ignore - ) + union_types = tuple(e for e in union_types if e is not NoneType) # TODO: technically both disambiguators could support TypedDicts and # dataclasses... diff --git a/src/cattrs/preconf/__init__.py b/src/cattrs/preconf/__init__.py index 760ae115..7f747073 100644 --- a/src/cattrs/preconf/__init__.py +++ b/src/cattrs/preconf/__init__.py @@ -1,7 +1,21 @@ from datetime import datetime +from typing import Any, Callable, ParamSpec, TypeVar def validate_datetime(v, _): if not isinstance(v, datetime): raise Exception(f"Expected datetime, got {v}") return v + + +T = TypeVar("T") +P = ParamSpec("P") + + +def wrap(inner: Callable[P, Any]) -> Callable[[Callable[..., T]], Callable[P, T]]: + """Wrap a `Converter` `__init__` in a type-safe way.""" + + def impl(x: Callable[..., T]) -> Callable[P, T]: + return inner + + return impl diff --git a/src/cattrs/preconf/bson.py b/src/cattrs/preconf/bson.py index 6fc6d72a..cab125be 100644 --- a/src/cattrs/preconf/bson.py +++ b/src/cattrs/preconf/bson.py @@ -11,7 +11,7 @@ from ..converters import BaseConverter, Converter from ..dispatch import StructureHook from ..strategies import configure_union_passthrough -from . import validate_datetime +from . import validate_datetime, wrap T = TypeVar("T") @@ -93,6 +93,7 @@ def gen_structure_mapping(cl: Any) -> StructureHook: converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v)) +@wrap(BsonConverter) def make_converter(*args: Any, **kwargs: Any) -> BsonConverter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/src/cattrs/preconf/cbor2.py b/src/cattrs/preconf/cbor2.py index 444014b4..414d19ce 100644 --- a/src/cattrs/preconf/cbor2.py +++ b/src/cattrs/preconf/cbor2.py @@ -8,6 +8,7 @@ from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough +from . import wrap T = TypeVar("T") @@ -36,6 +37,7 @@ def configure_converter(converter: BaseConverter): configure_union_passthrough(Union[str, bool, int, float, None, bytes], converter) +@wrap(Cbor2Converter) def make_converter(*args: Any, **kwargs: Any) -> Cbor2Converter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/src/cattrs/preconf/json.py b/src/cattrs/preconf/json.py index e4d52a3c..b9b1b1cf 100644 --- a/src/cattrs/preconf/json.py +++ b/src/cattrs/preconf/json.py @@ -8,6 +8,7 @@ from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough +from . import wrap T = TypeVar("T") @@ -40,6 +41,7 @@ def configure_converter(converter: BaseConverter): configure_union_passthrough(Union[str, bool, int, float, None, bytes], converter) +@wrap(JsonConverter) def make_converter(*args: Any, **kwargs: Any) -> JsonConverter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/src/cattrs/preconf/msgpack.py b/src/cattrs/preconf/msgpack.py index 2e7470b6..2a63ccd8 100644 --- a/src/cattrs/preconf/msgpack.py +++ b/src/cattrs/preconf/msgpack.py @@ -8,6 +8,7 @@ from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough +from . import wrap T = TypeVar("T") @@ -40,6 +41,7 @@ def configure_converter(converter: BaseConverter): configure_union_passthrough(Union[str, bool, int, float, None, bytes], converter) +@wrap(MsgpackConverter) def make_converter(*args: Any, **kwargs: Any) -> MsgpackConverter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py new file mode 100644 index 00000000..7b257070 --- /dev/null +++ b/src/cattrs/preconf/msgspec.py @@ -0,0 +1,49 @@ +"""Preconfigured converters for msgspec.""" +from __future__ import annotations + +from base64 import b64decode +from datetime import date, datetime +from typing import Any, Callable, ParamSpec, TypeVar, Union + +from msgspec.json import decode, encode + +from ..converters import BaseConverter, Converter +from ..strategies import configure_union_passthrough + +T = TypeVar("T") +P = ParamSpec("P") + + +def wrap(inner: Callable[P, Any]) -> Callable[[Callable[..., T]], Callable[P, T]]: + def impl(x: Callable[..., T]) -> Callable[P, T]: + return inner + + return impl + + +class MsgspecJsonConverter(Converter): + def dumps(self, obj: Any, unstructure_as: Any = None, **kwargs: Any) -> bytes: + return encode(self.unstructure(obj, unstructure_as=unstructure_as), **kwargs) + + def loads(self, data: bytes, cl: type[T], **kwargs: Any) -> T: + return self.structure(decode(data, **kwargs), cl) + + +def configure_converter(converter: BaseConverter) -> None: + """Configure the converter for the msgspec library. + + * bytes are serialized as base64 strings, directly by msgspec + * datetimes and dates are passed through to be serialized as RFC 3339 directly + * union passthrough configured for str, bool, int, float and None + """ + converter.register_structure_hook(bytes, lambda v, _: b64decode(v)) + converter.register_structure_hook(datetime, lambda v, _: datetime.fromisoformat(v)) + converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v)) + configure_union_passthrough(Union[str, bool, int, float, None], converter) + + +@wrap(MsgspecJsonConverter) +def make_converter(*args: Any, **kwargs: Any) -> MsgspecJsonConverter: + res = MsgspecJsonConverter(*args, **kwargs) + configure_converter(res) + return res diff --git a/src/cattrs/preconf/orjson.py b/src/cattrs/preconf/orjson.py index fcd380b9..8df76a78 100644 --- a/src/cattrs/preconf/orjson.py +++ b/src/cattrs/preconf/orjson.py @@ -11,6 +11,7 @@ from ..converters import BaseConverter, Converter from ..fns import identity from ..strategies import configure_union_passthrough +from . import wrap T = TypeVar("T") @@ -69,6 +70,7 @@ def key_handler(v): configure_union_passthrough(Union[str, bool, int, float, None], converter) +@wrap(OrjsonConverter) def make_converter(*args: Any, **kwargs: Any) -> OrjsonConverter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/src/cattrs/preconf/pyyaml.py b/src/cattrs/preconf/pyyaml.py index 091c1d37..19314ee1 100644 --- a/src/cattrs/preconf/pyyaml.py +++ b/src/cattrs/preconf/pyyaml.py @@ -8,7 +8,7 @@ from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough -from . import validate_datetime +from . import validate_datetime, wrap T = TypeVar("T") @@ -49,6 +49,7 @@ def configure_converter(converter: BaseConverter): ) +@wrap(PyyamlConverter) def make_converter(*args: Any, **kwargs: Any) -> PyyamlConverter: kwargs["unstruct_collection_overrides"] = { FrozenSetSubscriptable: list, diff --git a/src/cattrs/preconf/tomlkit.py b/src/cattrs/preconf/tomlkit.py index 8cdfeac7..10daf49d 100644 --- a/src/cattrs/preconf/tomlkit.py +++ b/src/cattrs/preconf/tomlkit.py @@ -12,7 +12,7 @@ from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough -from . import validate_datetime +from . import validate_datetime, wrap T = TypeVar("T") _enum_value_getter = attrgetter("_value_") @@ -73,6 +73,7 @@ def key_handler(k: bytes): ) +@wrap(TomlkitConverter) def make_converter(*args: Any, **kwargs: Any) -> TomlkitConverter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/src/cattrs/preconf/ujson.py b/src/cattrs/preconf/ujson.py index b6de8e85..0644186b 100644 --- a/src/cattrs/preconf/ujson.py +++ b/src/cattrs/preconf/ujson.py @@ -9,6 +9,7 @@ from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough +from . import wrap T = TypeVar("T") @@ -41,6 +42,7 @@ def configure_converter(converter: BaseConverter): configure_union_passthrough(Union[str, bool, int, float, None], converter) +@wrap(UjsonConverter) def make_converter(*args: Any, **kwargs: Any) -> UjsonConverter: kwargs["unstruct_collection_overrides"] = { AbstractSet: list, diff --git a/tests/test_preconf.py b/tests/test_preconf.py index f547e8de..306d0803 100644 --- a/tests/test_preconf.py +++ b/tests/test_preconf.py @@ -48,6 +48,7 @@ from cattrs.preconf.cbor2 import make_converter as cbor2_make_converter from cattrs.preconf.json import make_converter as json_make_converter from cattrs.preconf.msgpack import make_converter as msgpack_make_converter +from cattrs.preconf.msgspec import make_converter as msgspec_make_converter from cattrs.preconf.pyyaml import make_converter as pyyaml_make_converter from cattrs.preconf.tomlkit import make_converter as tomlkit_make_converter from cattrs.preconf.ujson import make_converter as ujson_make_converter @@ -296,7 +297,6 @@ def test_stdlib_json_converter_unstruct_collection_overrides(everything: Everyth include_bytes=False, include_datetimes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -314,7 +314,6 @@ def test_stdlib_json_unions(union_and_val: tuple, detailed_validation: bool): include_strings=False, include_bytes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -376,7 +375,6 @@ def test_ujson_converter_unstruct_collection_overrides(everything: Everything): include_bytes=False, include_datetimes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -445,7 +443,6 @@ def test_orjson_converter_unstruct_collection_overrides(everything: Everything): include_bytes=False, include_datetimes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -494,7 +491,6 @@ def test_msgpack_converter_unstruct_collection_overrides(everything: Everything) union_and_val=native_unions( include_datetimes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -569,7 +565,6 @@ def test_bson_converter_unstruct_collection_overrides(everything: Everything): union_and_val=native_unions( include_objectids=True, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -609,8 +604,7 @@ def test_pyyaml_converter_unstruct_collection_overrides(everything: Everything): @given( union_and_val=native_unions( - include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), + include_bools=sys.version_info[:2] != (3, 8) # Literal issues on 3.8 ), detailed_validation=..., ) @@ -698,7 +692,6 @@ def test_tomlkit_converter_unstruct_collection_overrides(everything: Everything) include_bytes=False, include_datetimes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -750,7 +743,6 @@ def test_cbor2_converter_unstruct_collection_overrides(everything: Everything): union_and_val=native_unions( include_datetimes=False, include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 - include_literals=sys.version_info >= (3, 8), ), detailed_validation=..., ) @@ -760,3 +752,38 @@ def test_cbor2_unions(union_and_val: tuple, detailed_validation: bool): type, val = union_and_val assert converter.structure(val, type) == val + + +@given(everythings(allow_inf=False)) +def test_msgspec_json_converter(everything: Everything): + converter = msgspec_make_converter() + raw = converter.dumps(everything) + assert converter.loads(raw, Everything) == everything + + +@given(everythings(allow_inf=False)) +def test_msgspec_json_unstruct_collection_overrides(everything: Everything): + """Ensure collection overrides work.""" + converter = msgspec_make_converter( + unstruct_collection_overrides={AbstractSet: sorted} + ) + raw = converter.unstructure(everything) + assert raw["a_set"] == sorted(raw["a_set"]) + assert raw["a_mutable_set"] == sorted(raw["a_mutable_set"]) + assert raw["a_frozenset"] == sorted(raw["a_frozenset"]) + + +@given( + union_and_val=native_unions( + include_datetimes=False, + include_bytes=False, + include_bools=sys.version_info[:2] != (3, 8), # Literal issues on 3.8 + ), + detailed_validation=..., +) +def test_msgspec_json_unions(union_and_val: tuple, detailed_validation: bool): + """Native union passthrough works.""" + converter = msgspec_make_converter(detailed_validation=detailed_validation) + type, val = union_and_val + + assert converter.structure(val, type) == val From 428d952b7afe83ce257649d4c3ee9cda4fe9e26c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sat, 6 Jan 2024 20:52:44 +0100 Subject: [PATCH 02/18] Fix typing import --- src/cattrs/preconf/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cattrs/preconf/__init__.py b/src/cattrs/preconf/__init__.py index 7f747073..9b4eb359 100644 --- a/src/cattrs/preconf/__init__.py +++ b/src/cattrs/preconf/__init__.py @@ -1,5 +1,11 @@ +import sys from datetime import datetime -from typing import Any, Callable, ParamSpec, TypeVar +from typing import Any, Callable, TypeVar + +if sys.version_info[:2] < (3, 10): + from typing_extensions import ParamSpec +else: + from typing import ParamSpec def validate_datetime(v, _): From 07d40ac0a966728744762b95488938a8938cbf6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 01:53:55 +0100 Subject: [PATCH 03/18] Test carefully for PyPy --- tests/test_preconf.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_preconf.py b/tests/test_preconf.py index 306d0803..adce4945 100644 --- a/tests/test_preconf.py +++ b/tests/test_preconf.py @@ -48,7 +48,6 @@ from cattrs.preconf.cbor2 import make_converter as cbor2_make_converter from cattrs.preconf.json import make_converter as json_make_converter from cattrs.preconf.msgpack import make_converter as msgpack_make_converter -from cattrs.preconf.msgspec import make_converter as msgspec_make_converter from cattrs.preconf.pyyaml import make_converter as pyyaml_make_converter from cattrs.preconf.tomlkit import make_converter as tomlkit_make_converter from cattrs.preconf.ujson import make_converter as ujson_make_converter @@ -756,6 +755,8 @@ def test_cbor2_unions(union_and_val: tuple, detailed_validation: bool): @given(everythings(allow_inf=False)) def test_msgspec_json_converter(everything: Everything): + from cattrs.preconf.msgspec import make_converter as msgspec_make_converter + converter = msgspec_make_converter() raw = converter.dumps(everything) assert converter.loads(raw, Everything) == everything @@ -764,6 +765,8 @@ def test_msgspec_json_converter(everything: Everything): @given(everythings(allow_inf=False)) def test_msgspec_json_unstruct_collection_overrides(everything: Everything): """Ensure collection overrides work.""" + from cattrs.preconf.msgspec import make_converter as msgspec_make_converter + converter = msgspec_make_converter( unstruct_collection_overrides={AbstractSet: sorted} ) @@ -783,6 +786,8 @@ def test_msgspec_json_unstruct_collection_overrides(everything: Everything): ) def test_msgspec_json_unions(union_and_val: tuple, detailed_validation: bool): """Native union passthrough works.""" + from cattrs.preconf.msgspec import make_converter as msgspec_make_converter + converter = msgspec_make_converter(detailed_validation=detailed_validation) type, val = union_and_val From cfe6ed4a283e97e36ffa922c9c14fecfeb2c07a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 01:54:26 +0100 Subject: [PATCH 04/18] Docs --- HISTORY.md | 5 +++++ docs/cattrs.preconf.rst | 8 ++++++++ docs/preconf.md | 5 +++++ src/cattrs/preconf/msgspec.py | 12 +++++------- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e7737d7c..a592e44b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -17,6 +17,9 @@ Our backwards-compatibility policy can be found [here](https://github.com/python ([#473](https://github.com/python-attrs/cattrs/pull/473)) - Introduce {meth}`BaseConverter.get_structure_hook` and {meth}`BaseConverter.get_unstructure_hook` methods. ([#432](https://github.com/python-attrs/cattrs/issues/432) [#472](https://github.com/python-attrs/cattrs/pull/472)) +- Introduce the [_msgspec_](https://jcristharif.com/msgspec/) {mod}`preconf converter `. + Only JSON is supported for now, with other formats supported by _msgspec_ to come later. + ([#481](https://github.com/python-attrs/cattrs/pull/481)) - The default union handler now properly takes renamed fields into account. ([#472](https://github.com/python-attrs/cattrs/pull/472)) - The default union handler now also handles dataclasses. @@ -25,6 +28,8 @@ Our backwards-compatibility policy can be found [here](https://github.com/python ([#452](https://github.com/python-attrs/cattrs/pull/452)) - The `include_subclasses` strategy now fetches the member hooks from the converter (making use of converter defaults) if overrides are not provided, instead of generating new hooks with no overrides. ([#429](https://github.com/python-attrs/cattrs/issues/429) [#472](https://github.com/python-attrs/cattrs/pull/472)) +- The preconf `make_converter` factories are now correctly typed. + ([#481](https://github.com/python-attrs/cattrs/pull/481)) - The {class}`orjson preconf converter ` now passes through dates and datetimes to orjson while unstructuring, greatly improving speed. ([#463](https://github.com/python-attrs/cattrs/pull/463)) - `cattrs.gen` generators now attach metadata to the generated functions, making them introspectable. diff --git a/docs/cattrs.preconf.rst b/docs/cattrs.preconf.rst index 61a94d2c..6b8f9312 100644 --- a/docs/cattrs.preconf.rst +++ b/docs/cattrs.preconf.rst @@ -41,6 +41,14 @@ cattrs.preconf.msgpack module :undoc-members: :show-inheritance: +cattrs.preconf.msgspec module +----------------------------- + +.. automodule:: cattrs.preconf.msgspec + :members: + :undoc-members: + :show-inheritance: + cattrs.preconf.orjson module ---------------------------- diff --git a/docs/preconf.md b/docs/preconf.md index d805f7ab..257243d1 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -70,6 +70,10 @@ _orjson_ only supports mappings with string keys so mappings will have their key ## _msgspec_ +```{versionadded} 24.1.0 + +``` + Found at {mod}`cattrs.preconf.msgspec`. Only JSON functionality is currently available, other formats supported by msgspec to follow in the future. @@ -77,6 +81,7 @@ Bytes are un/structured as base 64 strings directly by msgspec. _msgspec_ [encodes special float values](https://jcristharif.com/msgspec/supported-types.html#float) (`NaN, Inf, -Inf`) as `null`. `datetime` s and `date` s are passed through to be unstructured into RFC 3339 by _msgspec_ itself. +_msgspec_ doesn't support PyPy. ## _ujson_ diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 7b257070..8a4b8f3d 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -3,29 +3,27 @@ from base64 import b64decode from datetime import date, datetime -from typing import Any, Callable, ParamSpec, TypeVar, Union +from typing import Any, ParamSpec, TypeVar, Union from msgspec.json import decode, encode from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough +from . import wrap T = TypeVar("T") P = ParamSpec("P") - -def wrap(inner: Callable[P, Any]) -> Callable[[Callable[..., T]], Callable[P, T]]: - def impl(x: Callable[..., T]) -> Callable[P, T]: - return inner - - return impl +__all__ = ["MsgspecJsonConverter", "configure_converter", "make_converter"] class MsgspecJsonConverter(Converter): def dumps(self, obj: Any, unstructure_as: Any = None, **kwargs: Any) -> bytes: + """Unstructure and encode `obj` into JSON bytes.""" return encode(self.unstructure(obj, unstructure_as=unstructure_as), **kwargs) def loads(self, data: bytes, cl: type[T], **kwargs: Any) -> T: + """Decode and structure `cl` from the provided JSON bytes.""" return self.structure(decode(data, **kwargs), cl) From ab55a72ad643525df4810e48beea7acef6090c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 02:09:19 +0100 Subject: [PATCH 05/18] Fix typing wrapper --- src/cattrs/preconf/__init__.py | 4 ++-- src/cattrs/preconf/json.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/cattrs/preconf/__init__.py b/src/cattrs/preconf/__init__.py index 9b4eb359..876576d1 100644 --- a/src/cattrs/preconf/__init__.py +++ b/src/cattrs/preconf/__init__.py @@ -18,10 +18,10 @@ def validate_datetime(v, _): P = ParamSpec("P") -def wrap(inner: Callable[P, Any]) -> Callable[[Callable[..., T]], Callable[P, T]]: +def wrap(_: Callable[P, Any]) -> Callable[[Callable[..., T]], Callable[P, T]]: """Wrap a `Converter` `__init__` in a type-safe way.""" def impl(x: Callable[..., T]) -> Callable[P, T]: - return inner + return x return impl diff --git a/src/cattrs/preconf/json.py b/src/cattrs/preconf/json.py index b9b1b1cf..f4f5057a 100644 --- a/src/cattrs/preconf/json.py +++ b/src/cattrs/preconf/json.py @@ -4,8 +4,7 @@ from json import dumps, loads from typing import Any, Type, TypeVar, Union -from cattrs._compat import AbstractSet, Counter - +from .._compat import AbstractSet, Counter from ..converters import BaseConverter, Converter from ..strategies import configure_union_passthrough from . import wrap @@ -25,10 +24,12 @@ def configure_converter(converter: BaseConverter): """ Configure the converter for use with the stdlib json module. - * bytes are serialized as base64 strings + * bytes are serialized as base85 strings * datetimes are serialized as ISO 8601 * counters are serialized as dicts * sets are serialized as lists + * union passthrough is configured for unions of strings, bools, ints, + floats and None """ converter.register_unstructure_hook( bytes, lambda v: (b85encode(v) if v else b"").decode("utf8") @@ -38,7 +39,7 @@ def configure_converter(converter: BaseConverter): converter.register_structure_hook(datetime, lambda v, _: datetime.fromisoformat(v)) converter.register_unstructure_hook(date, lambda v: v.isoformat()) converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v)) - configure_union_passthrough(Union[str, bool, int, float, None, bytes], converter) + configure_union_passthrough(Union[str, bool, int, float, None], converter) @wrap(JsonConverter) From 605efc3e788eba393f0da54a3795cd4b2a5ca9c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 02:27:09 +0100 Subject: [PATCH 06/18] Fix PyPy CI some more --- tests/test_preconf.py | 3 +++ tox.ini | 17 ++++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/test_preconf.py b/tests/test_preconf.py index adce4945..2f43873a 100644 --- a/tests/test_preconf.py +++ b/tests/test_preconf.py @@ -753,6 +753,7 @@ def test_cbor2_unions(union_and_val: tuple, detailed_validation: bool): assert converter.structure(val, type) == val +@pytest.mark.skipif(python_implementation() == "PyPy", reason="no msgspec on PyPy") @given(everythings(allow_inf=False)) def test_msgspec_json_converter(everything: Everything): from cattrs.preconf.msgspec import make_converter as msgspec_make_converter @@ -762,6 +763,7 @@ def test_msgspec_json_converter(everything: Everything): assert converter.loads(raw, Everything) == everything +@pytest.mark.skipif(python_implementation() == "PyPy", reason="no msgspec on PyPy") @given(everythings(allow_inf=False)) def test_msgspec_json_unstruct_collection_overrides(everything: Everything): """Ensure collection overrides work.""" @@ -776,6 +778,7 @@ def test_msgspec_json_unstruct_collection_overrides(everything: Everything): assert raw["a_frozenset"] == sorted(raw["a_frozenset"]) +@pytest.mark.skipif(python_implementation() == "PyPy", reason="no msgspec on PyPy") @given( union_and_val=native_unions( include_datetimes=False, diff --git a/tox.ini b/tox.ini index 6fc0b87a..58f31167 100644 --- a/tox.ini +++ b/tox.ini @@ -6,10 +6,10 @@ python = 3.10: py310 3.11: py311 3.12: py312, lint - pypy-3: pypy3 + pypy-3: pypy38 [tox] -envlist = pypy3, py38, py39, py310, py311, py312, lint +envlist = pypy38, py38, py39, py310, py311, py312, lint isolated_build = true skipsdist = true @@ -42,19 +42,14 @@ setenv = COVERAGE_PROCESS_START={toxinidir}/pyproject.toml COVERAGE_CORE=sysmon -[testenv:pypy3] +[testenv:pypy38] setenv = - PYTHONPATH = {toxinidir}:{toxinidir}/cattr FAST = 1 -allowlist_externals = pdm + PDM_IGNORE_SAVED_PYTHON="1" + COVERAGE_PROCESS_START={toxinidir}/pyproject.toml commands_pre = - pdm sync -G :all,test + pdm sync -G ujson,msgpack,pyyaml,tomlkit,cbor2,bson,test python -c 'import pathlib; pathlib.Path("{env_site_packages_dir}/cov.pth").write_text("import coverage; coverage.process_startup()")' -commands = - coverage run -m pytest tests {posargs:-n auto} -passenv = CI -package = wheel -wheel_build_env = .pkg [testenv:docs] basepython = python3.11 From 4c9dbd809f2954fa31bce215ca9ec0bccaa2e09c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 02:37:14 +0100 Subject: [PATCH 07/18] Remove unused paramspec --- src/cattrs/preconf/msgspec.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 8a4b8f3d..73d7476b 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -3,7 +3,7 @@ from base64 import b64decode from datetime import date, datetime -from typing import Any, ParamSpec, TypeVar, Union +from typing import Any, TypeVar, Union from msgspec.json import decode, encode @@ -12,7 +12,6 @@ from . import wrap T = TypeVar("T") -P = ParamSpec("P") __all__ = ["MsgspecJsonConverter", "configure_converter", "make_converter"] From 7e4fc59873c33c479fb606b5747a94bf3fcd19de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 13:26:54 +0100 Subject: [PATCH 08/18] Use msgspec's datetime structurer --- src/cattrs/preconf/msgspec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 73d7476b..5ab852ef 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -5,6 +5,7 @@ from datetime import date, datetime from typing import Any, TypeVar, Union +from msgspec import convert from msgspec.json import decode, encode from ..converters import BaseConverter, Converter @@ -34,7 +35,7 @@ def configure_converter(converter: BaseConverter) -> None: * union passthrough configured for str, bool, int, float and None """ converter.register_structure_hook(bytes, lambda v, _: b64decode(v)) - converter.register_structure_hook(datetime, lambda v, _: datetime.fromisoformat(v)) + converter.register_structure_hook(datetime, lambda v, _: convert(v, datetime)) converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v)) configure_union_passthrough(Union[str, bool, int, float, None], converter) From 3255c268ce7811b3b812f976351133b9071ccb92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 23:26:08 +0100 Subject: [PATCH 09/18] More msgspec --- docs/preconf.md | 8 +++- src/cattrs/preconf/msgspec.py | 85 +++++++++++++++++++++++++++++++++-- tests/preconf/__init__.py | 0 tests/preconf/test_msgspec.py | 80 +++++++++++++++++++++++++++++++++ tests/typed.py | 42 ++++++++++++----- 5 files changed, 200 insertions(+), 15 deletions(-) create mode 100644 tests/preconf/__init__.py create mode 100644 tests/preconf/test_msgspec.py diff --git a/docs/preconf.md b/docs/preconf.md index 257243d1..dc749c49 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -77,10 +77,16 @@ _orjson_ only supports mappings with string keys so mappings will have their key Found at {mod}`cattrs.preconf.msgspec`. Only JSON functionality is currently available, other formats supported by msgspec to follow in the future. +[_msgspec_ structs](https://jcristharif.com/msgspec/structs.html) are supported, but not composable - a struct will be handed over to _msgspec_ directly, and _msgspec_ will handle it. +_cattrs_ may get more sophisticated handling of structs in the future. + Bytes are un/structured as base 64 strings directly by msgspec. _msgspec_ [encodes special float values](https://jcristharif.com/msgspec/supported-types.html#float) (`NaN, Inf, -Inf`) as `null`. `datetime` s and `date` s are passed through to be unstructured into RFC 3339 by _msgspec_ itself. +_attrs_ classes, dataclasses and sequences are handled directly by msgspec if possible, otherwise by the normal _cattrs_ machinery. +This means it's possible the validation errors produced may be _msgspec_ validation errors instead of _cattrs_ validation errors. + _msgspec_ doesn't support PyPy. ## _ujson_ @@ -89,7 +95,7 @@ Found at {mod}`cattrs.preconf.ujson`. Bytes are serialized as base 85 strings. Sets are serialized as lists, and deserialized back into sets. `datetime` s and `date` s are serialized as ISO 8601 strings. -`ujson` doesn't support integers less than -9223372036854775808, and greater than 9223372036854775807, nor does it support `float('inf')`. +_ujson_ doesn't support integers less than -9223372036854775808, and greater than 9223372036854775807, nor does it support `float('inf')`. ## _msgpack_ diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 5ab852ef..08213fa5 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -3,12 +3,18 @@ from base64 import b64decode from datetime import date, datetime -from typing import Any, TypeVar, Union +from typing import Any, Callable, TypeVar, Union -from msgspec import convert +from attrs import has as attrs_has +from attrs import resolve_types +from msgspec import Struct, convert, to_builtins from msgspec.json import decode, encode -from ..converters import BaseConverter, Converter +from cattrs._compat import fields, get_origin, has, is_bare, is_sequence +from cattrs.dispatch import HookFactory, UnstructureHook +from cattrs.fns import identity + +from ..converters import Converter from ..strategies import configure_union_passthrough from . import wrap @@ -22,18 +28,31 @@ def dumps(self, obj: Any, unstructure_as: Any = None, **kwargs: Any) -> bytes: """Unstructure and encode `obj` into JSON bytes.""" return encode(self.unstructure(obj, unstructure_as=unstructure_as), **kwargs) + def get_dumps_hook( + self, unstructure_as: Any, **kwargs: Any + ) -> Callable[[Any], bytes]: + unstruct_hook = self.get_unstructure_hook(unstructure_as) + if unstruct_hook in (identity, to_builtins): + return encode + return self.dumps + def loads(self, data: bytes, cl: type[T], **kwargs: Any) -> T: """Decode and structure `cl` from the provided JSON bytes.""" return self.structure(decode(data, **kwargs), cl) -def configure_converter(converter: BaseConverter) -> None: +def configure_converter(converter: Converter) -> None: """Configure the converter for the msgspec library. * bytes are serialized as base64 strings, directly by msgspec * datetimes and dates are passed through to be serialized as RFC 3339 directly * union passthrough configured for str, bool, int, float and None """ + configure_passthroughs(converter) + + converter.register_unstructure_hook(Struct, to_builtins) + + converter.register_structure_hook(Struct, convert) converter.register_structure_hook(bytes, lambda v, _: b64decode(v)) converter.register_structure_hook(datetime, lambda v, _: convert(v, datetime)) converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v)) @@ -45,3 +64,61 @@ def make_converter(*args: Any, **kwargs: Any) -> MsgspecJsonConverter: res = MsgspecJsonConverter(*args, **kwargs) configure_converter(res) return res + + +def configure_passthroughs(converter: Converter) -> None: + """Configure optimizing passthroughs. + + A passthrough is when we let msgspec handle something automatically. + """ + converter.register_unstructure_hook(bytes, to_builtins) + converter.register_unstructure_hook_factory( + is_sequence, make_unstructure_seq_factory(converter) + ) + converter.register_unstructure_hook_factory( + has, make_attrs_unstruct_factory(converter) + ) + + +def make_unstructure_seq_factory(converter: Converter) -> HookFactory[UnstructureHook]: + def unstructure_seq_factory(type) -> UnstructureHook: + if is_bare(type): + type_arg = Any + handler = converter.get_unstructure_hook(type_arg, cache_result=False) + elif getattr(type, "__args__", None) not in (None, ()): + type_arg = type.__args__[0] + handler = converter.get_unstructure_hook(type_arg, cache_result=False) + else: + handler = None + + if handler in (identity, to_builtins): + return handler + return converter.gen_unstructure_iterable(type) + + return unstructure_seq_factory + + +def make_attrs_unstruct_factory(converter: Converter) -> HookFactory[UnstructureHook]: + """Short-circuit attrs and dataclass handling if it matches msgspec.""" + + def attrs_factory(type: Any) -> UnstructureHook: + """Choose whether to use msgspec handling or our own.""" + origin = get_origin(type) + attribs = fields(origin or type) + if attrs_has(type) and any(isinstance(a.type, str) for a in attribs): + resolve_types(type) + attribs = fields(origin or type) + + if any( + attr.name.startswith("_") + or ( + converter.get_unstructure_hook(attr.type, cache_result=False) + not in (identity, to_builtins) + ) + for attr in attribs + ): + return converter.gen_unstructure_attrs_fromdict(type) + + return to_builtins + + return attrs_factory diff --git a/tests/preconf/__init__.py b/tests/preconf/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/preconf/test_msgspec.py b/tests/preconf/test_msgspec.py new file mode 100644 index 00000000..0e31a1e0 --- /dev/null +++ b/tests/preconf/test_msgspec.py @@ -0,0 +1,80 @@ +"""Tests for msgspec functionality.""" +from typing import Callable, List + +from attrs import define +from hypothesis import given +from msgspec import Struct, to_builtins +from msgspec.json import encode +from pytest import fixture + +from cattrs.fns import identity +from cattrs.preconf.json import make_converter as make_json_converter +from cattrs.preconf.msgspec import MsgspecJsonConverter as Conv +from cattrs.preconf.msgspec import make_converter + +from ..typed import simple_typed_classes + + +@define +class A: + a: int + + +@fixture +def converter() -> Conv: + return make_converter() + + +def is_passthrough(fn: Callable) -> bool: + return fn in (identity, to_builtins) + + +def test_unstructure_passthrough(converter: Conv): + """Passthrough for simple types works.""" + assert converter.get_unstructure_hook(int) == identity + assert converter.get_unstructure_hook(float) == identity + assert converter.get_unstructure_hook(str) == identity + assert is_passthrough(converter.get_unstructure_hook(bytes)) + assert converter.get_unstructure_hook(None) == identity + + # Any is special-cased, and we cannot know if it'll match + # the msgspec behavior. + assert not is_passthrough(converter.get_unstructure_hook(List)) + + assert is_passthrough(converter.get_unstructure_hook(List[int])) + + +def test_unstructure_pt_attrs(converter: Conv): + """Passthrough for attrs works.""" + assert is_passthrough(converter.get_unstructure_hook(A)) + + +def test_dump_hook_attrs(converter: Conv): + """Passthrough for dump hooks works.""" + assert converter.get_dumps_hook(A) == encode + + +def test_basic_structs(converter: Conv): + """Handling msgspec structs works.""" + + class B(Struct): + b: int + + assert converter.unstructure(B(1)) == {"b": 1} + + assert converter.structure({"b": 1}, B) == B(1) + + +@given(simple_typed_classes(text_codec="ascii", allow_infinity=False, allow_nan=False)) +def test_simple_classes(cls_and_vals): + cl, posargs, kwargs = cls_and_vals + + msgspec = make_converter() + json = make_json_converter() + + inst = cl(*posargs, **kwargs) + + rebuilt_msgspec = msgspec.loads(msgspec.dumps(inst), cl) + rebuilt_json = json.loads(json.dumps(inst), cl) + + assert rebuilt_msgspec == rebuilt_json diff --git a/tests/typed.py b/tests/typed.py index 98a2ba82..e3c79f7a 100644 --- a/tests/typed.py +++ b/tests/typed.py @@ -33,6 +33,7 @@ DrawFn, SearchStrategy, booleans, + characters, composite, dictionaries, fixed_dictionaries, @@ -58,7 +59,14 @@ def simple_typed_classes( - defaults=None, min_attrs=0, frozen=False, kw_only=None, newtypes=True + defaults=None, + min_attrs=0, + frozen=False, + kw_only=None, + newtypes=True, + text_codec: str = "utf8", + allow_infinity=None, + allow_nan=None, ) -> SearchStrategy[Tuple[Type, PosArgs, KwArgs]]: """Yield tuples of (class, values).""" return lists_of_typed_attrs( @@ -67,6 +75,9 @@ def simple_typed_classes( for_frozen=frozen, kw_only=kw_only, newtypes=newtypes, + text_codec=text_codec, + allow_infinity=allow_infinity, + allow_nan=allow_nan, ).flatmap(partial(_create_hyp_class, frozen=frozen)) @@ -97,6 +108,9 @@ def lists_of_typed_attrs( allow_mutable_defaults=True, kw_only=None, newtypes=True, + text_codec="utf8", + allow_infinity=None, + allow_nan=None, ) -> SearchStrategy[List[Tuple[_CountingAttr, SearchStrategy[PosArg]]]]: # Python functions support up to 255 arguments. return lists( @@ -106,6 +120,9 @@ def lists_of_typed_attrs( allow_mutable_defaults=allow_mutable_defaults, kw_only=kw_only, newtypes=newtypes, + text_codec=text_codec, + allow_infinity=allow_infinity, + allow_nan=allow_nan, ), min_size=min_size, max_size=50, @@ -122,13 +139,16 @@ def simple_typed_attrs( allow_mutable_defaults=True, kw_only=None, newtypes=True, + text_codec="utf8", + allow_infinity=None, + allow_nan=None, ) -> SearchStrategy[Tuple[_CountingAttr, SearchStrategy[PosArgs]]]: if not is_39_or_later: res = ( any_typed_attrs(defaults, kw_only) | int_typed_attrs(defaults, kw_only) - | str_typed_attrs(defaults, kw_only) - | float_typed_attrs(defaults, kw_only) + | str_typed_attrs(defaults, kw_only, text_codec) + | float_typed_attrs(defaults, kw_only, allow_infinity, allow_nan) | frozenset_typed_attrs(defaults, legacy_types_only=True, kw_only=kw_only) | homo_tuple_typed_attrs(defaults, legacy_types_only=True, kw_only=kw_only) | path_typed_attrs(defaults, kw_only=kw_only) @@ -172,8 +192,8 @@ def simple_typed_attrs( res = ( any_typed_attrs(defaults, kw_only) | int_typed_attrs(defaults, kw_only) - | str_typed_attrs(defaults, kw_only) - | float_typed_attrs(defaults, kw_only) + | str_typed_attrs(defaults, kw_only, text_codec) + | float_typed_attrs(defaults, kw_only, allow_infinity, allow_nan) | frozenset_typed_attrs(defaults, kw_only=kw_only) | homo_tuple_typed_attrs(defaults, kw_only=kw_only) | path_typed_attrs(defaults, kw_only=kw_only) @@ -353,7 +373,7 @@ def int_typed_attrs(draw, defaults=None, kw_only=None): @composite -def str_typed_attrs(draw, defaults=None, kw_only=None): +def str_typed_attrs(draw, defaults=None, kw_only=None, codec: str = "utf8"): """ Generate a tuple of an attribute and a strategy that yields strs for that attribute. @@ -367,26 +387,28 @@ def str_typed_attrs(draw, defaults=None, kw_only=None): default=default, kw_only=draw(booleans()) if kw_only is None else kw_only, ), - text(), + text(characters(codec=codec)), ) @composite -def float_typed_attrs(draw, defaults=None, kw_only=None): +def float_typed_attrs( + draw, defaults=None, kw_only=None, allow_infinity=None, allow_nan=None +): """ Generate a tuple of an attribute and a strategy that yields floats for that attribute. """ default = NOTHING if defaults is True or (defaults is None and draw(booleans())): - default = draw(floats()) + default = draw(floats(allow_infinity=allow_infinity, allow_nan=allow_nan)) return ( field( type=float, default=default, kw_only=draw(booleans()) if kw_only is None else kw_only, ), - floats(), + floats(allow_infinity=allow_infinity, allow_nan=allow_nan), ) From 42ab02a36ff8dacb96ac1f70e84162b84d80b36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Sun, 7 Jan 2024 23:34:36 +0100 Subject: [PATCH 10/18] Ignore _cpython tests on PyPy --- src/cattrs/gen/__init__.py | 3 --- tests/conftest.py | 3 +++ tests/preconf/{test_msgspec.py => test_msgspec_cpython.py} | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename tests/preconf/{test_msgspec.py => test_msgspec_cpython.py} (100%) diff --git a/src/cattrs/gen/__init__.py b/src/cattrs/gen/__init__.py index 02a676d7..f60ab142 100644 --- a/src/cattrs/gen/__init__.py +++ b/src/cattrs/gen/__init__.py @@ -293,9 +293,6 @@ def make_dict_structure_fn( mapping = generate_mapping(base, mapping) break - if isinstance(cl, TypeVar): - cl = mapping.get(cl.__name__, cl) - cl_name = cl.__name__ fn_name = "structure_" + cl_name diff --git a/tests/conftest.py b/tests/conftest.py index 98b74330..d295990e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ +import platform import sys from os import environ @@ -34,3 +35,5 @@ def converter_cls(request): collect_ignore_glob.append("*_604.py") if sys.version_info < (3, 12): collect_ignore_glob.append("*_695.py") +if platform.python_implementation() == "PyPy": + collect_ignore_glob.append("*_cpython.py") diff --git a/tests/preconf/test_msgspec.py b/tests/preconf/test_msgspec_cpython.py similarity index 100% rename from tests/preconf/test_msgspec.py rename to tests/preconf/test_msgspec_cpython.py From a8a6027aa2bbc802eedd1efdd62b22cb75413853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Mon, 8 Jan 2024 00:18:15 +0100 Subject: [PATCH 11/18] More msgspec --- src/cattrs/preconf/msgspec.py | 11 ++++++++--- tests/preconf/test_msgspec_cpython.py | 2 +- tests/test_typeddicts.py | 6 ++++-- tests/typeddicts.py | 6 ++++-- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 08213fa5..7edd7e1a 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -8,7 +8,7 @@ from attrs import has as attrs_has from attrs import resolve_types from msgspec import Struct, convert, to_builtins -from msgspec.json import decode, encode +from msgspec.json import Encoder, decode from cattrs._compat import fields, get_origin, has, is_bare, is_sequence from cattrs.dispatch import HookFactory, UnstructureHook @@ -24,16 +24,21 @@ class MsgspecJsonConverter(Converter): + _encoder: Encoder = Encoder() + def dumps(self, obj: Any, unstructure_as: Any = None, **kwargs: Any) -> bytes: """Unstructure and encode `obj` into JSON bytes.""" - return encode(self.unstructure(obj, unstructure_as=unstructure_as), **kwargs) + return self._encoder.encode( + self.unstructure(obj, unstructure_as=unstructure_as), **kwargs + ) def get_dumps_hook( self, unstructure_as: Any, **kwargs: Any ) -> Callable[[Any], bytes]: + """Produce a `dumps` hook for the given type.""" unstruct_hook = self.get_unstructure_hook(unstructure_as) if unstruct_hook in (identity, to_builtins): - return encode + return self._encoder.encode return self.dumps def loads(self, data: bytes, cl: type[T], **kwargs: Any) -> T: diff --git a/tests/preconf/test_msgspec_cpython.py b/tests/preconf/test_msgspec_cpython.py index 0e31a1e0..f4b23e2d 100644 --- a/tests/preconf/test_msgspec_cpython.py +++ b/tests/preconf/test_msgspec_cpython.py @@ -51,7 +51,7 @@ def test_unstructure_pt_attrs(converter: Conv): def test_dump_hook_attrs(converter: Conv): """Passthrough for dump hooks works.""" - assert converter.get_dumps_hook(A) == encode + assert converter.get_dumps_hook(A) == converter._encoder.encode def test_basic_structs(converter: Conv): diff --git a/tests/test_typeddicts.py b/tests/test_typeddicts.py index 1ffa455c..1ec10d91 100644 --- a/tests/test_typeddicts.py +++ b/tests/test_typeddicts.py @@ -1,5 +1,5 @@ """Tests for TypedDict un/structuring.""" -from datetime import datetime +from datetime import datetime, timezone from typing import Dict, Generic, Set, Tuple, TypedDict, TypeVar import pytest @@ -35,7 +35,9 @@ def mk_converter(detailed_validation: bool = True) -> Converter: """We can't use function-scoped fixtures with Hypothesis strats.""" c = Converter(detailed_validation=detailed_validation) c.register_unstructure_hook(datetime, lambda d: d.timestamp()) - c.register_structure_hook(datetime, lambda d, _: datetime.fromtimestamp(d)) + c.register_structure_hook( + datetime, lambda d, _: datetime.fromtimestamp(d, tz=timezone.utc) + ) return c diff --git a/tests/typeddicts.py b/tests/typeddicts.py index 18453d70..e89dd84d 100644 --- a/tests/typeddicts.py +++ b/tests/typeddicts.py @@ -1,5 +1,5 @@ """Strategies for typed dicts.""" -from datetime import datetime +from datetime import datetime, timezone from string import ascii_lowercase from typing import Any, Dict, Generic, List, Optional, Set, Tuple, TypeVar @@ -94,7 +94,9 @@ def datetime_attributes( draw: DrawFn, total: bool = True, not_required: bool = False ) -> Tuple[datetime, SearchStrategy, SearchStrategy]: success_strat = datetimes( - min_value=datetime(1970, 1, 1), max_value=datetime(2038, 1, 1) + min_value=datetime(1970, 1, 1), + max_value=datetime(2038, 1, 1), + timezones=just(timezone.utc), ).map(lambda dt: dt.replace(microsecond=0)) type = datetime strat = success_strat if total else success_strat | just(NOTHING) From c67a08796252e97a617a378e536a80a606d7f706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Mon, 8 Jan 2024 00:53:37 +0100 Subject: [PATCH 12/18] More doc work --- docs/_static/custom.css | 4 ++-- docs/customizing.md | 2 +- docs/index.md | 20 ++++++++++++++++- docs/preconf.md | 31 ++++++++++++++++----------- src/cattrs/preconf/msgspec.py | 3 ++- tests/preconf/test_msgspec_cpython.py | 1 - 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index f07517a1..de22ab4f 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -72,7 +72,7 @@ span:target ~ h6:first-of-type { div.article-container > article { font-size: 17px; - line-height: 31px; + line-height: 29px; } div.admonition { @@ -89,7 +89,7 @@ p.admonition-title { article > li > a { font-size: 19px; - line-height: 31px; + line-height: 29px; } div.tab-set { diff --git a/docs/customizing.md b/docs/customizing.md index 7efc229b..e10a9743 100644 --- a/docs/customizing.md +++ b/docs/customizing.md @@ -2,7 +2,7 @@ This section describes customizing the unstructuring and structuring processes in _cattrs_. -## Manual Un/structuring Hooks +## Custom Un/structuring Hooks You can write your own structuring and unstructuring functions and register them for types using {meth}`Converter.register_structure_hook() ` and {meth}`Converter.register_unstructure_hook() `. This approach is the most flexible but also requires the most amount of boilerplate. diff --git a/docs/index.md b/docs/index.md index 7e7eb8a3..426b60f7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,11 +2,21 @@ --- maxdepth: 2 hidden: true +caption: Introduction --- self basics defaulthooks +``` + +```{toctree} +--- +maxdepth: 2 +hidden: true +caption: User Guide +--- + customizing strategies validation @@ -14,10 +24,18 @@ preconf unions usage indepth +``` + +```{toctree} +--- +maxdepth: 2 +hidden: true +caption: Dev Guide +--- + history benchmarking contributing -API ``` ```{include} ../README.md diff --git a/docs/preconf.md b/docs/preconf.md index dc749c49..a3b9bc67 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -70,25 +70,31 @@ _orjson_ only supports mappings with string keys so mappings will have their key ## _msgspec_ -```{versionadded} 24.1.0 - -``` - Found at {mod}`cattrs.preconf.msgspec`. Only JSON functionality is currently available, other formats supported by msgspec to follow in the future. -[_msgspec_ structs](https://jcristharif.com/msgspec/structs.html) are supported, but not composable - a struct will be handed over to _msgspec_ directly, and _msgspec_ will handle it. +[_msgspec_ structs](https://jcristharif.com/msgspec/structs.html) are supported, but not composable - a struct will be handed over to _msgspec_ directly, and _msgspec_ will handle and all of its fields, recursively. _cattrs_ may get more sophisticated handling of structs in the future. -Bytes are un/structured as base 64 strings directly by msgspec. -_msgspec_ [encodes special float values](https://jcristharif.com/msgspec/supported-types.html#float) (`NaN, Inf, -Inf`) as `null`. -`datetime` s and `date` s are passed through to be unstructured into RFC 3339 by _msgspec_ itself. +[_msgspec_ strict mode](https://jcristharif.com/msgspec/usage.html#strict-vs-lax-mode) is used by default. +This can be customized by changing the {meth}`encoder ` attribute on the converter. -_attrs_ classes, dataclasses and sequences are handled directly by msgspec if possible, otherwise by the normal _cattrs_ machinery. +What _cattrs_ calls _unstructuring_ and _structuring_, _msgspec_ calls [`to_builtins` and `convert`](https://jcristharif.com/msgspec/converters.html). +What _cattrs_ refers to as _dumping_ and _loading_, _msgspec_ refers to as [`encoding` and `decoding`](https://jcristharif.com/msgspec/usage.html). + +Compatibility notes: +- Bytes are un/structured as base 64 strings directly by _msgspec_ itself. +- _msgspec_ [encodes special float values](https://jcristharif.com/msgspec/supported-types.html#float) (`NaN, Inf, -Inf`) as `null`. +- `datetime` s and `date` s are passed through to be unstructured into RFC 3339 by _msgspec_ itself. +- _attrs_ classes, dataclasses and sequences are handled directly by _msgspec_ if possible, otherwise by the normal _cattrs_ machinery. This means it's possible the validation errors produced may be _msgspec_ validation errors instead of _cattrs_ validation errors. _msgspec_ doesn't support PyPy. +```{versionadded} 24.1.0 + +``` + ## _ujson_ Found at {mod}`cattrs.preconf.ujson`. @@ -111,10 +117,6 @@ When parsing msgpack data from bytes, the library needs to be passed `strict_map ## _cbor2_ -```{versionadded} 23.1.0 - -``` - Found at {mod}`cattrs.preconf.cbor2`. _cbor2_ implements a fully featured CBOR encoder with several extensions for handling shared references, big integers, rational numbers and so on. @@ -133,6 +135,9 @@ Use keyword argument `canonical=True` for efficient encoding to the smallest bin Floats can be forced to smaller output by casting to lower-precision formats by casting to `numpy` floats (and back to Python floats). Example: `float(np.float32(value))` or `float(np.float16(value))` +```{versionadded} 23.1.0 + +``` ## _bson_ diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 7edd7e1a..d85f0c31 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -24,7 +24,8 @@ class MsgspecJsonConverter(Converter): - _encoder: Encoder = Encoder() + #: The msgspec encoder for dumping. + encoder: Encoder = Encoder() def dumps(self, obj: Any, unstructure_as: Any = None, **kwargs: Any) -> bytes: """Unstructure and encode `obj` into JSON bytes.""" diff --git a/tests/preconf/test_msgspec_cpython.py b/tests/preconf/test_msgspec_cpython.py index f4b23e2d..f03100d4 100644 --- a/tests/preconf/test_msgspec_cpython.py +++ b/tests/preconf/test_msgspec_cpython.py @@ -4,7 +4,6 @@ from attrs import define from hypothesis import given from msgspec import Struct, to_builtins -from msgspec.json import encode from pytest import fixture from cattrs.fns import identity From 3aca70832bc45264ee7f7c787d81f806513e6444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Mon, 8 Jan 2024 02:33:31 +0100 Subject: [PATCH 13/18] Fix --- src/cattrs/preconf/msgspec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index d85f0c31..364b73be 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -29,7 +29,7 @@ class MsgspecJsonConverter(Converter): def dumps(self, obj: Any, unstructure_as: Any = None, **kwargs: Any) -> bytes: """Unstructure and encode `obj` into JSON bytes.""" - return self._encoder.encode( + return self.encoder.encode( self.unstructure(obj, unstructure_as=unstructure_as), **kwargs ) @@ -39,7 +39,7 @@ def get_dumps_hook( """Produce a `dumps` hook for the given type.""" unstruct_hook = self.get_unstructure_hook(unstructure_as) if unstruct_hook in (identity, to_builtins): - return self._encoder.encode + return self.encoder.encode return self.dumps def loads(self, data: bytes, cl: type[T], **kwargs: Any) -> T: From ec69ae8d2b7cb08a37e15e893d572a4f87919ae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Mon, 8 Jan 2024 02:38:02 +0100 Subject: [PATCH 14/18] Docs --- docs/preconf.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/preconf.md b/docs/preconf.md index a3b9bc67..4abe7352 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -44,6 +44,9 @@ Optional install targets should match the name of the {mod}`cattrs.preconf` modu # Using pip $ pip install cattrs[ujson] +# Using pdm +$ pdm add cattrs[orjson] + # Using poetry $ poetry add --extras tomlkit cattrs ``` From 2333e72eeae35336a413d7e3d8ab281c3970804d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Mon, 8 Jan 2024 10:02:52 +0100 Subject: [PATCH 15/18] Fix test --- tests/preconf/test_msgspec_cpython.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/preconf/test_msgspec_cpython.py b/tests/preconf/test_msgspec_cpython.py index f03100d4..aaa59cc6 100644 --- a/tests/preconf/test_msgspec_cpython.py +++ b/tests/preconf/test_msgspec_cpython.py @@ -50,7 +50,7 @@ def test_unstructure_pt_attrs(converter: Conv): def test_dump_hook_attrs(converter: Conv): """Passthrough for dump hooks works.""" - assert converter.get_dumps_hook(A) == converter._encoder.encode + assert converter.get_dumps_hook(A) == converter.encoder.encode def test_basic_structs(converter: Conv): From b8578e243055b6c29c5f3996a39f2389919fed83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Fri, 12 Jan 2024 00:45:59 +0100 Subject: [PATCH 16/18] More msgspec work --- docs/preconf.md | 17 +++++++++++++++++ src/cattrs/preconf/msgspec.py | 7 +++++++ tests/preconf/test_msgspec_cpython.py | 6 ++++++ 3 files changed, 30 insertions(+) diff --git a/docs/preconf.md b/docs/preconf.md index 4abe7352..a6ce06a3 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -92,6 +92,23 @@ Compatibility notes: - _attrs_ classes, dataclasses and sequences are handled directly by _msgspec_ if possible, otherwise by the normal _cattrs_ machinery. This means it's possible the validation errors produced may be _msgspec_ validation errors instead of _cattrs_ validation errors. +This converter supports {meth}`get_loads_hook() ` and {meth}`get_dumps_hook() `. +These are factories for dumping and loading functions (as opposed to unstructuring and structuring); the hooks returned by this may be further optimized to offload as much work as possible to _msgspec_. + +```python +>>> from cattrs.preconf.msgspec import make_converter + +>>> @define +... class Test: +... a: int + +>>> converter = make_converter() +>>> dumps = converter.get_dumps_hook(A) + +>>> dumps(Test(1)) # Will use msgspec directly. +b'{"a":1}' +``` + _msgspec_ doesn't support PyPy. ```{versionadded} 24.1.0 diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 364b73be..1dd3edd1 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -3,6 +3,7 @@ from base64 import b64decode from datetime import date, datetime +from functools import partial from typing import Any, Callable, TypeVar, Union from attrs import has as attrs_has @@ -24,6 +25,8 @@ class MsgspecJsonConverter(Converter): + """A converter specialized for the _msgspec_ library.""" + #: The msgspec encoder for dumping. encoder: Encoder = Encoder() @@ -46,6 +49,10 @@ def loads(self, data: bytes, cl: type[T], **kwargs: Any) -> T: """Decode and structure `cl` from the provided JSON bytes.""" return self.structure(decode(data, **kwargs), cl) + def get_loads_hook(self, cl: type[T]) -> Callable[[bytes], T]: + """Produce a `loads` hook for the given type.""" + return partial(self.loads, cl=cl) + def configure_converter(converter: Converter) -> None: """Configure the converter for the msgspec library. diff --git a/tests/preconf/test_msgspec_cpython.py b/tests/preconf/test_msgspec_cpython.py index aaa59cc6..19a9add4 100644 --- a/tests/preconf/test_msgspec_cpython.py +++ b/tests/preconf/test_msgspec_cpython.py @@ -53,6 +53,12 @@ def test_dump_hook_attrs(converter: Conv): assert converter.get_dumps_hook(A) == converter.encoder.encode +def test_get_loads_hook(converter: Conv): + """`Converter.get_loads_hook` works.""" + hook = converter.get_loads_hook(A) + assert hook(b'{"a": 1}') == A(1) + + def test_basic_structs(converter: Conv): """Handling msgspec structs works.""" From 208b66ffa13f158e4dabfe85b1e15e33a7f7712d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Fri, 12 Jan 2024 01:01:54 +0100 Subject: [PATCH 17/18] Pass through mapping to msgspec --- docs/preconf.md | 2 ++ src/cattrs/preconf/msgspec.py | 36 +++++++++++++++++++++- tests/preconf/test_msgspec_cpython.py | 44 +++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/docs/preconf.md b/docs/preconf.md index a6ce06a3..95f47aa9 100644 --- a/docs/preconf.md +++ b/docs/preconf.md @@ -109,6 +109,8 @@ These are factories for dumping and loading functions (as opposed to unstructuri b'{"a":1}' ``` +Due to its complexity, this converter is currently _provisional_ and may slightly change as the best integration patterns are discovered. + _msgspec_ doesn't support PyPy. ```{versionadded} 24.1.0 diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 1dd3edd1..1ebafebd 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -11,7 +11,7 @@ from msgspec import Struct, convert, to_builtins from msgspec.json import Encoder, decode -from cattrs._compat import fields, get_origin, has, is_bare, is_sequence +from cattrs._compat import fields, get_origin, has, is_bare, is_mapping, is_sequence from cattrs.dispatch import HookFactory, UnstructureHook from cattrs.fns import identity @@ -85,6 +85,9 @@ def configure_passthroughs(converter: Converter) -> None: A passthrough is when we let msgspec handle something automatically. """ converter.register_unstructure_hook(bytes, to_builtins) + converter.register_unstructure_hook_factory( + is_mapping, make_unstructure_mapping_factory(converter) + ) converter.register_unstructure_hook_factory( is_sequence, make_unstructure_seq_factory(converter) ) @@ -111,6 +114,37 @@ def unstructure_seq_factory(type) -> UnstructureHook: return unstructure_seq_factory +def make_unstructure_mapping_factory( + converter: Converter, +) -> HookFactory[UnstructureHook]: + def unstructure_mapping_factory(type) -> UnstructureHook: + if is_bare(type): + key_arg = Any + value_arg = Any + key_handler = converter.get_unstructure_hook(key_arg, cache_result=False) + value_handler = converter.get_unstructure_hook( + value_arg, cache_result=False + ) + elif getattr(type, "__args__", None) not in (None, ()): + key_arg = type.__args__[0] + value_arg = type.__args__[1] + key_handler = converter.get_unstructure_hook(key_arg, cache_result=False) + value_handler = converter.get_unstructure_hook( + value_arg, cache_result=False + ) + else: + key_handler = value_handler = None + + if key_handler in (identity, to_builtins) and value_handler in ( + identity, + to_builtins, + ): + return to_builtins + return converter.gen_unstructure_iterable(type) + + return unstructure_mapping_factory + + def make_attrs_unstruct_factory(converter: Converter) -> HookFactory[UnstructureHook]: """Short-circuit attrs and dataclass handling if it matches msgspec.""" diff --git a/tests/preconf/test_msgspec_cpython.py b/tests/preconf/test_msgspec_cpython.py index 19a9add4..c4ba29d4 100644 --- a/tests/preconf/test_msgspec_cpython.py +++ b/tests/preconf/test_msgspec_cpython.py @@ -1,5 +1,14 @@ """Tests for msgspec functionality.""" -from typing import Callable, List +from typing import ( + Any, + Callable, + Dict, + List, + Mapping, + MutableMapping, + MutableSequence, + Sequence, +) from attrs import define from hypothesis import given @@ -19,6 +28,20 @@ class A: a: int +@define +class B: + """This class should not be passed through to msgspec.""" + + a: Any + + +@define +class C: + """This class should not be passed through to msgspec.""" + + _a: int + + @fixture def converter() -> Conv: return make_converter() @@ -41,16 +64,33 @@ def test_unstructure_passthrough(converter: Conv): assert not is_passthrough(converter.get_unstructure_hook(List)) assert is_passthrough(converter.get_unstructure_hook(List[int])) + assert is_passthrough(converter.get_unstructure_hook(Sequence[int])) + assert is_passthrough(converter.get_unstructure_hook(MutableSequence[int])) def test_unstructure_pt_attrs(converter: Conv): """Passthrough for attrs works.""" assert is_passthrough(converter.get_unstructure_hook(A)) + assert not is_passthrough(converter.get_unstructure_hook(B)) + assert not is_passthrough(converter.get_unstructure_hook(C)) + + +def test_unstructure_pt_mappings(converter: Conv): + """Mapping are passed through for unstructuring.""" + assert is_passthrough(converter.get_unstructure_hook(Dict[str, str])) + assert is_passthrough(converter.get_unstructure_hook(Dict[int, int])) + + assert is_passthrough(converter.get_unstructure_hook(Dict[int, A])) + assert not is_passthrough(converter.get_unstructure_hook(Dict[int, B])) + + assert is_passthrough(converter.get_unstructure_hook(Mapping[int, int])) + assert is_passthrough(converter.get_unstructure_hook(MutableMapping[int, int])) -def test_dump_hook_attrs(converter: Conv): +def test_dump_hook(converter: Conv): """Passthrough for dump hooks works.""" assert converter.get_dumps_hook(A) == converter.encoder.encode + assert converter.get_dumps_hook(Dict[str, str]) == converter.encoder.encode def test_get_loads_hook(converter: Conv): From a973cae81ae16ce63d98f7ce8690d732b162a21f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tin=20Tvrtkovi=C4=87?= Date: Fri, 12 Jan 2024 01:18:56 +0100 Subject: [PATCH 18/18] Fix counters --- src/cattrs/gen/__init__.py | 17 ++++++++++------- src/cattrs/gen/_shared.py | 6 +++--- src/cattrs/preconf/msgspec.py | 21 ++++++++++----------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/cattrs/gen/__init__.py b/src/cattrs/gen/__init__.py index f60ab142..f1acb6a4 100644 --- a/src/cattrs/gen/__init__.py +++ b/src/cattrs/gen/__init__.py @@ -166,7 +166,9 @@ def make_dict_unstructure_fn( # type of the default to dispatch on. t = a.default.__class__ try: - handler = converter._unstructure_func.dispatch(t) + handler = converter.get_unstructure_hook( + t, cache_result=False + ) except RecursionError: # There's a circular reference somewhere down the line handler = converter.unstructure @@ -674,7 +676,7 @@ def make_iterable_unstructure_fn( # We don't know how to handle the TypeVar on this level, # so we skip doing the dispatch here. if not isinstance(type_arg, TypeVar): - handler = converter._unstructure_func.dispatch(type_arg) + handler = converter.get_unstructure_hook(type_arg, cache_result=False) globs = {"__cattr_seq_cl": unstructure_to or cl, "__cattr_u": handler} lines = [] @@ -703,7 +705,8 @@ def make_hetero_tuple_unstructure_fn( # We can do the dispatch here and now. handlers = [ - converter._unstructure_func.dispatch(type_arg) for type_arg in type_args + converter.get_unstructure_hook(type_arg, cache_result=False) + for type_arg in type_args ] globs = {f"__cattr_u_{i}": h for i, h in enumerate(handlers)} @@ -758,11 +761,11 @@ def make_mapping_unstructure_fn( # Probably a Counter key_arg, val_arg = args, Any # We can do the dispatch here and now. - kh = key_handler or converter._unstructure_func.dispatch(key_arg) + kh = key_handler or converter.get_unstructure_hook(key_arg, cache_result=False) if kh == identity: kh = None - val_handler = converter._unstructure_func.dispatch(val_arg) + val_handler = converter.get_unstructure_hook(val_arg, cache_result=False) if val_handler == identity: val_handler = None @@ -830,11 +833,11 @@ def make_mapping_structure_fn( is_bare_dict = val_type is Any and key_type is Any if not is_bare_dict: # We can do the dispatch here and now. - key_handler = converter.get_structure_hook(key_type) + key_handler = converter.get_structure_hook(key_type, cache_result=False) if key_handler == converter._structure_call: key_handler = key_type - val_handler = converter.get_structure_hook(val_type) + val_handler = converter.get_structure_hook(val_type, cache_result=False) if val_handler == converter._structure_call: val_handler = val_type diff --git a/src/cattrs/gen/_shared.py b/src/cattrs/gen/_shared.py index 2bd1007f..78c2bc09 100644 --- a/src/cattrs/gen/_shared.py +++ b/src/cattrs/gen/_shared.py @@ -23,7 +23,7 @@ def find_structure_handler( # so it falls back to that. handler = None elif a.converter is not None and not prefer_attrs_converters and type is not None: - handler = c.get_structure_hook(type) + handler = c.get_structure_hook(type, cache_result=False) if handler == raise_error: handler = None elif type is not None: @@ -35,7 +35,7 @@ def find_structure_handler( # This is a special case where we can use the # type of the default to dispatch on. type = a.default.__class__ - handler = c.get_structure_hook(type) + handler = c.get_structure_hook(type, cache_result=False) if handler == c._structure_call: # Finals can't really be used with _structure_call, so # we wrap it so the rest of the toolchain doesn't get @@ -45,7 +45,7 @@ def handler(v, _, _h=handler): return _h(v, type) else: - handler = c.get_structure_hook(type) + handler = c.get_structure_hook(type, cache_result=False) else: handler = c.structure return handler diff --git a/src/cattrs/preconf/msgspec.py b/src/cattrs/preconf/msgspec.py index 1ebafebd..a9225970 100644 --- a/src/cattrs/preconf/msgspec.py +++ b/src/cattrs/preconf/msgspec.py @@ -120,18 +120,17 @@ def make_unstructure_mapping_factory( def unstructure_mapping_factory(type) -> UnstructureHook: if is_bare(type): key_arg = Any - value_arg = Any + val_arg = Any key_handler = converter.get_unstructure_hook(key_arg, cache_result=False) - value_handler = converter.get_unstructure_hook( - value_arg, cache_result=False - ) - elif getattr(type, "__args__", None) not in (None, ()): - key_arg = type.__args__[0] - value_arg = type.__args__[1] + value_handler = converter.get_unstructure_hook(val_arg, cache_result=False) + elif (args := getattr(type, "__args__", None)) not in (None, ()): + if len(args) == 2: + key_arg, val_arg = args + else: + # Probably a Counter + key_arg, val_arg = args, Any key_handler = converter.get_unstructure_hook(key_arg, cache_result=False) - value_handler = converter.get_unstructure_hook( - value_arg, cache_result=False - ) + value_handler = converter.get_unstructure_hook(val_arg, cache_result=False) else: key_handler = value_handler = None @@ -140,7 +139,7 @@ def unstructure_mapping_factory(type) -> UnstructureHook: to_builtins, ): return to_builtins - return converter.gen_unstructure_iterable(type) + return converter.gen_unstructure_mapping(type) return unstructure_mapping_factory