From 6d706aeb56d4f08bd0c37a7218b55f8302be1cc4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 18 Aug 2024 12:01:54 +0100 Subject: [PATCH 1/4] refactor: Fix `C901` complexity in `SchemaBase.copy` The nested functions do not reference `self` and can be split out, much like `.to_dict` -> `_todict`. Saw this as a chance to add annotations as well. I think this helps illustrate that `.copy` is a noop for anything other than `SchemaBase | dict | list` - which I think might need to be addressed in the future. --- altair/utils/schemapi.py | 87 +++++++++++++++++++++++--------------- tools/schemapi/schemapi.py | 87 +++++++++++++++++++++++--------------- 2 files changed, 106 insertions(+), 68 deletions(-) diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index fdf0d6594..3b058252e 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -25,6 +25,7 @@ Sequence, TypeVar, Union, + cast, overload, ) from typing_extensions import TypeAlias @@ -833,6 +834,49 @@ def is_undefined(obj: Any) -> TypeIs[UndefinedType]: return obj is Undefined +@overload +def _shallow_copy(obj: _CopyImpl) -> _CopyImpl: ... +@overload +def _shallow_copy(obj: Any) -> Any: ... +def _shallow_copy(obj: _CopyImpl | Any) -> _CopyImpl | Any: + if isinstance(obj, SchemaBase): + return obj.copy(deep=False) + elif isinstance(obj, list): + return obj[:] + elif isinstance(obj, dict): + return obj.copy() + else: + return obj + + +@overload +def _deep_copy(obj: _CopyImpl, ignore: Any = ...) -> _CopyImpl: ... +@overload +def _deep_copy(obj: Any, ignore: Any = ...) -> Any: ... +def _deep_copy( + obj: _CopyImpl | Any, ignore: list[str] | None = None +) -> _CopyImpl | Any: + if ignore is None: + ignore = [] + if isinstance(obj, SchemaBase): + args = tuple(_deep_copy(arg) for arg in obj._args) + kwds = { + k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) + for k, v in obj._kwds.items() + } + with debug_mode(False): + return obj.__class__(*args, **kwds) + elif isinstance(obj, list): + return [_deep_copy(v, ignore=ignore) for v in obj] + elif isinstance(obj, dict): + return { + k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) + for k, v in obj.items() + } + else: + return obj + + class SchemaBase: """ Base class for schema wrappers. @@ -870,7 +914,7 @@ def __init__(self, *args: Any, **kwds: Any) -> None: if DEBUG_MODE and self._class_is_valid_at_instantiation: self.to_dict(validate=True) - def copy( # noqa: C901 + def copy( self, deep: bool | Iterable[Any] = True, ignore: list[str] | None = None ) -> Self: """ @@ -887,38 +931,6 @@ def copy( # noqa: C901 A list of keys for which the contents should not be copied, but only stored by reference. """ - - def _shallow_copy(obj): - if isinstance(obj, SchemaBase): - return obj.copy(deep=False) - elif isinstance(obj, list): - return obj[:] - elif isinstance(obj, dict): - return obj.copy() - else: - return obj - - def _deep_copy(obj, ignore: list[str] | None = None): - if ignore is None: - ignore = [] - if isinstance(obj, SchemaBase): - args = tuple(_deep_copy(arg) for arg in obj._args) - kwds = { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj._kwds.items() - } - with debug_mode(False): - return obj.__class__(*args, **kwds) - elif isinstance(obj, list): - return [_deep_copy(v, ignore=ignore) for v in obj] - elif isinstance(obj, dict): - return { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj.items() - } - else: - return obj - try: deep = list(deep) # type: ignore[arg-type] except TypeError: @@ -927,7 +939,7 @@ def _deep_copy(obj, ignore: list[str] | None = None): deep_is_list = True if deep and not deep_is_list: - return _deep_copy(self, ignore=ignore) + return cast("Self", _deep_copy(self, ignore=ignore)) with debug_mode(False): copy = self.__class__(*self._args, **self._kwds) @@ -1240,6 +1252,13 @@ def __dir__(self) -> list[str]: TSchemaBase = TypeVar("TSchemaBase", bound=SchemaBase) +_CopyImpl = TypeVar("_CopyImpl", SchemaBase, Dict[Any, Any], List[Any]) +""" +Types which have an implementation in ``SchemaBase.copy()``. + +All other types are returned **by reference**. +""" + def _is_dict(obj: Any | dict[Any, Any]) -> TypeIs[dict[Any, Any]]: return isinstance(obj, dict) diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index 1c756c2a2..c1ab46483 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -23,6 +23,7 @@ Sequence, TypeVar, Union, + cast, overload, ) from typing_extensions import TypeAlias @@ -831,6 +832,49 @@ def is_undefined(obj: Any) -> TypeIs[UndefinedType]: return obj is Undefined +@overload +def _shallow_copy(obj: _CopyImpl) -> _CopyImpl: ... +@overload +def _shallow_copy(obj: Any) -> Any: ... +def _shallow_copy(obj: _CopyImpl | Any) -> _CopyImpl | Any: + if isinstance(obj, SchemaBase): + return obj.copy(deep=False) + elif isinstance(obj, list): + return obj[:] + elif isinstance(obj, dict): + return obj.copy() + else: + return obj + + +@overload +def _deep_copy(obj: _CopyImpl, ignore: Any = ...) -> _CopyImpl: ... +@overload +def _deep_copy(obj: Any, ignore: Any = ...) -> Any: ... +def _deep_copy( + obj: _CopyImpl | Any, ignore: list[str] | None = None +) -> _CopyImpl | Any: + if ignore is None: + ignore = [] + if isinstance(obj, SchemaBase): + args = tuple(_deep_copy(arg) for arg in obj._args) + kwds = { + k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) + for k, v in obj._kwds.items() + } + with debug_mode(False): + return obj.__class__(*args, **kwds) + elif isinstance(obj, list): + return [_deep_copy(v, ignore=ignore) for v in obj] + elif isinstance(obj, dict): + return { + k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) + for k, v in obj.items() + } + else: + return obj + + class SchemaBase: """ Base class for schema wrappers. @@ -868,7 +912,7 @@ def __init__(self, *args: Any, **kwds: Any) -> None: if DEBUG_MODE and self._class_is_valid_at_instantiation: self.to_dict(validate=True) - def copy( # noqa: C901 + def copy( self, deep: bool | Iterable[Any] = True, ignore: list[str] | None = None ) -> Self: """ @@ -885,38 +929,6 @@ def copy( # noqa: C901 A list of keys for which the contents should not be copied, but only stored by reference. """ - - def _shallow_copy(obj): - if isinstance(obj, SchemaBase): - return obj.copy(deep=False) - elif isinstance(obj, list): - return obj[:] - elif isinstance(obj, dict): - return obj.copy() - else: - return obj - - def _deep_copy(obj, ignore: list[str] | None = None): - if ignore is None: - ignore = [] - if isinstance(obj, SchemaBase): - args = tuple(_deep_copy(arg) for arg in obj._args) - kwds = { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj._kwds.items() - } - with debug_mode(False): - return obj.__class__(*args, **kwds) - elif isinstance(obj, list): - return [_deep_copy(v, ignore=ignore) for v in obj] - elif isinstance(obj, dict): - return { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj.items() - } - else: - return obj - try: deep = list(deep) # type: ignore[arg-type] except TypeError: @@ -925,7 +937,7 @@ def _deep_copy(obj, ignore: list[str] | None = None): deep_is_list = True if deep and not deep_is_list: - return _deep_copy(self, ignore=ignore) + return cast("Self", _deep_copy(self, ignore=ignore)) with debug_mode(False): copy = self.__class__(*self._args, **self._kwds) @@ -1238,6 +1250,13 @@ def __dir__(self) -> list[str]: TSchemaBase = TypeVar("TSchemaBase", bound=SchemaBase) +_CopyImpl = TypeVar("_CopyImpl", SchemaBase, Dict[Any, Any], List[Any]) +""" +Types which have an implementation in ``SchemaBase.copy()``. + +All other types are returned **by reference**. +""" + def _is_dict(obj: Any | dict[Any, Any]) -> TypeIs[dict[Any, Any]]: return isinstance(obj, dict) From 4ce42d9346041ca2e4688fba517b78ead11a7e7e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 18 Aug 2024 13:00:05 +0100 Subject: [PATCH 2/4] refactor: Further simplify `SchemaBase.copy` 10 lines shorter and is no longer constrained to `list` on the assert. May be slightly faster on `python<3.11` which do not have zero-cost exceptions https://docs.python.org/3.11/whatsnew/3.11.html#misc --- altair/utils/schemapi.py | 14 ++------------ tools/schemapi/schemapi.py | 14 ++------------ 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index 3b058252e..089708224 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -931,21 +931,11 @@ def copy( A list of keys for which the contents should not be copied, but only stored by reference. """ - try: - deep = list(deep) # type: ignore[arg-type] - except TypeError: - deep_is_list = False - else: - deep_is_list = True - - if deep and not deep_is_list: + if deep is True: return cast("Self", _deep_copy(self, ignore=ignore)) - with debug_mode(False): copy = self.__class__(*self._args, **self._kwds) - if deep_is_list: - # Assert statement is for the benefit of Mypy - assert isinstance(deep, list) + if _is_iterable(deep): for attr in deep: copy[attr] = _shallow_copy(copy._get(attr)) return copy diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index c1ab46483..9a1f93d25 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -929,21 +929,11 @@ def copy( A list of keys for which the contents should not be copied, but only stored by reference. """ - try: - deep = list(deep) # type: ignore[arg-type] - except TypeError: - deep_is_list = False - else: - deep_is_list = True - - if deep and not deep_is_list: + if deep is True: return cast("Self", _deep_copy(self, ignore=ignore)) - with debug_mode(False): copy = self.__class__(*self._args, **self._kwds) - if deep_is_list: - # Assert statement is for the benefit of Mypy - assert isinstance(deep, list) + if _is_iterable(deep): for attr in deep: copy[attr] = _shallow_copy(copy._get(attr)) return copy From dcb34e934f43af78cb1b456d8b83f5840d07fdf9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 18 Aug 2024 13:11:16 +0100 Subject: [PATCH 3/4] refactor(perf): Merge identical `_shallow_copy` branches Remembered a related `ruff` rule [FURB145](https://docs.astral.sh/ruff/rules/slice-copy/). Wouldn't have made this fix, but likely would have led someone there --- altair/utils/schemapi.py | 4 +--- tools/schemapi/schemapi.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index 089708224..69067d238 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -841,9 +841,7 @@ def _shallow_copy(obj: Any) -> Any: ... def _shallow_copy(obj: _CopyImpl | Any) -> _CopyImpl | Any: if isinstance(obj, SchemaBase): return obj.copy(deep=False) - elif isinstance(obj, list): - return obj[:] - elif isinstance(obj, dict): + elif isinstance(obj, (list, dict)): return obj.copy() else: return obj diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index 9a1f93d25..2cff79aed 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -839,9 +839,7 @@ def _shallow_copy(obj: Any) -> Any: ... def _shallow_copy(obj: _CopyImpl | Any) -> _CopyImpl | Any: if isinstance(obj, SchemaBase): return obj.copy(deep=False) - elif isinstance(obj, list): - return obj[:] - elif isinstance(obj, dict): + elif isinstance(obj, (list, dict)): return obj.copy() else: return obj From d0558d26ecdfee8275888d709db17859a6a56122 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 18 Aug 2024 14:32:36 +0100 Subject: [PATCH 4/4] refactor(perf): Reduce `_deep_copy` - Initialize an empty set **once** at origin, rather than creating a new list per iteration - Renamed to `by_ref` in the new private function, to better describe the operation. - No change to public API. - Define a partial `copy` to reduce repetition - Use a genexpr for `args`, to avoid unpacking twice --- altair/utils/schemapi.py | 27 +++++++++------------------ tools/schemapi/schemapi.py | 27 +++++++++------------------ 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index 69067d238..9bcf039c3 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -848,29 +848,20 @@ def _shallow_copy(obj: _CopyImpl | Any) -> _CopyImpl | Any: @overload -def _deep_copy(obj: _CopyImpl, ignore: Any = ...) -> _CopyImpl: ... +def _deep_copy(obj: _CopyImpl, by_ref: set[str]) -> _CopyImpl: ... @overload -def _deep_copy(obj: Any, ignore: Any = ...) -> Any: ... -def _deep_copy( - obj: _CopyImpl | Any, ignore: list[str] | None = None -) -> _CopyImpl | Any: - if ignore is None: - ignore = [] +def _deep_copy(obj: Any, by_ref: set[str]) -> Any: ... +def _deep_copy(obj: _CopyImpl | Any, by_ref: set[str]) -> _CopyImpl | Any: + copy = partial(_deep_copy, by_ref=by_ref) if isinstance(obj, SchemaBase): - args = tuple(_deep_copy(arg) for arg in obj._args) - kwds = { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj._kwds.items() - } + args = (copy(arg) for arg in obj._args) + kwds = {k: (copy(v) if k not in by_ref else v) for k, v in obj._kwds.items()} with debug_mode(False): return obj.__class__(*args, **kwds) elif isinstance(obj, list): - return [_deep_copy(v, ignore=ignore) for v in obj] + return [copy(v) for v in obj] elif isinstance(obj, dict): - return { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj.items() - } + return {k: (copy(v) if k not in by_ref else v) for k, v in obj.items()} else: return obj @@ -930,7 +921,7 @@ def copy( only stored by reference. """ if deep is True: - return cast("Self", _deep_copy(self, ignore=ignore)) + return cast("Self", _deep_copy(self, set(ignore) if ignore else set())) with debug_mode(False): copy = self.__class__(*self._args, **self._kwds) if _is_iterable(deep): diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index 2cff79aed..a77afd4a4 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -846,29 +846,20 @@ def _shallow_copy(obj: _CopyImpl | Any) -> _CopyImpl | Any: @overload -def _deep_copy(obj: _CopyImpl, ignore: Any = ...) -> _CopyImpl: ... +def _deep_copy(obj: _CopyImpl, by_ref: set[str]) -> _CopyImpl: ... @overload -def _deep_copy(obj: Any, ignore: Any = ...) -> Any: ... -def _deep_copy( - obj: _CopyImpl | Any, ignore: list[str] | None = None -) -> _CopyImpl | Any: - if ignore is None: - ignore = [] +def _deep_copy(obj: Any, by_ref: set[str]) -> Any: ... +def _deep_copy(obj: _CopyImpl | Any, by_ref: set[str]) -> _CopyImpl | Any: + copy = partial(_deep_copy, by_ref=by_ref) if isinstance(obj, SchemaBase): - args = tuple(_deep_copy(arg) for arg in obj._args) - kwds = { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj._kwds.items() - } + args = (copy(arg) for arg in obj._args) + kwds = {k: (copy(v) if k not in by_ref else v) for k, v in obj._kwds.items()} with debug_mode(False): return obj.__class__(*args, **kwds) elif isinstance(obj, list): - return [_deep_copy(v, ignore=ignore) for v in obj] + return [copy(v) for v in obj] elif isinstance(obj, dict): - return { - k: (_deep_copy(v, ignore=ignore) if k not in ignore else v) - for k, v in obj.items() - } + return {k: (copy(v) if k not in by_ref else v) for k, v in obj.items()} else: return obj @@ -928,7 +919,7 @@ def copy( only stored by reference. """ if deep is True: - return cast("Self", _deep_copy(self, ignore=ignore)) + return cast("Self", _deep_copy(self, set(ignore) if ignore else set())) with debug_mode(False): copy = self.__class__(*self._args, **self._kwds) if _is_iterable(deep):