diff --git a/docs/changelog.md b/docs/changelog.md index 5dd4663c1..fb478ed8f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,14 @@ ## Version 1.3 +### WIP + +#### Bug Fixes + +* Avoid the constructor when creating new histograms from existing ones. [#759][] + +[#759]: https://github.com/scikit-hep/boost-histogram/pull/759 + ### Version 1.3.2 * Include PyPy 3.9 binary wheels [#730][] diff --git a/src/boost_histogram/_core/axis/__init__.pyi b/src/boost_histogram/_core/axis/__init__.pyi index 63f24a4fe..0a78a444c 100644 --- a/src/boost_histogram/_core/axis/__init__.pyi +++ b/src/boost_histogram/_core/axis/__init__.pyi @@ -29,6 +29,8 @@ class _BaseAxis: def traits_ordered(self) -> bool: ... @property def metadata(self) -> Any: ... + @metadata.setter + def metadata(self, item: Any) -> None: ... @property def size(self) -> int: ... @property diff --git a/src/boost_histogram/_core/hist.pyi b/src/boost_histogram/_core/hist.pyi index 523f37419..704b0d1ec 100644 --- a/src/boost_histogram/_core/hist.pyi +++ b/src/boost_histogram/_core/hist.pyi @@ -12,7 +12,10 @@ _axes_limit: int class _BaseHistogram: _storage_type: ClassVar[Type[storage._BaseStorage]] - def __init__(self, axis: axis._BaseAxis, storage: storage._BaseStorage) -> None: ... + # Note that storage has a default simply because subclasses always handle it. + def __init__( + self, axis: list[axis._BaseAxis], storage: storage._BaseStorage = ... + ) -> None: ... def rank(self) -> int: ... def size(self) -> int: ... def reset(self) -> None: ... @@ -25,10 +28,17 @@ class _BaseHistogram: def to_numpy(self, flow: bool = ...) -> Tuple["np.typing.NDArray[Any]", ...]: ... def view(self, flow: bool = ...) -> "np.typing.NDArray[Any]": ... def axis(self, i: int = ...) -> axis._BaseAxis: ... - def fill(self, *args: ArrayLike, weight: ArrayLike | None = ...) -> None: ... + def fill( + self, + *args: ArrayLike, + weight: ArrayLike | None = ..., + sample: ArrayLike | None = ..., + ) -> None: ... def empty(self, flow: bool = ...) -> bool: ... def reduce(self: T, *args: Any) -> T: ... def project(self: T, *args: int) -> T: ... + def sum(self, flow: bool = ...) -> Any: ... + def at(self, *args: int) -> Any: ... class any_int64(_BaseHistogram): def __idiv__(self: T, other: any_int64) -> T: ... @@ -73,7 +83,7 @@ class any_mean(_BaseHistogram): self, *args: ArrayLike, weight: ArrayLike | None = ..., - sample: ArrayLike | None = ... + sample: ArrayLike | None = ..., ) -> None: ... class any_weighted_mean(_BaseHistogram): @@ -84,5 +94,5 @@ class any_weighted_mean(_BaseHistogram): self, *args: ArrayLike, weight: ArrayLike | None = ..., - sample: ArrayLike | None = ... + sample: ArrayLike | None = ..., ) -> None: ... diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index 6c5927cf8..80d27c4bf 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -9,6 +9,7 @@ TYPE_CHECKING, Any, Callable, + ClassVar, Dict, Iterable, List, @@ -129,7 +130,10 @@ class Histogram: ) # .metadata and ._variance_known are part of the dict - _family: object = boost_histogram + _family: ClassVar[object] = boost_histogram + + axes: AxesTuple + _hist: CppHistogram def __init_subclass__(cls, *, family: Optional[object] = None) -> None: """ @@ -185,22 +189,24 @@ def __init__( # Allow construction from a raw histogram object (internal) if len(axes) == 1 and isinstance(axes[0], tuple(_histograms)): - self._hist: Any = axes[0] - self.metadata = metadata - self.axes = self._generate_axes_() + cpp_hist: CppHistogram = axes[0] # type: ignore[assignment] + self._from_histogram_cpp(cpp_hist) + if metadata: + self.metadata = metadata return # If we construct with another Histogram as the only positional argument, # support that too if len(axes) == 1 and isinstance(axes[0], Histogram): - # Special case - we can recursively call __init__ here - self.__init__(axes[0]._hist) # type: ignore[misc] # pylint: disable=non-parent-init-called - self._from_histogram_object(axes[0]) + normal_hist: Histogram = axes[0] + self._from_histogram_object(normal_hist) + if metadata: + self.metadata = metadata return # Support objects that provide a to_boost method, like Uproot if len(axes) == 1 and hasattr(axes[0], "_to_boost_histogram_"): - self.__init__(axes[0]._to_boost_histogram_()) # type: ignore[misc, union-attr] # pylint: disable=non-parent-init-called + self._from_histogram_object(axes[0]._to_boost_histogram_()) # type: ignore[union-attr] return if storage is None: @@ -232,6 +238,60 @@ def __init__( raise TypeError("Unsupported storage") + @classmethod + def _clone( + cls: Type[H], + _hist: "Histogram | CppHistogram", + *, + other: "Histogram | None" = None, + memo: Any = NOTHING, + ) -> H: + """ + Clone a histogram (possibly of a different base). Does not trigger __init__. + This will copy data from `other=` if non-None, otherwise metadata gets copied from the input. + """ + + self = cls.__new__(cls) + if isinstance(_hist, tuple(_histograms)): + self._from_histogram_cpp(_hist) # type: ignore[arg-type] + if other is not None: + return cls._clone(self, other=other, memo=memo) + return self + + assert isinstance(_hist, Histogram) + + if other is None: + other = _hist + + self._from_histogram_object(_hist) + + if memo is NOTHING: + self.__dict__ = copy.copy(other.__dict__) + else: + self.__dict__ = copy.deepcopy(other.__dict__, memo) + + for ax in self.axes: + if memo is NOTHING: + ax.__dict__ = copy.copy(ax._ax.metadata) + else: + ax.__dict__ = copy.deepcopy(ax._ax.metadata, memo) + return self + + def _new_hist(self: H, _hist: CppHistogram, memo: Any = NOTHING) -> H: + """ + Return a new histogram given a new _hist, copying current metadata. + """ + return self.__class__._clone(_hist, other=self, memo=memo) + + def _from_histogram_cpp(self, other: CppHistogram) -> None: + """ + Import a Cpp histogram. + """ + self._variance_known = True + self._hist = other + self.metadata = None + self.axes = self._generate_axes_() + def _from_histogram_object(self, other: "Histogram") -> None: """ Convert self into a new histogram object based on another, possibly @@ -270,32 +330,12 @@ def _generate_axes_(self) -> AxesTuple: return AxesTuple(self._axis(i) for i in range(self.ndim)) - def _new_hist(self: H, _hist: CppHistogram, memo: Any = NOTHING) -> H: - """ - Return a new histogram given a new _hist, copying metadata. - """ - - other = self.__class__(_hist) - if memo is NOTHING: - other.__dict__ = copy.copy(self.__dict__) - else: - other.__dict__ = copy.deepcopy(self.__dict__, memo) - other.axes = other._generate_axes_() - - for ax in other.axes: - if memo is NOTHING: - ax.__dict__ = copy.copy(ax._ax.metadata) - else: - ax.__dict__ = copy.deepcopy(ax._ax.metadata, memo) - - return other - @property def ndim(self) -> int: """ Number of axes (dimensions) of the histogram. """ - return self._hist.rank() # type: ignore[no-any-return] + return self._hist.rank() def view( self, flow: bool = False @@ -469,7 +509,7 @@ def fill( threads = cpu_count() if threads is None or threads == 1: - self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) + self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) # type: ignore[arg-type] return self if self._hist._storage_type in { @@ -640,7 +680,7 @@ def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: if isinstance(index, SupportsIndex): if abs(int(index)) >= self._hist.axis(axis).size: raise IndexError("histogram index is out of range") - return index % self._hist.axis(axis).size # type: ignore[no-any-return] + return int(index) % self._hist.axis(axis).size return index @@ -719,7 +759,7 @@ def to_numpy( hist, *edges = self._hist.to_numpy(flow) hist = self.view(flow=flow) if view else self.values(flow=flow) - return (hist, edges) if dd else (hist, *edges) + return (hist, edges) if dd else (hist, *edges) # type: ignore[return-value] def copy(self: H, *, deep: bool = True) -> H: """ @@ -742,7 +782,7 @@ def empty(self, flow: bool = False) -> bool: Check to see if the histogram has any non-default values. You can use flow=True to check flow bins too. """ - return self._hist.empty(flow) # type: ignore[no-any-return] + return self._hist.empty(flow) def sum(self, flow: bool = False) -> Union[float, Accumulator]: """ @@ -758,7 +798,7 @@ def size(self) -> int: """ Total number of bins in the histogram (including underflow/overflow). """ - return self._hist.size() # type: ignore[no-any-return] + return self._hist.size() @property def shape(self) -> Tuple[int, ...]: @@ -779,7 +819,7 @@ def __getitem__( # noqa: C901 if not hasattr(indexes, "items") and all( isinstance(a, SupportsIndex) for a in indexes ): - return self._hist.at(*indexes) # type: ignore[no-any-return] + return self._hist.at(*indexes) # type: ignore[no-any-return, arg-type] integrations: Set[int] = set() slices: List[_core.algorithm.reduce_command] = [] @@ -885,7 +925,7 @@ def __getitem__( # noqa: C901 if ax.traits_overflow and ax.size not in pick_set[i]: selection.append(ax.size) - new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) + new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) # type: ignore[call-arg] new_axis.metadata = axes[i].metadata axes[i] = new_axis reduced_view = np.take(reduced_view, selection, axis=i) diff --git a/tests/test_subclassing.py b/tests/test_subclassing.py index 0b8f2ca78..b2d6e321f 100644 --- a/tests/test_subclassing.py +++ b/tests/test_subclassing.py @@ -32,3 +32,20 @@ class MyHist(bh.Histogram): assert type(h) == MyHist assert type(h.axes[0]) == bh.axis.Regular + + +def test_copy(): + class MyHist(bh.Histogram): + def __init__(self, var, bins, weight, **kwargs): + super().__init__( + bh.axis.Regular(*bins), storage=bh.storage.Weight(), **kwargs + ) + + self.fill(var, weight=weight) + + b = (2, 0, 1) + v = [0.1, 0.5, 0.9] + w = [1, 0.5, 1] + hist = MyHist(v, b, w) + + hist.copy()