Skip to content

Commit 9398302

Browse files
authored
Direct memory access to accumulators (#194)
* Allow direct memory access, no longer throws error/segfault * Internalize accumulators * First working version of conversion * Fix several bugs, and change the meaning of np.asarray() slightly * Fix view not returning an editable view * Normalize names * Nicer repr and str for views * Cleanup views with a decorator * Some small polish * Fix #195, unlimited and atomic single access * Adding tests for storages * README update * CHANGELOG and renable a test * Fix unneeded change * Address review comments * Move ostream, remove some extra headers * Fix reprs, add missing tests * Adding accum[key] access * Addressing second review * Rename wsum and wsum2
1 parent 8c62518 commit 9398302

File tree

17 files changed

+830
-58
lines changed

17 files changed

+830
-58
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#### User changes
44

55
* Histogram and Axis classes now follow PEP 8 naming scheme (`histogram`->`Histogram`, `regular`->`Regular`, etc.) [#192][]
6+
* You can now view a histogram with accumulators, with property access such as `h.view().value` [#194][]
67
* Added axes transforms [#192][]
78
* You can now sum over a range with endpoints [#185][]
89
* You can now access the functional regular axis directly, `regular_sqrt` becomes `regular.sqrt`, etc.. [#183][]
@@ -12,6 +13,10 @@
1213
* Signatures are much nicer in Python 3 [#188][]
1314

1415

16+
#### Bug fixes:
17+
* Unlimited and AtomicInt storages now allow single item access [#194][]
18+
* `.view()` now no longer makes a copy [#194][]
19+
1520
#### Developer changes
1621

1722
* The hist/axis classes are now pure Python, with a C++ object inside [#183][]

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ Python bindings for [Boost::Histogram][] ([source][Boost::Histogram source]), a
1818
> Join the [discussion on gitter][gitter-link] or [open an issue](https://github.com/scikit-hep/boost-histogram/issues)!
1919
>
2020
> #### Known issues (develop):
21-
> * Non-simple storages do not support `.view()` or the buffer interface; you can access and set one element at a time.
2221
> * Setting with an array is not yet supported (`h[...] = np.array(...)`).
2322
2423

boost_histogram/_internal/hist.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from .. import _core
66
from .axis import _to_axis, Axis
7+
from .view import _to_view
78
from .axistuple import AxesTuple
89
from .sig_tools import inject_signature
910
from .storage import Double
@@ -53,13 +54,13 @@ def _expand_ellipsis(indexes, rank):
5354
raise IndexError("an index can only have a single ellipsis ('...')")
5455

5556

56-
def _compute_commonindex(hist, index, expand):
57+
def _compute_commonindex(hist, index, expand_ellipsis):
5758
# Normalize -> h[i] == h[i,]
5859
if not isinstance(index, tuple):
5960
index = (index,)
6061

6162
# Now a list
62-
if expand:
63+
if expand_ellipsis:
6364
indexes = _expand_ellipsis(index, hist.rank())
6465
else:
6566
indexes = list(index)
@@ -135,9 +136,7 @@ def __repr__(self):
135136
return self.__class__.__name__ + repr(self._hist)[9:]
136137

137138
def __array__(self):
138-
return np.asarray(self._hist)
139-
# TODO: .view does not seem to return an editable view
140-
# so we have to use the buffer interface here
139+
return self.view()
141140

142141
def __add__(self, other):
143142
return self.__class__(self._hist + other._hist)
@@ -296,7 +295,7 @@ def view(self, flow=False):
296295
"""
297296
Return a view into the data, optionally with overflow turned on.
298297
"""
299-
return self._hist.view(flow)
298+
return _to_view(self._hist.view(flow))
300299

301300
def reset(self):
302301
"""
@@ -334,7 +333,7 @@ def size(self):
334333

335334
def __getitem__(self, index):
336335

337-
indexes = _compute_commonindex(self._hist, index, expand=True)
336+
indexes = _compute_commonindex(self._hist, index, expand_ellipsis=True)
338337

339338
# If this is (now) all integers, return the bin contents
340339
try:
@@ -406,5 +405,5 @@ def __getitem__(self, index):
406405
)
407406

408407
def __setitem__(self, index, value):
409-
indexes = _compute_commonindex(self._hist, index, expand=True)
408+
indexes = _compute_commonindex(self._hist, index, expand_ellipsis=False)
410409
self._hist._at_set(value, *indexes)

boost_histogram/_internal/view.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
from __future__ import absolute_import, division, print_function
2+
3+
from ..accumulators import Mean, WeightedMean, WeightedSum
4+
5+
import numpy as np
6+
7+
8+
class View(np.ndarray):
9+
__slots__ = ()
10+
11+
def __getitem__(self, ind):
12+
sliced = super(View, self).__getitem__(ind)
13+
14+
# If the shape is empty, return the parent type
15+
if not sliced.shape:
16+
return self._PARENT._make(*sliced)
17+
# If the dtype has changed, return a normal array (no longer a record)
18+
elif sliced.dtype != self.dtype:
19+
return np.asarray(sliced)
20+
# Otherwise, no change, return the same View type
21+
else:
22+
return sliced
23+
24+
def __repr__(self):
25+
# Numpy starts the ndarray class name with "array", so we replace it
26+
# with our class name
27+
return (
28+
"{self.__class__.__name__}(\n ".format(self=self)
29+
+ repr(self.view(np.ndarray))[6:]
30+
)
31+
32+
def __str__(self):
33+
fields = ", ".join(self._FIELDS)
34+
return "{self.__class__.__name__}: ({fields})\n{arr}".format(
35+
self=self, fields=fields, arr=self.view(np.ndarray)
36+
)
37+
38+
39+
def fields(*names):
40+
"""
41+
This decorator adds the name to the _FIELDS
42+
class property (for printing in reprs), and
43+
adds a property that looks like this:
44+
45+
@property
46+
def name(self):
47+
return self["name"]
48+
@name.setter
49+
def name(self, value):
50+
self["name"] = value
51+
"""
52+
53+
def injector(cls):
54+
if hasattr(cls, "_FIELDS"):
55+
raise RuntimeError(
56+
"{0} already has had a fields decorator applied".format(
57+
self.__class__.__name__
58+
)
59+
)
60+
fields = []
61+
for name in names:
62+
fields.append(name)
63+
64+
def fget(self):
65+
return self[name]
66+
67+
def fset(self, value):
68+
self[name] = value
69+
70+
setattr(cls, name, property(fget, fset))
71+
cls._FIELDS = tuple(fields)
72+
return cls
73+
74+
return injector
75+
76+
77+
@fields("value", "variance")
78+
class WeightedSumView(View):
79+
__slots__ = ()
80+
_PARENT = WeightedSum
81+
82+
83+
@fields(
84+
"sum_of_weights",
85+
"sum_of_weights_squared",
86+
"value",
87+
"sum_of_weighted_deltas_squared",
88+
)
89+
class WeightedMeanView(View):
90+
__slots__ = ()
91+
_PARENT = WeightedMean
92+
93+
@property
94+
def variance(self):
95+
return self["sum_of_weighted_deltas_squared"] / (
96+
self["sum_of_weights"]
97+
- self["sum_of_weights_squared"] / self["sum_of_weights"]
98+
)
99+
100+
101+
@fields("count", "value", "sum_of_deltas_squared")
102+
class MeanView(View):
103+
__slots__ = ()
104+
_PARENT = Mean
105+
106+
# Variance is a computation
107+
@property
108+
def variance(self):
109+
return self["sum_of_deltas_squared"] / (self["count"] - 1)
110+
111+
112+
def _to_view(item, value=False):
113+
for cls in View.__subclasses__():
114+
if cls._FIELDS == item.dtype.names:
115+
ret = item.view(cls)
116+
if value and ret.shape:
117+
return ret.value
118+
else:
119+
return ret
120+
return item
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Copyright 2015-2019 Hans Dembinski and Henry Schreiner
2+
//
3+
// Distributed under the Boost Software License, version 1.0.
4+
// (See accompanying file LICENSE_1_0.txt
5+
// or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
//
7+
// Based on boost/histogram/accumulators/mean.hpp
8+
// Changes:
9+
// * Internal values are public for access from Python
10+
// * A special constructor added for construction from Python
11+
12+
#pragma once
13+
14+
#include <boost/core/nvp.hpp>
15+
#include <boost/histogram/weight.hpp>
16+
17+
namespace accumulators {
18+
19+
/** Calculates mean and variance of sample.
20+
21+
Uses Welford's incremental algorithm to improve the numerical
22+
stability of mean and variance computation.
23+
*/
24+
template <class RealType>
25+
struct mean {
26+
mean() = default;
27+
mean(const RealType &n, const RealType &mean, const RealType &variance) noexcept
28+
: count(n)
29+
, value(mean)
30+
, sum_of_deltas_squared(variance * (n - 1)) {}
31+
32+
mean(const RealType &sum,
33+
const RealType &mean,
34+
const RealType &sum_of_deltas_squared,
35+
bool /* Tag to trigger python internal constructor */)
36+
: count(sum)
37+
, value(mean)
38+
, sum_of_deltas_squared(sum_of_deltas_squared) {}
39+
40+
void operator()(const RealType &x) noexcept {
41+
count += static_cast<RealType>(1);
42+
const auto delta = x - value;
43+
value += delta / count;
44+
sum_of_deltas_squared += delta * (x - value);
45+
}
46+
47+
void operator()(const boost::histogram::weight_type<RealType> &w,
48+
const RealType &x) noexcept {
49+
count += w.value;
50+
const auto delta = x - value;
51+
value += w.value * delta / count;
52+
sum_of_deltas_squared += w.value * delta * (x - value);
53+
}
54+
55+
template <class T>
56+
mean &operator+=(const mean<T> &rhs) noexcept {
57+
if(count != 0 || rhs.count != 0) {
58+
const auto tmp
59+
= value * count + static_cast<RealType>(rhs.value * rhs.count);
60+
count += rhs.count;
61+
value = tmp / count;
62+
}
63+
sum_of_deltas_squared += static_cast<RealType>(rhs.sum_of_deltas_squared);
64+
return *this;
65+
}
66+
67+
mean &operator*=(const RealType &s) noexcept {
68+
value *= s;
69+
sum_of_deltas_squared *= s * s;
70+
return *this;
71+
}
72+
73+
template <class T>
74+
bool operator==(const mean<T> &rhs) const noexcept {
75+
return count == rhs.count && value == rhs.value
76+
&& sum_of_deltas_squared == rhs.sum_of_deltas_squared;
77+
}
78+
79+
template <class T>
80+
bool operator!=(const mean<T> &rhs) const noexcept {
81+
return !operator==(rhs);
82+
}
83+
84+
RealType variance() const noexcept { return sum_of_deltas_squared / (count - 1); }
85+
86+
template <class Archive>
87+
void serialize(Archive &ar, unsigned) {
88+
ar &boost::make_nvp("count", count);
89+
ar &boost::make_nvp("value", value);
90+
ar &boost::make_nvp("sum_of_deltas_squared", sum_of_deltas_squared);
91+
}
92+
93+
RealType count = 0, value = 0, sum_of_deltas_squared = 0;
94+
};
95+
96+
} // namespace accumulators

0 commit comments

Comments
 (0)