introduction of an abstraction layer for the "results array"

Blosc · May 5, 2015 · a8fee6c · a8fee6c
1 parent 5ebe7c1
commit a8fee6c
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 15 deletions.
diff --git a/bcolz/ctable.py b/bcolz/ctable.py
@@ -1013,11 +1013,9 @@ def _where(self, boolarr, colnames=None):
 
         if colnames is None:
             colnames = self.names
-        cols = [self.cols[name][boolarr] for name in colnames]
-        dtype = np.dtype([(name, self.cols[name].dtype) for name in colnames])
-        result = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
+        result = self._outstruc_fromboolarr(boolarr, colnames)
 
-        return result
+        return result.ra
 
     def __getitem__(self, key):
         """Returns values based on `key`.
@@ -1043,10 +1041,10 @@ def __getitem__(self, key):
         # First, check for integer
         if isinstance(key, _inttypes):
             # Get a copy of the len-1 array
-            ra = self._arr1.copy()
+            result = self._outstruc_allocate(1)
             # Fill it
-            ra[0] = tuple([self.cols[name][key] for name in self.names])
-            return ra[0]
+            result[0] = tuple([self.cols[name][key] for name in self.names])
+            return result.ra
         # Slices
         elif type(key) == slice:
             (start, stop, step) = key.start, key.stop, key.step
@@ -1060,7 +1058,7 @@ def __getitem__(self, key):
         # List of integers (case of fancy indexing), or list of column names
         elif type(key) is list:
             if len(key) == 0:
-                return np.empty(0, self.dtype)
+                return self._outstruc_allocate(0, self.dtype).ra
             strlist = [type(v) for v in key] == [str for v in key]
             # Range of column names
             if strlist:
@@ -1072,15 +1070,14 @@ def __getitem__(self, key):
             except:
                 raise IndexError(
                     "key cannot be converted to an array of indices")
-            return np.fromiter((self[i] for i in key),
-                               dtype=self.dtype, count=len(key))
+            return self._outstruc_fromindices(key).ra
         # A boolean array (case of fancy indexing)
         elif hasattr(key, "dtype"):
             if key.dtype.type == np.bool_:
                 return self._where(key)
             elif np.issubsctype(key, np.int_):
                 # An integer array
-                return np.array([self[i] for i in key], dtype=self.dtype)
+                return self._outstruc_fromindices(key).ra
             else:
                 raise IndexError(
                     "arrays used as indices must be integer (or boolean)")
@@ -1105,12 +1102,12 @@ def __getitem__(self, key):
         (start, stop, step) = slice(start, stop, step).indices(self.len)
         # Build a numpy container
         n = utils.get_len_of_range(start, stop, step)
-        ra = np.empty(shape=(n,), dtype=self.dtype)
+        result = self._outstruc_allocate(n, self.dtype)
         # Fill it
         for name in self.names:
-            ra[name][:] = self.cols[name][start:stop:step]
+            result[name] = self.cols[name][start:stop:step]
 
-        return ra
+        return result.ra
 
     def __setitem__(self, key, value):
         """Sets values based on `key`.
@@ -1247,7 +1244,40 @@ def __repr__(self):
         return fullrepr
 
 
-# Local Variables:
+class OutputStructure_numpy(object):
+    @staticmethod
+    def allocate(ctable_, size, dtype=None):
+        result = object.__new__(OutputStructure_numpy)
+        if size == 1:
+            result.ra = ctable_._arr1.copy()
+        else:
+            result.ra = np.empty(size, dtype)
+        return result
+
+    @staticmethod
+    def fromindices(ctable_, iter):
+        result = object.__new__(OutputStructure_numpy)
+        result.ra = np.fromiter((ctable_[i] for i in iter), 
+                                dtype=ctable_.dtype, count=len(iter))
+        return result
+
+    @staticmethod
+    def fromboolarr(ctable_, boolarr, colnames):
+        result = object.__new__(OutputStructure_numpy)
+
+        dtype = np.dtype([(name, ctable_.cols[name].dtype) for name in colnames])
+        cols = [ctable_.cols[name][boolarr] for name in colnames]
+        result.ra = np.rec.fromarrays(cols, dtype=dtype).view(np.ndarray)
+        return result
+
+    def __setitem__(self, key, value):
+        if isinstance(key, int):
+            self.ra[key] = value
+        else:
+            self.ra[key][:] = value
+
+
+
 # mode: python
 # tab-width: 4
 # fill-column: 78

diff --git a/bcolz/defaults.py b/bcolz/defaults.py
@@ -12,6 +12,7 @@
 from __future__ import absolute_import
 
 import bcolz
+from bcolz.ctable import OutputStructure_numpy
 
 
 class Defaults(object):
@@ -70,6 +71,27 @@ def eval_out_flavor(self, value):
         self.check_choices('eval_out_flavor', value)
         self.__eval_out_flavor = value
 
+    @property
+    def ctable_out_implementation(self):
+        return self.__ctable_out_implementation
+
+    @ctable_out_implementation.setter
+    def ctable_out_implementation(self, value):
+        if value is None:
+            value = OutputStructure_numpy
+        try:
+            bcolz.ctable._outstruc_allocate = value.allocate
+            bcolz.ctable._outstruc_fromindices = value.fromindices
+            bcolz.ctable._outstruc_fromboolarr = value.fromboolarr
+            assert hasattr(value, '__setitem__')
+        except (AttributeError, AssertionError):
+            value = OutputStructure_numpy
+            bcolz.ctable._outstruc_allocate = value.allocate
+            bcolz.ctable._outstruc_fromindices = value.fromindices
+            bcolz.ctable._outstruc_fromboolarr = value.fromboolarr
+            raise NotImplementedError('The output structure implementation is incomplete')
+        self.__ctable_out_implementation = value
+
     @property
     def cparams(self):
         return self.__cparams
@@ -90,6 +112,12 @@ def cparams(self, value):
 'numpy'.  Default is 'carray'.
 """
 
+defaults.ctable_out_implementation = None
+"""
+The implementation of the output structure abstraction layer for the 
+output object in `__getitem__()`.
+"""
+
 defaults.eval_vm = "numexpr" if bcolz.numexpr_here else "python"
 """
 The virtual machine to be used in computations (via `eval`).  It can