jupyterlab · JonjonHays · Sep 27, 2019 · Oct 5, 2019 · Oct 15, 2019 · Oct 15, 2019
diff --git a/jupyterlab_hdf/baseHandler.py b/jupyterlab_hdf/baseHandler.py
@@ -4,10 +4,11 @@
 # Distributed under the terms of the Modified BSD License.
 
 import h5py
-import json
+# import json
 import os
 from tornado import gen
 from tornado.httpclient import HTTPError
+import simplejson
 
 from notebook.base.handlers import APIHandler
 from notebook.utils import url_path_join
@@ -24,28 +25,20 @@ class HdfBaseManager:
     def __init__(self, notebook_dir):
         self.notebook_dir = notebook_dir
 
-    def _get(self, f, uri, row, col):
+    def _get(self, f, uri, select):
         raise NotImplementedError
 
-    def get(self, relfpath, uri, row, col):
+    def get(self, relfpath, uri, select):
         def _handleErr(code, msg):
             raise HTTPError(code, '\n'.join((
                 msg,
-                f'relfpath: {relfpath}, uri: {uri}, row: {row}, col: {col}'
+                f'relfpath: {relfpath}, uri: {uri}, select: {select}'
             )))
 
         if not relfpath:
             msg = f'The request was malformed; fpath should not be empty.'
             _handleErr(400, msg)
 
-        if row and not col:
-            msg = f'The request was malformed; row slice was specified, but col slice was empty.'
-            _handleErr(400, msg)
-
-        if col and not row:
-            msg = f'The request was malformed; col slice was specified, but row slice was empty.'
-            _handleErr(400, msg)
-
         fpath = url_path_join(self.notebook_dir, relfpath)
 
         if not os.path.exists(fpath):
@@ -61,15 +54,14 @@ def _handleErr(code, msg):
                 _handleErr(401, msg)
             try:
                 with h5py.File(fpath, 'r') as f:
-                    out = self._get(f, uri, row, col)
+                    out = self._get(f, uri, select)
             except Exception as e:
                 msg = (f'Found and opened file, error getting contents from object specified by the uri.\n'
                        f'Error: {e}')
                 _handleErr(500, msg)
 
             return out
 
-
 ## handler
 class HdfBaseHandler(APIHandler):
     managerClass = None
@@ -86,25 +78,16 @@ def initialize(self, notebook_dir):
     @gen.coroutine
     def get(self, path):
         """Based on an api request, get either the contents of a group or a
-        slice of a dataset and return it as serialized JSON.
+        selected hyperslab of a dataset and return it as serialized JSON.
         """
         uri = '/' + self.get_query_argument('uri').lstrip('/')
-        row = self.getQueryArguments('row', int)
-        col = self.getQueryArguments('col', int)
-
+        select = self.get_query_argument('select', default=None)
         try:
-            self.finish(json.dumps(self.manager.get(path, uri, row, col)))
-
+            self.finish(simplejson.dumps(self.manager.get(path, uri, select), ignore_nan=True))
         except HTTPError as err:
             self.set_status(err.code)
             response = err.response.body if err.response else str(err.code)
             self.finish('\n'.join((
                 response,
                 err.message
             )))
-
-    def getQueryArguments(self, key, func=None):
-        if func is not None:
-            return [func(x) for x in self.get_query_argument(key).split(',')] if key in self.request.query_arguments else None
-        else:
-            return [x for x in self.get_query_argument(key).split(',')] if key in self.request.query_arguments else None
diff --git a/jupyterlab_hdf/contents.py b/jupyterlab_hdf/contents.py
@@ -14,21 +14,20 @@
 class HdfContentsManager(HdfBaseManager):
     """Implements HDF5 contents handling
     """
-    def _get(self, f, uri, row, col):
+    def _get(self, f, uri, select):
         obj = f[uri]
-
-        if isinstance(obj, h5py.Group):
+        
+        if isinstance(obj, h5py.Group):        
             return [(groupDict if isinstance(val, h5py.Group) else dsetDict)
                         (name=name, uri=uriJoin(uri, name))
                     for name,val in obj.items()]
         else:
             return dsetDict(
                 name=uriName(uri),
                 uri=uri,
-                content=dsetContentDict(obj, row, col),
+                content=dsetContentDict(obj, select)
             )
 
-
 ## handler
 class HdfContentsHandler(HdfBaseHandler):
     """A handler for HDF5 contents

diff --git a/jupyterlab_hdf/data.py b/jupyterlab_hdf/data.py
@@ -13,8 +13,8 @@
 class HdfDataManager(HdfBaseManager):
     """Implements HDF5 data handling
     """
-    def _get(self, f, uri, row, col):
-        return dsetChunk(f[uri], row, col)
+    def _get(self, f, uri, select):
+        return dsetChunk(f[uri], select)
 
 
 ## handler

diff --git a/jupyterlab_hdf/util.py b/jupyterlab_hdf/util.py
@@ -5,7 +5,17 @@
 
 import re
 
-__all__ = ['chunkSlice', 'dsetChunk', 'dsetContentDict', 'dsetDict', 'groupDict', 'uriJoin', 'uriName']
+from tornado.web import HTTPError
+
+__all__ = [
+    'chunkSlice',
+    'dsetChunk',
+    'dsetContentDict',
+    'dsetDict',
+    'groupDict',
+    'uriJoin',
+    'uriName'
+]
 
 ## chunk handling
 def chunkSlice(chunk, s):
@@ -14,12 +24,12 @@ def chunkSlice(chunk, s):
 
     return slice(s.start*chunk, s.stop*chunk, s.step)
 
-def dsetChunk(dset, row, col):
-    return dset[slice(*row), slice(*col)].tolist()
-
+def dsetChunk(dset, select):
+    slices = _getHyperslabSlices(dset.shape, select)
+    return dset[slices].tolist()
 
 ## create dicts to be converted to json
-def dsetContentDict(dset, row=None, col=None):
+def dsetContentDict(dset, select=None):
     return dict([
         # metadata
         ('attrs', dict(*dset.attrs.items())),
@@ -28,7 +38,7 @@ def dsetContentDict(dset, row=None, col=None):
         ('shape', dset.shape),
 
         # actual data
-        ('data', dsetChunk(dset, row, col) if row and col else None)
+        ('data', dsetChunk(dset, select) if select else None)
     ])
 
 def dsetDict(name, uri, content=None):
@@ -55,3 +65,88 @@ def uriJoin(*parts):
 
 def uriName(uri):
     return uri.split('/')[-1]
+
+
+def _getHyperslabSlices(dsetshape, select):
+    """
+    Parse SELECT query param and return tuple of Python slice objects
+
+    :param dsetshape: The shape of the dataset as returned by dset.shape
+    :param select: The SELECT query param should be in the form: 
+    [<dim1>, <dim2>, ... , <dimn>]
+    For each dimension, valid formats are:
+        single integer: n
+        start and end: n:m
+        start, end, and stride: n:m:s
+    :type select: str
+
+    :returns: tuple of Python slices based on the SELECT query param
+    """
+
+    if select is None:
+        # Default: return entire dataset
+        return tuple(slice(0, extent) for extent in dsetshape)
+
+    if not select.startswith('['):
+        msg = "Bad Request: selection query missing start bracket"
+        raise HTTPError(400, reason=msg)
+    if not select.endswith(']'):
+        msg = "Bad Request: selection query missing end bracket"
+        raise HTTPError(400, reason=msg)
+
+    # strip brackets
+    select = select[1:-1]
+
+    select_array = select.split(',')
+    if len(select_array) > len(dsetshape):
+        msg = "Bad Request: number of selected dimensions exceeds the rank of the dataset"
+        raise HTTPError(400, reason=msg)
+
+    slices = []
+    for dim, dim_slice in enumerate(select_array):
+        extent = dsetshape[dim]
+
+        # default slice values
+        start = 0
+        stop = extent
+        step = 1
+        if dim_slice.find(':') < 0:
+            # just a number - append to SLICES, and continue to next iteration
+            try:
+                slices.append(int(dim_slice))
+                continue
+            except ValueError:
+                msg = "Bad Request: invalid selection parameter (can't convert to int) for dimension: " + str(dim)
+                raise HTTPError(400, reason=msg)
+            stop = start + 1
+        elif dim_slice == ':':
+            # select everything (default)
+            pass
+        else:
+            fields = dim_slice.split(":")
+            if len(fields) > 3:
+                msg = "Bad Request: Too many ':' seperators for dimension: " + str(dim)
+                raise HTTPError(400, reason=msg)
+            try:
+                if fields[0]:
+                    start = int(fields[0])
+                if fields[1]:
+                    stop = int(fields[1])
+                if len(fields) > 2 and fields[2]:
+                    step = int(fields[2])
+            except ValueError:
+                msg = "Bad Request: invalid selection parameter (can't convert to int) for dimension: " + str(dim)
+                raise HTTPError(400, reason=msg)
+
+        if start < 0 or start > extent:
+            msg = "Bad Request: Invalid selection start parameter for dimension: " + str(dim)
+            raise HTTPError(400, reason=msg)
+        if stop > extent:
+            msg = "Bad Request: Invalid selection stop parameter for dimension: " + str(dim)
+            raise HTTPError(400, reason=msg)
+        if step <= 0:
+            msg = "Bad Request: invalid selection step parameter for dimension: " + str(dim)
+            raise HTTPError(400, reason=msg)
+        slices.append(slice(start, stop, step))
+
+    return tuple(slices)
diff --git a/setup.py b/setup.py
@@ -40,7 +40,8 @@
     ],
     install_requires=[
         'h5py',
-        'notebook'
+        'notebook',
+        'simplejson'
     ]
 )
 

diff --git a/src/dataset.ts b/src/dataset.ts
@@ -251,8 +251,7 @@ export class HdfDatasetModelBase extends DataModel {
     const params = {
       fpath: this._fpath,
       uri: this._uri,
-      col: [colStart, colStop],
-      row: [rowStart, rowStop]
+      select: `[${rowStart}:${rowStop}, ${colStart}:${colStop}]`
     };
     hdfDataRequest(params, this._serverSettings).then(data => {
       this._blocks[rowBlock][colBlock] = data;

diff --git a/src/hdf.ts b/src/hdf.ts
@@ -73,12 +73,12 @@ export function hdfDataRequest(
   parameters: IContentsParameters,
   settings: ServerConnection.ISettings
 ): Promise<number[][]> {
-  // require the uri, row, and col query parameters
-  const { fpath, uri, row, col } = parameters;
+  // require the uri query parameter, select is optional
+  const { fpath, uri, ...select } = parameters;
 
   const fullUrl =
     URLExt.join(settings.baseUrl, "hdf", "data", fpath).split("?")[0] +
-    URLExt.objectToQueryString({ uri, row, col });
+    URLExt.objectToQueryString({ uri, ...select });
 
   return ServerConnection.makeRequest(fullUrl, {}, settings).then(response => {
     if (response.status !== 200) {
@@ -105,14 +105,13 @@ export interface IContentsParameters {
   uri: string;
 
   /**
-   * Row slice. Up to 3 integers, same syntax as for Python `slice` built-in.
+   * String representing an array of slices which determine a
+   * hyperslab selection of an n-dimensional HDF5 dataset.
+   * Syntax and semantics matches that of h5py's Dataset indexing.
+   * E.g, ?select=[0:3, 1:5, :, 9]
+   * See jupyterlab_hdf/util.py for full details.
    */
-  row?: number[];
-
-  /**
-   * Column slice. Up to 3 integers, same syntax as for Python `slice` built-in.
-   */
-  col?: number[];
+  select?: string;
 }
 
 /**

diff --git a/src/index.ts b/src/index.ts
@@ -226,13 +226,9 @@ function addBrowserCommands(
         fpath: args["fpath"] as string,
         uri: args["uri"] as string
       };
-      if (args["col"]) {
-        params.col = args["col"] as number[];
+      if (args["select"]) {
+        params.select = args["select"] as string;
       }
-      if (args["row"]) {
-        params.row = args["row"] as number[];
-      }
-
       return hdfContentsRequest(params, serverSettings);
     },
     label: "For an HDF5 file at `fpath`, fetch the contents at `uri`"