add splitting to contrib

Fabian-Robert Stöter · Fabian-Robert Stöter · commit 7869350d4201 · 2018-11-16T15:51:47.000+01:00
diff --git a/norbert/__init__.py b/norbert/__init__.py
@@ -18,13 +18,13 @@ class Processor(object):
 
     Parameters
     ----------
-    pipeline : list of norbert objects
+    pipeline : list of norbert modules
 
     """
     def __init__(self, pipeline):
         super(Processor, self).__init__()
-        # set up modules
 
+        # set up modules
         self.pipeline = pipeline
 
     def forward(self, input):
diff --git a/norbert/contrib.py b/norbert/contrib.py
@@ -128,3 +128,253 @@ def compress_filter(W, eps, thresh=0.6, slope=15, multichannel=True):
     else:
         W = _logit(W, thresh, slope)
     return W
+
+
+
+import numpy as np
+import itertools
+
+
+def splitinfo(sigShape, frameShape, hop):
+
+    # making sure input shapes are tuples, not simple integers
+    if np.isscalar(frameShape):
+        frameShape = (frameShape,)
+    if np.isscalar(hop):
+        hop = (hop,)
+
+    # converting frameShape to array, and building an aligned frameshape,
+    # which is 1 whenever the frame dimension is not given. For instance, if
+    # frameShape=(1024,) and sigShape=(10000,2), frameShapeAligned is set
+    # to (1024,1)
+    frameShape = np.array(frameShape)
+    fdim = len(frameShape)
+    frameShapeAligned = np.append(
+        frameShape, np.ones(
+            (len(sigShape) - len(frameShape)))).astype(int)
+
+    # same thing for hop
+    hop = np.array(hop)
+    hop = np.append(hop, np.ones((len(sigShape) - len(hop)))).astype(int)
+
+    # building the positions of the frames. For each dimension, gridding from
+    # 0 to sigShape[dim] every hop[dim]
+    framesPos = np.ogrid[[slice(0, size, step)
+                          for (size, step) in zip(sigShape, hop)]]
+
+    # number of dimensions
+    nDim = len(framesPos)
+
+    # now making sure we have at most one frame going out of the signal. This
+    # is possible, for instance if the overlap is very large between the frames
+    for dim in range(nDim):
+        # for each dimension, we remove all frames that go beyond the signal
+        framesPos[dim] = framesPos[dim][
+            np.nonzero(
+                np.add(
+                    framesPos[dim],
+                    frameShapeAligned[dim]) < sigShape[dim])]
+        # are there frames positions left in this dimension ?
+        if len(framesPos[dim]):
+            # yes. we then add a last frame (the one going beyond the signal),
+            # if it is possible. (it may NOT be possible in some exotic cases
+            # such as hopSize[dim]>1 and frameShapeAligned[dim]==1
+            if framesPos[dim][-1] + hop[dim] < sigShape[dim]:
+                framesPos[dim] = np.append(
+                    framesPos[dim], framesPos[dim][-1] + hop[dim])
+        else:
+            # if there is no more frames in this dimension (short signal in
+            # this dimension), then at least consider 0
+            framesPos[dim] = [0]
+
+    # constructing the shape of the framed signal
+    framedShape = np.append(frameShape, [len(x) for x in framesPos])
+    return (framesPos, framedShape, frameShape, hop,
+            fdim, nDim, frameShapeAligned)
+
+
+def split(sig, frames_shape, hop, weight_frames=False, verbose=False):
+    """splits a ndarray into overlapping frames
+    sig : ndarray
+    frameShape : tuple giving the size of each frame. If its shape is
+                 smaller than that of sig, assume the frame is of size 1
+                 for all missing dimensions
+    hop : tuple giving the hopsize in each dimension. If its shape is
+          smaller than that of sig, assume the hopsize is 1 for all
+          missing dimensions
+    weightFrames : return frames weighted by a ND hamming window
+    verbose : whether to output progress during computation"""
+
+    # signal shape
+    sigShape = np.array(sig.shape)
+
+    (framesPos, framedShape, frameShape,
+     hop, fdim, nDim, frameShapeAligned) = splitinfo(
+                        sigShape, frames_shape, hop)
+
+    if weight_frames:
+        # constructing the weighting window. Choosing hamming for convenience
+        # (never 0)
+        win = 1
+        for dim in range(len(frameShape) - 1, -1, -1):
+            win = np.outer(np.hamming(frameShapeAligned[dim]), win)
+        win = np.squeeze(win)
+
+    # alocating memory for framed signal
+    framed = np.zeros(framedShape, dtype=sig.dtype)
+
+    # total number of frames (for displaying)
+    nFrames = np.prod([len(x) for x in framesPos])
+
+    # for each frame
+    for iframe, index in enumerate(
+                itertools.product(*[range(len(x)) for x in framesPos])):
+        # display from time to time if asked for
+        if verbose and (not iframe % 100):
+            print('Splitting : frame ' + str(iframe) + '/' + str(nFrames))
+
+        # build the slice to use for extracting the signal of this frame.
+        frameRange = [Ellipsis]
+        for dim in range(nDim):
+            frameRange += [slice(framesPos[dim][index[dim]],
+                                 min(sigShape[dim],
+                                     framesPos[dim][index[dim]]
+                                     + frameShapeAligned[dim]),
+                                 1)]
+
+        # extract the signal
+        sigFrame = sig[tuple(frameRange)]
+        sigFrame.shape = sigFrame.shape[:fdim]
+
+        # the signal may be shorter than the normal size of a frame (at the
+        # end of the signal). We build a slice that corresponds to the actual
+        # size we got here
+        sigFrameRange = [slice(0, x, 1) for x in sigFrame.shape[:fdim]]
+
+        # puts the signal in the output variable
+        framed[tuple(sigFrameRange + list(index))] = sigFrame
+
+        if weight_frames:
+            # multiply by the weighting window
+            framed[(Ellipsis,) + tuple(index)] *= win
+
+    frameShape = [int(x) for x in frameShape]
+    return framed
+
+
+def overlapadd(S, fdim, hop, shape=None, weighted_frames=True, verbose=False):
+    """n-dimensional overlap-add
+    S    : ndarray containing the stft to be inverted
+    fdim : the number of dimensions in S corresponding to
+           frame indices.
+    hop  : tuple containing hopsizes along dimensions.
+           Missing hopsizes are assumed to be 1
+    shape: Indicating the original shape of the
+           signal for truncating. If None: no truncating is done
+    weightedFrames: True if we need to compensate for the analysis weighting
+                    (weightFrames of the split function)
+    verbose: whether or not to display progress
+            """
+
+    # number of dimensions
+    nDim = len(S.shape)
+
+    frameShape = S.shape[:fdim]
+    trueFrameShape = np.append(
+        frameShape,
+        np.ones(
+            (nDim - len(frameShape)))).astype(int)
+
+    # same thing for hop
+    if np.isscalar(hop):
+        hop = (hop,)
+    hop = np.array(hop)
+    hop = np.append(hop, np.ones((nDim - len(hop)))).astype(int)
+
+    sigShape = [
+        (nframedim - 1) * hopdim + frameshapedim for (
+            nframedim,
+            hopdim,
+            frameshapedim) in zip(S.shape[fdim:], hop, trueFrameShape)]
+
+    # building the positions of the frames. For each dimension, gridding from
+    # 0 to sigShape[dim] every hop[dim]
+    framesPos = [np.arange(size) * step for (size, step)
+                 in zip(S.shape[fdim:], hop)]
+
+    # constructing the weighting window. Choosing hamming for convenience
+    # (never 0)
+    win = np.array(1)
+    for dim in range(fdim):
+        if trueFrameShape[dim] == 1:
+            win = win[..., None]
+        else:
+            key = ((None,) * len(win.shape) + (Ellipsis,))
+            win = (win[..., None]
+                   * np.hamming(trueFrameShape[dim]).__getitem__(key))
+
+    # if we need to compensate for analysis weighting, simply square window
+    if weighted_frames:
+        win2 = win ** 2
+    else:
+        win2 = win
+
+    sig = np.zeros(sigShape, dtype=S.dtype)
+
+    # will also store the sum of all weighting windows applied during
+    # overlap and add. Traditionally, window function and overlap are chosen
+    # so that these weights end up being 1 everywhere. However, we here are
+    # not restricted here to any particular hopsize. Hence, the price to pay
+    # is this further memory burden
+    weights = np.zeros(sigShape)
+
+    # total number of frames (for displaying)
+    nFrames = np.prod(S.shape[fdim:])
+
+    # could use memmap or stuff
+    S *= win[tuple([Ellipsis] + [None] * (len(S.shape) - len(win.shape)))]
+
+    # for each frame
+    for iframe, index in enumerate(
+            itertools.product(*[range(len(x)) for x in framesPos])):
+        # display from time to time if asked for
+        if verbose and (not iframe % 100):
+            print('overlap-add : frame ' + str(iframe) + '/' + str(nFrames))
+
+        # build the slice to use for overlap-adding the signal of this frame.
+        frameRange = [Ellipsis]
+        for dim in range(nDim-fdim):
+            frameRange += [slice(framesPos[dim][index[dim]],
+                                 min(sigShape[dim],
+                                     framesPos[dim][index[dim]]
+                                     + trueFrameShape[dim]),
+                                 1)]
+
+        # put back the reconstructed weighted frame into place
+        frameSig = S[tuple([Ellipsis] + list(index))]
+        sig[tuple(frameRange)] += frameSig[
+                tuple([Ellipsis] +
+                      [None] *
+                      (len(sig[tuple(frameRange)].shape) -
+                      len(frameSig.shape)))]
+
+        # also store the corresponding window contribution
+        weights[tuple(frameRange)] += win2[
+                tuple([Ellipsis] +
+                      [None] *
+                      (len(weights[tuple(frameRange)].shape) -
+                       len(win2.shape)))]
+
+    # account for different weighting at different places
+    sig /= weights
+
+    # truncate the signal if asked for
+    if shape is not None:
+        sig_res = np.zeros(shape, S.dtype)
+        truncateRange = [slice(0, min(x, sig.shape[i]), 1)
+                         for (i, x) in enumerate(shape)]
+        sig_res[tuple(truncateRange)] = sig[tuple(truncateRange)]
+        sig = sig_res
+
+    # finished
+    return sig
diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 
 setup(
     name='norbert',
-    version='0.1.0b',
+    version='0.1.0c',
     description='Spectrogram Models',
     long_description=long_description,
     long_description_content_type='text/markdown',
diff --git a/tests/test_contrib.py b/tests/test_contrib.py
@@ -0,0 +1,46 @@
+import numpy as np
+import pytest
+from norbert.contrib import split, overlapadd
+
+
+@pytest.fixture(params=[100, 256, 1001])
+def nb_frames(request):
+    return int(request.param)
+
+
+@pytest.fixture(params=[1024, 777])
+def nb_bins(request):
+    return request.param
+
+
+@pytest.fixture(params=[1, 2])
+def nb_channels(request):
+    return request.param
+
+
+@pytest.fixture(params=[np.float])
+def dtype(request):
+    return request.param
+
+
+@pytest.fixture(params=[1, 2, 3])
+def test_len(request, nb_frames):
+    return int(nb_frames/request.param)
+
+
+@pytest.fixture(params=[1, 2, 3])
+def test_hop(request, test_len):
+    return int(test_len/request.param)
+
+
+@pytest.fixture
+def X(request, nb_frames, nb_bins, nb_channels, dtype):
+    np.random.seed(0)
+    X = np.random.random((nb_frames, nb_bins, nb_channels)).astype(dtype)
+    return X
+
+
+def test_split(X, test_len, test_hop):
+    patches = split(X, test_len, test_hop)
+    X_out = overlapadd(patches, 1, test_len, X.shape)
+    assert np.allclose(X, X_out)