Byteskip minus1 fixing (#74)

1. Fixed byte_skip -1 reading to work on compressed data (issue #70) 2. Add error checking for line_skip and byte_skip < -1 3. Added tests for error checking and byte_skip functionality
mhe · Nov 12, 2018 · 3b084ed · 3b084ed
1 parent 5310894
commit 3b084ed
Show file tree

Hide file tree

Showing 9 changed files with 134 additions and 12 deletions.
diff --git a/nrrd/reader.py b/nrrd/reader.py
@@ -357,22 +357,30 @@ def read_data(header, fh=None, filename=None):
     # Get the total number of data points by multiplying the size of each dimension together
     total_data_points = header['sizes'].prod()
 
-    # If encoding is raw and byte skip is -1, then seek backwards to the data
-    # Otherwise skip the number of lines requested
-    if header['encoding'] == 'raw' and byte_skip == -1:
-        fh.seek(-dtype.itemsize * total_data_points, 2)
-    else:
+    # Skip the number of lines requested when line_skip >= 0
+    # Irrespective of the NRRD file having attached/detached header
+    # Lines are skipped before getting to the beginning of the data
+    if line_skip >= 0:
         for _ in range(line_skip):
             fh.readline()
-
-    # If a compression encoding is used, then byte skip AFTER decompressing
-    if header['encoding'] == 'raw':
-        # Skip the requested number of bytes and then parse the data using NumPy
+    else:
+        raise NRRDError('Invalid lineskip, allowed values are greater than or equal to 0')
+
+    # Skip the requested number of bytes or seek backward, and then parse the data using NumPy
+    if byte_skip < -1:
+        raise NRRDError('Invalid byteskip, allowed values are greater than or equal to -1')
+    elif byte_skip >= 0:
         fh.seek(byte_skip, os.SEEK_CUR)
+    elif byte_skip == -1 and header['encoding'] not in ['gzip', 'gz', 'bzip2', 'bz2']:
+        fh.seek(-dtype.itemsize * total_data_points, os.SEEK_END)
+    else:
+        # The only case left should be: byte_skip == -1 and header['encoding'] == 'gzip'
+        byte_skip = -dtype.itemsize * total_data_points
+
+    # If a compression encoding is used, then byte skip AFTER decompressing
+    if header['encoding'] == 'raw':             
         data = np.fromfile(fh, dtype)
     elif header['encoding'] in ['ASCII', 'ascii', 'text', 'txt']:
-        # Skip the requested number of bytes and then parse the data using NumPy
-        fh.seek(byte_skip, os.SEEK_CUR)
         data = np.fromfile(fh, dtype, sep=' ')
     else:
         # Handle compressed data now
@@ -441,7 +449,6 @@ def read(filename, custom_field_map=None):
     """
 
     """Read a NRRD file and return a tuple (data, header)."""
-
     with open(filename, 'rb') as fh:
         header = read_header(fh, custom_field_map)
         data = read_data(header, fh, filename)

diff --git a/nrrd/tests/data/BallBinary30x30x30.nii.gz b/nrrd/tests/data/BallBinary30x30x30.nii.gz
diff --git a/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_five.nhdr b/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_five.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+byte skip: -5
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: raw
+space origin: (0,0,0)
+data file: BallBinary30x30x30.raw
diff --git a/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one.nhdr b/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+byte skip: -1
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: raw
+space origin: (0,0,0)
+data file: BallBinary30x30x30.raw
diff --git a/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one_nifti.nhdr b/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one_nifti.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+byte skip: -1
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: gzip
+space origin: (0,0,0)
+data file: BallBinary30x30x30.nii.gz
diff --git a/nrrd/tests/data/BallBinary30x30x30_gz_byteskip_minus_one.nrrd b/nrrd/tests/data/BallBinary30x30x30_gz_byteskip_minus_one.nrrd
diff --git a/nrrd/tests/data/BallBinary30x30x30_nifti.nhdr b/nrrd/tests/data/BallBinary30x30x30_nifti.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+# byte skip: -1
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: gzip
+space origin: (0,0,0)
+data file: BallBinary30x30x30.nii.gz
diff --git a/nrrd/tests/test_reading.py b/nrrd/tests/test_reading.py
@@ -68,6 +68,46 @@ def test_read_detached_header_and_data(self):
 
         # Test that the data read is able to be edited
         self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_detached_header_and_data_with_byteskip_minus1(self):
+        expected_header = self.expected_header
+        expected_header[u'data file'] = os.path.basename(RAW_DATA_FILE_PATH)
+        expected_header[u'byte skip'] = -1
+
+        data, header = nrrd.read(RAW_BYTESKIP_NHDR_FILE_PATH)
+
+        np.testing.assert_equal(self.expected_header, header)
+        np.testing.assert_equal(data, self.expected_data)
+
+        # Test that the data read is able to be edited
+        self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_detached_header_and_nifti_data_with_byteskip_minus1(self):
+        expected_header = self.expected_header
+        expected_header[u'data file'] = os.path.basename(RAW_DATA_FILE_PATH)
+        expected_header[u'byte skip'] = -1
+        expected_header[u'encoding'] = 'gzip'
+        expected_header[u'data file'] = 'BallBinary30x30x30.nii.gz'
+
+        data, header = nrrd.read(GZ_BYTESKIP_NIFTI_NHDR_FILE_PATH)
+
+        np.testing.assert_equal(self.expected_header, header)
+        np.testing.assert_equal(data, self.expected_data)
+
+        # Test that the data read is able to be edited
+        self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_detached_header_and_nifti_data(self):
+
+        with self.assertRaisesRegex(nrrd.NRRDError, 'Size of the data does not equal '
+            + 'the product of all the dimensions: 27000-27176=-176'):
+            nrrd.read(GZ_NIFTI_NHDR_FILE_PATH)
+
+    def test_read_detached_header_and_data_with_byteskip_minus5(self):
+
+        with self.assertRaisesRegex(nrrd.NRRDError, 'Invalid byteskip, allowed values '
+            +'are greater than or equal to -1'):
+            nrrd.read(RAW_INVALID_BYTESKIP_NHDR_FILE_PATH)
 
     def test_read_header_and_gz_compressed_data(self):
         expected_header = self.expected_header
@@ -80,6 +120,20 @@ def test_read_header_and_gz_compressed_data(self):
 
         # Test that the data read is able to be edited
         self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_header_and_gz_compressed_data_with_byteskip_minus1(self):
+        expected_header = self.expected_header
+        expected_header[u'encoding'] = 'gzip'
+        expected_header[u'type'] = 'int16'
+        expected_header[u'byte skip'] = -1
+
+        data, header = nrrd.read(GZ_BYTESKIP_NRRD_FILE_PATH)
+
+        np.testing.assert_equal(self.expected_header, header)
+        np.testing.assert_equal(data, self.expected_data)
+
+        # Test that the data read is able to be edited
+        self.assertTrue(data.flags['WRITEABLE'])
 
     def test_read_header_and_bz2_compressed_data(self):
         expected_header = self.expected_header

diff --git a/nrrd/tests/util.py b/nrrd/tests/util.py
@@ -4,10 +4,15 @@
 DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
 RAW_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30.nrrd')
 RAW_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30.nhdr')
+RAW_BYTESKIP_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_byteskip_minus_one.nhdr')
+GZ_BYTESKIP_NIFTI_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_byteskip_minus_one_nifti.nhdr')
+GZ_NIFTI_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_nifti.nhdr')
+RAW_INVALID_BYTESKIP_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_byteskip_minus_five.nhdr')
 RAW_DATA_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30.raw')
 GZ_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_gz.nrrd')
 BZ2_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_bz2.nrrd')
 GZ_LINESKIP_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_gz_lineskip.nrrd')
+GZ_BYTESKIP_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_gz_byteskip_minus_one.nrrd')
 RAW_4D_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'test_simple4d_raw.nrrd')
 
 ASCII_1D_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'test1d_ascii.nrrd')