tashrifbillah · tashrifbillah · Jan 24, 2019 · Nov 9, 2018 · Nov 12, 2018 · Jan 18, 2019
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -62,3 +62,21 @@ Example with fields and custom fields
        [0., 1., 0.],
        [0., 0., 1.]])), ('kinds', ['domain', 'domain', 'domain']), ('encoding', 'ASCII'), ('spacings', array([1.0458, 1.0458, 2.5   ])), ('units', ['mm', 'mm', 'mm']), ('custom_field_here1', 24.34), ('custom_field_here2', array([1, 2, 3, 4]))])
 
+Example reading NRRD file with duplicated header field
+-------------
+.. code-block:: python
+
+    import nrrd
+
+    # Set this field to True to enable the reading of files with duplicated header fields
+    nrrd.reader.ALLOW_DUPLICATE_FIELD = True
+
+    # Name of the file you want to read with a duplicated header field
+    filename = "filename.nrrd"
+
+    # Read the file
+    # filedata = numpy array
+    # fileheader = header of the NRRD file
+    # A warning is now received about duplicate headers rather than an error being thrown
+    filedata, fileheader = nrrd.read(filename)
+    >>> UserWarning: Duplicate header field: 'space' warnings.warn(dup_message)
diff --git a/docs/source/pynrrd.rst b/docs/source/pynrrd.rst
@@ -11,6 +11,7 @@ Reading NRRD files
     nrrd.read
     nrrd.read_header
     nrrd.read_data
+    nrrd.reader.ALLOW_DUPLICATE_FIELD
 
 Writing NRRD files
 ~~~~~~~~~~~~~~~~~~
@@ -47,3 +48,5 @@ NRRD Module
     :members:
     :undoc-members:
     :show-inheritance:
+
+.. autodata:: nrrd.reader.ALLOW_DUPLICATE_FIELD
diff --git a/docs/source/user-guide.rst b/docs/source/user-guide.rst
@@ -188,6 +188,8 @@ The :obj:`file` parameter of :meth:`read_header` accepts a filename or a string
 
 The :meth:`read_data` will not typically be used besides within the :meth:`read` function because the header is a required parameter (:obj:`header`) to this function. The remaining two parameters :obj:`fh` and :obj:`filename` are optional depending on the parameters but it never hurts to specify both. The file handle (:obj:`fh`) is necessary if the header contains the NRRD data as well (AKA it is not a detached file). However, if the NRRD data is detached from the header, then the :obj:`filename` parameter is required to obtain the absolute path to the data file. The :meth:`read_data` function returns a :class:`numpy.ndarray` of the data.
 
+Some NRRD files, while prohibited by specification, may contain duplicated reader fields preventing the proper reading of the file. Changing :data:`nrrd.reader.ALLOW_DUPLICATE_FIELD` to :obj:`True` will show a warning instead of an error while trying to read the file.
+
 Writing NRRD files
 ------------------
 Writing to NRRD files can be done with the single function :meth:`write`. The :obj:`filename` parameter to the function specifies the absolute or relative filename to write the NRRD file. If the :obj:`filename` extension is .nhdr, then the :obj:`detached_header` parameter is set to true automatically. If the :obj:`detached_header` parameter is set to :obj:`True` and the :obj:`filename` ends in .nrrd, then the header file will have the same path and base name as the :obj:`filename` but with an extension of .nhdr. In all other cases, the header and data are saved in the same file.

diff --git a/nrrd/reader.py b/nrrd/reader.py
@@ -2,8 +2,8 @@
 import bz2
 import os
 import re
-import zlib
 import warnings
+import zlib
 from collections import OrderedDict
 
 from nrrd.parsers import *
@@ -16,9 +16,27 @@
 
 _NRRD_REQUIRED_FIELDS = ['dimension', 'type', 'encoding', 'sizes']
 
-# Duplicated fields are prohibited by the spec, but do occur in the wild.
-# Set True to allow duplicate fields, with a warning.
 ALLOW_DUPLICATE_FIELD = False
+"""Allow duplicate header fields when reading NRRD files
+
+When there are duplicated fields in a NRRD file header, pynrrd throws an error by default. Setting this field as 
+:obj:`True` will instead show a warning.
+
+Example:
+    Reading a NRRD file with duplicated header field 'space' with field set to :obj:`False`.
+
+    >>> filedata, fileheader = nrrd.read('filename_duplicatedheader.nrrd')
+    nrrd.errors.NRRDError: Duplicate header field: 'space'
+
+    Set the field as :obj:`True` to receive a warning instead.
+
+    >>> nrrd.reader.ALLOW_DUPLICATE_FIELD = True
+    >>> filedata, fileheader = nrrd.read('filename_duplicatedheader.nrrd')
+    UserWarning: Duplicate header field: 'space' warnings.warn(dup_message)
+
+Note:
+    Duplicated fields are prohibited by the NRRD file specification.
+"""
 
 _TYPEMAP_NRRD2NUMPY = {
     'signed char': 'i1',
@@ -83,7 +101,7 @@ def _get_field_type(field, custom_field_map):
     elif field in ['space origin']:
         return 'double vector'
     elif field in ['measurement frame']:
-        return 'int matrix'
+        return 'double matrix'
     elif field in ['space directions']:
         return 'double matrix'
     else:
@@ -339,22 +357,30 @@ def read_data(header, fh=None, filename=None):
     # Get the total number of data points by multiplying the size of each dimension together
     total_data_points = header['sizes'].prod()
 
-    # If encoding is raw and byte skip is -1, then seek backwards to the data
-    # Otherwise skip the number of lines requested
-    if header['encoding'] == 'raw' and byte_skip == -1:
-        fh.seek(-dtype.itemsize * total_data_points, 2)
-    else:
+    # Skip the number of lines requested when line_skip >= 0
+    # Irrespective of the NRRD file having attached/detached header
+    # Lines are skipped before getting to the beginning of the data
+    if line_skip >= 0:
         for _ in range(line_skip):
             fh.readline()
-
-    # If a compression encoding is used, then byte skip AFTER decompressing
-    if header['encoding'] == 'raw':
-        # Skip the requested number of bytes and then parse the data using NumPy
+    else:
+        raise NRRDError('Invalid lineskip, allowed values are greater than or equal to 0')
+
+    # Skip the requested number of bytes or seek backward, and then parse the data using NumPy
+    if byte_skip < -1:
+        raise NRRDError('Invalid byteskip, allowed values are greater than or equal to -1')
+    elif byte_skip >= 0:
         fh.seek(byte_skip, os.SEEK_CUR)
+    elif byte_skip == -1 and header['encoding'] not in ['gzip', 'gz', 'bzip2', 'bz2']:
+        fh.seek(-dtype.itemsize * total_data_points, os.SEEK_END)
+    else:
+        # The only case left should be: byte_skip == -1 and header['encoding'] == 'gzip'
+        byte_skip = -dtype.itemsize * total_data_points
+
+    # If a compression encoding is used, then byte skip AFTER decompressing
+    if header['encoding'] == 'raw':             
         data = np.fromfile(fh, dtype)
     elif header['encoding'] in ['ASCII', 'ascii', 'text', 'txt']:
-        # Skip the requested number of bytes and then parse the data using NumPy
-        fh.seek(byte_skip, os.SEEK_CUR)
         data = np.fromfile(fh, dtype, sep=' ')
     else:
         # Handle compressed data now
@@ -423,7 +449,6 @@ def read(filename, custom_field_map=None):
     """
 
     """Read a NRRD file and return a tuple (data, header)."""
-
     with open(filename, 'rb') as fh:
         header = read_header(fh, custom_field_map)
         data = read_data(header, fh, filename)

diff --git a/nrrd/tests/data/BallBinary30x30x30.nii.gz b/nrrd/tests/data/BallBinary30x30x30.nii.gz
diff --git a/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_five.nhdr b/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_five.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+byte skip: -5
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: raw
+space origin: (0,0,0)
+data file: BallBinary30x30x30.raw
diff --git a/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one.nhdr b/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+byte skip: -1
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: raw
+space origin: (0,0,0)
+data file: BallBinary30x30x30.raw
diff --git a/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one_nifti.nhdr b/nrrd/tests/data/BallBinary30x30x30_byteskip_minus_one_nifti.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+byte skip: -1
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: gzip
+space origin: (0,0,0)
+data file: BallBinary30x30x30.nii.gz
diff --git a/nrrd/tests/data/BallBinary30x30x30_gz_byteskip_minus_one.nrrd b/nrrd/tests/data/BallBinary30x30x30_gz_byteskip_minus_one.nrrd
diff --git a/nrrd/tests/data/BallBinary30x30x30_nifti.nhdr b/nrrd/tests/data/BallBinary30x30x30_nifti.nhdr
@@ -0,0 +1,14 @@
+NRRD0004
+# Complete NRRD file format specification at:
+# http://teem.sourceforge.net/nrrd/format.html
+type: short
+dimension: 3
+space: left-posterior-superior
+sizes: 30 30 30
+# byte skip: -1
+space directions: (1,0,0) (0,1,0) (0,0,1)
+kinds: domain domain domain
+endian: little
+encoding: gzip
+space origin: (0,0,0)
+data file: BallBinary30x30x30.nii.gz
diff --git a/nrrd/tests/data/test_simple4d_raw.nrrd b/nrrd/tests/data/test_simple4d_raw.nrrd
@@ -8,6 +8,6 @@ sizes: 1 1 1 1
 space directions: (1.5,0,0) (0,1.5,0) (0,0,1) none
 endian: little
 encoding: raw
-measurement frame: (1,0,0) (0,1,0) (0,0,1)
+measurement frame: (1.0001,0,0) (0,1.0000000006,0) (0,0,1.000000000000009)
 
-������?
+������?
diff --git a/nrrd/tests/test_reading.py b/nrrd/tests/test_reading.py
@@ -7,7 +7,6 @@
 from nrrd.tests.util import *
 import nrrd
 
-
 class TestReadingFunctions(unittest.TestCase):
     def setUp(self):
         self.expected_header = {u'dimension': 3,
@@ -68,6 +67,46 @@ def test_read_detached_header_and_data(self):
 
         # Test that the data read is able to be edited
         self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_detached_header_and_data_with_byteskip_minus1(self):
+        expected_header = self.expected_header
+        expected_header[u'data file'] = os.path.basename(RAW_DATA_FILE_PATH)
+        expected_header[u'byte skip'] = -1
+
+        data, header = nrrd.read(RAW_BYTESKIP_NHDR_FILE_PATH)
+
+        np.testing.assert_equal(self.expected_header, header)
+        np.testing.assert_equal(data, self.expected_data)
+
+        # Test that the data read is able to be edited
+        self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_detached_header_and_nifti_data_with_byteskip_minus1(self):
+        expected_header = self.expected_header
+        expected_header[u'data file'] = os.path.basename(RAW_DATA_FILE_PATH)
+        expected_header[u'byte skip'] = -1
+        expected_header[u'encoding'] = 'gzip'
+        expected_header[u'data file'] = 'BallBinary30x30x30.nii.gz'
+
+        data, header = nrrd.read(GZ_BYTESKIP_NIFTI_NHDR_FILE_PATH)
+
+        np.testing.assert_equal(self.expected_header, header)
+        np.testing.assert_equal(data, self.expected_data)
+
+        # Test that the data read is able to be edited
+        self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_detached_header_and_nifti_data(self):
+
+        with self.assertRaisesRegex(nrrd.NRRDError, 'Size of the data does not equal '
+            + 'the product of all the dimensions: 27000-27176=-176'):
+            nrrd.read(GZ_NIFTI_NHDR_FILE_PATH)
+
+    def test_read_detached_header_and_data_with_byteskip_minus5(self):
+
+        with self.assertRaisesRegex(nrrd.NRRDError, 'Invalid byteskip, allowed values '
+            +'are greater than or equal to -1'):
+            nrrd.read(RAW_INVALID_BYTESKIP_NHDR_FILE_PATH)
 
     def test_read_header_and_gz_compressed_data(self):
         expected_header = self.expected_header
@@ -80,6 +119,20 @@ def test_read_header_and_gz_compressed_data(self):
 
         # Test that the data read is able to be edited
         self.assertTrue(data.flags['WRITEABLE'])
+
+    def test_read_header_and_gz_compressed_data_with_byteskip_minus1(self):
+        expected_header = self.expected_header
+        expected_header[u'encoding'] = 'gzip'
+        expected_header[u'type'] = 'int16'
+        expected_header[u'byte skip'] = -1
+
+        data, header = nrrd.read(GZ_BYTESKIP_NRRD_FILE_PATH)
+
+        np.testing.assert_equal(self.expected_header, header)
+        np.testing.assert_equal(data, self.expected_data)
+
+        # Test that the data read is able to be edited
+        self.assertTrue(data.flags['WRITEABLE'])
 
     def test_read_header_and_bz2_compressed_data(self):
         expected_header = self.expected_header
@@ -130,7 +183,7 @@ def test_read_dup_field_error_and_warn(self):
             self.assertTrue("Duplicate header field: 'type'" in str(w[0].message))
 
             self.assertEqual(expected_header, header)
-            nrrd.reader._NRRD_ALLOW_DUPLICATE_FIELD = False
+            nrrd.reader.ALLOW_DUPLICATE_FIELD = False
 
     def test_read_header_and_ascii_1d_data(self):
         expected_header = {u'dimension': 1,
@@ -177,14 +230,16 @@ def test_read_simple_4d_nrrd(self):
                                                          [np.NaN, np.NaN, np.NaN]]),
                            'endian': 'little',
                            'encoding': 'raw',
-                           'measurement frame': np.array([[1., 0., 0.],
-                                                          [0., 1., 0.],
-                                                          [0., 0., 1.]])}
+                           'measurement frame': np.array([[1.0001,         0.,      0.],
+                                                          [0., 1.0000000006,      0.],
+                                                          [0., 0., 1.000000000000009]])}
+
 
         data, header = nrrd.read(RAW_4D_NRRD_FILE_PATH)
 
         np.testing.assert_equal(header, expected_header)
         np.testing.assert_equal(data.dtype, np.float64)
+        np.testing.assert_equal(header['measurement frame'].dtype, np.float64)
         np.testing.assert_equal(data, np.array([[[[0.76903426]]]]))
 
         # Test that the data read is able to be edited

diff --git a/nrrd/tests/util.py b/nrrd/tests/util.py
@@ -4,10 +4,15 @@
 DATA_DIR_PATH = os.path.join(os.path.dirname(__file__), 'data')
 RAW_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30.nrrd')
 RAW_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30.nhdr')
+RAW_BYTESKIP_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_byteskip_minus_one.nhdr')
+GZ_BYTESKIP_NIFTI_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_byteskip_minus_one_nifti.nhdr')
+GZ_NIFTI_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_nifti.nhdr')
+RAW_INVALID_BYTESKIP_NHDR_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_byteskip_minus_five.nhdr')
 RAW_DATA_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30.raw')
 GZ_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_gz.nrrd')
 BZ2_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_bz2.nrrd')
 GZ_LINESKIP_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_gz_lineskip.nrrd')
+GZ_BYTESKIP_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'BallBinary30x30x30_gz_byteskip_minus_one.nrrd')
 RAW_4D_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'test_simple4d_raw.nrrd')
 
 ASCII_1D_NRRD_FILE_PATH = os.path.join(DATA_DIR_PATH, 'test1d_ascii.nrrd')