Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow override of dup. field error, and zlib compression level #65

Merged
merged 7 commits into from
Aug 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion nrrd/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
import zlib
import warnings
from collections import OrderedDict

from nrrd.parsers import *
Expand All @@ -15,6 +16,10 @@

_NRRD_REQUIRED_FIELDS = ['dimension', 'type', 'encoding', 'sizes']

# Duplicated fields are prohibited by the spec, but do occur in the wild.
# Set True to allow duplicate fields, with a warning.
ALLOW_DUPLICATE_FIELD = False

_TYPEMAP_NRRD2NUMPY = {
'signed char': 'i1',
'int8': 'i1',
Expand Down Expand Up @@ -250,7 +255,12 @@ def read_header(file, custom_field_map=None):

# Check if the field has been added already
if field in header.keys():
raise NRRDError('Duplicate header field: %s' % repr(field))
dup_message = "Duplicate header field: '%s'" % str(field)

if not ALLOW_DUPLICATE_FIELD:
raise NRRDError(dup_message)

warnings.warn(dup_message)

# Get the datatype of the field based on it's field name and custom field map
field_type = _get_field_type(field, custom_field_map)
Expand Down
17 changes: 17 additions & 0 deletions nrrd/tests/test_reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,23 @@ def test_read_raw_header(self):
header = nrrd.read_header(('NRRD0005', 'my extra info:=my : colon-separated : values'))
np.testing.assert_equal(expected_header, header)

def test_read_dup_field_error_and_warn(self):
expected_header = {u'type': 'float', u'dimension': 3}
header_txt_tuple = ('NRRD0005', 'type: float', 'dimension: 3', 'type: float')

with self.assertRaisesRegex(nrrd.NRRDError, "Duplicate header field: 'type'"):
header = nrrd.read_header(header_txt_tuple)

import warnings
with warnings.catch_warnings(record=True) as w:
nrrd.reader.ALLOW_DUPLICATE_FIELD = True
header = nrrd.read_header(header_txt_tuple)

self.assertTrue("Duplicate header field: 'type'" in str(w[0].message))

self.assertEqual(expected_header, header)
nrrd.reader._NRRD_ALLOW_DUPLICATE_FIELD = False

def test_read_header_and_ascii_1d_data(self):
expected_header = {u'dimension': 1,
u'encoding': 'ASCII',
Expand Down
24 changes: 20 additions & 4 deletions nrrd/tests/test_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,40 @@ def setUp(self):
with open(RAW_DATA_FILE_PATH, 'rb') as f:
self.expected_data = f.read()

def write_and_read_back_with_encoding(self, encoding):
output_filename = os.path.join(self.temp_write_dir, 'testfile_%s.nrrd' % encoding)
nrrd.write(output_filename, self.data_input, {u'encoding': encoding})
def write_and_read_back_with_encoding(self, encoding, level=9):
output_filename = os.path.join(self.temp_write_dir, 'testfile_{}_{}.nrrd'.format(encoding, str(level)))
nrrd.write(output_filename, self.data_input, {u'encoding': encoding},
compression_level=level)

# Read back the same file
data, header = nrrd.read(output_filename)
self.assertEqual(self.expected_data, data.tostring(order='F'))
self.assertEqual(header['encoding'], encoding)

return output_filename

def test_write_raw(self):
self.write_and_read_back_with_encoding(u'raw')

def test_write_gz(self):
self.write_and_read_back_with_encoding(u'gzip')

def test_write_bz2(self):
def test_write_bzip2(self):
self.write_and_read_back_with_encoding(u'bzip2')

def test_write_gz_level1(self):
import os
fn = self.write_and_read_back_with_encoding(u'gzip', level=1)

self.assertLess(os.path.getsize(GZ_NRRD_FILE_PATH), os.path.getsize(fn))

def test_write_bzip2_level1(self):
fn = self.write_and_read_back_with_encoding(u'bzip2', level=1)

# note: we don't currently assert reduction here, because with the binary ball test data,
# the output size does not change at different bz2 levels.
# self.assertLess(os.path.getsize(BZ2_NRRD_FILE_PATH), os.path.getsize(fn))

def test_write_ascii_1d(self):
output_filename = os.path.join(self.temp_write_dir, 'testfile_ascii_1d.nrrd')

Expand Down
18 changes: 11 additions & 7 deletions nrrd/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
'B': 'big'
}


def _format_field_value(value, field_type):
if field_type == 'int':
return format_number(value)
Expand All @@ -94,7 +93,8 @@ def _format_field_value(value, field_type):
raise NRRDError('Invalid field type given: %s' % field_type)


def write(filename, data, header={}, detached_header=False, custom_field_map=None):
def write(filename, data, header={}, detached_header=False, custom_field_map=None,
compression_level = 9):
"""Write :class:`numpy.ndarray` to NRRD file

The :obj:`filename` parameter specifies the absolute or relative filename to write the NRRD file to. If the
Expand Down Expand Up @@ -124,6 +124,10 @@ def write(filename, data, header={}, detached_header=False, custom_field_map=Non
custom_field_map : :class:`dict` (:class:`str`, :class:`str`), optional
Dictionary used for parsing custom field types where the key is the custom field name and the value is a
string identifying datatype for the custom field.
compression_level : :class:`int`
Int specifying compression level, when using a compressed encoding (.gz, .bz2).
- For zlib (.gz): 1-9 set low to high compression; 0 disables; -1 uses zlib default.
- For bzip2 (.bz2): 1-9 set low to high compression.

See Also
--------
Expand Down Expand Up @@ -236,15 +240,15 @@ def write(filename, data, header={}, detached_header=False, custom_field_map=Non

# If header & data in the same file is desired, write data in the file
if not detached_header:
_write_data(data, fh, header)
_write_data(data, fh, header, compression_level=compression_level)

# If detached header desired, write data to different file
if detached_header:
with open(data_filename, 'wb') as data_fh:
_write_data(data, data_fh, header)
_write_data(data, data_fh, header, compression_level=compression_level)


def _write_data(data, fh, header):
def _write_data(data, fh, header, compression_level = None):
if header['encoding'] == 'raw':
# Convert the data into a string
raw_data = data.tostring(order='F')
Expand All @@ -263,9 +267,9 @@ def _write_data(data, fh, header):

# Construct the compressor object based on encoding
if header['encoding'] in ['gzip', 'gz']:
compressobj = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS | 16)
compressobj = zlib.compressobj(compression_level, zlib.DEFLATED, zlib.MAX_WBITS | 16)
elif header['encoding'] in ['bzip2', 'bz2']:
compressobj = bz2.BZ2Compressor()
compressobj = bz2.BZ2Compressor(compression_level)
else:
raise NRRDError('Unsupported encoding: "%s"' % header['encoding'])

Expand Down