Skip to content

Commit

Permalink
Fix data-corruption error in CBF compression (#314)
Browse files Browse the repository at this point in the history
The byte-offset compression introduced by #238, #228 was incorrect, causing
problems for anything _writing_ cbf files... so dlsnsx2cbf (and probably
dials.merge_cbf).

- 0xf777 -> 0x7fff as one greater than -0x8000 - values with too large a
  negative value were being incorrectly encoded
- treatment of the fall through / last case in compression for similarity with
  decompression layout
- more thorough testing in particular tests with negative delta of magnitude
  greater than 0x8000
- test added which verifies that the HDF5 and CBF representation of the data
  from dlsnxs2cbf are the same
  • Loading branch information
graeme-winter authored Feb 22, 2021
1 parent af49159 commit d1f9583
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 24 deletions.
31 changes: 16 additions & 15 deletions boost_python/compression.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "compression.h"
#include <assert.h>

typedef union {
char b[2];
Expand Down Expand Up @@ -62,7 +63,7 @@ std::vector<char> dxtbx::boost_python::cbf_compress(const int *values,

packed.push_back(-0x80);

if ((-0xf777 <= delta) && (delta < 0x8000)) {
if ((-0x7fff <= delta) && (delta < 0x8000)) {
s = (short)delta;
b = ((union_short *)&s)[0].b;

Expand All @@ -75,6 +76,7 @@ std::vector<char> dxtbx::boost_python::cbf_compress(const int *values,
current += delta;
continue;
}

s = -0x8000;
b = ((union_short *)&s)[0].b;

Expand All @@ -85,21 +87,20 @@ std::vector<char> dxtbx::boost_python::cbf_compress(const int *values,
packed.push_back(b[0]);
packed.push_back(b[1]);

if ((-0x7fffffff <= delta) && (delta < 0x80000000)) {
i = delta;
b = ((union_int *)&i)[0].b;
assert((-0x7fffffff <= delta) && (delta < 0x80000000));

if (!le) {
byte_swap_int(b);
}
i = delta;
b = ((union_int *)&i)[0].b;

packed.push_back(b[0]);
packed.push_back(b[1]);
packed.push_back(b[2]);
packed.push_back(b[3]);
current += delta;
continue;
if (!le) {
byte_swap_int(b);
}

packed.push_back(b[0]);
packed.push_back(b[1]);
packed.push_back(b[2]);
packed.push_back(b[3]);
current += delta;
}

return packed;
Expand All @@ -119,7 +120,7 @@ void dxtbx::boost_python::cbf_decompress(const char *packed,
c = packed[j];
j += 1;

if (c != -128) {
if (c != -0x80) {
current += c;
*values = current;
values++;
Expand All @@ -134,7 +135,7 @@ void dxtbx::boost_python::cbf_decompress(const char *packed,
byte_swap_short((char *)&s);
}

if (s != -32768) {
if (s != -0x8000) {
current += s;
*values = current;
values++;
Expand Down
2 changes: 2 additions & 0 deletions newsfragments/314.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix error corrupting data when writing CBF files with large pixel values.
This affected ``dxtbx.dlsnxs2cbf`` and ``dials.merge_cbf``
35 changes: 26 additions & 9 deletions tests/command_line/test_dlsnxs2cbf.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,41 @@
import h5py
import numpy as np
import procrunner
import pytest

from dxtbx.format.FormatCBFMiniEigerDLS16MSN160 import FormatCBFMiniEigerDLS16MSN160
from dxtbx.model.experiment_list import ExperimentListFactory


@pytest.mark.xfail(
"os.name == 'nt'", reason="crashes python process on Windows", run=False
)
def test_dlsnxs2cbf(dials_data, tmpdir):
def test_dlsnxs2cbf(dials_data, tmp_path):
screen = dials_data("thaumatin_eiger_screen")
master = screen.join("Therm_6_1_master.h5")
result = procrunner.run(
["dxtbx.dlsnxs2cbf", master, "junk_%04d.cbf"], working_directory=tmpdir
["dxtbx.dlsnxs2cbf", master, "junk_%04d.cbf"], working_directory=tmp_path
)
assert not result.returncode and not result.stderr

expected_output = "\n".join("junk_%04d.cbf" % j for j in (1, 2, 3))

for record in expected_output.split("\n"):
assert record.strip().encode("latin-1") in result.stdout, record
output_files = ["junk_%04d.cbf" % j for j in (1, 2, 3)]
for file in output_files:
assert file.encode("latin-1") in result.stdout
assert tmp_path.joinpath(file).is_file()

# check files on disk
expts = ExperimentListFactory.from_filenames(
str(tmp_path / file) for file in output_files
)
assert all(
imgset.get_format_class() == FormatCBFMiniEigerDLS16MSN160
for imgset in expts.imagesets()
)

for j in (1, 2, 3):
assert tmpdir.join("junk_%04d.cbf" % j).check()
with h5py.File(master) as fh:
for i, imgset in enumerate(expts.imagesets()):
original = fh["/entry/data/data_000001"][i][()]
sel = np.where(original < original.max())
np.testing.assert_equal(
fh["/entry/data/data_000001"][i][sel],
imgset.get_raw_data(0)[0].as_numpy_array()[sel],
)
15 changes: 15 additions & 0 deletions tests/test_compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from scitbx.array_family import flex

from dxtbx.ext import compress, uncompress


def test_compress_decompress():
x, y = 10, 10

data = flex.int(x * y, 1)
data[10] = 44369
data[11] = 214
compressed = compress(data)
uncompressed = uncompress(compressed, x, y)

assert list(data) == list(uncompressed)

0 comments on commit d1f9583

Please sign in to comment.