Skip to content

Commit

Permalink
FormatMultiImage: when file indices are provided, only read models fo…
Browse files Browse the repository at this point in the history
…r the specified images (#210)

We were loading models (beam, detector, gonio, scan) for every image in a multiimage file, even when only 1 image was requested. Reading only the requested models has a ~8x performance impact when constructing small imagesets from large containers.
  • Loading branch information
dwpaley authored Nov 18, 2020
1 parent 55ce283 commit cb6e61b
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 18 deletions.
38 changes: 20 additions & 18 deletions format/FormatMultiImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def get_imageset(
assert not (as_sequence and lazy), "No lazy support for sequences"

if single_file_indices is not None:
assert len(single_file_indices)
single_file_indices = flex.size_t(single_file_indices)

# Create an imageset or sequence
Expand Down Expand Up @@ -228,29 +229,30 @@ def get_imageset(
indices=single_file_indices,
)

if single_file_indices is None:
single_file_indices = range(format_instance.get_num_images())

# If any are None then read from format
if [beam, detector, goniometer, scan].count(None) != 0:
if not all((beam, detector, goniometer, scan)):

# Get list of models
beam = []
detector = []
goniometer = []
scan = []
for i in range(format_instance.get_num_images()):
beam.append(format_instance.get_beam(i))
detector.append(format_instance.get_detector(i))
goniometer.append(format_instance.get_goniometer(i))
scan.append(format_instance.get_scan(i))

if single_file_indices is None:
single_file_indices = list(range(format_instance.get_num_images()))
num_images = format_instance.get_num_images()
beam = [None] * num_images
detector = [None] * num_images
goniometer = [None] * num_images
scan = [None] * num_images
for i in single_file_indices:
beam[i] = format_instance.get_beam(i)
detector[i] = format_instance.get_detector(i)
goniometer[i] = format_instance.get_goniometer(i)
scan[i] = format_instance.get_scan(i)

# Set the list of models
for i in range(len(single_file_indices)):
iset.set_beam(beam[single_file_indices[i]], i)
iset.set_detector(detector[single_file_indices[i]], i)
iset.set_goniometer(goniometer[single_file_indices[i]], i)
iset.set_scan(scan[single_file_indices[i]], i)
for i, index in enumerate(single_file_indices):
iset.set_beam(beam[index], i)
iset.set_detector(detector[index], i)
iset.set_goniometer(goniometer[index], i)
iset.set_scan(scan[index], i)

else:

Expand Down
3 changes: 3 additions & 0 deletions newsfragments/210.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FormatMultiImage: When constructing an imageset with the indices of some (not
all) single images in the container, we skip reading models for the images that
were not requested. In some cases this speeds up imageset construction by 8x.
24 changes: 24 additions & 0 deletions tests/test_imageset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,44 @@
import os
from builtins import range

import mock
import pytest
import six.moves.cPickle as pickle

from scitbx.array_family import flex

import dxtbx.format.FormatHDF5SaclaMPCCD
import dxtbx.format.image
import dxtbx.format.Registry
import dxtbx.tests.imagelist
from dxtbx.format.FormatCBFMiniPilatus import FormatCBFMiniPilatus as FormatClass
from dxtbx.imageset import ExternalLookup, ImageSequence, ImageSetData, ImageSetFactory
from dxtbx.model import Beam, Detector, Panel
from dxtbx.model.beam import BeamFactory
from dxtbx.model.experiment_list import ExperimentListFactory


@pytest.mark.parametrize("indices,expected_call_count", ((None, 4), ([1], 2)))
def test_single_file_indices(indices, expected_call_count, dials_regression):
def dummy_beam():
return BeamFactory.simple(1.0)

with mock.patch.object(
dxtbx.format.FormatHDF5SaclaMPCCD.FormatHDF5SaclaMPCCD,
"_beam",
side_effect=dummy_beam,
) as obj:
filename = os.path.join(
dials_regression,
"image_examples",
"SACLA_MPCCD_Cheetah",
"run266702-0-subset.h5",
)
format_class = dxtbx.format.Registry.get_format_class_for_file(filename)
format_class.get_imageset([filename], single_file_indices=indices)
assert obj.call_count == expected_call_count


@pytest.mark.parametrize(
"image",
dxtbx.tests.imagelist.smv_images
Expand Down

0 comments on commit cb6e61b

Please sign in to comment.