diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index d7ef2144b0..36acbca9fa 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -30,6 +30,7 @@ public class Container { private final ReferenceContext referenceContext; // container header as defined in the specs, in addition to sequenceId from ReferenceContext + /** * Byte size of the content excluding header. */ @@ -44,7 +45,16 @@ public class Container { public long bases = 0; public int blockCount = -1; + + // Slice byte boundaries within this container, after the header. Equal to Slice.offset. + // e.g. if landmarks[0] = 9000 and landmarks[1] = 109000, we know: + // the container's header size = 9000 + // Slice[0].offset = 9000 + // Slice[0].size = 109000 - 9000 = 100000 + // Slice[1].offset = 109000 + public int[] landmarks; + public int checksum = 0; /** @@ -59,7 +69,7 @@ public class Container { // for indexing: /** - * Container start in the stream. + * Container start in the stream, in bytes. */ public long offset; @@ -136,6 +146,44 @@ else if (sliceRefContexts.size() > 1) { return container; } + /** + * Assign this Container's slices, and populate those slices' + * indexing parameters from this Container + * @param slicesToPopulate the slices to populate + */ + void populateSlicesAndIndexingParameters(final ArrayList slicesToPopulate) { + + slices = new Slice[slicesToPopulate.size()]; + + if (slicesToPopulate.isEmpty()) { + return; + } + + final int lastSliceIndex = slicesToPopulate.size() - 1; + for (int i = 0; i < lastSliceIndex; i++) { + final Slice slice = slicesToPopulate.get(i); + slice.containerOffset = offset; + slice.index = i; + slice.offset = landmarks[i]; + slice.size = landmarks[i + 1] - slice.offset; + slices[i] = slice; + } + + final Slice lastSlice = slicesToPopulate.get(lastSliceIndex); + lastSlice.containerOffset = offset; + lastSlice.index = lastSliceIndex; + lastSlice.offset = landmarks[lastSliceIndex]; + + // calculate a "final landmark" indicating the byte offset of the end of the container + // equivalent to the container's total byte size + + final int containerHeaderSize = landmarks[0]; + final int containerTotalByteSize = containerHeaderSize + containerByteSize; + lastSlice.size = containerTotalByteSize - lastSlice.offset; + + this.slices[lastSliceIndex] = lastSlice; + } + /** * Retrieve the list of CRAI Index entries corresponding to this Container. * diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java index 1dc0e19e46..a9697f29df 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java @@ -3,9 +3,9 @@ import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.common.CramVersionPolicies; import htsjdk.samtools.cram.common.Version; -import htsjdk.samtools.util.Log; import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.block.BlockContentType; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeIOException; import java.io.*; @@ -78,38 +78,18 @@ private static Container readContainer(final int major, final InputStream inputS throw new RuntimeIOException(e); } - final List slices = new ArrayList(); + final ArrayList slices = new ArrayList<>(); for (int sliceCount = fromSlice; sliceCount < howManySlices - fromSlice; sliceCount++) { - final Slice slice = SliceIO.read(major, inputStream); - slice.index = sliceCount; - slices.add(slice); + slices.add(SliceIO.read(major, inputStream)); } - container.slices = slices.toArray(new Slice[slices.size()]); - - calculateSliceOffsetsAndSizes(container); + container.populateSlicesAndIndexingParameters(slices); log.debug("READ CONTAINER: " + container.toString()); return container; } - private static void calculateSliceOffsetsAndSizes(final Container container) { - if (container.slices.length == 0) return; - for (int i = 0; i < container.slices.length - 1; i++) { - final Slice slice = container.slices[i]; - slice.offset = container.landmarks[i]; - slice.size = container.landmarks[i + 1] - slice.offset; - slice.containerOffset = container.offset; - slice.index = i; - } - final Slice lastSlice = container.slices[container.slices.length - 1]; - lastSlice.offset = container.landmarks[container.landmarks.length - 1]; - lastSlice.size = container.containerByteSize - lastSlice.offset; - lastSlice.containerOffset = container.offset; - lastSlice.index = container.slices.length - 1; - } - /** * Writes a {@link Container} header information to a {@link OutputStream}. * @@ -173,7 +153,6 @@ public static int writeContainer(final Version version, final Container containe container.landmarks[i] = landmarks.get(i); container.containerByteSize = byteArrayOutputStream.size(); - calculateSliceOffsetsAndSizes(container); int length = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream); try { diff --git a/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java b/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java index 7771352d09..789c739f0f 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java @@ -9,6 +9,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -150,4 +151,103 @@ public void getSpansTest(final List records, Assert.assertEquals(spanMap.get(expectedReferenceContext), expectedAlignmentSpan); } + // show that we can populate all of the slice indexing fields from the + // values in the container's header + + // this is part of the deserialization process, and supports index creation + + // single slice + + @Test + public static void populateSlicesAndIndexingParametersOneSlice() { + // this container starts 100,000 bytes into the CRAM stream + final int containerStreamByteOffset = 100000; + + // this Container consists of: + // a header of size 1234 bytes + // a Slice of size 6262 bytes + + final int containerHeaderSize = 1234; + final int sliceSize = 6262; + + final Container container = createOneSliceContainer(containerStreamByteOffset, containerHeaderSize, sliceSize); + + assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, sliceSize, containerHeaderSize); + } + + // two slices + + @Test + public static void populateSlicesAndIndexingParametersTwoSlices() { + // this container starts 200,000 bytes into the CRAM stream + final int containerStreamByteOffset = 200000; + + // this Container consists of: + // a header of size 3234 bytes + // a Slice of size 7890 bytes + // a Slice of size 5555 bytes + + final int containerHeaderSize = 3234; + final int slice0size = 7890; + final int slice1size = 5555; + + final Container container = createTwoSliceContainer(containerStreamByteOffset, containerHeaderSize, slice0size, slice1size); + + assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, slice0size, containerHeaderSize); + assertSliceIndexingParams(container.slices[1], 1, containerStreamByteOffset, slice1size, containerHeaderSize + slice0size); + } + + private static Container createOneSliceContainer(final int containerStreamByteOffset, + final int containerHeaderSize, + final int slice0size) { + final ReferenceContext refContext = new ReferenceContext(0); + + final Container container = new Container(refContext); + container.offset = containerStreamByteOffset; + container.containerByteSize = slice0size; + container.landmarks = new int[]{ + containerHeaderSize, // beginning of slice + }; + + final ArrayList slices = new ArrayList() {{ + add(new Slice(refContext)); + }}; + container.populateSlicesAndIndexingParameters(slices); + return container; + } + + private static Container createTwoSliceContainer(final int containerStreamByteOffset, + final int containerHeaderSize, + final int slice0size, + final int slice1size) { + final int containerDataSize = slice0size + slice1size; + + final ReferenceContext refContext = new ReferenceContext(0); + + final Container container = new Container(refContext); + container.offset = containerStreamByteOffset; + container.containerByteSize = containerDataSize; + container.landmarks = new int[]{ + containerHeaderSize, // beginning of slice 1 + containerHeaderSize + slice0size // beginning of slice 2 + }; + + final ArrayList slices = new ArrayList() {{ + add(new Slice(refContext)); + add(new Slice(refContext)); + }}; + container.populateSlicesAndIndexingParameters(slices); + return container; + } + + private static void assertSliceIndexingParams(final Slice slice, + final int expectedIndex, + final int expectedContainerOffset, + final int expectedSize, + final int expectedOffset) { + Assert.assertEquals(slice.index, expectedIndex); + Assert.assertEquals(slice.containerOffset, expectedContainerOffset); + Assert.assertEquals(slice.size, expectedSize); + Assert.assertEquals(slice.offset, expectedOffset); + } }