Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test ContainerIO.calculateSliceOffsetsAndSizes() and fix the slice size calculation #1326

Merged
merged 1 commit into from
Mar 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion src/main/java/htsjdk/samtools/cram/structure/Container.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public class Container {
private final ReferenceContext referenceContext;

// container header as defined in the specs, in addition to sequenceId from ReferenceContext

/**
* Byte size of the content excluding header.
*/
Expand All @@ -44,7 +45,16 @@ public class Container {

public long bases = 0;
public int blockCount = -1;

// Slice byte boundaries within this container, after the header. Equal to Slice.offset.
// e.g. if landmarks[0] = 9000 and landmarks[1] = 109000, we know:
// the container's header size = 9000
// Slice[0].offset = 9000
// Slice[0].size = 109000 - 9000 = 100000
// Slice[1].offset = 109000
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you.


public int[] landmarks;

public int checksum = 0;

/**
Expand All @@ -59,7 +69,7 @@ public class Container {

// for indexing:
/**
* Container start in the stream.
* Container start in the stream, in bytes.
*/
public long offset;

Expand Down Expand Up @@ -136,6 +146,44 @@ else if (sliceRefContexts.size() > 1) {
return container;
}

/**
* Assign this Container's slices, and populate those slices'
* indexing parameters from this Container
* @param slicesToPopulate the slices to populate
*/
void populateSlicesAndIndexingParameters(final ArrayList<Slice> slicesToPopulate) {

slices = new Slice[slicesToPopulate.size()];

if (slicesToPopulate.isEmpty()) {
return;
}

final int lastSliceIndex = slicesToPopulate.size() - 1;
for (int i = 0; i < lastSliceIndex; i++) {
final Slice slice = slicesToPopulate.get(i);
slice.containerOffset = offset;
slice.index = i;
slice.offset = landmarks[i];
slice.size = landmarks[i + 1] - slice.offset;
slices[i] = slice;
}

final Slice lastSlice = slicesToPopulate.get(lastSliceIndex);
lastSlice.containerOffset = offset;
lastSlice.index = lastSliceIndex;
lastSlice.offset = landmarks[lastSliceIndex];

// calculate a "final landmark" indicating the byte offset of the end of the container
// equivalent to the container's total byte size

final int containerHeaderSize = landmarks[0];
final int containerTotalByteSize = containerHeaderSize + containerByteSize;
lastSlice.size = containerTotalByteSize - lastSlice.offset;

this.slices[lastSliceIndex] = lastSlice;
}

/**
* Retrieve the list of CRAI Index entries corresponding to this Container.
*
Expand Down
29 changes: 4 additions & 25 deletions src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.common.CramVersionPolicies;
import htsjdk.samtools.cram.common.Version;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.cram.structure.block.Block;
import htsjdk.samtools.cram.structure.block.BlockContentType;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.*;
Expand Down Expand Up @@ -78,38 +78,18 @@ private static Container readContainer(final int major, final InputStream inputS
throw new RuntimeIOException(e);
}

final List<Slice> slices = new ArrayList<Slice>();
final ArrayList<Slice> slices = new ArrayList<>();
for (int sliceCount = fromSlice; sliceCount < howManySlices - fromSlice; sliceCount++) {
final Slice slice = SliceIO.read(major, inputStream);
slice.index = sliceCount;
slices.add(slice);
slices.add(SliceIO.read(major, inputStream));
}

container.slices = slices.toArray(new Slice[slices.size()]);

calculateSliceOffsetsAndSizes(container);
container.populateSlicesAndIndexingParameters(slices);

log.debug("READ CONTAINER: " + container.toString());

return container;
}

private static void calculateSliceOffsetsAndSizes(final Container container) {
if (container.slices.length == 0) return;
for (int i = 0; i < container.slices.length - 1; i++) {
final Slice slice = container.slices[i];
slice.offset = container.landmarks[i];
slice.size = container.landmarks[i + 1] - slice.offset;
slice.containerOffset = container.offset;
slice.index = i;
}
final Slice lastSlice = container.slices[container.slices.length - 1];
lastSlice.offset = container.landmarks[container.landmarks.length - 1];
lastSlice.size = container.containerByteSize - lastSlice.offset;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was the error

lastSlice.containerOffset = container.offset;
lastSlice.index = container.slices.length - 1;
}

/**
* Writes a {@link Container} header information to a {@link OutputStream}.
*
Expand Down Expand Up @@ -173,7 +153,6 @@ public static int writeContainer(final Version version, final Container containe
container.landmarks[i] = landmarks.get(i);

container.containerByteSize = byteArrayOutputStream.size();
calculateSliceOffsetsAndSizes(container);

int length = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream);
try {
Expand Down
100 changes: 100 additions & 0 deletions src/test/java/htsjdk/samtools/cram/structure/ContainerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -150,4 +151,103 @@ public void getSpansTest(final List<CramCompressionRecord> records,
Assert.assertEquals(spanMap.get(expectedReferenceContext), expectedAlignmentSpan);
}

// show that we can populate all of the slice indexing fields from the
// values in the container's header

// this is part of the deserialization process, and supports index creation

// single slice

@Test
public static void populateSlicesAndIndexingParametersOneSlice() {
// this container starts 100,000 bytes into the CRAM stream
final int containerStreamByteOffset = 100000;

// this Container consists of:
// a header of size 1234 bytes
// a Slice of size 6262 bytes

final int containerHeaderSize = 1234;
final int sliceSize = 6262;

final Container container = createOneSliceContainer(containerStreamByteOffset, containerHeaderSize, sliceSize);

assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, sliceSize, containerHeaderSize);
}

// two slices

@Test
public static void populateSlicesAndIndexingParametersTwoSlices() {
// this container starts 200,000 bytes into the CRAM stream
final int containerStreamByteOffset = 200000;

// this Container consists of:
// a header of size 3234 bytes
// a Slice of size 7890 bytes
// a Slice of size 5555 bytes

final int containerHeaderSize = 3234;
final int slice0size = 7890;
final int slice1size = 5555;

final Container container = createTwoSliceContainer(containerStreamByteOffset, containerHeaderSize, slice0size, slice1size);

assertSliceIndexingParams(container.slices[0], 0, containerStreamByteOffset, slice0size, containerHeaderSize);
assertSliceIndexingParams(container.slices[1], 1, containerStreamByteOffset, slice1size, containerHeaderSize + slice0size);
}

private static Container createOneSliceContainer(final int containerStreamByteOffset,
final int containerHeaderSize,
final int slice0size) {
final ReferenceContext refContext = new ReferenceContext(0);

final Container container = new Container(refContext);
container.offset = containerStreamByteOffset;
container.containerByteSize = slice0size;
container.landmarks = new int[]{
containerHeaderSize, // beginning of slice
};

final ArrayList<Slice> slices = new ArrayList<Slice>() {{
add(new Slice(refContext));
}};
container.populateSlicesAndIndexingParameters(slices);
return container;
}

private static Container createTwoSliceContainer(final int containerStreamByteOffset,
final int containerHeaderSize,
final int slice0size,
final int slice1size) {
final int containerDataSize = slice0size + slice1size;

final ReferenceContext refContext = new ReferenceContext(0);

final Container container = new Container(refContext);
container.offset = containerStreamByteOffset;
container.containerByteSize = containerDataSize;
container.landmarks = new int[]{
containerHeaderSize, // beginning of slice 1
containerHeaderSize + slice0size // beginning of slice 2
};

final ArrayList<Slice> slices = new ArrayList<Slice>() {{
add(new Slice(refContext));
add(new Slice(refContext));
}};
container.populateSlicesAndIndexingParameters(slices);
return container;
}

private static void assertSliceIndexingParams(final Slice slice,
final int expectedIndex,
final int expectedContainerOffset,
final int expectedSize,
final int expectedOffset) {
Assert.assertEquals(slice.index, expectedIndex);
Assert.assertEquals(slice.containerOffset, expectedContainerOffset);
Assert.assertEquals(slice.size, expectedSize);
Assert.assertEquals(slice.offset, expectedOffset);
}
}