Skip to content

Commit

Permalink
Revert #1326 because it was correct before
Browse files Browse the repository at this point in the history
- added comments clarifying the real situation
- see samtools/hts-specs#396
- see samtools/hts-specs#398
  • Loading branch information
jmthibault79 committed Apr 1, 2019
1 parent aa89809 commit 3c5de2a
Show file tree
Hide file tree
Showing 16 changed files with 118 additions and 99 deletions.
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/BAMIndexMetaData.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ void recordMetaData(final Slice slice) {
unAlignedRecords += slice.unmappedReadsCount;
}

final long start = slice.byteOffsetFromContainer;
final long start = slice.byteOffsetFromCompressionHeaderStart;

if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) {
this.firstOffset = start;
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void processContainer(final Container container, final ValidationStringency vali
final AlignmentSpan span = spanMap.get(refContext);
final Slice fakeSlice = new Slice(refContext);
fakeSlice.containerByteOffset = slice.containerByteOffset;
fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer;
fakeSlice.byteOffsetFromCompressionHeaderStart = slice.byteOffsetFromCompressionHeaderStart;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
Expand All @@ -167,7 +167,7 @@ void processContainer(final Container container, final ValidationStringency vali
if (unmappedSpan != null) {
final Slice fakeSlice = new Slice(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT);
fakeSlice.containerByteOffset = slice.containerByteOffset;
fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer;
fakeSlice.byteOffsetFromCompressionHeaderStart = slice.byteOffsetFromCompressionHeaderStart;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ public CloseableIterator<SAMRecord> queryUnmapped() {
newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency);
seekableStream.seek(startOfLastLinearBin >>> 16);
final Container container = ContainerHeaderIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream);
seekableStream.seek(seekableStream.position() + container.containerByteSize);
seekableStream.seek(seekableStream.position() + container.containerBlocksByteSize);
iterator = newIterator;
boolean atAlignments;
do {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class CRAIEntry implements Comparable<CRAIEntry> {
// equal to Slice.containerByteOffset and Container.byteOffset
private final long containerStartByteOffset;
// this Slice's offset in bytes from the beginning of its Container
// equal to Slice.byteOffsetFromContainer and Container.landmarks[Slice.index]
// equal to Slice.byteOffsetFromCompressionHeaderStart and Container.landmarks[Slice.index]
private final int sliceByteOffset;
private final int sliceByteSize;

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/cram/CRAIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre
slice.containerByteOffset = entry.getContainerStartByteOffset();
slice.alignmentStart = entry.getAlignmentStart();
slice.alignmentSpan = entry.getAlignmentSpan();
slice.byteOffsetFromContainer = entry.getSliceByteOffset();
slice.byteOffsetFromCompressionHeaderStart = entry.getSliceByteOffset();

// NOTE: the sliceIndex and read count fields can't be derived from the CRAM index
// so we can only set them to zero
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public CramContainerHeaderIterator(final InputStream inputStream) {
@Override
protected Container containerFromStream(final CountingInputStream countingStream) {
final Container container = ContainerHeaderIO.readContainerHeader(getCramHeader().getVersion().major, countingStream);
InputStreamUtils.skipFully(countingStream, container.containerByteSize);
InputStreamUtils.skipFully(countingStream, container.containerBlocksByteSize);
return container;
}

Expand Down
12 changes: 6 additions & 6 deletions src/main/java/htsjdk/samtools/cram/build/CramIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -273,16 +273,16 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil

final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
block.write(major, byteArrayOutputStream);
container.containerByteSize = byteArrayOutputStream.size();
container.containerBlocksByteSize = byteArrayOutputStream.size();

final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(major, container, os);
try {
os.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size());
os.write(byteArrayOutputStream.toByteArray(), 0, container.containerBlocksByteSize);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}

return containerHeaderByteSize + byteArrayOutputStream.size();
return containerHeaderByteSize + container.containerBlocksByteSize;
}

private static SAMFileHeader readSAMFileHeader(final Version version,
Expand All @@ -293,13 +293,13 @@ private static SAMFileHeader readSAMFileHeader(final Version version,
final Block block;
{
if (version.compatibleWith(CramVersions.CRAM_v3)) {
final byte[] bytes = new byte[container.containerByteSize];
final byte[] bytes = new byte[container.containerBlocksByteSize];
InputStreamUtils.readFully(inputStream, bytes, 0, bytes.length);
block = Block.read(version.major, new ByteArrayInputStream(bytes));
// ignore the rest of the container
} else {
/*
* pending issue: container.containerByteSize inputStream 2 bytes shorter
* pending issue: container.containerBlocksByteSize inputStream 2 bytes shorter
* than needed in the v21 test cram files.
*/
block = Block.read(version.major, inputStream);
Expand Down Expand Up @@ -355,7 +355,7 @@ public static boolean replaceCramHeader(final File file, final CramHeader newHea
final Block block = Block.createRawFileHeaderBlock(toByteArray(newHeader.getSamFileHeader()));
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
block.write(newHeader.getVersion().major, byteArrayOutputStream);
if (byteArrayOutputStream.size() > c.containerByteSize) {
if (byteArrayOutputStream.size() > c.containerBlocksByteSize) {
log.error("Failed to replace CRAM header because the new header does not fit.");
return false;
}
Expand Down
48 changes: 25 additions & 23 deletions src/main/java/htsjdk/samtools/cram/structure/Container.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ public class Container {
// container header as defined in the specs, in addition to sequenceId from ReferenceContext

/**
* Byte size of the content excluding header.
* The total length of all blocks in this container, of all types.
* @see htsjdk.samtools.cram.structure.block.BlockContentType
*/
public int containerByteSize = 0;
public int containerBlocksByteSize = 0;

// minimum alignment start of the reads in this Container
// uses a 1-based coordinate system
Expand All @@ -46,13 +47,20 @@ public class Container {
public long bases = 0;
public int blockCount = -1;

// Slice byte boundaries within this container, after the header. Equal to Slice.offset.
// e.g. if landmarks[0] = 9000 and landmarks[1] = 109000, we know:
// the container's header size = 9000
// Slice[0].offset = 9000
// Slice[0].size = 109000 - 9000 = 100000
// Slice[1].offset = 109000

/**
* Slice byte boundaries as offsets within this container, counted after the
* compression header. Equal to {@link Slice#byteOffsetFromCompressionHeaderStart}.
*
* As an example, suppose we have:
* - landmarks[0] = 9000
* - landmarks[1] = 109000
* - containerBlocksByteSize = 123456
*
* We therefore know:
* - the compression header size = 9000
* - Slice 0 has offset 9000 and size 100000 (109000 - 9000)
* - Slice 1 has offset 109000 and size 14456 (123456 - 109000)
*/
public int[] landmarks;

public int checksum = 0;
Expand Down Expand Up @@ -170,7 +178,7 @@ else if (sliceRefContexts.size() > 1) {
/**
* Populate the indexing parameters of this Container's slices
*
* Requires: valid landmarks and containerByteSize
* Requires: valid landmarks and containerBlocksByteSize
*
* @throws CRAMException when the Container is in an invalid state
*/
Expand All @@ -188,28 +196,22 @@ public void distributeIndexingParametersToSlices() {
throw new CRAMException(String.format(format, landmarks.length, slices.length));
}

if (containerByteSize == 0) {
if (containerBlocksByteSize == 0) {
throw new CRAMException("Cannot set Slice indexing parameters if this Container's byte size is unknown");
}

final int lastSliceIndex = slices.length - 1;
for (int i = 0; i < lastSliceIndex; i++) {
final Slice slice = slices[i];
slice.index = i;
slice.byteOffsetFromContainer = landmarks[i];
slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromContainer;
slice.byteOffsetFromCompressionHeaderStart = landmarks[i];
slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromCompressionHeaderStart;
}

final Slice lastSlice = slices[lastSliceIndex];
lastSlice.index = lastSliceIndex;
lastSlice.byteOffsetFromContainer = landmarks[lastSliceIndex];

// calculate a "final landmark" indicating the byte offset of the end of the container
// equivalent to the container's total byte size

final int containerHeaderSize = landmarks[0];
final int containerTotalByteSize = containerHeaderSize + containerByteSize;
lastSlice.byteSize = containerTotalByteSize - lastSlice.byteOffsetFromContainer;
lastSlice.byteOffsetFromCompressionHeaderStart = landmarks[lastSliceIndex];
lastSlice.byteSize = containerBlocksByteSize - lastSlice.byteOffsetFromCompressionHeaderStart;
}

/**
Expand Down Expand Up @@ -237,11 +239,11 @@ public String toString() {
}

public boolean isEOF() {
final boolean v3 = containerByteSize == 15 && referenceContext.isUnmappedUnplaced()
final boolean v3 = containerBlocksByteSize == 15 && referenceContext.isUnmappedUnplaced()
&& alignmentStart == 4542278 && blockCount == 1
&& nofRecords == 0 && (getSlices() == null || getSlices().length == 0);

final boolean v2 = containerByteSize == 11 && referenceContext.isUnmappedUnplaced()
final boolean v2 = containerBlocksByteSize == 11 && referenceContext.isUnmappedUnplaced()
&& alignmentStart == 4542278 && blockCount == 1
&& nofRecords == 0 && (getSlices() == null || getSlices().length == 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public static Container readContainerHeader(final int major,
final int containerByteSize = CramInt.readInt32(peek);
final ReferenceContext refContext = new ReferenceContext(ITF8.readUnsignedITF8(inputStream));
final Container container = new Container(refContext);
container.containerByteSize = containerByteSize;
container.containerBlocksByteSize = containerByteSize;

container.alignmentStart = ITF8.readUnsignedITF8(inputStream);
container.alignmentSpan = ITF8.readUnsignedITF8(inputStream);
Expand Down Expand Up @@ -123,7 +123,7 @@ public static Container readContainerHeader(final int major, final CountingInput
public static int writeContainerHeader(final int major, final Container container, final OutputStream outputStream) {
final CRC32OutputStream crc32OutputStream = new CRC32OutputStream(outputStream);

int length = (CramInt.writeInt32(container.containerByteSize, crc32OutputStream) + 7) / 8;
int length = (CramInt.writeInt32(container.containerBlocksByteSize, crc32OutputStream) + 7) / 8;
length += (ITF8.writeUnsignedITF8(container.getReferenceContext().getSerializableId(), crc32OutputStream) + 7) / 8;
length += (ITF8.writeUnsignedITF8(container.alignmentStart, crc32OutputStream) + 7) / 8;
length += (ITF8.writeUnsignedITF8(container.alignmentSpan, crc32OutputStream) + 7) / 8;
Expand Down
20 changes: 11 additions & 9 deletions src/main/java/htsjdk/samtools/cram/structure/ContainerIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,26 +131,28 @@ public static int writeContainer(final Version version, final Container containe
if (isFileHeaderContainer) {
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
firstBlock.write(version.major, byteArrayOutputStream);
container.containerByteSize = byteArrayOutputStream.size();
container.containerBlocksByteSize = byteArrayOutputStream.size();

final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream);
try {
outputStream.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size());
outputStream.write(byteArrayOutputStream.toByteArray(), 0, container.containerBlocksByteSize);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
return containerHeaderByteSize + byteArrayOutputStream.size();
return containerHeaderByteSize + container.containerBlocksByteSize;
}
}
}

// use this BAOS for two purposes: writing out and counting bytes for landmarks/containerBlocksByteSize
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();

container.compressionHeader.write(version, byteArrayOutputStream);
container.blockCount = 1;

final List<Integer> landmarks = new ArrayList<>();
for (final Slice slice : container.getSlices()) {
// landmark 0 = byte length of the compression header
// landmarks after 0 = byte length of the compression header plus all slices before this one
landmarks.add(byteArrayOutputStream.size());
SliceIO.write(version.major, slice, byteArrayOutputStream);
container.blockCount++;
Expand All @@ -159,21 +161,21 @@ public static int writeContainer(final Version version, final Container containe
container.blockCount += slice.external.size();
}
container.landmarks = landmarks.stream().mapToInt(Integer::intValue).toArray();
container.containerByteSize = byteArrayOutputStream.size();
// compression header plus all slices, if any (EOF Containers do not; File Header Containers are handled above)
container.containerBlocksByteSize = byteArrayOutputStream.size();

// Slices require the Container's landmarks and containerByteSize before indexing
// Slices require the Container's landmarks and containerBlocksByteSize before indexing
container.distributeIndexingParametersToSlices();

int length = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream);
final int containerHeaderLength = ContainerHeaderIO.writeContainerHeader(version.major, container, outputStream);
try {
outputStream.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size());
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
length += byteArrayOutputStream.size();

log.debug("CONTAINER WRITTEN: " + container.toString());

return length;
return containerHeaderLength + container.containerBlocksByteSize;
}
}
50 changes: 31 additions & 19 deletions src/main/java/htsjdk/samtools/cram/structure/Slice.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,31 @@ public class Slice {

public static final int UNINITIALIZED_INDEXING_PARAMETER = -1;

// the Slice's offset in bytes from the beginning of its Container
// equal to Container.landmarks[Slice.index] of its enclosing Container
// BAI and CRAI
public int byteOffsetFromContainer = UNINITIALIZED_INDEXING_PARAMETER;
// this Slice's Container's offset in bytes from the beginning of the stream
// equal to Container.byteOffset of its enclosing Container
// BAI and CRAI
/**
* The Slice's offset in bytes from the beginning of the Container's Compression Header
* (or the end of the Container Header), equal to {@link Container#landmarks}
*
* Used by BAI and CRAI indexing
*/
public int byteOffsetFromCompressionHeaderStart = UNINITIALIZED_INDEXING_PARAMETER;
/**
* The Slice's Container's offset in bytes from the beginning of the stream
* equal to {@link Container#byteOffset}
*
* Used by BAI and CRAI indexing
*/
public long containerByteOffset = UNINITIALIZED_INDEXING_PARAMETER;
// this Slice's size in bytes
// CRAI only
/**
* The Slice's size in bytes
*
* Used by CRAI indexing only
*/
public int byteSize = UNINITIALIZED_INDEXING_PARAMETER;
// this Slice's index number within its Container
// BAI only
/**
* The Slice's index number within its Container
*
* Used by BAI indexing only
*/
public int index = UNINITIALIZED_INDEXING_PARAMETER;

// to pass this to the container:
Expand Down Expand Up @@ -121,13 +133,13 @@ public ReferenceContext getReferenceContext() {

/**
* Confirm that we have initialized the 3 BAI index parameters:
* byteOffsetFromContainer, containerByteOffset, and index
* byteOffsetFromCompressionHeaderStart, containerByteOffset, and index
*/
public void baiIndexInitializationCheck() {
final StringBuilder error = new StringBuilder();

if (byteOffsetFromContainer == UNINITIALIZED_INDEXING_PARAMETER) {
error.append("Cannot index this Slice for BAI because its byteOffsetFromContainer is unknown.").append(System.lineSeparator());
if (byteOffsetFromCompressionHeaderStart == UNINITIALIZED_INDEXING_PARAMETER) {
error.append("Cannot index this Slice for BAI because its byteOffsetFromCompressionHeaderStart is unknown.").append(System.lineSeparator());
}

if (containerByteOffset == UNINITIALIZED_INDEXING_PARAMETER) {
Expand All @@ -145,15 +157,15 @@ public void baiIndexInitializationCheck() {

/**
* Confirm that we have initialized the 3 CRAI index parameters:
* byteOffsetFromContainer, containerByteOffset, and byteSize
* byteOffsetFromCompressionHeaderStart, containerByteOffset, and byteSize
*
* NOTE: this is currently unused because we always use BAI
*/
void craiIndexInitializationCheck() {
final StringBuilder error = new StringBuilder();

if (byteOffsetFromContainer == UNINITIALIZED_INDEXING_PARAMETER) {
error.append("Cannot index this Slice for CRAI because its byteOffsetFromContainer is unknown.").append(System.lineSeparator());
if (byteOffsetFromCompressionHeaderStart == UNINITIALIZED_INDEXING_PARAMETER) {
error.append("Cannot index this Slice for CRAI because its byteOffsetFromCompressionHeaderStart is unknown.").append(System.lineSeparator());
}

if (containerByteOffset == UNINITIALIZED_INDEXING_PARAMETER) {
Expand Down Expand Up @@ -415,7 +427,7 @@ public List<CRAIEntry> getCRAIEntries(final CompressionHeader compressionHeader)
e.getValue().getStart(),
e.getValue().getSpan(),
containerByteOffset,
byteOffsetFromContainer,
byteOffsetFromCompressionHeaderStart,
byteSize))
.sorted()
.collect(Collectors.toList());
Expand All @@ -427,7 +439,7 @@ public List<CRAIEntry> getCRAIEntries(final CompressionHeader compressionHeader)
alignmentStart,
alignmentSpan,
containerByteOffset,
byteOffsetFromContainer,
byteOffsetFromCompressionHeaderStart,
byteSize));
}
}
Expand Down
Loading

0 comments on commit 3c5de2a

Please sign in to comment.