Skip to content

Commit

Permalink
Remove multiple versions of Slice/Container getCRAIEntries()
Browse files Browse the repository at this point in the history
- Encapsulate Container.byteOffset and distribute to Slices on set
  • Loading branch information
jmthibault79 committed Mar 26, 2019
1 parent f1b63fe commit c031882
Show file tree
Hide file tree
Showing 16 changed files with 220 additions and 147 deletions.
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/BAMIndexMetaData.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ void recordMetaData(final Slice slice) {
unAlignedRecords += slice.unmappedReadsCount;
}

final long start = slice.offset;
final long start = slice.byteOffsetFromContainer;

if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) {
this.firstOffset = start;
Expand Down
18 changes: 8 additions & 10 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,12 @@ public void processContainer(final Container container, final ValidationStringen

int sliceIndex = 0;
for (final Slice slice : container.slices) {
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
if (slice.getReferenceContext().isMultiRef()) {
final Map<ReferenceContext, AlignmentSpan> spanMap = container.getSpans(validationStringency);

// TODO why are we updating the original slice here?

slice.containerOffset = container.offset;
slice.index = sliceIndex++;

/**
Expand All @@ -151,8 +149,8 @@ public void processContainer(final Container container, final ValidationStringen
for (final ReferenceContext refContext : new TreeSet<>(spanMap.keySet())) {
final AlignmentSpan span = spanMap.get(refContext);
final Slice fakeSlice = new Slice(refContext);
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.containerByteOffset = slice.containerByteOffset;
fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
Expand All @@ -165,8 +163,8 @@ public void processContainer(final Container container, final ValidationStringen

if (unmappedSpan != null) {
final Slice fakeSlice = new Slice(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT);
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.containerByteOffset = slice.containerByteOffset;
fakeSlice.byteOffsetFromContainer = slice.byteOffsetFromContainer;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
Expand Down Expand Up @@ -273,7 +271,7 @@ public static void createIndex(final SeekableStream stream,
break;
}

container.offset = offset;
container.setByteOffset(offset);

indexer.processContainer(container, validationStringency);

Expand Down Expand Up @@ -368,7 +366,7 @@ private int computeIndexingBin(final Slice slice) {
* Record any index information for a given CRAM slice
*
* Reads these Slice fields:
* sequenceId, alignmentStart, alignmentSpan, containerOffset, index
* sequenceId, alignmentStart, alignmentSpan, containerByteOffset, index
*
* @param slice CRAM slice, single ref only.
*/
Expand Down Expand Up @@ -411,8 +409,8 @@ private void processSingleReferenceSlice(final Slice slice) {

// process chunks

final long chunkStart = (slice.containerOffset << 16) | slice.index;
final long chunkEnd = ((slice.containerOffset << 16) | slice.index) + 1;
final long chunkStart = (slice.containerByteOffset << 16) | slice.index;
final long chunkEnd = ((slice.containerByteOffset << 16) | slice.index) + 1;

final Chunk newChunk = new Chunk(chunkStart, chunkEnd);

Expand Down
3 changes: 1 addition & 2 deletions src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import java.io.OutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
Expand Down Expand Up @@ -104,7 +103,7 @@ public static void writeIndex(final SeekableStream cramStream, OutputStream crai
Container container = ContainerIO.readContainer(cramVersion, cramStream);

while (container != null && !container.isEOF()) {
container.offset = offset;
container.setByteOffset(offset);
indexer.processContainer(container);
offset = cramStream.position();
container = ContainerIO.readContainer(cramVersion, cramStream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ protected void flushContainer() throws IllegalArgumentException {
for (final Slice slice : container.slices) {
slice.setRefMD5(referenceBases);
}
container.offset = offset;
container.setByteOffset(offset);
offset += ContainerIO.writeContainer(cramVersion, container, outputStream);
if (indexer != null) {
/**
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ void nextContainer() throws IllegalArgumentException, CRAMException {
samRecord.setValidationStringency(validationStringency);

if (mReader != null) {
final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex;
final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1;
final long chunkStart = (container.getByteOffset() << 16) | cramRecord.sliceIndex;
final long chunkEnd = ((container.getByteOffset() << 16) | cramRecord.sliceIndex) + 1;
samRecord.setFileSource(new SAMFileSource(mReader, new BAMFileSpan(new Chunk(chunkStart, chunkEnd))));
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ public class CRAIEntry implements Comparable<CRAIEntry> {
private final int alignmentSpan;

// this Slice's Container's offset in bytes from the beginning of the stream
// equal to Slice.containerOffset and Container.offset
// equal to Slice.containerByteOffset and Container.byteOffset
private final long containerStartByteOffset;
// this Slice's offset in bytes from the beginning of its Container
// equal to Slice.offset and Container.landmarks[Slice.index]
// equal to Slice.byteOffsetFromContainer and Container.landmarks[Slice.index]
private final int sliceByteOffset;
private final int sliceByteSize;

Expand Down
16 changes: 7 additions & 9 deletions src/main/java/htsjdk/samtools/cram/CRAIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public void writeIndex(final OutputStream os) {
* @param container the container to index
*/
public void processContainer(final Container container) {
addEntries(container.getCRAIEntriesSplittingMultiRef());
addEntries(container.getCRAIEntries());
}

public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) {
Expand All @@ -84,14 +84,15 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre

for (final CRAIEntry entry : full) {
final Slice slice = new Slice(new ReferenceContext(entry.getSequenceId()));
slice.containerOffset = entry.getContainerStartByteOffset();
slice.containerByteOffset = entry.getContainerStartByteOffset();
slice.alignmentStart = entry.getAlignmentStart();
slice.alignmentSpan = entry.getAlignmentSpan();
slice.offset = entry.getSliceByteOffset();
slice.byteOffsetFromContainer = entry.getSliceByteOffset();

// NOTE: the sliceIndex and read count fields can't be derived from the CRAM index
// so we can only set them to zero
// see https://github.com/samtools/htsjdk/issues/531

slice.mappedReadsCount = 0;
slice.unmappedReadsCount = 0;
slice.unplacedReadsCount = 0;
Expand All @@ -106,12 +107,8 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre

public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) {
final boolean matchEntireSequence = start < 1 || span < 1;
final CRAIEntry query = new CRAIEntry(seqId,
start,
span,
Long.MAX_VALUE,
Integer.MAX_VALUE,
Integer.MAX_VALUE);
final int dummyValue = 1;
final CRAIEntry query = new CRAIEntry(seqId, start, span, dummyValue, dummyValue, dummyValue);

return list.stream()
.filter(e -> e.getSequenceId() == seqId)
Expand All @@ -124,6 +121,7 @@ public static CRAIEntry getLeftmost(final List<CRAIEntry> list) {
if (list == null || list.isEmpty()) {
return null;
}

return list.stream()
.sorted()
.findFirst()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ public Container buildContainer(final List<CramCompressionRecord> records) {
slices.add(slice);
}

final Container container = Container.initializeFromSlices(slices);
container.compressionHeader = compressionHeader;
final Container container = Container.initializeFromSlices(slices, compressionHeader);
container.nofRecords = records.size();
container.globalRecordCounter = lastGlobalRecordCounter;
container.blockCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void readNextContainer() {
nextContainer = containerFromStream(cramHeader.getVersion(), countingInputStream);
final long containerSizeInBytes = countingInputStream.getCount() - offset;

nextContainer.offset = offset;
nextContainer.setByteOffset(offset);
offset += containerSizeInBytes;

if (nextContainer.isEOF()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public Container next() {

final long offset = seekableStream.position();
final Container c = ContainerIO.readContainer(cramHeader.getVersion(), seekableStream);
c.offset = offset;
c.setByteOffset(offset);
return c;
} catch (final IOException e) {
throw new RuntimeIOException(e);
Expand Down
59 changes: 29 additions & 30 deletions src/main/java/htsjdk/samtools/cram/structure/Container.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,28 @@ public class Container {
// slices found in the container:
public Slice[] slices;

// for indexing:
// this Container's byte offset from the the start of the stream.
// Used for indexing.
private long byteOffset;

public long getByteOffset() {
return byteOffset;
}

/**
* Container start in the stream, in bytes.
* Set this Container's byte offset from the the start of the stream.
* Also distribute this value to the Container's constituent {@link Slice}s.
* For indexing.
* @param byteOffset the byte location in the stream where this Container begins
*/
public long offset;
public void setByteOffset(final long byteOffset) {
this.byteOffset = byteOffset;
if (slices != null) {
for (final Slice slice : slices) {
slice.containerByteOffset = byteOffset;
}
}
}

/**
* Construct this Container by providing its {@link ReferenceContext}
Expand Down Expand Up @@ -105,10 +122,11 @@ public ReferenceContext getReferenceContext() {
* TODO for general Container refactoring: make this part of construction
*
* @param containerSlices the constituent Slices of the Container
* @param compressionHeader the CRAM {@link CompressionHeader} to assign to the Container
* @throws CRAMException for invalid Container states
* @return the initialized Container
*/
public static Container initializeFromSlices(final List<Slice> containerSlices) {
public static Container initializeFromSlices(final List<Slice> containerSlices, final CompressionHeader compressionHeader) {
final Set<ReferenceContext> sliceRefContexts = containerSlices.stream()
.map(Slice::getReferenceContext)
.collect(Collectors.toSet());
Expand All @@ -124,6 +142,7 @@ else if (sliceRefContexts.size() > 1) {

final Container container = new Container(commonRefContext);
container.slices = containerSlices.toArray(new Slice[0]);
container.compressionHeader = compressionHeader;

if (commonRefContext.isMappedSingleRef()) {
int start = Integer.MAX_VALUE;
Expand Down Expand Up @@ -162,57 +181,37 @@ void populateSlicesAndIndexingParameters(final ArrayList<Slice> slicesToPopulate
final int lastSliceIndex = slicesToPopulate.size() - 1;
for (int i = 0; i < lastSliceIndex; i++) {
final Slice slice = slicesToPopulate.get(i);
slice.containerOffset = offset;
slice.index = i;
slice.offset = landmarks[i];
slice.size = landmarks[i + 1] - slice.offset;
slice.byteOffsetFromContainer = landmarks[i];
slice.byteSize = landmarks[i + 1] - slice.byteOffsetFromContainer;
slices[i] = slice;
}

final Slice lastSlice = slicesToPopulate.get(lastSliceIndex);
lastSlice.containerOffset = offset;
lastSlice.index = lastSliceIndex;
lastSlice.offset = landmarks[lastSliceIndex];
lastSlice.byteOffsetFromContainer = landmarks[lastSliceIndex];

// calculate a "final landmark" indicating the byte offset of the end of the container
// equivalent to the container's total byte size

final int containerHeaderSize = landmarks[0];
final int containerTotalByteSize = containerHeaderSize + containerByteSize;
lastSlice.size = containerTotalByteSize - lastSlice.offset;
lastSlice.byteSize = containerTotalByteSize - lastSlice.byteOffsetFromContainer;

this.slices[lastSliceIndex] = lastSlice;
}

/**
* Retrieve the list of CRAI Index entries corresponding to this Container.
*
* Retrieve the list of CRAI Index entries corresponding to this Container
* @return the list of CRAI Index entries
*/
public List<CRAIEntry> getCRAIEntries() {
return Arrays.stream(slices)
.map(Slice::getCRAIEntry)
.collect(Collectors.toList());
}

/**
* Retrieve the list of CRAI Index entries corresponding to this Container.
*
* TODO: investigate why we sometimes split multi-ref Slices
* into different entries and sometimes do not
*
* TODO: clearly identify and enforce preconditions, e.g.
* a Container built from Slices which were in turn built from records
*
* @return the list of CRAI Index entries
*/
public List<CRAIEntry> getCRAIEntriesSplittingMultiRef() {
if (isEOF()) {
return Collections.emptyList();
}

return Arrays.stream(slices)
.map(s -> s.getCRAIEntriesSplittingMultiRef(compressionHeader, landmarks, offset))
.map(s -> s.getCRAIEntries(compressionHeader))
.flatMap(List::stream)
.sorted()
.collect(Collectors.toList());
Expand Down
Loading

0 comments on commit c031882

Please sign in to comment.