Skip to content

Commit

Permalink
# This is a combination of 10 commits.
Browse files Browse the repository at this point in the history
# This is the 1st commit message:

move Container.setByteOffset() call inside ContainerIO.readContainer()
- also ContainerHeaderIO.readContainerHeader()

restrict access to setByteOffset() and encapsulate Container.slices

# This is the commit message #2:

oops

# This is the commit message #3:

a little unrelated cleanup

# This is the commit message #4:

oops

# This is the commit message #5:

better CRAIEntryTest

# This is the commit message #6:

test improvements and a fix

# This is the commit message #7:

comment

# This is the commit message #8:

javadoc

# This is the commit message #9:

review comments

# This is the commit message #10:

comments and clarification for CRAMBAIIndexer
  • Loading branch information
jmthibault79 committed Mar 26, 2019
1 parent c031882 commit d89b341
Show file tree
Hide file tree
Showing 23 changed files with 349 additions and 244 deletions.
52 changes: 21 additions & 31 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import htsjdk.samtools.util.ProgressLogger;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
Expand Down Expand Up @@ -133,7 +132,7 @@ public void processContainer(final Container container, final ValidationStringen
}

int sliceIndex = 0;
for (final Slice slice : container.slices) {
for (final Slice slice : container.getSlices()) {
slice.index = sliceIndex++;
if (slice.getReferenceContext().isMultiRef()) {
final Map<ReferenceContext, AlignmentSpan> spanMap = container.getSpans(validationStringency);
Expand Down Expand Up @@ -260,40 +259,31 @@ public static void createIndex(final SeekableStream stream,
}
final CRAMBAIIndexer indexer = new CRAMBAIIndexer(output, cramHeader.getSamFileHeader());

int totalRecords = 0;
Container container = null;
ProgressLogger progressLogger = new ProgressLogger(log, 1, "indexed", "slices");
do {
try {
final long offset = stream.position();
container = ContainerIO.readContainer(cramHeader.getVersion(), stream);
if (container == null || container.isEOF()) {
break;
}
container = ContainerIO.readContainer(cramHeader.getVersion(), stream);
if (container == null || container.isEOF()) {
break;
}

container.setByteOffset(offset);

indexer.processContainer(container, validationStringency);

if (null != log) {
String sequenceName;
final ReferenceContext containerContext = container.getReferenceContext();
switch (containerContext.getType()) {
case UNMAPPED_UNPLACED_TYPE:
sequenceName = "?";
break;
case MULTIPLE_REFERENCE_TYPE:
sequenceName = "???";
break;
default:
sequenceName = cramHeader.getSamFileHeader().getSequence(containerContext.getSequenceId()).getSequenceName();
break;
}
progressLogger.record(sequenceName, container.alignmentStart);
indexer.processContainer(container, validationStringency);

if (null != log) {
String sequenceName;
final ReferenceContext containerContext = container.getReferenceContext();
switch (containerContext.getType()) {
case UNMAPPED_UNPLACED_TYPE:
sequenceName = "?";
break;
case MULTIPLE_REFERENCE_TYPE:
sequenceName = "???";
break;
default:
sequenceName = cramHeader.getSamFileHeader().getSequence(containerContext.getSequenceId()).getSequenceName();
break;
}

} catch (final IOException e) {
throw new RuntimeException("Failed to read cram container", e);
progressLogger.record(sequenceName, container.alignmentStart);
}

} while (!container.isEOF());
Expand Down
30 changes: 11 additions & 19 deletions src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,27 +93,19 @@ public void finish() {
* @param craiStream stream for output index
*/
public static void writeIndex(final SeekableStream cramStream, OutputStream craiStream) {
try {
final CramHeader cramHeader = CramIO.readCramHeader(cramStream);
final CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(craiStream, cramHeader.getSamFileHeader());
final Version cramVersion = cramHeader.getVersion();

// get the first container and its offset
long offset = cramStream.position();
Container container = ContainerIO.readContainer(cramVersion, cramStream);

while (container != null && !container.isEOF()) {
container.setByteOffset(offset);
indexer.processContainer(container);
offset = cramStream.position();
container = ContainerIO.readContainer(cramVersion, cramStream);
}
final CramHeader cramHeader = CramIO.readCramHeader(cramStream);
final CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(craiStream, cramHeader.getSamFileHeader());
final Version cramVersion = cramHeader.getVersion();

indexer.finish();
}
catch (IOException e) {
throw new RuntimeIOException("Error writing CRAI index to output stream");
// get the first container
Container container = ContainerIO.readContainer(cramVersion, cramStream);

while (container != null && !container.isEOF()) {
indexer.processContainer(container);
container = ContainerIO.readContainer(cramVersion, cramStream);
}

indexer.finish();
}

/**
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,10 @@ protected void flushContainer() throws IllegalArgumentException {
}
}

final Container container = containerFactory.buildContainer(cramRecords);
for (final Slice slice : container.slices) {
final Container container = containerFactory.buildContainer(cramRecords, offset);
for (final Slice slice : container.getSlices()) {
slice.setRefMD5(referenceBases);
}
container.setByteOffset(offset);
offset += ContainerIO.writeContainer(cramVersion, container, outputStream);
if (indexer != null) {
/**
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
import htsjdk.samtools.cram.structure.ContainerHeaderIO;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.CloseableIterator;
Expand Down Expand Up @@ -400,7 +400,7 @@ public CloseableIterator<SAMRecord> queryUnmapped() {
seekableStream.seek(0);
newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency);
seekableStream.seek(startOfLastLinearBin >>> 16);
final Container container = ContainerIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream);
final Container container = ContainerHeaderIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream);
seekableStream.seek(seekableStream.position() + container.containerByteSize);
iterator = newIterator;
boolean atAlignments;
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,7 @@ void nextContainer() throws IllegalArgumentException, CRAMException {
}
}

for (int i = 0; i < container.slices.length; i++) {
final Slice slice = container.slices[i];
for (final Slice slice : container.getSlices()) {
final ReferenceContext sliceContext = slice.getReferenceContext();

if (! sliceContext.isMappedSingleRef())
Expand Down
10 changes: 8 additions & 2 deletions src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ public ContainerFactory(final SAMFileHeader samFileHeader, final int recordsPerS
this.recordsPerSlice = recordsPerSlice;
}

public Container buildContainer(final List<CramCompressionRecord> records) {
/**
* Build a Container (and its constituent Slices) from {@link CramCompressionRecord}s
* @param records the records used to build the Container
* @param containerByteOffset the Container's byte offset from the start of the stream
* @return the container built from these records
*/
public Container buildContainer(final List<CramCompressionRecord> records, final long containerByteOffset) {
// sets header APDelta
final boolean coordinateSorted = samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate;
final CompressionHeader compressionHeader = new CompressionHeaderFactory().build(records, null, coordinateSorted);
Expand All @@ -58,7 +64,7 @@ public Container buildContainer(final List<CramCompressionRecord> records) {
slices.add(slice);
}

final Container container = Container.initializeFromSlices(slices, compressionHeader);
final Container container = Container.initializeFromSlices(slices, compressionHeader, containerByteOffset);
container.nofRecords = records.size();
container.globalRecordCounter = lastGlobalRecordCounter;
container.blockCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public List<CramCompressionRecord> getRecords(final Container container,
records = new ArrayList<>(container.nofRecords);
}

for (final Slice slice : container.slices) {
for (final Slice slice : container.getSlices()) {
records.addAll(getRecords(slice, container.compressionHeader, validationStringency));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import htsjdk.samtools.cram.io.CountingInputStream;
import htsjdk.samtools.cram.io.InputStreamUtils;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
import htsjdk.samtools.cram.structure.ContainerHeaderIO;

import java.io.InputStream;

Expand All @@ -22,8 +22,19 @@ public CramContainerHeaderIterator(final InputStream inputStream) {
super(inputStream);
}

/**
* Consume the entirety of the next container from the stream, but retain only the header.
* This is intended as a performance optimization, because it does not decode block data.
*
* @see CramContainerIterator#containerFromStream(Version, CountingInputStream)
*
* @param cramVersion the expected CRAM version of the stream
* @param countingStream the {@link CountingInputStream} to read from
* @return The next Container's header from the stream, returned as a Container.
*/
@Override
protected Container containerFromStream(final Version cramVersion, final CountingInputStream countingStream) {
final Container container = ContainerIO.readContainerHeader(cramVersion.major, countingStream);
final Container container = ContainerHeaderIO.readContainerHeader(cramVersion.major, countingStream);
InputStreamUtils.skipFully(countingStream, container.containerByteSize);
return container;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,14 @@ public class CramContainerIterator implements Iterator<Container> {
private CountingInputStream countingInputStream;
private Container nextContainer;
private boolean eof = false;
private long offset = 0;

public CramContainerIterator(final InputStream inputStream) {
this.countingInputStream = new CountingInputStream(inputStream);
cramHeader = CramIO.readCramHeader(countingInputStream);
this.offset = countingInputStream.getCount();
}

void readNextContainer() {
private void readNextContainer() {
nextContainer = containerFromStream(cramHeader.getVersion(), countingInputStream);
final long containerSizeInBytes = countingInputStream.getCount() - offset;

nextContainer.setByteOffset(offset);
offset += containerSizeInBytes;

if (nextContainer.isEOF()) {
eof = true;
Expand All @@ -40,8 +34,11 @@ void readNextContainer() {

/**
* Consume the entirety of the next container from the stream.
* @param cramVersion
* @param countingStream
*
* @see CramContainerHeaderIterator#containerFromStream(Version, CountingInputStream)
*
* @param cramVersion the expected CRAM version of the stream
* @param countingStream the {@link CountingInputStream} to read from
* @return The next Container from the stream.
*/
protected Container containerFromStream(final Version cramVersion, final CountingInputStream countingStream) {
Expand Down
64 changes: 37 additions & 27 deletions src/main/java/htsjdk/samtools/cram/build/CramIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
import htsjdk.samtools.cram.io.CountingInputStream;
import htsjdk.samtools.cram.io.InputStreamUtils;
import htsjdk.samtools.cram.ref.ReferenceContext;
import htsjdk.samtools.cram.structure.*;
import htsjdk.samtools.cram.structure.block.Block;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.LineReader;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;

Expand Down Expand Up @@ -215,7 +213,14 @@ public static CramHeader readCramHeader(final InputStream inputStream) {
try {
final CramHeader header = readFormatDefinition(inputStream);

final SAMFileHeader samFileHeader = readSAMFileHeader(header.getVersion(), inputStream, new String(header.getId()));
// the location of the stream pointer after the CramHeader has been read
final long containerByteOffset = CramIO.DEFINITION_LENGTH;

final SAMFileHeader samFileHeader = readSAMFileHeader(
header.getVersion(),
inputStream,
new String(header.getId()),
containerByteOffset);

return new CramHeader(header.getVersion(), new String(header.getId()), samFileHeader);
} catch (final IOException e) {
Expand Down Expand Up @@ -262,7 +267,6 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil
container.blockCount = 1;
container.blocks = new Block[]{block};
container.landmarks = new int[0];
container.slices = new Slice[0];
container.bases = 0;
container.globalRecordCounter = 0;
container.nofRecords = 0;
Expand All @@ -271,7 +275,7 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil
block.write(major, byteArrayOutputStream);
container.containerByteSize = byteArrayOutputStream.size();

final int containerHeaderByteSize = ContainerIO.writeContainerHeader(major, container, os);
final int containerHeaderByteSize = ContainerHeaderIO.writeContainerHeader(major, container, os);
try {
os.write(byteArrayOutputStream.toByteArray(), 0, byteArrayOutputStream.size());
} catch (final IOException e) {
Expand All @@ -281,8 +285,11 @@ private static long writeContainerForSamFileHeader(final int major, final SAMFil
return containerHeaderByteSize + byteArrayOutputStream.size();
}

private static SAMFileHeader readSAMFileHeader(final Version version, InputStream inputStream, final String id) {
final Container container = ContainerIO.readContainerHeader(version.major, inputStream);
private static SAMFileHeader readSAMFileHeader(final Version version,
final InputStream inputStream,
final String id,
final long containerByteOffset) {
final Container container = ContainerHeaderIO.readContainerHeader(version.major, inputStream, containerByteOffset);
final Block block;
{
if (version.compatibleWith(CramVersions.CRAM_v3)) {
Expand All @@ -293,36 +300,39 @@ private static SAMFileHeader readSAMFileHeader(final Version version, InputStrea
} else {
/*
* pending issue: container.containerByteSize inputStream 2 bytes shorter
* then needed in the v21 test cram files.
*/
* than needed in the v21 test cram files.
*/
block = Block.read(version.major, inputStream);
}
}

inputStream = new ByteArrayInputStream(block.getUncompressedContent());
byte[] bytes;
try (final InputStream blockStream = new ByteArrayInputStream(block.getUncompressedContent())) {

final ByteBuffer buffer = ByteBuffer.allocate(4);
buffer.order(ByteOrder.LITTLE_ENDIAN);

final ByteBuffer buffer = ByteBuffer.allocate(4);
buffer.order(ByteOrder.LITTLE_ENDIAN);
try {
for (int i = 0; i < 4; i++)
buffer.put((byte) inputStream.read());
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
buffer.flip();
final int size = buffer.asIntBuffer().get();
buffer.put((byte) blockStream.read());

final DataInputStream dataInputStream = new DataInputStream(inputStream);
final byte[] bytes = new byte[size];
try {
buffer.flip();
final int size = buffer.asIntBuffer().get();

final DataInputStream dataInputStream = new DataInputStream(blockStream);
bytes = new byte[size];
dataInputStream.readFully(bytes);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}

final BufferedLineReader bufferedLineReader = new BufferedLineReader(new ByteArrayInputStream(bytes));
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
return codec.decode(bufferedLineReader, id);

try (final InputStream byteStream = new ByteArrayInputStream(bytes);
final LineReader lineReader = new BufferedLineReader(byteStream)) {
return codec.decode(lineReader, id);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}

/**
Expand All @@ -338,7 +348,7 @@ public static boolean replaceCramHeader(final File file, final CramHeader newHea
try (final CountingInputStream countingInputStream = new CountingInputStream(new FileInputStream(file));
final RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw")) {
final CramHeader header = readFormatDefinition(countingInputStream);
final Container c = ContainerIO.readContainerHeader(header.getVersion().major, countingInputStream);
final Container c = ContainerHeaderIO.readContainerHeader(header.getVersion().major, countingInputStream);
final long pos = countingInputStream.getCount();
countingInputStream.close();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,7 @@ public Container next() {
throw new RuntimeException("No more containers in this boundary.");
}

final long offset = seekableStream.position();
final Container c = ContainerIO.readContainer(cramHeader.getVersion(), seekableStream);
c.setByteOffset(offset);
return c;
return ContainerIO.readContainer(cramHeader.getVersion(), seekableStream);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
Expand Down
Loading

0 comments on commit d89b341

Please sign in to comment.