Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Immutable CRAIEntry #1256

Merged
merged 7 commits into from
Jan 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 63 additions & 56 deletions src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,41 @@

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.*;
import java.util.stream.Collectors;

/**
* A class representing CRAI index entry: file and alignment offsets for each slice.
* Created by vadim on 10/08/2015.
*/
public class CRAIEntry implements Comparable<CRAIEntry>, Cloneable {
public int sequenceId;
public int alignmentStart;
public int alignmentSpan;
public long containerStartOffset;
public int sliceOffset;
jmthibault79 marked this conversation as resolved.
Show resolved Hide resolved
public int sliceSize;
public int sliceIndex;
jmthibault79 marked this conversation as resolved.
Show resolved Hide resolved

private static int CRAI_INDEX_COLUMNS = 6;
private static String entryFormat = "%d\t%d\t%d\t%d\t%d\t%d";

public CRAIEntry() {
public class CRAIEntry implements Comparable<CRAIEntry> {
private final int sequenceId;
private final int alignmentStart;
private final int alignmentSpan;
jmthibault79 marked this conversation as resolved.
Show resolved Hide resolved

// this Slice's Container's offset in bytes from the beginning of the stream
// equal to Slice.containerOffset and Container.offset
private final long containerStartByteOffset;
// this Slice's offset in bytes from the beginning of its Container
// equal to Slice.offset and Container.landmarks[Slice.index]
private final int sliceByteOffset;
private final int sliceByteSize;

private static final int CRAI_INDEX_COLUMNS = 6;
private static final String ENTRY_FORMAT = "%d\t%d\t%d\t%d\t%d\t%d";

public CRAIEntry(final int sequenceId,
final int alignmentStart,
final int alignmentSpan,
final long containerStartByteOffset,
final int sliceByteOffset,
final int sliceByteSize) {
this.sequenceId = sequenceId;
this.alignmentStart = alignmentStart;
this.alignmentSpan = alignmentSpan;
this.containerStartByteOffset = containerStartByteOffset;
this.sliceByteOffset = sliceByteOffset;
this.sliceByteSize = sliceByteSize;
}

/**
Expand All @@ -47,9 +61,9 @@ public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException {
sequenceId = Integer.parseInt(chunks[0]);
alignmentStart = Integer.parseInt(chunks[1]);
alignmentSpan = Integer.parseInt(chunks[2]);
containerStartOffset = Long.parseLong(chunks[3]);
sliceOffset = Integer.parseInt(chunks[4]);
sliceSize = Integer.parseInt(chunks[5]);
containerStartByteOffset = Long.parseLong(chunks[3]);
sliceByteOffset = Integer.parseInt(chunks[4]);
sliceByteSize = Integer.parseInt(chunks[5]);
} catch (final NumberFormatException e) {
throw new CRAIIndex.CRAIIndexException(e);
}
Expand All @@ -73,32 +87,14 @@ public void writeToStream(OutputStream os) {
* Format the entry as a string suitable for serialization in the CRAI index
*/
private String serializeToString() {
return String.format(entryFormat,
return String.format(ENTRY_FORMAT,
sequenceId, alignmentStart, alignmentSpan,
containerStartOffset, sliceOffset, sliceSize);
containerStartByteOffset, sliceByteOffset, sliceByteSize);
}

@Override
public String toString() { return serializeToString(); }

public static List<CRAIEntry> fromContainer(final Container container) {
final List<CRAIEntry> entries = new ArrayList<>(container.slices.length);
for (int i = 0; i < container.slices.length; i++) {
final Slice s = container.slices[i];
final CRAIEntry e = new CRAIEntry();
e.sequenceId = s.sequenceId;
e.alignmentStart = s.alignmentStart;
e.alignmentSpan = s.alignmentSpan;
e.containerStartOffset = s.containerOffset;
e.sliceOffset = container.landmarks[i];
e.sliceSize = s.size;

e.sliceIndex = i;
entries.add(e);
}
return entries;
}

@Override
public int compareTo(final CRAIEntry o) {
if (o == null) {
Expand All @@ -111,20 +107,7 @@ public int compareTo(final CRAIEntry o) {
return alignmentStart - o.alignmentStart;
}

return (int) (containerStartOffset - o.containerStartOffset);
}

@Override
public CRAIEntry clone() throws CloneNotSupportedException {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only tests were using this

super.clone();
final CRAIEntry entry = new CRAIEntry();
entry.sequenceId = sequenceId;
entry.alignmentStart = alignmentStart;
entry.alignmentSpan = alignmentSpan;
entry.containerStartOffset = containerStartOffset;
entry.sliceOffset = sliceOffset;
entry.sliceSize = sliceSize;
return entry;
return (int) (containerStartByteOffset - o.containerStartByteOffset);
}

public static Comparator<CRAIEntry> byEnd = new Comparator<CRAIEntry>() {
Expand All @@ -138,7 +121,7 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
return o1.alignmentStart + o1.alignmentSpan - o2.alignmentStart - o2.alignmentSpan;
}

return (int) (o1.containerStartOffset - o2.containerStartOffset);
return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset);
}
};

Expand All @@ -153,7 +136,7 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
return o1.alignmentStart - o2.alignmentStart;
}

return (int) (o1.containerStartOffset - o2.containerStartOffset);
return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset);
}
};

Expand All @@ -171,7 +154,7 @@ public int compare(CRAIEntry o1, CRAIEntry o2) {
if (o1.alignmentStart != o2.alignmentStart)
return o1.alignmentStart - o2.alignmentStart;

return (int) (o1.containerStartOffset - o2.containerStartOffset);
return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset);
}
};

Expand All @@ -192,4 +175,28 @@ public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) {
return Math.abs(a0 + b0 - a1 - b1) < (e0.alignmentSpan + e1.alignmentSpan);

}

public int getSequenceId() {
return sequenceId;
}

public int getAlignmentStart() {
return alignmentStart;
}

public int getAlignmentSpan() {
return alignmentSpan;
}

public long getContainerStartByteOffset() {
return containerStartByteOffset;
}

public int getSliceByteOffset() {
return sliceByteOffset;
}

public int getSliceByteSize() {
return sliceByteSize;
}
}
121 changes: 39 additions & 82 deletions src/main/java/htsjdk/samtools/cram/CRAIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,15 @@
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.CRAMBAIIndexer;
import htsjdk.samtools.CRAMCRAIIndexer;
import htsjdk.samtools.cram.encoding.reader.MultiRefSliceAlignmentSpanReader;
import htsjdk.samtools.cram.structure.*;
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.ValidationStringency;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.io.*;
import java.util.*;
import java.util.stream.Collectors;

import java.util.List;

/**
* CRAI index used for CRAM files.
Expand Down Expand Up @@ -55,63 +46,30 @@ public void writeIndex(final OutputStream os) {

/**
* Create index entries for a single container.
* @param c the container to index
* @param container the container to index
*/
public void processContainer(final Container c) {
public void processContainer(final Container container) {
// TODO: this should be refactored and delegate to container/slice
if (!c.isEOF()) {
for (int i = 0; i < c.slices.length; i++) {
Slice s = c.slices[i];
if (!container.isEOF()) {
for (final Slice s: container.slices) {
if (s.sequenceId == Slice.MULTI_REFERENCE) {
this.entries.addAll(getCRAIEntriesForMultiRefSlice(s, c.header, c.offset, c.landmarks));
}
else {
CRAIEntry e = new CRAIEntry();

e.sequenceId = c.sequenceId;
e.alignmentStart = s.alignmentStart;
e.alignmentSpan = s.alignmentSpan;
e.containerStartOffset = c.offset;
e.sliceOffset = c.landmarks[i];
e.sliceSize = s.size;
e.sliceIndex = i;

entries.add(e);
final Map<Integer, AlignmentSpan> spans = s.getMultiRefAlignmentSpans(container.header, ValidationStringency.DEFAULT_STRINGENCY);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

equivalent to what getCRAIEntriesForMultiRefSlice() did


this.entries.addAll(spans.entrySet().stream()
.map(e -> new CRAIEntry(e.getKey(),
e.getValue().getStart(),
e.getValue().getSpan(),
container.offset,
container.landmarks[s.index],
s.size))
.collect(Collectors.toList()));
} else {
entries.add(s.getCRAIEntry(container.offset));
jmthibault79 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}

/**
* Return a list of CRAI Entries; one for each reference in the multireference slice.
* TODO: this should be refactored and delegate to container/slice
*/
private static Collection<CRAIEntry> getCRAIEntriesForMultiRefSlice(
final Slice slice,
final CompressionHeader header,
final long containerOffset,
final int[] landmarks)
{
final Map<Integer, AlignmentSpan> spans = slice.getMultiRefAlignmentSpans(header, ValidationStringency.DEFAULT_STRINGENCY);

List<CRAIEntry> entries = new ArrayList<>(spans.size());
for (int seqId : spans.keySet()) {
CRAIEntry e = new CRAIEntry();
e.sequenceId = seqId;
AlignmentSpan span = spans.get(seqId);
e.alignmentStart = span.getStart();
e.alignmentSpan = span.getSpan();
e.sliceSize = slice.size;
e.sliceIndex = slice.index;
e.containerStartOffset = containerOffset;
e.sliceOffset = landmarks[slice.index];

entries.add(e);
}

return entries;
}

public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException {
return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
}
Expand All @@ -128,16 +86,16 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre

for (final CRAIEntry entry : full) {
final Slice slice = new Slice();
slice.containerOffset = entry.containerStartOffset;
slice.alignmentStart = entry.alignmentStart;
slice.alignmentSpan = entry.alignmentSpan;
slice.sequenceId = entry.sequenceId;
// https://github.com/samtools/htsjdk/issues/531
// entry.sliceSize is the slice size in bytes, not the number of
// records; this results in the BAMIndex metadata being wrong
slice.nofRecords = entry.sliceSize;
slice.index = entry.sliceIndex;
slice.offset = entry.sliceOffset;
slice.containerOffset = entry.getContainerStartByteOffset();
slice.alignmentStart = entry.getAlignmentStart();
slice.alignmentSpan = entry.getAlignmentSpan();
slice.sequenceId = entry.getSequenceId();
// NOTE: the recordCount and sliceIndex fields can't be derived from the CRAM index
// so we can only set them to zero
// see https://github.com/samtools/htsjdk/issues/531
slice.nofRecords = 0;
slice.index = 0;
slice.offset = entry.getSliceByteOffset();

indexer.processSingleReferenceSlice(slice);
jmthibault79 marked this conversation as resolved.
Show resolved Hide resolved
}
Expand All @@ -148,17 +106,16 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre

public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) {
final boolean whole = start < 1 || span < 1;
final CRAIEntry query = new CRAIEntry();
query.sequenceId = seqId;
query.alignmentStart = start < 1 ? 1 : start;
query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span;
query.containerStartOffset = Long.MAX_VALUE;
query.sliceOffset = Integer.MAX_VALUE;
query.sliceSize = Integer.MAX_VALUE;
final CRAIEntry query = new CRAIEntry(seqId,
start < 1 ? 1 : start,
span < 1 ? Integer.MAX_VALUE : span,
Long.MAX_VALUE,
Integer.MAX_VALUE,
Integer.MAX_VALUE);

final List<CRAIEntry> l = new ArrayList<>();
for (final CRAIEntry e : list) {
if (e.sequenceId != seqId) {
if (e.getSequenceId() != seqId) {
continue;
}
if (whole || CRAIEntry.intersect(e, query)) {
Expand All @@ -176,7 +133,7 @@ public static CRAIEntry getLeftmost(final List<CRAIEntry> list) {
CRAIEntry left = list.get(0);

for (final CRAIEntry e : list) {
if (e.alignmentStart < left.alignmentStart) {
if (e.getAlignmentStart() < left.getAlignmentStart()) {
left = e;
}
}
Expand All @@ -202,7 +159,7 @@ public static int findLastAlignedEntry(final List<CRAIEntry> list) {
final int mid = (low + high) >>> 1;
final CRAIEntry midVal = list.get(mid);

if (midVal.sequenceId >= 0) {
if (midVal.getSequenceId() >= 0) {
low = mid + 1;
} else {
high = mid - 1;
Expand All @@ -211,7 +168,7 @@ public static int findLastAlignedEntry(final List<CRAIEntry> list) {
if (low >= list.size()) {
return list.size() - 1;
}
for (; low >= 0 && list.get(low).sequenceId == -1; low--) {
for (; low >= 0 && list.get(low).getSequenceId() == -1; low--) {
}
return low;
}
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/htsjdk/samtools/cram/structure/Container.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@
package htsjdk.samtools.cram.structure;

import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.cram.CRAIEntry;
import htsjdk.samtools.cram.structure.block.Block;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class Container {
// container header as defined in the specs:
/**
Expand Down Expand Up @@ -53,6 +58,12 @@ public class Container {
*/
public long offset;

public List<CRAIEntry> getCRAIEntries() {
return Arrays.stream(slices)
.map(Slice::getCRAIEntry)
.collect(Collectors.toList());
}

@Override
public String toString() {
return String
Expand Down
Loading