diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index dd22a00a1b..6a31fad1bd 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -7,27 +7,41 @@ import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; /** * A class representing CRAI index entry: file and alignment offsets for each slice. * Created by vadim on 10/08/2015. */ -public class CRAIEntry implements Comparable, Cloneable { - public int sequenceId; - public int alignmentStart; - public int alignmentSpan; - public long containerStartOffset; - public int sliceOffset; - public int sliceSize; - public int sliceIndex; - - private static int CRAI_INDEX_COLUMNS = 6; - private static String entryFormat = "%d\t%d\t%d\t%d\t%d\t%d"; - - public CRAIEntry() { +public class CRAIEntry implements Comparable { + private final int sequenceId; + private final int alignmentStart; + private final int alignmentSpan; + + // this Slice's Container's offset in bytes from the beginning of the stream + // equal to Slice.containerOffset and Container.offset + private final long containerStartByteOffset; + // this Slice's offset in bytes from the beginning of its Container + // equal to Slice.offset and Container.landmarks[Slice.index] + private final int sliceByteOffset; + private final int sliceByteSize; + + private static final int CRAI_INDEX_COLUMNS = 6; + private static final String ENTRY_FORMAT = "%d\t%d\t%d\t%d\t%d\t%d"; + + public CRAIEntry(final int sequenceId, + final int alignmentStart, + final int alignmentSpan, + final long containerStartByteOffset, + final int sliceByteOffset, + final int sliceByteSize) { + this.sequenceId = sequenceId; + this.alignmentStart = alignmentStart; + this.alignmentSpan = alignmentSpan; + this.containerStartByteOffset = containerStartByteOffset; + this.sliceByteOffset = sliceByteOffset; + this.sliceByteSize = sliceByteSize; } /** @@ -47,9 +61,9 @@ public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { sequenceId = Integer.parseInt(chunks[0]); alignmentStart = Integer.parseInt(chunks[1]); alignmentSpan = Integer.parseInt(chunks[2]); - containerStartOffset = Long.parseLong(chunks[3]); - sliceOffset = Integer.parseInt(chunks[4]); - sliceSize = Integer.parseInt(chunks[5]); + containerStartByteOffset = Long.parseLong(chunks[3]); + sliceByteOffset = Integer.parseInt(chunks[4]); + sliceByteSize = Integer.parseInt(chunks[5]); } catch (final NumberFormatException e) { throw new CRAIIndex.CRAIIndexException(e); } @@ -73,32 +87,14 @@ public void writeToStream(OutputStream os) { * Format the entry as a string suitable for serialization in the CRAI index */ private String serializeToString() { - return String.format(entryFormat, + return String.format(ENTRY_FORMAT, sequenceId, alignmentStart, alignmentSpan, - containerStartOffset, sliceOffset, sliceSize); + containerStartByteOffset, sliceByteOffset, sliceByteSize); } @Override public String toString() { return serializeToString(); } - public static List fromContainer(final Container container) { - final List entries = new ArrayList<>(container.slices.length); - for (int i = 0; i < container.slices.length; i++) { - final Slice s = container.slices[i]; - final CRAIEntry e = new CRAIEntry(); - e.sequenceId = s.sequenceId; - e.alignmentStart = s.alignmentStart; - e.alignmentSpan = s.alignmentSpan; - e.containerStartOffset = s.containerOffset; - e.sliceOffset = container.landmarks[i]; - e.sliceSize = s.size; - - e.sliceIndex = i; - entries.add(e); - } - return entries; - } - @Override public int compareTo(final CRAIEntry o) { if (o == null) { @@ -111,20 +107,7 @@ public int compareTo(final CRAIEntry o) { return alignmentStart - o.alignmentStart; } - return (int) (containerStartOffset - o.containerStartOffset); - } - - @Override - public CRAIEntry clone() throws CloneNotSupportedException { - super.clone(); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = sequenceId; - entry.alignmentStart = alignmentStart; - entry.alignmentSpan = alignmentSpan; - entry.containerStartOffset = containerStartOffset; - entry.sliceOffset = sliceOffset; - entry.sliceSize = sliceSize; - return entry; + return (int) (containerStartByteOffset - o.containerStartByteOffset); } public static Comparator byEnd = new Comparator() { @@ -138,7 +121,7 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) { return o1.alignmentStart + o1.alignmentSpan - o2.alignmentStart - o2.alignmentSpan; } - return (int) (o1.containerStartOffset - o2.containerStartOffset); + return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset); } }; @@ -153,7 +136,7 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) { return o1.alignmentStart - o2.alignmentStart; } - return (int) (o1.containerStartOffset - o2.containerStartOffset); + return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset); } }; @@ -171,7 +154,7 @@ public int compare(CRAIEntry o1, CRAIEntry o2) { if (o1.alignmentStart != o2.alignmentStart) return o1.alignmentStart - o2.alignmentStart; - return (int) (o1.containerStartOffset - o2.containerStartOffset); + return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset); } }; @@ -192,4 +175,28 @@ public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) { return Math.abs(a0 + b0 - a1 - b1) < (e0.alignmentSpan + e1.alignmentSpan); } + + public int getSequenceId() { + return sequenceId; + } + + public int getAlignmentStart() { + return alignmentStart; + } + + public int getAlignmentSpan() { + return alignmentSpan; + } + + public long getContainerStartByteOffset() { + return containerStartByteOffset; + } + + public int getSliceByteOffset() { + return sliceByteOffset; + } + + public int getSliceByteSize() { + return sliceByteSize; + } } diff --git a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java index 88391cbcda..964023e6d8 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java @@ -4,24 +4,15 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.CRAMBAIIndexer; import htsjdk.samtools.CRAMCRAIIndexer; -import htsjdk.samtools.cram.encoding.reader.MultiRefSliceAlignmentSpanReader; import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.seekablestream.SeekableMemoryStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.ValidationStringency; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; +import java.io.*; +import java.util.*; +import java.util.stream.Collectors; -import java.util.List; /** * CRAI index used for CRAM files. @@ -55,63 +46,30 @@ public void writeIndex(final OutputStream os) { /** * Create index entries for a single container. - * @param c the container to index + * @param container the container to index */ - public void processContainer(final Container c) { + public void processContainer(final Container container) { // TODO: this should be refactored and delegate to container/slice - if (!c.isEOF()) { - for (int i = 0; i < c.slices.length; i++) { - Slice s = c.slices[i]; + if (!container.isEOF()) { + for (final Slice s: container.slices) { if (s.sequenceId == Slice.MULTI_REFERENCE) { - this.entries.addAll(getCRAIEntriesForMultiRefSlice(s, c.header, c.offset, c.landmarks)); - } - else { - CRAIEntry e = new CRAIEntry(); - - e.sequenceId = c.sequenceId; - e.alignmentStart = s.alignmentStart; - e.alignmentSpan = s.alignmentSpan; - e.containerStartOffset = c.offset; - e.sliceOffset = c.landmarks[i]; - e.sliceSize = s.size; - e.sliceIndex = i; - - entries.add(e); + final Map spans = s.getMultiRefAlignmentSpans(container.header, ValidationStringency.DEFAULT_STRINGENCY); + + this.entries.addAll(spans.entrySet().stream() + .map(e -> new CRAIEntry(e.getKey(), + e.getValue().getStart(), + e.getValue().getSpan(), + container.offset, + container.landmarks[s.index], + s.size)) + .collect(Collectors.toList())); + } else { + entries.add(s.getCRAIEntry(container.offset)); } } } } - /** - * Return a list of CRAI Entries; one for each reference in the multireference slice. - * TODO: this should be refactored and delegate to container/slice - */ - private static Collection getCRAIEntriesForMultiRefSlice( - final Slice slice, - final CompressionHeader header, - final long containerOffset, - final int[] landmarks) - { - final Map spans = slice.getMultiRefAlignmentSpans(header, ValidationStringency.DEFAULT_STRINGENCY); - - List entries = new ArrayList<>(spans.size()); - for (int seqId : spans.keySet()) { - CRAIEntry e = new CRAIEntry(); - e.sequenceId = seqId; - AlignmentSpan span = spans.get(seqId); - e.alignmentStart = span.getStart(); - e.alignmentSpan = span.getSpan(); - e.sliceSize = slice.size; - e.sliceIndex = slice.index; - e.containerStartOffset = containerOffset; - e.sliceOffset = landmarks[slice.index]; - - entries.add(e); - } - - return entries; - } - public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException { return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary); } @@ -128,16 +86,16 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre for (final CRAIEntry entry : full) { final Slice slice = new Slice(); - slice.containerOffset = entry.containerStartOffset; - slice.alignmentStart = entry.alignmentStart; - slice.alignmentSpan = entry.alignmentSpan; - slice.sequenceId = entry.sequenceId; - // https://github.com/samtools/htsjdk/issues/531 - // entry.sliceSize is the slice size in bytes, not the number of - // records; this results in the BAMIndex metadata being wrong - slice.nofRecords = entry.sliceSize; - slice.index = entry.sliceIndex; - slice.offset = entry.sliceOffset; + slice.containerOffset = entry.getContainerStartByteOffset(); + slice.alignmentStart = entry.getAlignmentStart(); + slice.alignmentSpan = entry.getAlignmentSpan(); + slice.sequenceId = entry.getSequenceId(); + // NOTE: the recordCount and sliceIndex fields can't be derived from the CRAM index + // so we can only set them to zero + // see https://github.com/samtools/htsjdk/issues/531 + slice.nofRecords = 0; + slice.index = 0; + slice.offset = entry.getSliceByteOffset(); indexer.processSingleReferenceSlice(slice); } @@ -148,17 +106,16 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre public static List find(final List list, final int seqId, final int start, final int span) { final boolean whole = start < 1 || span < 1; - final CRAIEntry query = new CRAIEntry(); - query.sequenceId = seqId; - query.alignmentStart = start < 1 ? 1 : start; - query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span; - query.containerStartOffset = Long.MAX_VALUE; - query.sliceOffset = Integer.MAX_VALUE; - query.sliceSize = Integer.MAX_VALUE; + final CRAIEntry query = new CRAIEntry(seqId, + start < 1 ? 1 : start, + span < 1 ? Integer.MAX_VALUE : span, + Long.MAX_VALUE, + Integer.MAX_VALUE, + Integer.MAX_VALUE); final List l = new ArrayList<>(); for (final CRAIEntry e : list) { - if (e.sequenceId != seqId) { + if (e.getSequenceId() != seqId) { continue; } if (whole || CRAIEntry.intersect(e, query)) { @@ -176,7 +133,7 @@ public static CRAIEntry getLeftmost(final List list) { CRAIEntry left = list.get(0); for (final CRAIEntry e : list) { - if (e.alignmentStart < left.alignmentStart) { + if (e.getAlignmentStart() < left.getAlignmentStart()) { left = e; } } @@ -202,7 +159,7 @@ public static int findLastAlignedEntry(final List list) { final int mid = (low + high) >>> 1; final CRAIEntry midVal = list.get(mid); - if (midVal.sequenceId >= 0) { + if (midVal.getSequenceId() >= 0) { low = mid + 1; } else { high = mid - 1; @@ -211,7 +168,7 @@ public static int findLastAlignedEntry(final List list) { if (low >= list.size()) { return list.size() - 1; } - for (; low >= 0 && list.get(low).sequenceId == -1; low--) { + for (; low >= 0 && list.get(low).getSequenceId() == -1; low--) { } return low; } diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index bdeaa9d058..78c974c1e4 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -18,8 +18,13 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.cram.CRAIEntry; import htsjdk.samtools.cram.structure.block.Block; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + public class Container { // container header as defined in the specs: /** @@ -53,6 +58,12 @@ public class Container { */ public long offset; + public List getCRAIEntries() { + return Arrays.stream(slices) + .map(Slice::getCRAIEntry) + .collect(Collectors.toList()); + } + @Override public String toString() { return String diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 611cc9559a..ab881f430d 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -18,6 +18,7 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.*; +import htsjdk.samtools.cram.CRAIEntry; import htsjdk.samtools.cram.encoding.reader.CramRecordReader; import htsjdk.samtools.cram.encoding.reader.MultiRefSliceAlignmentSpanReader; import htsjdk.samtools.cram.io.BitInputStream; @@ -59,10 +60,17 @@ public class Slice { public Block embeddedRefBlock; public Map external; - // for indexing purposes: + // for indexing purposes + + // the Slice's offset in bytes from the beginning of its Container + // equal to Container.landmarks[Slice.index] of its enclosing Container public int offset = -1; + // this Slice's Container's offset in bytes from the beginning of the stream + // equal to Container.offset of its enclosing Container public long containerOffset = -1; + // this Slice's size in bytes public int size = -1; + // this Slice's index within its Container public int index = -1; // to pass this to the container: @@ -291,4 +299,23 @@ public Map getMultiRefAlignmentSpans(final CompressionHe return reader.getReferenceSpans(); } + /** + * Generate a CRAI Index entry from this Slice + * @return a new CRAI Index Entry + */ + public CRAIEntry getCRAIEntry() { + return new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerOffset, offset, size); + } + /** + * Generate a CRAI Index entry from this Slice and the container offset. + * + * TODO: investigate why we sometimes need to pass in an external containerStartOffset + * because this Slice's containerOffset is incorrect + * + * @param containerStartOffset the byte offset of this Slice's Container + * @return a new CRAI Index Entry + */ + public CRAIEntry getCRAIEntry(final long containerStartOffset) { + return new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerStartOffset, offset, size); + } } diff --git a/src/test/java/htsjdk/samtools/SamIndexesTest.java b/src/test/java/htsjdk/samtools/SamIndexesTest.java index f78b0f3719..51bfb2b825 100644 --- a/src/test/java/htsjdk/samtools/SamIndexesTest.java +++ b/src/test/java/htsjdk/samtools/SamIndexesTest.java @@ -2,7 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAIEntry; -import htsjdk.samtools.cram.CRAIIndex; +import htsjdk.samtools.cram.CRAIEntryTest; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableMemoryStream; import htsjdk.samtools.seekablestream.SeekableStream; @@ -16,8 +16,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; import java.util.zip.GZIPOutputStream; public class SamIndexesTest extends HtsjdkTest { @@ -71,13 +69,7 @@ public void testCraiInMemory() throws IOException { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(baos, header); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); baos.close(); @@ -91,11 +83,11 @@ public void testCraiInMemory() throws IOException { baos = new ByteArrayOutputStream(); IOUtil.copyStream(baiStream, baos); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } @@ -108,13 +100,7 @@ public void testCraiFromFile() throws IOException { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(fos, header); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); fos.close(); @@ -126,11 +112,11 @@ public void testCraiFromFile() throws IOException { Assert.assertNotNull(baiStream); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(baiStream, dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } @@ -167,13 +153,7 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(fos, header); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); fos.close(); @@ -184,11 +164,11 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtil.copyStream(baiStream, baos); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } } diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index d43f2fc146..f767f0c257 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -14,7 +14,6 @@ * Created by vadim on 25/08/2015. */ public class CRAIEntryTest extends HtsjdkTest { - @Test public void testFromContainer() { final Container container = new Container(); @@ -23,18 +22,22 @@ public void testFromContainer() { slice.alignmentStart = 2; slice.alignmentSpan = 3; slice.containerOffset = 4; - container.landmarks = new int[]{5}; + slice.offset = 5; + slice.size = 6; + container.landmarks = new int[]{7}; container.slices = new Slice[]{slice}; - final List entries = CRAIEntry.fromContainer(container); + final List entries = container.getCRAIEntries(); Assert.assertNotNull(entries); Assert.assertEquals(entries.size(), 1); final CRAIEntry entry = entries.get(0); - Assert.assertEquals(entry.sequenceId, slice.sequenceId); - Assert.assertEquals(entry.alignmentStart, slice.alignmentStart); - Assert.assertEquals(entry.alignmentSpan, slice.alignmentSpan); - Assert.assertEquals(entry.containerStartOffset, slice.containerOffset); + Assert.assertEquals(entry.getSequenceId(), slice.sequenceId); + Assert.assertEquals(entry.getAlignmentStart(), slice.alignmentStart); + Assert.assertEquals(entry.getAlignmentSpan(), slice.alignmentSpan); + Assert.assertEquals(entry.getContainerStartByteOffset(), slice.containerOffset); + Assert.assertEquals(entry.getSliceByteOffset(), slice.offset); + Assert.assertEquals(entry.getSliceByteSize(), slice.size); } @Test @@ -45,101 +48,121 @@ public void testFromCraiLine() { final int alignmentSpan = counter++; final int containerOffset = Integer.MAX_VALUE + counter++; final int sliceOffset = counter++; - final int sliceSise = counter++; + final int sliceSize = counter++; - final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSise); + final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSize); final CRAIEntry entry = new CRAIEntry(line); Assert.assertNotNull(entry); - Assert.assertEquals(entry.sequenceId, sequenceId); - Assert.assertEquals(entry.alignmentStart, alignmentStart); - Assert.assertEquals(entry.alignmentSpan, alignmentSpan); - Assert.assertEquals(entry.containerStartOffset, containerOffset); + Assert.assertEquals(entry.getSequenceId(), sequenceId); + Assert.assertEquals(entry.getAlignmentStart(), alignmentStart); + Assert.assertEquals(entry.getAlignmentSpan(), alignmentSpan); + Assert.assertEquals(entry.getContainerStartByteOffset(), containerOffset); + Assert.assertEquals(entry.getSliceByteOffset(), sliceOffset); + Assert.assertEquals(entry.getSliceByteSize(), sliceSize); } @Test - public void testIntersetcsZeroSpan() { - Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 0))); + public void testIntersectsZeroSpan() { + Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 0))); } @Test - public void testIntersetcsSame() { - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 1))); + public void testIntersectsSame() { + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 1))); } @Test - public void testIntersetcsIncluded() { - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(1, 1))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1))); + public void testIntersectsIncluded() { + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 1, 1))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 2, 1))); // is symmetrical? - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 2))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(2, 1), newEntry(1, 2))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 2))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2, 1), newEntry(1, 1, 2))); } @Test - public void testIntersetcsOvertlaping() { - Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 1))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 2))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1))); - Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(3, 1))); + public void testIntersectsOvertlaping() { + Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 0, 1))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 0, 2))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 2, 1))); + Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 3, 1))); } @Test - public void testIntersetcsAnotherSequence() { + public void testIntersectsAnotherSequence() { Assert.assertTrue(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(10, 2, 1))); Assert.assertFalse(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(11, 2, 1))); } @Test public void testCompareTo () { - final List list = new ArrayList(2); + final List list = new ArrayList<>(2); CRAIEntry e1; CRAIEntry e2; - e1 = new CRAIEntry(); - e1.sequenceId = 100; - e2 = new CRAIEntry(); - e2.sequenceId = 200; + e1 = newEntry(100, 0, 0); + e2 = newEntry(200, 0, 0); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).sequenceId < list.get(0).sequenceId); + Assert.assertTrue(list.get(1).getSequenceId() < list.get(0).getSequenceId()); Collections.sort(list); - Assert.assertTrue(list.get(0).sequenceId < list.get(1).sequenceId); + Assert.assertTrue(list.get(0).getSequenceId() < list.get(1).getSequenceId()); list.clear(); - e1 = new CRAIEntry(); - e1.alignmentStart = 100; - e2 = new CRAIEntry(); - e2.alignmentStart = 200; + e1 = newEntry(1, 100, 0); + e2 = newEntry(1, 200, 0); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).alignmentStart < list.get(0).alignmentStart); + Assert.assertTrue(list.get(1).getAlignmentStart() < list.get(0).getAlignmentStart()); Collections.sort(list); - Assert.assertTrue(list.get(0).alignmentStart < list.get(1).alignmentStart); + Assert.assertTrue(list.get(0).getAlignmentStart() < list.get(1).getAlignmentStart()); list.clear(); - e1 = new CRAIEntry(); - e1.containerStartOffset = 100; - e2 = new CRAIEntry(); - e2.containerStartOffset = 200; + e1 = newEntryContOffset(100); + e2 = newEntryContOffset(200); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).containerStartOffset < list.get(0).containerStartOffset); + Assert.assertTrue(list.get(1).getContainerStartByteOffset() < list.get(0).getContainerStartByteOffset()); Collections.sort(list); - Assert.assertTrue(list.get(0).containerStartOffset < list.get(1).containerStartOffset); + Assert.assertTrue(list.get(0).getContainerStartByteOffset() < list.get(1).getContainerStartByteOffset()); + } + + public static CRAIEntry newEntry(final int seqId, final int start, final int span) { + return newEntry(seqId, start, span, 0, 0, 0); } - private static CRAIEntry newEntry(final int start, final int span) { - return newEntry(1, start, span); + public static CRAIEntry newEntry(final int sequenceId, + final int start, + final int span, + final int containerStartOffset, + final int sliceOffset, + final int sliceSize) { + return new CRAIEntry(sequenceId, start, span, containerStartOffset, sliceOffset, sliceSize); } - private static CRAIEntry newEntry(final int seqId, final int start, final int span) { - final CRAIEntry e1 = new CRAIEntry(); - e1.sequenceId = seqId; - e1.alignmentStart = start; - e1.alignmentSpan = span; - return e1; + public static CRAIEntry newEntrySeqStart(final int seqId, final int start) { + return newEntry(seqId, start, 0); } + public static CRAIEntry newEntryContOffset(final int containerStartOffset) { + return newEntry(1, 0, 0, containerStartOffset, 0, 0); + } + public static CRAIEntry updateStart(final CRAIEntry toClone, final int alignmentStart) { + return newEntry(toClone.getSequenceId(), + alignmentStart, + toClone.getAlignmentSpan()); + } + + public static CRAIEntry updateStartContOffset(final CRAIEntry toClone, + final int alignmentStart, + final int containerStartOffset) { + return newEntry(toClone.getSequenceId(), + alignmentStart, + toClone.getAlignmentSpan(), + containerStartOffset, + toClone.getSliceByteOffset(), + toClone.getSliceByteSize()); + } } diff --git a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java index 9e48d6b4e0..2dd4133b5f 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java @@ -20,27 +20,17 @@ public class CRAIIndexTest extends HtsjdkTest { @Test - public void testFind() throws IOException, CloneNotSupportedException { - final List index = new ArrayList(); + public void testFind() { + final List index = new ArrayList<>(); final int sequenceId = 1; - CRAIEntry e = new CRAIEntry(); - e.sequenceId = sequenceId; - e.alignmentStart = 1; - e.alignmentSpan = 1; - e.containerStartOffset = 1; - e.sliceOffset = 1; - e.sliceSize = 0; + CRAIEntry e = CRAIEntryTest.newEntry(sequenceId, 1, 1, 1, 1, 0); index.add(e); - e = e.clone(); - e.alignmentStart = 2; - e.containerStartOffset = 2; + e = CRAIEntryTest.updateStartContOffset(e, 2, 2); index.add(e); - e = e.clone(); - e.alignmentStart = 3; - e.containerStartOffset = 3; + e = CRAIEntryTest.updateStartContOffset(e, 3, 3); index.add(e); Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 0)); @@ -60,11 +50,11 @@ private boolean allFoundEntriesIntersectQueryInFind(final List index, int foundCount = 0; for (final CRAIEntry found : CRAIIndex.find(index, sequenceId, start, span)) { foundCount++; - Assert.assertEquals(found.sequenceId, sequenceId); + Assert.assertEquals(found.getSequenceId(), sequenceId); boolean intersects = false; - for (int pos = Math.min(found.alignmentStart, start); pos <= Math.max(found.alignmentStart + found.alignmentSpan, start + span); pos++) { - if (pos >= found.alignmentStart && pos >= start && - pos <= found.alignmentStart + found.alignmentSpan && pos <= start + span) { + for (int pos = Math.min(found.getAlignmentStart(), start); pos <= Math.max(found.getAlignmentStart() + found.getAlignmentSpan(), start + span); pos++) { + if (pos >= found.getAlignmentStart() && pos >= start && + pos <= found.getAlignmentStart() + found.getAlignmentSpan() && pos <= start + span) { intersects = true; break; } @@ -95,15 +85,9 @@ public void testCraiFromFile() throws IOException { doCRAITest(this::getBaiStreamFromFile); } - private void doCRAITest(BiFunction, SeekableStream> getBaiStreamForIndex) throws IOException { - final ArrayList index = new ArrayList(); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + private void doCRAITest(BiFunction, SeekableStream> getBaiStreamForIndex) { + final ArrayList index = new ArrayList<>(); + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); index.add(entry); final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); @@ -112,25 +96,31 @@ private void doCRAITest(BiFunction, Seeka final SeekableStream baiStream = getBaiStreamForIndex.apply(dictionary, index); final DiskBasedBAMFileIndex bamIndex = new DiskBasedBAMFileIndex(baiStream, dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } public SeekableStream getBaiStreamFromMemory(SAMSequenceDictionary dictionary, final List index) { - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] written; + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { SAMFileHeader samHeader = new SAMFileHeader(); samHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(baos, samHeader); - for (CRAIEntry entry: index) { + for (CRAIEntry entry : index) { indexer.addEntry(entry); } indexer.finish(); - final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(new ByteArrayInputStream(baos.toByteArray()), dictionary); + written = baos.toByteArray(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + + try (final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(new ByteArrayInputStream(written), dictionary)) { Assert.assertNotNull(baiStream); return baiStream; } @@ -161,23 +151,16 @@ private SeekableStream getBaiStreamFromFile(SAMSequenceDictionary dictionary, fi } @Test - public void testGetLeftmost() throws CloneNotSupportedException { - final List index = new ArrayList(); + public void testGetLeftmost() { + final List index = new ArrayList<>(); Assert.assertNull(CRAIIndex.getLeftmost(index)); - final CRAIEntry e1 = new CRAIEntry(); - e1.sequenceId = 1; - e1.alignmentStart = 2; - e1.alignmentSpan = 3; - e1.containerStartOffset = 4; - e1.sliceOffset = 5; - e1.sliceSize = 6; + final CRAIEntry e1 = CRAIEntryTest.newEntry(1, 2, 3, 4, 5, 6); index.add(e1); // trivial case of single entry in index: Assert.assertEquals(e1, CRAIIndex.getLeftmost(index)); - final CRAIEntry e2 = e1.clone(); - e2.alignmentStart = e1.alignmentStart + 1; + final CRAIEntry e2 = CRAIEntryTest.updateStart(e1, e1.getAlignmentStart() + 1); index.add(e2); Assert.assertEquals(e1, CRAIIndex.getLeftmost(index)); } @@ -192,16 +175,13 @@ public void testFindLastAlignedEntry() { for (int lastAligned = 0; lastAligned < indexSize; lastAligned++) { index.clear(); for (int i = 0; i < indexSize; i++) { - final CRAIEntry e = new CRAIEntry(); - - e.sequenceId = (i <= lastAligned ? 0 : -1); - e.alignmentStart = i; + final CRAIEntry e = CRAIEntryTest.newEntrySeqStart(i <= lastAligned ? 0 : -1, i); index.add(e); } // check expectations are correct before calling findLastAlignedEntry method: - Assert.assertTrue(index.get(lastAligned).sequenceId != -1); + Assert.assertTrue(index.get(lastAligned).getSequenceId() != -1); if (lastAligned < index.size() - 1) { - Assert.assertTrue(index.get(lastAligned + 1).sequenceId == -1); + Assert.assertTrue(index.get(lastAligned + 1).getSequenceId() == -1); } // assert the the found value matches the expectation: Assert.assertEquals(CRAIIndex.findLastAlignedEntry(index), lastAligned);