From 06070a0343b2d74c137e8743d5f8a3ad968450fb Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Tue, 8 Jan 2019 16:04:29 -0500 Subject: [PATCH 1/7] encapsulate CRAIEntry fields - explicitly set nOfRecords and index to 0 - remove unused sliceIndex --- .../java/htsjdk/samtools/cram/CRAIEntry.java | 106 ++++++++------- .../java/htsjdk/samtools/cram/CRAIIndex.java | 121 ++++++----------- .../htsjdk/samtools/cram/structure/Slice.java | 12 ++ .../java/htsjdk/samtools/SamIndexesTest.java | 40 ++---- .../htsjdk/samtools/cram/CRAIEntryTest.java | 125 ++++++++++-------- .../htsjdk/samtools/cram/CRAIIndexTest.java | 82 +++++------- 6 files changed, 221 insertions(+), 265 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index dd22a00a1b..a83acbb569 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -7,27 +7,36 @@ import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; /** * A class representing CRAI index entry: file and alignment offsets for each slice. * Created by vadim on 10/08/2015. */ -public class CRAIEntry implements Comparable, Cloneable { - public int sequenceId; - public int alignmentStart; - public int alignmentSpan; - public long containerStartOffset; - public int sliceOffset; - public int sliceSize; - public int sliceIndex; - - private static int CRAI_INDEX_COLUMNS = 6; - private static String entryFormat = "%d\t%d\t%d\t%d\t%d\t%d"; - - public CRAIEntry() { +public class CRAIEntry implements Comparable { + private final int sequenceId; + private final int alignmentStart; + private final int alignmentSpan; + private final long containerStartOffset; + private final int sliceByteOffset; + private final int sliceByteSize; + + private static final int CRAI_INDEX_COLUMNS = 6; + private static final String ENTRY_FORMAT = "%d\t%d\t%d\t%d\t%d\t%d"; + + public CRAIEntry(final int sequenceId, + final int alignmentStart, + final int alignmentSpan, + final long containerStartOffset, + final int sliceByteOffset, + final int sliceByteSize) { + this.sequenceId = sequenceId; + this.alignmentStart = alignmentStart; + this.alignmentSpan = alignmentSpan; + this.containerStartOffset = containerStartOffset; + this.sliceByteOffset = sliceByteOffset; + this.sliceByteSize = sliceByteSize; } /** @@ -36,7 +45,7 @@ public CRAIEntry() { * @param line string formatted as a CRAI index entry * @throws CRAIIndex.CRAIIndexException */ - public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { + public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { final String[] chunks = line.split("\t"); if (chunks.length != CRAI_INDEX_COLUMNS) { throw new CRAIIndex.CRAIIndexException( @@ -48,8 +57,8 @@ public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { alignmentStart = Integer.parseInt(chunks[1]); alignmentSpan = Integer.parseInt(chunks[2]); containerStartOffset = Long.parseLong(chunks[3]); - sliceOffset = Integer.parseInt(chunks[4]); - sliceSize = Integer.parseInt(chunks[5]); + sliceByteOffset = Integer.parseInt(chunks[4]); + sliceByteSize = Integer.parseInt(chunks[5]); } catch (final NumberFormatException e) { throw new CRAIIndex.CRAIIndexException(e); } @@ -73,30 +82,18 @@ public void writeToStream(OutputStream os) { * Format the entry as a string suitable for serialization in the CRAI index */ private String serializeToString() { - return String.format(entryFormat, + return String.format(ENTRY_FORMAT, sequenceId, alignmentStart, alignmentSpan, - containerStartOffset, sliceOffset, sliceSize); + containerStartOffset, sliceByteOffset, sliceByteSize); } @Override public String toString() { return serializeToString(); } public static List fromContainer(final Container container) { - final List entries = new ArrayList<>(container.slices.length); - for (int i = 0; i < container.slices.length; i++) { - final Slice s = container.slices[i]; - final CRAIEntry e = new CRAIEntry(); - e.sequenceId = s.sequenceId; - e.alignmentStart = s.alignmentStart; - e.alignmentSpan = s.alignmentSpan; - e.containerStartOffset = s.containerOffset; - e.sliceOffset = container.landmarks[i]; - e.sliceSize = s.size; - - e.sliceIndex = i; - entries.add(e); - } - return entries; + return Arrays.stream(container.slices) + .map(slice -> slice.getCRAIEntry(slice.containerOffset)) + .collect(Collectors.toList()); } @Override @@ -114,19 +111,6 @@ public int compareTo(final CRAIEntry o) { return (int) (containerStartOffset - o.containerStartOffset); } - @Override - public CRAIEntry clone() throws CloneNotSupportedException { - super.clone(); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = sequenceId; - entry.alignmentStart = alignmentStart; - entry.alignmentSpan = alignmentSpan; - entry.containerStartOffset = containerStartOffset; - entry.sliceOffset = sliceOffset; - entry.sliceSize = sliceSize; - return entry; - } - public static Comparator byEnd = new Comparator() { @Override @@ -192,4 +176,28 @@ public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) { return Math.abs(a0 + b0 - a1 - b1) < (e0.alignmentSpan + e1.alignmentSpan); } + + public int getSequenceId() { + return sequenceId; + } + + public int getAlignmentStart() { + return alignmentStart; + } + + public int getAlignmentSpan() { + return alignmentSpan; + } + + public long getContainerStartOffset() { + return containerStartOffset; + } + + public int getSliceByteOffset() { + return sliceByteOffset; + } + + public int getSliceByteSize() { + return sliceByteSize; + } } diff --git a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java index 88391cbcda..a684a3be39 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java @@ -4,24 +4,15 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.CRAMBAIIndexer; import htsjdk.samtools.CRAMCRAIIndexer; -import htsjdk.samtools.cram.encoding.reader.MultiRefSliceAlignmentSpanReader; import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.seekablestream.SeekableMemoryStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.ValidationStringency; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; +import java.io.*; +import java.util.*; +import java.util.stream.Collectors; -import java.util.List; /** * CRAI index used for CRAM files. @@ -55,63 +46,30 @@ public void writeIndex(final OutputStream os) { /** * Create index entries for a single container. - * @param c the container to index + * @param container the container to index */ - public void processContainer(final Container c) { + public void processContainer(final Container container) { // TODO: this should be refactored and delegate to container/slice - if (!c.isEOF()) { - for (int i = 0; i < c.slices.length; i++) { - Slice s = c.slices[i]; + if (!container.isEOF()) { + for (final Slice s: container.slices) { if (s.sequenceId == Slice.MULTI_REFERENCE) { - this.entries.addAll(getCRAIEntriesForMultiRefSlice(s, c.header, c.offset, c.landmarks)); - } - else { - CRAIEntry e = new CRAIEntry(); - - e.sequenceId = c.sequenceId; - e.alignmentStart = s.alignmentStart; - e.alignmentSpan = s.alignmentSpan; - e.containerStartOffset = c.offset; - e.sliceOffset = c.landmarks[i]; - e.sliceSize = s.size; - e.sliceIndex = i; - - entries.add(e); + final Map spans = s.getMultiRefAlignmentSpans(container.header, ValidationStringency.DEFAULT_STRINGENCY); + + this.entries.addAll(spans.entrySet().stream() + .map(e -> new CRAIEntry(e.getKey(), + e.getValue().getStart(), + e.getValue().getSpan(), + container.offset, + container.landmarks[s.index], + s.size)) + .collect(Collectors.toList())); + } else { + entries.add(s.getCRAIEntry(container.offset)); } } } } - /** - * Return a list of CRAI Entries; one for each reference in the multireference slice. - * TODO: this should be refactored and delegate to container/slice - */ - private static Collection getCRAIEntriesForMultiRefSlice( - final Slice slice, - final CompressionHeader header, - final long containerOffset, - final int[] landmarks) - { - final Map spans = slice.getMultiRefAlignmentSpans(header, ValidationStringency.DEFAULT_STRINGENCY); - - List entries = new ArrayList<>(spans.size()); - for (int seqId : spans.keySet()) { - CRAIEntry e = new CRAIEntry(); - e.sequenceId = seqId; - AlignmentSpan span = spans.get(seqId); - e.alignmentStart = span.getStart(); - e.alignmentSpan = span.getSpan(); - e.sliceSize = slice.size; - e.sliceIndex = slice.index; - e.containerStartOffset = containerOffset; - e.sliceOffset = landmarks[slice.index]; - - entries.add(e); - } - - return entries; - } - public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException { return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary); } @@ -128,16 +86,16 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre for (final CRAIEntry entry : full) { final Slice slice = new Slice(); - slice.containerOffset = entry.containerStartOffset; - slice.alignmentStart = entry.alignmentStart; - slice.alignmentSpan = entry.alignmentSpan; - slice.sequenceId = entry.sequenceId; - // https://github.com/samtools/htsjdk/issues/531 - // entry.sliceSize is the slice size in bytes, not the number of - // records; this results in the BAMIndex metadata being wrong - slice.nofRecords = entry.sliceSize; - slice.index = entry.sliceIndex; - slice.offset = entry.sliceOffset; + slice.containerOffset = entry.getContainerStartOffset(); + slice.alignmentStart = entry.getAlignmentStart(); + slice.alignmentSpan = entry.getAlignmentSpan(); + slice.sequenceId = entry.getSequenceId(); + // NOTE: the recordCount and sliceIndex fields can't be derived from the CRAM index + // so we can only set them to zero + // see https://github.com/samtools/htsjdk/issues/531 + slice.nofRecords = 0; + slice.index = 0; + slice.offset = entry.getSliceByteOffset(); indexer.processSingleReferenceSlice(slice); } @@ -148,17 +106,16 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre public static List find(final List list, final int seqId, final int start, final int span) { final boolean whole = start < 1 || span < 1; - final CRAIEntry query = new CRAIEntry(); - query.sequenceId = seqId; - query.alignmentStart = start < 1 ? 1 : start; - query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span; - query.containerStartOffset = Long.MAX_VALUE; - query.sliceOffset = Integer.MAX_VALUE; - query.sliceSize = Integer.MAX_VALUE; + final CRAIEntry query = new CRAIEntry(seqId, + start < 1 ? 1 : start, + span < 1 ? Integer.MAX_VALUE : span, + Long.MAX_VALUE, + Integer.MAX_VALUE, + Integer.MAX_VALUE); final List l = new ArrayList<>(); for (final CRAIEntry e : list) { - if (e.sequenceId != seqId) { + if (e.getSequenceId() != seqId) { continue; } if (whole || CRAIEntry.intersect(e, query)) { @@ -176,7 +133,7 @@ public static CRAIEntry getLeftmost(final List list) { CRAIEntry left = list.get(0); for (final CRAIEntry e : list) { - if (e.alignmentStart < left.alignmentStart) { + if (e.getAlignmentStart() < left.getAlignmentStart()) { left = e; } } @@ -202,7 +159,7 @@ public static int findLastAlignedEntry(final List list) { final int mid = (low + high) >>> 1; final CRAIEntry midVal = list.get(mid); - if (midVal.sequenceId >= 0) { + if (midVal.getSequenceId() >= 0) { low = mid + 1; } else { high = mid - 1; @@ -211,7 +168,7 @@ public static int findLastAlignedEntry(final List list) { if (low >= list.size()) { return list.size() - 1; } - for (; low >= 0 && list.get(low).sequenceId == -1; low--) { + for (; low >= 0 && list.get(low).getSequenceId() == -1; low--) { } return low; } diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 611cc9559a..492fd9143c 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -18,6 +18,7 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.*; +import htsjdk.samtools.cram.CRAIEntry; import htsjdk.samtools.cram.encoding.reader.CramRecordReader; import htsjdk.samtools.cram.encoding.reader.MultiRefSliceAlignmentSpanReader; import htsjdk.samtools.cram.io.BitInputStream; @@ -291,4 +292,15 @@ public Map getMultiRefAlignmentSpans(final CompressionHe return reader.getReferenceSpans(); } + /** + * Generate a CRAI Index entry from this Slice and the container offset. + * + * TODO: investigate why we can't simply use the Slice's own containerOffset here + * + * @param containerStartOffset the byte offset of this Slice's Container + * @return a new CRAI Index Entry + */ + public CRAIEntry getCRAIEntry(final long containerStartOffset) { + return new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerStartOffset, offset, size); + } } diff --git a/src/test/java/htsjdk/samtools/SamIndexesTest.java b/src/test/java/htsjdk/samtools/SamIndexesTest.java index f78b0f3719..43d8c2ff2f 100644 --- a/src/test/java/htsjdk/samtools/SamIndexesTest.java +++ b/src/test/java/htsjdk/samtools/SamIndexesTest.java @@ -2,7 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAIEntry; -import htsjdk.samtools.cram.CRAIIndex; +import htsjdk.samtools.cram.CRAIEntryTest; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableMemoryStream; import htsjdk.samtools.seekablestream.SeekableStream; @@ -16,8 +16,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; import java.util.zip.GZIPOutputStream; public class SamIndexesTest extends HtsjdkTest { @@ -71,13 +69,7 @@ public void testCraiInMemory() throws IOException { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(baos, header); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); baos.close(); @@ -91,11 +83,11 @@ public void testCraiInMemory() throws IOException { baos = new ByteArrayOutputStream(); IOUtil.copyStream(baiStream, baos); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } @@ -108,13 +100,7 @@ public void testCraiFromFile() throws IOException { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(fos, header); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); fos.close(); @@ -126,11 +112,11 @@ public void testCraiFromFile() throws IOException { Assert.assertNotNull(baiStream); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(baiStream, dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } @@ -167,13 +153,7 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(fos, header); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); fos.close(); @@ -184,11 +164,11 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtil.copyStream(baiStream, baos); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } } diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index d43f2fc146..b697cbbd05 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -14,7 +14,6 @@ * Created by vadim on 25/08/2015. */ public class CRAIEntryTest extends HtsjdkTest { - @Test public void testFromContainer() { final Container container = new Container(); @@ -31,10 +30,10 @@ public void testFromContainer() { Assert.assertEquals(entries.size(), 1); final CRAIEntry entry = entries.get(0); - Assert.assertEquals(entry.sequenceId, slice.sequenceId); - Assert.assertEquals(entry.alignmentStart, slice.alignmentStart); - Assert.assertEquals(entry.alignmentSpan, slice.alignmentSpan); - Assert.assertEquals(entry.containerStartOffset, slice.containerOffset); + Assert.assertEquals(entry.getSequenceId(), slice.sequenceId); + Assert.assertEquals(entry.getAlignmentStart(), slice.alignmentStart); + Assert.assertEquals(entry.getAlignmentSpan(), slice.alignmentSpan); + Assert.assertEquals(entry.getContainerStartOffset(), slice.containerOffset); } @Test @@ -45,101 +44,121 @@ public void testFromCraiLine() { final int alignmentSpan = counter++; final int containerOffset = Integer.MAX_VALUE + counter++; final int sliceOffset = counter++; - final int sliceSise = counter++; + final int sliceSize = counter++; - final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSise); + final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSize); final CRAIEntry entry = new CRAIEntry(line); Assert.assertNotNull(entry); - Assert.assertEquals(entry.sequenceId, sequenceId); - Assert.assertEquals(entry.alignmentStart, alignmentStart); - Assert.assertEquals(entry.alignmentSpan, alignmentSpan); - Assert.assertEquals(entry.containerStartOffset, containerOffset); + Assert.assertEquals(entry.getSequenceId(), sequenceId); + Assert.assertEquals(entry.getAlignmentStart(), alignmentStart); + Assert.assertEquals(entry.getAlignmentSpan(), alignmentSpan); + Assert.assertEquals(entry.getContainerStartOffset(), containerOffset); + Assert.assertEquals(entry.getSliceByteOffset(), sliceOffset); + Assert.assertEquals(entry.getSliceByteSize(), sliceSize); } @Test - public void testIntersetcsZeroSpan() { - Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 0))); + public void testIntersectsZeroSpan() { + Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 0))); } @Test - public void testIntersetcsSame() { - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 1))); + public void testIntersectsSame() { + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 1))); } @Test - public void testIntersetcsIncluded() { - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(1, 1))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1))); + public void testIntersectsIncluded() { + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 1, 1))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 2, 1))); // is symmetrical? - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 2))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(2, 1), newEntry(1, 2))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 2))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2, 1), newEntry(1, 1, 2))); } @Test - public void testIntersetcsOvertlaping() { - Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 1))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 2))); - Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1))); - Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(3, 1))); + public void testIntersectsOvertlaping() { + Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 0, 1))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 0, 2))); + Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 2, 1))); + Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 3, 1))); } @Test - public void testIntersetcsAnotherSequence() { + public void testIntersectsAnotherSequence() { Assert.assertTrue(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(10, 2, 1))); Assert.assertFalse(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(11, 2, 1))); } @Test public void testCompareTo () { - final List list = new ArrayList(2); + final List list = new ArrayList<>(2); CRAIEntry e1; CRAIEntry e2; - e1 = new CRAIEntry(); - e1.sequenceId = 100; - e2 = new CRAIEntry(); - e2.sequenceId = 200; + e1 = newEntry(100, 0, 0); + e2 = newEntry(200, 0, 0); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).sequenceId < list.get(0).sequenceId); + Assert.assertTrue(list.get(1).getSequenceId() < list.get(0).getSequenceId()); Collections.sort(list); - Assert.assertTrue(list.get(0).sequenceId < list.get(1).sequenceId); + Assert.assertTrue(list.get(0).getSequenceId() < list.get(1).getSequenceId()); list.clear(); - e1 = new CRAIEntry(); - e1.alignmentStart = 100; - e2 = new CRAIEntry(); - e2.alignmentStart = 200; + e1 = newEntry(1, 100, 0); + e2 = newEntry(1, 200, 0); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).alignmentStart < list.get(0).alignmentStart); + Assert.assertTrue(list.get(1).getAlignmentStart() < list.get(0).getAlignmentStart()); Collections.sort(list); - Assert.assertTrue(list.get(0).alignmentStart < list.get(1).alignmentStart); + Assert.assertTrue(list.get(0).getAlignmentStart() < list.get(1).getAlignmentStart()); list.clear(); - e1 = new CRAIEntry(); - e1.containerStartOffset = 100; - e2 = new CRAIEntry(); - e2.containerStartOffset = 200; + e1 = newEntryContOffset(100); + e2 = newEntryContOffset(200); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).containerStartOffset < list.get(0).containerStartOffset); + Assert.assertTrue(list.get(1).getContainerStartOffset() < list.get(0).getContainerStartOffset()); Collections.sort(list); - Assert.assertTrue(list.get(0).containerStartOffset < list.get(1).containerStartOffset); + Assert.assertTrue(list.get(0).getContainerStartOffset() < list.get(1).getContainerStartOffset()); + } + + public static CRAIEntry newEntry(final int seqId, final int start, final int span) { + return newEntry(seqId, start, span, 0, 0, 0); } - private static CRAIEntry newEntry(final int start, final int span) { - return newEntry(1, start, span); + public static CRAIEntry newEntry(final int sequenceId, + final int start, + final int span, + final int containerStartOffset, + final int sliceOffset, + final int sliceSize) { + return new CRAIEntry(sequenceId, start, span, containerStartOffset, sliceOffset, sliceSize); } - private static CRAIEntry newEntry(final int seqId, final int start, final int span) { - final CRAIEntry e1 = new CRAIEntry(); - e1.sequenceId = seqId; - e1.alignmentStart = start; - e1.alignmentSpan = span; - return e1; + public static CRAIEntry newEntrySeqStart(final int seqId, final int start) { + return newEntry(seqId, start, 0); } + public static CRAIEntry newEntryContOffset(final int containerStartOffset) { + return newEntry(1, 0, 0, containerStartOffset, 0, 0); + } + public static CRAIEntry updateStart(final CRAIEntry toClone, final int alignmentStart) { + return newEntry(toClone.getSequenceId(), + alignmentStart, + toClone.getAlignmentSpan()); + } + + public static CRAIEntry updateStartContOffset(final CRAIEntry toClone, + final int alignmentStart, + final int containerStartOffset) { + return newEntry(toClone.getSequenceId(), + alignmentStart, + toClone.getAlignmentSpan(), + containerStartOffset, + toClone.getSliceByteOffset(), + toClone.getSliceByteSize()); + } } diff --git a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java index 9e48d6b4e0..af82b66740 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java @@ -20,27 +20,17 @@ public class CRAIIndexTest extends HtsjdkTest { @Test - public void testFind() throws IOException, CloneNotSupportedException { - final List index = new ArrayList(); + public void testFind() { + final List index = new ArrayList<>(); final int sequenceId = 1; - CRAIEntry e = new CRAIEntry(); - e.sequenceId = sequenceId; - e.alignmentStart = 1; - e.alignmentSpan = 1; - e.containerStartOffset = 1; - e.sliceOffset = 1; - e.sliceSize = 0; + CRAIEntry e = CRAIEntryTest.newEntry(sequenceId, 1, 1, 1, 1, 0); index.add(e); - e = e.clone(); - e.alignmentStart = 2; - e.containerStartOffset = 2; + e = CRAIEntryTest.updateStartContOffset(e, 2, 2); index.add(e); - e = e.clone(); - e.alignmentStart = 3; - e.containerStartOffset = 3; + e = CRAIEntryTest.updateStartContOffset(e, 3, 3); index.add(e); Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 0)); @@ -60,11 +50,11 @@ private boolean allFoundEntriesIntersectQueryInFind(final List index, int foundCount = 0; for (final CRAIEntry found : CRAIIndex.find(index, sequenceId, start, span)) { foundCount++; - Assert.assertEquals(found.sequenceId, sequenceId); + Assert.assertEquals(found.getSequenceId(), sequenceId); boolean intersects = false; - for (int pos = Math.min(found.alignmentStart, start); pos <= Math.max(found.alignmentStart + found.alignmentSpan, start + span); pos++) { - if (pos >= found.alignmentStart && pos >= start && - pos <= found.alignmentStart + found.alignmentSpan && pos <= start + span) { + for (int pos = Math.min(found.getAlignmentStart(), start); pos <= Math.max(found.getAlignmentStart() + found.getAlignmentSpan(), start + span); pos++) { + if (pos >= found.getAlignmentStart() && pos >= start && + pos <= found.getAlignmentStart() + found.getAlignmentSpan() && pos <= start + span) { intersects = true; break; } @@ -95,15 +85,9 @@ public void testCraiFromFile() throws IOException { doCRAITest(this::getBaiStreamFromFile); } - private void doCRAITest(BiFunction, SeekableStream> getBaiStreamForIndex) throws IOException { - final ArrayList index = new ArrayList(); - final CRAIEntry entry = new CRAIEntry(); - entry.sequenceId = 0; - entry.alignmentStart = 1; - entry.alignmentSpan = 2; - entry.sliceOffset = 3; - entry.sliceSize = 4; - entry.containerStartOffset = 5; + private void doCRAITest(BiFunction, SeekableStream> getBaiStreamForIndex) { + final ArrayList index = new ArrayList<>(); + final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); index.add(entry); final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); @@ -112,25 +96,31 @@ private void doCRAITest(BiFunction, Seeka final SeekableStream baiStream = getBaiStreamForIndex.apply(dictionary, index); final DiskBasedBAMFileIndex bamIndex = new DiskBasedBAMFileIndex(baiStream, dictionary); - final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart); + final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } public SeekableStream getBaiStreamFromMemory(SAMSequenceDictionary dictionary, final List index) { - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] written; + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { SAMFileHeader samHeader = new SAMFileHeader(); samHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(baos, samHeader); - for (CRAIEntry entry: index) { + for (CRAIEntry entry : index) { indexer.addEntry(entry); } indexer.finish(); - final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(new ByteArrayInputStream(baos.toByteArray()), dictionary); + written = baos.toByteArray(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + + try (final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(new ByteArrayInputStream(written), dictionary)) { Assert.assertNotNull(baiStream); return baiStream; } @@ -161,23 +151,16 @@ private SeekableStream getBaiStreamFromFile(SAMSequenceDictionary dictionary, fi } @Test - public void testGetLeftmost() throws CloneNotSupportedException { - final List index = new ArrayList(); + public void testGetLeftmost() { + final List index = new ArrayList<>(); Assert.assertNull(CRAIIndex.getLeftmost(index)); - final CRAIEntry e1 = new CRAIEntry(); - e1.sequenceId = 1; - e1.alignmentStart = 2; - e1.alignmentSpan = 3; - e1.containerStartOffset = 4; - e1.sliceOffset = 5; - e1.sliceSize = 6; + final CRAIEntry e1 = CRAIEntryTest.newEntry(1, 2, 3, 4, 5, 6); index.add(e1); // trivial case of single entry in index: Assert.assertEquals(e1, CRAIIndex.getLeftmost(index)); - final CRAIEntry e2 = e1.clone(); - e2.alignmentStart = e1.alignmentStart + 1; + final CRAIEntry e2 = CRAIEntryTest.updateStart(e1, e1.getAlignmentStart() + 1); index.add(e2); Assert.assertEquals(e1, CRAIIndex.getLeftmost(index)); } @@ -192,16 +175,13 @@ public void testFindLastAlignedEntry() { for (int lastAligned = 0; lastAligned < indexSize; lastAligned++) { index.clear(); for (int i = 0; i < indexSize; i++) { - final CRAIEntry e = new CRAIEntry(); - - e.sequenceId = (i <= lastAligned ? 0 : -1); - e.alignmentStart = i; + final CRAIEntry e = CRAIEntryTest.newEntrySeqStart(i <= lastAligned ? 0 : -1, i); index.add(e); } // check expectations are correct before calling findLastAlignedEntry method: - Assert.assertTrue(index.get(lastAligned).sequenceId != -1); + Assert.assertTrue(index.get(lastAligned).getSequenceId() != -1); if (lastAligned < index.size() - 1) { - Assert.assertTrue(index.get(lastAligned + 1).sequenceId == -1); + Assert.assertTrue(index.get(lastAligned + 1).getSequenceId() == -1); } // assert the the found value matches the expectation: Assert.assertEquals(CRAIIndex.findLastAlignedEntry(index), lastAligned); From 448548caccad5cda76886651af7dd7a0e7db3af2 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 9 Jan 2019 10:59:33 -0500 Subject: [PATCH 2/7] containerStartByteOffset --- .../java/htsjdk/samtools/cram/CRAIEntry.java | 25 +++++++++---------- .../java/htsjdk/samtools/cram/CRAIIndex.java | 2 +- .../java/htsjdk/samtools/SamIndexesTest.java | 6 ++--- .../htsjdk/samtools/cram/CRAIEntryTest.java | 8 +++--- .../htsjdk/samtools/cram/CRAIIndexTest.java | 2 +- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index a83acbb569..e2ad84ab0a 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -2,7 +2,6 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.cram.structure.Container; -import htsjdk.samtools.cram.structure.Slice; import htsjdk.samtools.util.RuntimeIOException; import java.io.IOException; @@ -18,7 +17,7 @@ public class CRAIEntry implements Comparable { private final int sequenceId; private final int alignmentStart; private final int alignmentSpan; - private final long containerStartOffset; + private final long containerStartByteOffset; private final int sliceByteOffset; private final int sliceByteSize; @@ -28,13 +27,13 @@ public class CRAIEntry implements Comparable { public CRAIEntry(final int sequenceId, final int alignmentStart, final int alignmentSpan, - final long containerStartOffset, + final long containerStartByteOffset, final int sliceByteOffset, final int sliceByteSize) { this.sequenceId = sequenceId; this.alignmentStart = alignmentStart; this.alignmentSpan = alignmentSpan; - this.containerStartOffset = containerStartOffset; + this.containerStartByteOffset = containerStartByteOffset; this.sliceByteOffset = sliceByteOffset; this.sliceByteSize = sliceByteSize; } @@ -45,7 +44,7 @@ public CRAIEntry(final int sequenceId, * @param line string formatted as a CRAI index entry * @throws CRAIIndex.CRAIIndexException */ - public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { + public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { final String[] chunks = line.split("\t"); if (chunks.length != CRAI_INDEX_COLUMNS) { throw new CRAIIndex.CRAIIndexException( @@ -56,7 +55,7 @@ public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException { sequenceId = Integer.parseInt(chunks[0]); alignmentStart = Integer.parseInt(chunks[1]); alignmentSpan = Integer.parseInt(chunks[2]); - containerStartOffset = Long.parseLong(chunks[3]); + containerStartByteOffset = Long.parseLong(chunks[3]); sliceByteOffset = Integer.parseInt(chunks[4]); sliceByteSize = Integer.parseInt(chunks[5]); } catch (final NumberFormatException e) { @@ -84,7 +83,7 @@ public void writeToStream(OutputStream os) { private String serializeToString() { return String.format(ENTRY_FORMAT, sequenceId, alignmentStart, alignmentSpan, - containerStartOffset, sliceByteOffset, sliceByteSize); + containerStartByteOffset, sliceByteOffset, sliceByteSize); } @Override @@ -108,7 +107,7 @@ public int compareTo(final CRAIEntry o) { return alignmentStart - o.alignmentStart; } - return (int) (containerStartOffset - o.containerStartOffset); + return (int) (containerStartByteOffset - o.containerStartByteOffset); } public static Comparator byEnd = new Comparator() { @@ -122,7 +121,7 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) { return o1.alignmentStart + o1.alignmentSpan - o2.alignmentStart - o2.alignmentSpan; } - return (int) (o1.containerStartOffset - o2.containerStartOffset); + return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset); } }; @@ -137,7 +136,7 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) { return o1.alignmentStart - o2.alignmentStart; } - return (int) (o1.containerStartOffset - o2.containerStartOffset); + return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset); } }; @@ -155,7 +154,7 @@ public int compare(CRAIEntry o1, CRAIEntry o2) { if (o1.alignmentStart != o2.alignmentStart) return o1.alignmentStart - o2.alignmentStart; - return (int) (o1.containerStartOffset - o2.containerStartOffset); + return (int) (o1.containerStartByteOffset - o2.containerStartByteOffset); } }; @@ -189,8 +188,8 @@ public int getAlignmentSpan() { return alignmentSpan; } - public long getContainerStartOffset() { - return containerStartOffset; + public long getContainerStartByteOffset() { + return containerStartByteOffset; } public int getSliceByteOffset() { diff --git a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java index a684a3be39..964023e6d8 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIIndex.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIIndex.java @@ -86,7 +86,7 @@ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStre for (final CRAIEntry entry : full) { final Slice slice = new Slice(); - slice.containerOffset = entry.getContainerStartOffset(); + slice.containerOffset = entry.getContainerStartByteOffset(); slice.alignmentStart = entry.getAlignmentStart(); slice.alignmentSpan = entry.getAlignmentSpan(); slice.sequenceId = entry.getSequenceId(); diff --git a/src/test/java/htsjdk/samtools/SamIndexesTest.java b/src/test/java/htsjdk/samtools/SamIndexesTest.java index 43d8c2ff2f..51bfb2b825 100644 --- a/src/test/java/htsjdk/samtools/SamIndexesTest.java +++ b/src/test/java/htsjdk/samtools/SamIndexesTest.java @@ -87,7 +87,7 @@ public void testCraiInMemory() throws IOException { Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } @@ -116,7 +116,7 @@ public void testCraiFromFile() throws IOException { Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } @@ -168,7 +168,7 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException { Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } } diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index b697cbbd05..171dbda0b9 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -33,7 +33,7 @@ public void testFromContainer() { Assert.assertEquals(entry.getSequenceId(), slice.sequenceId); Assert.assertEquals(entry.getAlignmentStart(), slice.alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), slice.alignmentSpan); - Assert.assertEquals(entry.getContainerStartOffset(), slice.containerOffset); + Assert.assertEquals(entry.getContainerStartByteOffset(), slice.containerOffset); } @Test @@ -52,7 +52,7 @@ public void testFromCraiLine() { Assert.assertEquals(entry.getSequenceId(), sequenceId); Assert.assertEquals(entry.getAlignmentStart(), alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), alignmentSpan); - Assert.assertEquals(entry.getContainerStartOffset(), containerOffset); + Assert.assertEquals(entry.getContainerStartByteOffset(), containerOffset); Assert.assertEquals(entry.getSliceByteOffset(), sliceOffset); Assert.assertEquals(entry.getSliceByteSize(), sliceSize); } @@ -119,9 +119,9 @@ public void testCompareTo () { e2 = newEntryContOffset(200); list.add(e2); list.add(e1); - Assert.assertTrue(list.get(1).getContainerStartOffset() < list.get(0).getContainerStartOffset()); + Assert.assertTrue(list.get(1).getContainerStartByteOffset() < list.get(0).getContainerStartByteOffset()); Collections.sort(list); - Assert.assertTrue(list.get(0).getContainerStartOffset() < list.get(1).getContainerStartOffset()); + Assert.assertTrue(list.get(0).getContainerStartByteOffset() < list.get(1).getContainerStartByteOffset()); } public static CRAIEntry newEntry(final int seqId, final int start, final int span) { diff --git a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java index af82b66740..2dd4133b5f 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIIndexTest.java @@ -100,7 +100,7 @@ private void doCRAITest(BiFunction, Seeka Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); - Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartOffset()); + Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } From 3774387e8d2f772909e19e9fb533aa65c8d58be1 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 9 Jan 2019 11:08:17 -0500 Subject: [PATCH 3/7] slightly better test --- src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index 171dbda0b9..8b031a2ca8 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -22,7 +22,9 @@ public void testFromContainer() { slice.alignmentStart = 2; slice.alignmentSpan = 3; slice.containerOffset = 4; - container.landmarks = new int[]{5}; + slice.offset = 5; + slice.size = 6; + container.landmarks = new int[]{7}; container.slices = new Slice[]{slice}; final List entries = CRAIEntry.fromContainer(container); @@ -34,6 +36,8 @@ public void testFromContainer() { Assert.assertEquals(entry.getAlignmentStart(), slice.alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), slice.alignmentSpan); Assert.assertEquals(entry.getContainerStartByteOffset(), slice.containerOffset); + Assert.assertEquals(entry.getSliceByteOffset(), slice.offset); + Assert.assertEquals(entry.getSliceByteSize(), slice.size); } @Test From bbd533f8743c0626fd48092dea95b484b22b3378 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 9 Jan 2019 17:17:58 -0500 Subject: [PATCH 4/7] comments about Slice indexing values --- src/main/java/htsjdk/samtools/cram/structure/Slice.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 492fd9143c..3ff03ff007 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -60,10 +60,17 @@ public class Slice { public Block embeddedRefBlock; public Map external; - // for indexing purposes: + // for indexing purposes + + // the Slice's offset in bytes from the beginning of its Container + // equal to Container.landmarks[Slice.index] of its enclosing Container public int offset = -1; + // this Slice's Container's offset in bytes from the beginning of the stream + // equal to Container.offset of its enclosing Container public long containerOffset = -1; + // this Slice's size in bytes public int size = -1; + // this Slice's index within its Container public int index = -1; // to pass this to the container: From 91ced436391f232e5d6217cf8ae630bb37e48676 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 9 Jan 2019 17:22:37 -0500 Subject: [PATCH 5/7] Add getCraiEntry() and getCraiEntries() --- src/main/java/htsjdk/samtools/cram/CRAIEntry.java | 7 +------ .../htsjdk/samtools/cram/structure/Container.java | 11 +++++++++++ .../java/htsjdk/samtools/cram/structure/Slice.java | 10 +++++++++- src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java | 2 +- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index e2ad84ab0a..4a76334ae4 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -2,6 +2,7 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.cram.structure.Container; +import htsjdk.samtools.cram.structure.Slice; import htsjdk.samtools.util.RuntimeIOException; import java.io.IOException; @@ -89,12 +90,6 @@ private String serializeToString() { @Override public String toString() { return serializeToString(); } - public static List fromContainer(final Container container) { - return Arrays.stream(container.slices) - .map(slice -> slice.getCRAIEntry(slice.containerOffset)) - .collect(Collectors.toList()); - } - @Override public int compareTo(final CRAIEntry o) { if (o == null) { diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index bdeaa9d058..082ce3a0d8 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -18,8 +18,13 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.cram.CRAIEntry; import htsjdk.samtools.cram.structure.block.Block; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + public class Container { // container header as defined in the specs: /** @@ -53,6 +58,12 @@ public class Container { */ public long offset; + public List getCraiEntries() { + return Arrays.stream(slices) + .map(Slice::getCRAIEntry) + .collect(Collectors.toList()); + } + @Override public String toString() { return String diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 3ff03ff007..ab881f430d 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -299,10 +299,18 @@ public Map getMultiRefAlignmentSpans(final CompressionHe return reader.getReferenceSpans(); } + /** + * Generate a CRAI Index entry from this Slice + * @return a new CRAI Index Entry + */ + public CRAIEntry getCRAIEntry() { + return new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerOffset, offset, size); + } /** * Generate a CRAI Index entry from this Slice and the container offset. * - * TODO: investigate why we can't simply use the Slice's own containerOffset here + * TODO: investigate why we sometimes need to pass in an external containerStartOffset + * because this Slice's containerOffset is incorrect * * @param containerStartOffset the byte offset of this Slice's Container * @return a new CRAI Index Entry diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index 8b031a2ca8..6afd9a8d10 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -27,7 +27,7 @@ public void testFromContainer() { container.landmarks = new int[]{7}; container.slices = new Slice[]{slice}; - final List entries = CRAIEntry.fromContainer(container); + final List entries = container.getCraiEntries(); Assert.assertNotNull(entries); Assert.assertEquals(entries.size(), 1); final CRAIEntry entry = entries.get(0); From 5ed180f5b94748ae0193910f362ec686843c720f Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 9 Jan 2019 17:23:37 -0500 Subject: [PATCH 6/7] all caps CRAI --- src/main/java/htsjdk/samtools/cram/structure/Container.java | 2 +- src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index 082ce3a0d8..78c974c1e4 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -58,7 +58,7 @@ public class Container { */ public long offset; - public List getCraiEntries() { + public List getCRAIEntries() { return Arrays.stream(slices) .map(Slice::getCRAIEntry) .collect(Collectors.toList()); diff --git a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java index 6afd9a8d10..f767f0c257 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java +++ b/src/test/java/htsjdk/samtools/cram/CRAIEntryTest.java @@ -27,7 +27,7 @@ public void testFromContainer() { container.landmarks = new int[]{7}; container.slices = new Slice[]{slice}; - final List entries = container.getCraiEntries(); + final List entries = container.getCRAIEntries(); Assert.assertNotNull(entries); Assert.assertEquals(entries.size(), 1); final CRAIEntry entry = entries.get(0); From 8e4098cd72c07dafabe8f22a78325278b83aa132 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 9 Jan 2019 17:32:25 -0500 Subject: [PATCH 7/7] offset comments in CRAIEntry --- src/main/java/htsjdk/samtools/cram/CRAIEntry.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java index 4a76334ae4..6a31fad1bd 100644 --- a/src/main/java/htsjdk/samtools/cram/CRAIEntry.java +++ b/src/main/java/htsjdk/samtools/cram/CRAIEntry.java @@ -18,7 +18,12 @@ public class CRAIEntry implements Comparable { private final int sequenceId; private final int alignmentStart; private final int alignmentSpan; + + // this Slice's Container's offset in bytes from the beginning of the stream + // equal to Slice.containerOffset and Container.offset private final long containerStartByteOffset; + // this Slice's offset in bytes from the beginning of its Container + // equal to Slice.offset and Container.landmarks[Slice.index] private final int sliceByteOffset; private final int sliceByteSize;