diff --git a/src/main/java/htsjdk/samtools/cram/io/CramInt.java b/src/main/java/htsjdk/samtools/cram/io/CramInt.java index 1b0432f9b6..58a14d15db 100644 --- a/src/main/java/htsjdk/samtools/cram/io/CramInt.java +++ b/src/main/java/htsjdk/samtools/cram/io/CramInt.java @@ -17,7 +17,7 @@ public class CramInt { * @return an integer value read * @throws IOException as per java IO contract */ - public static int int32(final InputStream inputStream) throws IOException { + public static int readInt32(final InputStream inputStream) throws IOException { return inputStream.read() | inputStream.read() << 8 | inputStream.read() << 16 | inputStream.read() << 24; } @@ -27,7 +27,7 @@ public static int int32(final InputStream inputStream) throws IOException { * @param data input stream to read from * @return an integer value read */ - public static int int32(final byte[] data) { + public static int readInt32(final byte[] data) { if (data.length != 4) throw new IllegalArgumentException("Expecting a 4-byte integer. "); return (0xFF & data[0]) | ((0xFF & data[1]) << 8) | ((0xFF & data[2]) << 16) | ((0xFF & data[3]) << 24); @@ -39,8 +39,11 @@ public static int int32(final byte[] data) { * @param buffer {@link ByteBuffer} to read from * @return an integer value read from the buffer */ - public static int int32(final ByteBuffer buffer) { - return buffer.get() | buffer.get() << 8 | buffer.get() << 16 | buffer.get() << 24; + public static int readInt32(final ByteBuffer buffer) { + return (0xFF & buffer.get()) | + (0xFF & buffer.get()) << 8 | + (0xFF & buffer.get()) << 16 | + (0xFF & buffer.get()) << 24; } /** @@ -61,7 +64,7 @@ public static int writeInt32(final int value, final OutputStream outputStream) t } /** - * Write int value to {@link OutputStream} encoded as CRAM int data type. + * Write int value to an array of bytes encoded as CRAM int data type. * * @param value value to be written out * @return the byte array holding the value encoded as CRAM int data type @@ -74,5 +77,4 @@ public static byte[] writeInt32(final int value) { data[3] = (byte) (value >> 24 & 0xFF); return data; } - } diff --git a/src/main/java/htsjdk/samtools/cram/io/CramArray.java b/src/main/java/htsjdk/samtools/cram/io/CramIntArray.java similarity index 97% rename from src/main/java/htsjdk/samtools/cram/io/CramArray.java rename to src/main/java/htsjdk/samtools/cram/io/CramIntArray.java index 819319e476..8ce5421de5 100644 --- a/src/main/java/htsjdk/samtools/cram/io/CramArray.java +++ b/src/main/java/htsjdk/samtools/cram/io/CramIntArray.java @@ -7,7 +7,7 @@ /** * Methods to read and write CRAM array of integers data type. */ -public class CramArray { +public class CramIntArray { /** * Read CRAM int array from a {@link InputStream}. * diff --git a/src/main/java/htsjdk/samtools/cram/structure/Block.java b/src/main/java/htsjdk/samtools/cram/structure/Block.java index 2e99b8a76c..b74577a2f8 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Block.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Block.java @@ -91,7 +91,7 @@ public static Block readFromInputStream(final int major, InputStream inputStream InputStreamUtils.readFully(inputStream, block.compressedContent, 0, block.compressedContent.length); if (v3OrHigher) { final int actualChecksum = ((CRC32InputStream) inputStream).getCRC32(); - final int checksum = CramInt.int32(inputStream); + final int checksum = CramInt.readInt32(inputStream); if (checksum != actualChecksum) throw new RuntimeException(String.format("Block CRC32 mismatch: %04x vs %04x", checksum, actualChecksum)); } diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java index fd6edfe178..7e6e203f13 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeaderIO.java @@ -18,7 +18,7 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.cram.io.CRC32OutputStream; -import htsjdk.samtools.cram.io.CramArray; +import htsjdk.samtools.cram.io.CramIntArray; import htsjdk.samtools.cram.io.CramInt; import htsjdk.samtools.cram.io.ITF8; import htsjdk.samtools.cram.io.LTF8; @@ -49,7 +49,7 @@ public boolean readContainerHeader(final int major, final Container container, f peek[i] = (byte) character; } - container.containerByteSize = CramInt.int32(peek); + container.containerByteSize = CramInt.readInt32(peek); container.sequenceId = ITF8.readUnsignedITF8(inputStream); container.alignmentStart = ITF8.readUnsignedITF8(inputStream); container.alignmentSpan = ITF8.readUnsignedITF8(inputStream); @@ -57,9 +57,9 @@ public boolean readContainerHeader(final int major, final Container container, f container.globalRecordCounter = LTF8.readUnsignedLTF8(inputStream); container.bases = LTF8.readUnsignedLTF8(inputStream); container.blockCount = ITF8.readUnsignedITF8(inputStream); - container.landmarks = CramArray.array(inputStream); + container.landmarks = CramIntArray.array(inputStream); if (major >= 3) - container.checksum = CramInt.int32(inputStream); + container.checksum = CramInt.readInt32(inputStream); return true; } @@ -84,7 +84,7 @@ public int writeContainerHeader(final int major, final Container container, fina length += (LTF8.writeUnsignedLTF8(container.globalRecordCounter, crc32OutputStream) + 7) / 8; length += (LTF8.writeUnsignedLTF8(container.bases, crc32OutputStream) + 7) / 8; length += (ITF8.writeUnsignedITF8(container.blockCount, crc32OutputStream) + 7) / 8; - length += (CramArray.write(container.landmarks, crc32OutputStream) + 7) / 8; + length += (CramIntArray.write(container.landmarks, crc32OutputStream) + 7) / 8; if (major >= 3) { outputStream.write(crc32OutputStream.getCrc32_LittleEndian()); diff --git a/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java b/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java index 4ccbb36969..3afca735dc 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SliceIO.java @@ -22,7 +22,7 @@ import htsjdk.samtools.SAMTagUtil; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.cram.common.CramVersions; -import htsjdk.samtools.cram.io.CramArray; +import htsjdk.samtools.cram.io.CramIntArray; import htsjdk.samtools.cram.io.ITF8; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.cram.io.LTF8; @@ -54,7 +54,7 @@ private static void parseSliceHeaderBlock(final int major, final Slice slice) th slice.globalRecordCounter = LTF8.readUnsignedLTF8(inputStream); slice.nofBlocks = ITF8.readUnsignedITF8(inputStream); - slice.contentIDs = CramArray.array(inputStream); + slice.contentIDs = CramIntArray.array(inputStream); slice.embeddedRefBlockContentID = ITF8.readUnsignedITF8(inputStream); slice.refMD5 = new byte[16]; InputStreamUtils.readFully(inputStream, slice.refMD5, 0, slice.refMD5.length); @@ -85,7 +85,7 @@ private static byte[] createSliceHeaderBlockContent(final int major, final Slice int i = 0; for (final int id : slice.external.keySet()) slice.contentIDs[i++] = id; - CramArray.write(slice.contentIDs, byteArrayOutputStream); + CramIntArray.write(slice.contentIDs, byteArrayOutputStream); ITF8.writeUnsignedITF8(slice.embeddedRefBlockContentID, byteArrayOutputStream); byteArrayOutputStream.write(slice.refMD5 == null ? new byte[16] : slice.refMD5); diff --git a/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java b/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java deleted file mode 100644 index 60a65197de..0000000000 --- a/src/test/java/htsjdk/cram/io/ExternalCompressionTest.java +++ /dev/null @@ -1,29 +0,0 @@ -package htsjdk.cram.io; - -import htsjdk.HtsjdkTest; -import htsjdk.samtools.cram.io.ExternalCompression; -import org.testng.Assert; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; - -public class ExternalCompressionTest extends HtsjdkTest { - public static final File BZIP2_FILE = new File("src/test/resources/htsjdk/samtools/cram/io/bzip2-test.bz2"); - public static final byte [] TEST_BYTES = "This is a simple string to test BZip2".getBytes(); - - @Test - public void testBZip2Decompression() throws IOException { - final byte [] input = Files.readAllBytes(BZIP2_FILE.toPath()); - final byte [] output = ExternalCompression.unbzip2(input); - Assert.assertEquals(output, "BZip2 worked".getBytes()); - } - - @Test - public void testBZip2Roundtrip() throws IOException { - final byte [] compressed = ExternalCompression.bzip2(TEST_BYTES); - final byte [] restored = ExternalCompression.unbzip2(compressed); - Assert.assertEquals(TEST_BYTES, restored); - } -} diff --git a/src/test/java/htsjdk/samtools/cram/VersionTest.java b/src/test/java/htsjdk/samtools/cram/VersionTest.java index be2851eb6d..c4b27d89b1 100644 --- a/src/test/java/htsjdk/samtools/cram/VersionTest.java +++ b/src/test/java/htsjdk/samtools/cram/VersionTest.java @@ -91,7 +91,7 @@ public void test_V3() throws IOException { CRC32 digester = new CRC32(); digester.update(containerHeaderBytes); Assert.assertEquals(container.checksum, (int) digester.getValue()); - Assert.assertEquals(CramInt.int32(crcBytes), container.checksum); + Assert.assertEquals(CramInt.readInt32(crcBytes), container.checksum); // test block's crc: cramSeekableStream.seek(firstBlockStart); @@ -102,6 +102,6 @@ public void test_V3() throws IOException { crcBytes = InputStreamUtils.readFully(cramSeekableStream, crcByteSize); digester = new CRC32(); digester.update(blockBytes); - Assert.assertEquals(CramInt.int32(crcBytes), (int) digester.getValue()); + Assert.assertEquals(CramInt.readInt32(crcBytes), (int) digester.getValue()); } } diff --git a/src/test/java/htsjdk/samtools/cram/io/CramIntArrayTest.java b/src/test/java/htsjdk/samtools/cram/io/CramIntArrayTest.java new file mode 100644 index 0000000000..22f5b5dc2a --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/io/CramIntArrayTest.java @@ -0,0 +1,27 @@ +package htsjdk.samtools.cram.io; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; + +public class CramIntArrayTest extends HtsjdkTest { + + @Test(dataProvider = "testInt32Arrays", dataProviderClass = IOTestCases.class) + public void runTest(List ints) throws IOException { + + int[] inputArray = ints.stream().mapToInt(Integer::intValue).toArray(); + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + CramIntArray.write(inputArray, baos); + + try (ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray())) { + int[] outputArray = CramIntArray.array(bais); + Assert.assertEquals(inputArray, outputArray, "Arrays did not match"); + } + } + } +} diff --git a/src/test/java/htsjdk/samtools/cram/io/CramIntTest.java b/src/test/java/htsjdk/samtools/cram/io/CramIntTest.java new file mode 100644 index 0000000000..7e3c2de67a --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/io/CramIntTest.java @@ -0,0 +1,106 @@ +package htsjdk.samtools.cram.io; + +import htsjdk.HtsjdkTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +public class CramIntTest extends HtsjdkTest { + private byte[] streamWritten(List ints) throws IOException { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + for (int value : ints) { + CramInt.writeInt32(value, baos); + } + return baos.toByteArray(); + } + } + + private byte[] byteArrayWritten(List ints) { + final int bufSize = 4; + final int arraySize = bufSize * ints.size(); + byte[] array = new byte[arraySize]; + + int offset = 0; + byte[] arrayBuffer; + + for (int value : ints) { + arrayBuffer = CramInt.writeInt32(value); + System.arraycopy(arrayBuffer, 0, array, offset, bufSize); + offset += bufSize; + } + + return array; + } + + @Test(dataProvider = "littleEndianTests32", dataProviderClass = IOTestCases.class) + public void checkStreamLittleEndian(Integer testInt, byte[] expected) throws IOException { + List ints = new ArrayList<>(); + ints.add(testInt); + + byte[] actual = streamWritten(ints); + Assert.assertEquals(actual, expected); + } + + @Test(dataProvider = "littleEndianTests32", dataProviderClass = IOTestCases.class) + public void checkByteArrayLittleEndian(Integer testInt, byte[] expected) { + List ints = new ArrayList<>(); + ints.add(testInt); + + byte[] actual = byteArrayWritten(ints); + Assert.assertEquals(actual, expected); + } + + // Combinatorial tests of 2 CramInt write methods x 3 CramInt read methods + + @Test(dataProvider = "testInt32Arrays", dataProviderClass = IOTestCases.class) + public void matchStreamRead(List ints) throws IOException { + byte[][] inputs = {streamWritten(ints), byteArrayWritten(ints)}; + + for (byte[] byteArray : inputs) { + try (ByteArrayInputStream bais = new ByteArrayInputStream(byteArray)) { + for (int value : ints) { + int fromStream = CramInt.readInt32(bais); + Assert.assertEquals(fromStream, value, "Value did not match"); + } + } + } + } + + @Test(dataProvider = "testInt32Arrays", dataProviderClass = IOTestCases.class) + public void matchBufferRead(List ints) throws IOException { + byte[][] inputs = {streamWritten(ints), byteArrayWritten(ints)}; + + for (byte[] byteArray : inputs) { + ByteBuffer bb = ByteBuffer.wrap(byteArray); + + for (int value : ints) { + int fromBuffer = CramInt.readInt32(bb); + Assert.assertEquals(fromBuffer, value, "Value did not match"); + } + } + } + + @Test(dataProvider = "testInt32Arrays", dataProviderClass = IOTestCases.class) + public void matchByteArrayRead(List ints) throws IOException { + byte[][] inputs = {streamWritten(ints), byteArrayWritten(ints)}; + + for (byte[] inputArray : inputs) { + final int bufSize = 4; + byte[] outBuf = new byte[bufSize]; + int offset = 0; + + for (int value : ints) { + System.arraycopy(inputArray, offset, outBuf, 0, bufSize); + int fromBuffer = CramInt.readInt32(outBuf); + Assert.assertEquals(fromBuffer, value, "Value did not match"); + offset += bufSize; + } + } + } +} diff --git a/src/test/java/htsjdk/samtools/cram/io/ExternalCompressionTest.java b/src/test/java/htsjdk/samtools/cram/io/ExternalCompressionTest.java new file mode 100644 index 0000000000..7ee63e3c76 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/io/ExternalCompressionTest.java @@ -0,0 +1,54 @@ +package htsjdk.samtools.cram.io; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.encoding.rans.RANS; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; + +public class ExternalCompressionTest extends HtsjdkTest { + public static final File BZIP2_FILE = new File("src/test/resources/htsjdk/samtools/cram/io/bzip2-test.bz2"); + public static final byte[] TEST_BYTES = "This is a simple string to test compression".getBytes(); + + @Test + public void testBZip2Decompression() throws IOException { + final byte [] input = Files.readAllBytes(BZIP2_FILE.toPath()); + final byte [] output = ExternalCompression.unbzip2(input); + Assert.assertEquals(output, "BZip2 worked".getBytes()); + } + + @Test + public void testGZipRoundtrip() throws IOException { + final byte [] compressed = ExternalCompression.gzip(TEST_BYTES); + final byte [] restored = ExternalCompression.gunzip(compressed); + Assert.assertEquals(TEST_BYTES, restored); + } + + @Test + public void testBZip2Roundtrip() throws IOException { + final byte [] compressed = ExternalCompression.bzip2(TEST_BYTES); + final byte [] restored = ExternalCompression.unbzip2(compressed); + Assert.assertEquals(TEST_BYTES, restored); + } + + @Test + public void testRANSRoundtrip() { + for(RANS.ORDER order : RANS.ORDER.values()) { + final byte[] compressed = ExternalCompression.rans(TEST_BYTES, order); + final byte[] restored = ExternalCompression.unrans(compressed); + Assert.assertEquals(TEST_BYTES, restored); + } + } + + @Test + public void testXZRoundtrip() throws IOException { + final byte [] compressed = ExternalCompression.xz(TEST_BYTES); + final byte [] restored = ExternalCompression.unxz(compressed); + Assert.assertEquals(TEST_BYTES, restored); + } + + +} diff --git a/src/test/java/htsjdk/samtools/cram/io/IOTestCases.java b/src/test/java/htsjdk/samtools/cram/io/IOTestCases.java new file mode 100644 index 0000000000..e2a3b88b61 --- /dev/null +++ b/src/test/java/htsjdk/samtools/cram/io/IOTestCases.java @@ -0,0 +1,135 @@ +package htsjdk.samtools.cram.io; + +import htsjdk.HtsjdkTest; +import org.testng.annotations.DataProvider; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class IOTestCases extends HtsjdkTest { + + @DataProvider(name = "littleEndianTests32") + public static Object[][] littleEndianTests32() { + return new Object[][] { + {1, new byte[]{1, 0, 0, 0}}, // 0x01 + {127, new byte[]{127, 0, 0, 0}}, // 0x7F + {128, new byte[]{-128, 0, 0, 0}}, // 0x80 + {129, new byte[]{-127, 0, 0, 0}}, // 0x81 + {255, new byte[]{-1, 0, 0, 0}}, // 0xFF + {256, new byte[]{0, 1, 0, 0}}, // 0x0100 + {257, new byte[]{1, 1, 0, 0}}, // 0x0101 + {65535, new byte[]{-1, -1, 0, 0}}, // 0xFFFF + {65536, new byte[]{0, 0, 1, 0}}, // 0x010000 + {16777216, new byte[]{0, 0, 0, 1}}, // 0x01000000 + {2147483647, new byte[]{-1, -1, -1, 127}}, // 0x7FFFFFFF + {-2147483648, new byte[]{0, 0, 0, -128}}, // 0x80000000 + {-1, new byte[]{-1, -1, -1, -1}} // 0xFFFFFFFF + }; + } + + private static Object[][] asDataProvider(List list) { + Object[][] params = new Object[list.size()][]; + for (int i = 0; i < params.length; i++) + params[i] = new Object[]{list.get(i)}; + return params; + } + + static List int32Tests() { + List list = new ArrayList(); + + // basics: + list.add(0); + list.add(1); + list.add(127); + list.add(128); + list.add(255); + list.add(256); + list.add(-1); + + // scan with bits: + for (int i = 0; i <= 32; i++) { + list.add((1 << i) - 2); + list.add((1 << i) - 1); + list.add(1 << i); + list.add((1 << i) + 1); + list.add((1 << i) + 1); + } + + // special cases: + list.add(Integer.MAX_VALUE); + list.add(Integer.MIN_VALUE); + list.add(268435456); + + return list; + } + + @DataProvider(name = "testInt32") + public static Object[][] testInt32() { + return asDataProvider(IOTestCases.int32Tests()); + } + + @DataProvider(name = "testInt32Arrays") + public static Object[][] testValues32() { + List int32Tests = IOTestCases.int32Tests(); + List shuffled = new ArrayList<>(int32Tests); + Collections.shuffle(shuffled); + + return new Object[][]{ + {int32Tests}, + {shuffled} + }; + } + + static List int64Tests() { + List list = new ArrayList() ; + + // basics: + list.add(0L); + list.add(0L); + list.add(1L); + list.add(127L); + list.add(128L); + list.add(255L); + list.add(256L); + + // scan with bits: + for (int i = 0; i <= 64; i++) { + list.add((1L << i) - 2); + list.add((1L << i) - 1); + list.add(1L << i); + list.add((1L << i) + 1); + list.add((1L << i) + 1); + } + + // special cases: + list.add(Long.MAX_VALUE); + list.add(Long.MIN_VALUE); + list.add(1125899906842622L) ; + list.add(1125899906842622L); + list.add(562949953421312L); + list.add(4294967296L); + list.add(268435456L); + list.add(2147483648L); + list.add(-1L); + + return list; + } + + @DataProvider(name = "testInt64") + public static Object[][] testInt64() { + return asDataProvider(IOTestCases.int64Tests()); + } + + @DataProvider(name = "testInt64Arrays") + public static Object[][] testValues64() { + List int64Tests = IOTestCases.int64Tests(); + List shuffled = new ArrayList<>(int64Tests); + Collections.shuffle(shuffled); + + return new Object[][]{ + {int64Tests}, + {shuffled} + }; + } +} diff --git a/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java b/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java index a206ad1f04..749d0451bb 100644 --- a/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java +++ b/src/test/java/htsjdk/samtools/cram/io/ITF8Test.java @@ -11,7 +11,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; /** @@ -34,41 +33,7 @@ public void reset() { testBAIS.reset(); } - - @DataProvider(name = "testITF8") - public static Object[][] testValues() { - List list = new ArrayList() ; - - // basics: - list.add(0); - list.add(1); - list.add(127); - list.add(128); - list.add(255); - list.add(256); - list.add(-1); - - // scan with bits: - for (int i = 0; i <= 32; i++) { - list.add((1 << i) - 2); - list.add((1 << i) - 1); - list.add(1 << i); - list.add((1 << i) + 1); - list.add((1 << i) + 1); - } - - // special cases: - list.add(Integer.MAX_VALUE) ; - list.add(Integer.MIN_VALUE); - list.add(268435456); - - Object[][] params = new Object[list.size()][] ; - for (int i=0; i list = new ArrayList() ; - - // basics: - list.add(0L); - list.add(0L); - list.add(1L); - list.add(127L); - list.add(128L); - list.add(255L); - list.add(256L); - - // scan with bits: - for (int i = 0; i <= 64; i++) { - list.add((1L << i) - 2); - list.add((1L << i) - 1); - list.add(1L << i); - list.add((1L << i) + 1); - list.add((1L << i) + 1); - } - - // special cases: - list.add(1125899906842622L) ; - list.add(1125899906842622L); - list.add(562949953421312L); - list.add(4294967296L); - list.add(268435456L); - list.add(2147483648L); - list.add(-1L); - - Object[][] params = new Object[list.size()][] ; - for (int i=0; i