Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BetaIntegerCodecTest and bugfixes #1199

Merged
merged 11 commits into from
Oct 19, 2018
64 changes: 49 additions & 15 deletions src/main/java/htsjdk/samtools/cram/encoding/BetaIntegerCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,70 @@

import java.io.IOException;


/**
* Encodes integers by adding a constant offset value to a range of values in order to reduce
* the necessary number of bits needed to store each value.
*
* As a simple example, consider a data series with values all in the range 10,000 - 10,100.
* Choosing the offset -10,000 means every encoded value will be stored as 0 - 100,
* requiring only ceil(log2(100)) = 7 bits per value.
*/
class BetaIntegerCodec extends AbstractBitCodec<Integer> {
private int offset = 0;
private final int readNofBits;
private final int offset;
private final int bitsPerValue;
private final long valueLimit; // 1 << bitsPerValue (max 32) so int is too small

/**
* Given integers to encode in the range MIN to MAX:
*
* @param offset the common value to be added to all values before storage.
* Setting this to (-MIN) will ensure all stored values will be in the range (0 .. MAX - MIN)
* @param bitsPerValue the smallest value which will allow the largest stored value (MAX - MIN)
*/
public BetaIntegerCodec(final int offset, final int bitsPerValue) {
if (bitsPerValue <= 0) {
throw new IllegalArgumentException("Number of bits per value must be positive");
} else if (bitsPerValue > 32) {
throw new IllegalArgumentException("Number of bits per value must be 32 or lower");
}

public BetaIntegerCodec(final int offset, final int readNofBits) {
this.offset = offset;
this.readNofBits = readNofBits;
this.bitsPerValue = bitsPerValue;
this.valueLimit = 1L << bitsPerValue;
}

@Override
public final Integer read(final BitInputStream bitInputStream) throws IOException {
return bitInputStream.readBits(readNofBits) - offset;
return bitInputStream.readBits(bitsPerValue) - offset;
}

private int getAndCheckOffsetValue(int value) {
final int newValue = value + offset;

if (newValue < 0) {
String negative = String.format("Value %s plus offset %s must be positive",
value, offset);
throw new IllegalArgumentException(negative);
} else if (newValue >= valueLimit) {
String tooBig = String.format("Value %s plus offset %s is greater than or equal to limit %s",
value, offset, valueLimit);
throw new IllegalArgumentException(tooBig);
}

return newValue;
}

@Override
public final long write(final BitOutputStream bitOutputStream, final Integer value) throws IOException {
final int nofBits = (int) numberOfBits(value);
final long newValue = value + offset;
bitOutputStream.write(newValue, nofBits);
return nofBits;
bitOutputStream.write(getAndCheckOffsetValue(value), bitsPerValue);
// every value is encoded using the same number of bits
return bitsPerValue;
}

@Override
public final long numberOfBits(final Integer value) {
jmthibault79 marked this conversation as resolved.
Show resolved Hide resolved
if (value > (1L << readNofBits))
throw new IllegalArgumentException("Value written is bigger then allowed: value=" + value
+ ", max nof bits=" + readNofBits);

return readNofBits;
// every value is encoded using the same number of bits
return bitsPerValue;
}

@Override
Expand Down
140 changes: 140 additions & 0 deletions src/test/java/htsjdk/samtools/cram/encoding/BetaIntegerCodecTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package htsjdk.samtools.cram.encoding;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.io.BitOutputStream;
import htsjdk.samtools.cram.io.DefaultBitInputStream;
import htsjdk.samtools.cram.io.DefaultBitOutputStream;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.*;

public class BetaIntegerCodecTest extends HtsjdkTest {

private void testCodec(int offset, int bitsPerValue, int[] values) throws IOException {
BitCodec<Integer> codec = new BetaIntegerCodec(offset, bitsPerValue);

try (ByteArrayOutputStream os = new ByteArrayOutputStream();
BitOutputStream bos = new DefaultBitOutputStream(os)) {

for (int value : values) {
codec.write(bos, value);
}

int[] actual = new int[values.length];
try (InputStream is = new ByteArrayInputStream(os.toByteArray());
DefaultBitInputStream dbis = new DefaultBitInputStream(is)) {

for (int i = 0; i < values.length; i++) {
actual[i] = codec.read(dbis);
}
}

Assert.assertEquals(actual, values);
}
}

// test that the offsets enable the data series to be stored in N bits

@DataProvider(name = "basicTest")
public Object[][] basicTestData() {
return new Object[][] {
{8, -100, new int[]{100, 101, 102, (1<<8) + 98, (1<<8) + 99}},
{4, 10015, new int[]{-10015, -10014, -10001, -10000}},
};
}

@Test(dataProvider = "basicTest")
public void basicTest(int bitsPerValue, int offset, int[] values) throws IOException {
testCodec(offset, bitsPerValue, values);
}

// test that values fit into N bits without offsets

@DataProvider(name = "basicTestNoOffset")
public Object[][] basicTestNoOffsetData() {
return new Object[][] {
{8, new int[]{0, 1, 2, 100, (1 << 8) - 2, (1 << 8) - 1}},
{16, new int[]{0, 1, 255, (1 << 16) - 2, (1 << 16) - 1}},
};
}

@Test(dataProvider = "basicTestNoOffset")
public void basicTestNoOffset(int bitsPerValue, int[] values) throws IOException {
testCodec(0, bitsPerValue, values);
}

// sanity checks for bitsPerValue. Must be > 0 and <= 32

@DataProvider(name = "bitsPerValue")
public Object[][] bitsPerValueData() {
return new Object[][] {
{0},
{-1},
{33}
};
}

@Test(dataProvider = "bitsPerValue", expectedExceptions = IllegalArgumentException.class)
public void bitsPerValue(int bitsPerValue) {
new BetaIntegerCodec(0, bitsPerValue);
}

// throw Exceptions when offsets + values are too big to store in N bits

@DataProvider(name = "overflow")
public Object[][] overflowData() {
// tuples of bitsPerValue and offsets + values which are too big to store
return new Object[][] {
// first with zero offset
{1, 0, (1 << 1)},
{2, 0, (1 << 2)},
{4, 0, (1 << 4)},
{8, 0, (1 << 8)},
{16, 0, (1 << 16)},

// adding offset of 1 will put it over
{1, 1, (1 << 1) - 1},
{2, 1, (1 << 2) - 1},
{4, 1, (1 << 4) - 1},
{8, 1, (1 << 8) - 1},
{16, 1, (1 << 16) - 1},
};
}

@Test(dataProvider = "overflow", expectedExceptions = IllegalArgumentException.class)
public void overflow(int bitsPerValue, int offset, int value) throws IOException {
BitCodec<Integer> codec = new BetaIntegerCodec(offset, bitsPerValue);

try (ByteArrayOutputStream os = new ByteArrayOutputStream();
BitOutputStream bos = new DefaultBitOutputStream(os)) {
codec.write(bos, value);
}
}

// throw Exceptions when offsets + values are negative

@DataProvider(name = "negativeTest")
public Object[][] negativeTestData() {
// tuples of bitsPerValue and offsets + values which are negative
return new Object[][] {
{1, 0, -1},
{1, -1, 0},
{2, 0, -1},
{2, -1, 0},
{8, -100, 99},
{8, 99, -100},
};
}

@Test(dataProvider = "negativeTest", expectedExceptions = IllegalArgumentException.class)
public void negativeTest(int bitsPerValue, int offset, int value) throws IOException {
BitCodec<Integer> codec = new BetaIntegerCodec(offset, bitsPerValue);

try (ByteArrayOutputStream os = new ByteArrayOutputStream();
BitOutputStream bos = new DefaultBitOutputStream(os)) {
codec.write(bos, value);
}
}
}