Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small CRAM refactor: common ExternalEncoding Abstract Base Class #1346

Merged
merged 2 commits into from
May 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@
import htsjdk.samtools.cram.compression.ExternalCompressor;
import htsjdk.samtools.cram.encoding.*;
import htsjdk.samtools.cram.encoding.core.CanonicalHuffmanIntegerEncoding;
import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding;
import htsjdk.samtools.cram.encoding.external.ExternalByteArrayEncoding;
import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding;
import htsjdk.samtools.cram.encoding.external.ExternalIntegerEncoding;
import htsjdk.samtools.cram.encoding.external.*;
import htsjdk.samtools.cram.compression.rans.RANS;
import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
import htsjdk.samtools.cram.encoding.readfeatures.Substitution;
Expand Down Expand Up @@ -80,36 +77,36 @@ public CompressionHeader build(final List<CramCompressionRecord> records, Substi

final CompressionHeaderBuilder builder = new CompressionHeaderBuilder(coordinateSorted);

builder.addExternalIntegerRansOrderZeroEncoding(DataSeries.AP_AlignmentPositionOffset);
builder.addExternalByteRansOrderOneEncoding(DataSeries.BA_Base);
builder.addExternalRansOrderZeroEncoding(DataSeries.AP_AlignmentPositionOffset);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The typed versions (Byte, Integer, etc.) actually all did the same thing because their encoding parameters are identical. The Codecs differ, but that's not relevant here.

builder.addExternalRansOrderOneEncoding(DataSeries.BA_Base);
// BB is not used
builder.addExternalIntegerRansOrderOneEncoding(DataSeries.BF_BitFlags);
builder.addExternalByteGzipEncoding(DataSeries.BS_BaseSubstitutionCode);
builder.addExternalIntegerRansOrderOneEncoding(DataSeries.CF_CompressionBitFlags);
builder.addExternalIntegerGzipEncoding(DataSeries.DL_DeletionLength);
builder.addExternalByteGzipEncoding(DataSeries.FC_FeatureCode);
builder.addExternalIntegerGzipEncoding(DataSeries.FN_NumberOfReadFeatures);
builder.addExternalIntegerGzipEncoding(DataSeries.FP_FeaturePosition);
builder.addExternalIntegerGzipEncoding(DataSeries.HC_HardClip);
builder.addExternalRansOrderOneEncoding(DataSeries.BF_BitFlags);
builder.addExternalGzipEncoding(DataSeries.BS_BaseSubstitutionCode);
builder.addExternalRansOrderOneEncoding(DataSeries.CF_CompressionBitFlags);
builder.addExternalGzipEncoding(DataSeries.DL_DeletionLength);
builder.addExternalGzipEncoding(DataSeries.FC_FeatureCode);
builder.addExternalGzipEncoding(DataSeries.FN_NumberOfReadFeatures);
builder.addExternalGzipEncoding(DataSeries.FP_FeaturePosition);
builder.addExternalGzipEncoding(DataSeries.HC_HardClip);
builder.addExternalByteArrayStopTabGzipEncoding(DataSeries.IN_Insertion);
builder.addExternalIntegerGzipEncoding(DataSeries.MF_MateBitFlags);
builder.addExternalIntegerGzipEncoding(DataSeries.MQ_MappingQualityScore);
builder.addExternalIntegerGzipEncoding(DataSeries.NF_RecordsToNextFragment);
builder.addExternalIntegerGzipEncoding(DataSeries.NP_NextFragmentAlignmentStart);
builder.addExternalIntegerRansOrderOneEncoding(DataSeries.NS_NextFragmentReferenceSequenceID);
builder.addExternalIntegerGzipEncoding(DataSeries.PD_padding);
builder.addExternalGzipEncoding(DataSeries.MF_MateBitFlags);
builder.addExternalGzipEncoding(DataSeries.MQ_MappingQualityScore);
builder.addExternalGzipEncoding(DataSeries.NF_RecordsToNextFragment);
builder.addExternalGzipEncoding(DataSeries.NP_NextFragmentAlignmentStart);
builder.addExternalRansOrderOneEncoding(DataSeries.NS_NextFragmentReferenceSequenceID);
builder.addExternalGzipEncoding(DataSeries.PD_padding);
// QQ is not used
builder.addExternalByteRansOrderOneEncoding(DataSeries.QS_QualityScore);
builder.addExternalIntegerRansOrderOneEncoding(DataSeries.RG_ReadGroup);
builder.addExternalIntegerRansOrderZeroEncoding(DataSeries.RI_RefId);
builder.addExternalIntegerRansOrderOneEncoding(DataSeries.RL_ReadLength);
builder.addExternalRansOrderOneEncoding(DataSeries.QS_QualityScore);
builder.addExternalRansOrderOneEncoding(DataSeries.RG_ReadGroup);
builder.addExternalRansOrderZeroEncoding(DataSeries.RI_RefId);
builder.addExternalRansOrderOneEncoding(DataSeries.RL_ReadLength);
builder.addExternalByteArrayStopTabGzipEncoding(DataSeries.RN_ReadName);
builder.addExternalIntegerGzipEncoding(DataSeries.RS_RefSkip);
builder.addExternalGzipEncoding(DataSeries.RS_RefSkip);
builder.addExternalByteArrayStopTabGzipEncoding(DataSeries.SC_SoftClip);
builder.addExternalIntegerGzipEncoding(DataSeries.TC_TagCount);
builder.addExternalIntegerEncoding(DataSeries.TL_TagIdList, ExternalCompressor.createGZIP());
builder.addExternalIntegerGzipEncoding(DataSeries.TN_TagNameAndType);
builder.addExternalIntegerRansOrderOneEncoding(DataSeries.TS_InsertSize);
builder.addExternalGzipEncoding(DataSeries.TC_TagCount);
builder.addExternalGzipEncoding(DataSeries.TL_TagIdList);
builder.addExternalGzipEncoding(DataSeries.TN_TagNameAndType);
builder.addExternalRansOrderOneEncoding(DataSeries.TS_InsertSize);

builder.setTagIdDictionary(buildTagIdDictionary(records));

Expand Down Expand Up @@ -512,36 +509,23 @@ private void addExternalByteArrayStopTabGzipEncoding(final DataSeries dataSeries
ExternalCompressor.createGZIP());
}

private void addExternalIntegerEncoding(final DataSeries dataSeries, final ExternalCompressor compressor) {
addExternalEncoding(dataSeries,
new ExternalIntegerEncoding(dataSeries.getExternalBlockContentId()).toParam(),
compressor);
}

private void addExternalIntegerGzipEncoding(final DataSeries dataSeries) {
addExternalEncoding(dataSeries,
new ExternalIntegerEncoding(dataSeries.getExternalBlockContentId()).toParam(),
ExternalCompressor.createGZIP());
private void addExternalEncoding(final DataSeries dataSeries, final ExternalCompressor compressor) {
// we need a concrete type; the choice of Byte is arbitrary.
// params are equal for all External Encoding value types
final EncodingParams params = new ExternalByteEncoding(dataSeries.getExternalBlockContentId()).toParam();
addExternalEncoding(dataSeries, params, compressor);
}

private void addExternalByteGzipEncoding(final DataSeries dataSeries) {
addExternalEncoding(dataSeries,
new ExternalByteEncoding(dataSeries.getExternalBlockContentId()).toParam(),
ExternalCompressor.createGZIP());
}

private void addExternalByteRansOrderOneEncoding(final DataSeries dataSeries) {
addExternalEncoding(dataSeries,
new ExternalByteEncoding(dataSeries.getExternalBlockContentId()).toParam(),
ExternalCompressor.createRANS(RANS.ORDER.ONE));
private void addExternalGzipEncoding(final DataSeries dataSeries) {
addExternalEncoding(dataSeries, ExternalCompressor.createGZIP());
}

private void addExternalIntegerRansOrderOneEncoding(final DataSeries dataSeries) {
addExternalIntegerEncoding(dataSeries, ExternalCompressor.createRANS(RANS.ORDER.ONE));
private void addExternalRansOrderOneEncoding(final DataSeries dataSeries) {
addExternalEncoding(dataSeries, ExternalCompressor.createRANS(RANS.ORDER.ONE));
}

private void addExternalIntegerRansOrderZeroEncoding(final DataSeries dataSeries) {
addExternalIntegerEncoding(dataSeries, ExternalCompressor.createRANS(RANS.ORDER.ZERO));
private void addExternalRansOrderZeroEncoding(final DataSeries dataSeries) {
addExternalEncoding(dataSeries, ExternalCompressor.createRANS(RANS.ORDER.ZERO));
}

void addTagEncoding(final int tagId, final EncodingDetails encodingDetails) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,24 @@
package htsjdk.samtools.cram.encoding.external;

import htsjdk.samtools.cram.encoding.CRAMCodec;
import htsjdk.samtools.cram.encoding.CRAMEncoding;
import htsjdk.samtools.cram.io.BitInputStream;
import htsjdk.samtools.cram.io.BitOutputStream;
import htsjdk.samtools.cram.io.ITF8;
import htsjdk.samtools.cram.structure.EncodingID;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Map;

public class ExternalByteEncoding extends CRAMEncoding<Byte> {
private final int externalBlockContentId;

public class ExternalByteEncoding extends ExternalEncoding<Byte> {
public ExternalByteEncoding(final int externalBlockContentId) {
super(EncodingID.EXTERNAL);
this.externalBlockContentId = externalBlockContentId;
super(externalBlockContentId);
}

public static ExternalByteEncoding fromParams(byte[] params) {
final int contentId = ITF8.readUnsignedITF8(params);
return new ExternalByteEncoding(contentId);
}

@Override
public byte[] toByteArray() {
return ITF8.writeUnsignedITF8(externalBlockContentId);
}

@Override
public CRAMCodec<Byte> buildCodec(final BitInputStream coreBlockInputStream,
final BitOutputStream coreBlockOutputStream,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package htsjdk.samtools.cram.encoding.external;

import htsjdk.samtools.cram.encoding.CRAMEncoding;
import htsjdk.samtools.cram.io.ITF8;
import htsjdk.samtools.cram.structure.EncodingID;

public abstract class ExternalEncoding<T> extends CRAMEncoding<T> {
protected final int externalBlockContentId;

ExternalEncoding(final int externalBlockContentId) {
super(EncodingID.EXTERNAL);
this.externalBlockContentId = externalBlockContentId;
}

@Override
public byte[] toByteArray() {
return ITF8.writeUnsignedITF8(externalBlockContentId);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,24 @@
package htsjdk.samtools.cram.encoding.external;

import htsjdk.samtools.cram.encoding.CRAMCodec;
import htsjdk.samtools.cram.encoding.CRAMEncoding;
import htsjdk.samtools.cram.io.BitInputStream;
import htsjdk.samtools.cram.io.BitOutputStream;
import htsjdk.samtools.cram.io.ITF8;
import htsjdk.samtools.cram.structure.EncodingID;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Map;

public class ExternalIntegerEncoding extends CRAMEncoding<Integer> {
private final int externalBlockContentId;

public class ExternalIntegerEncoding extends ExternalEncoding<Integer> {
public ExternalIntegerEncoding(final int externalBlockContentId) {
super(EncodingID.EXTERNAL);
this.externalBlockContentId = externalBlockContentId;
super(externalBlockContentId);
}

public static ExternalIntegerEncoding fromParams(byte[] params) {
final int contentId = ITF8.readUnsignedITF8(params);
return new ExternalIntegerEncoding(contentId);
}

@Override
public byte[] toByteArray() {
return ITF8.writeUnsignedITF8(externalBlockContentId);
}

@Override
public CRAMCodec<Integer> buildCodec(final BitInputStream coreBlockInputStream,
final BitOutputStream coreBlockOutputStream,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,34 +18,24 @@
package htsjdk.samtools.cram.encoding.external;

import htsjdk.samtools.cram.encoding.CRAMCodec;
import htsjdk.samtools.cram.encoding.CRAMEncoding;
import htsjdk.samtools.cram.io.BitInputStream;
import htsjdk.samtools.cram.io.BitOutputStream;
import htsjdk.samtools.cram.io.ITF8;
import htsjdk.samtools.cram.structure.EncodingID;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Map;

public class ExternalLongEncoding extends CRAMEncoding<Long> {
private final int externalBlockContentId;

ExternalLongEncoding(final int externalBlockContentId) {
super(EncodingID.EXTERNAL);
this.externalBlockContentId = externalBlockContentId;
public class ExternalLongEncoding extends ExternalEncoding<Long> {
public ExternalLongEncoding(final int externalBlockContentId) {
super(externalBlockContentId);
}

public static ExternalLongEncoding fromParams(byte[] params) {
final int contentId = ITF8.readUnsignedITF8(params);
return new ExternalLongEncoding(contentId);
}

@Override
public byte[] toByteArray() {
return ITF8.writeUnsignedITF8(externalBlockContentId);
}

@Override
public CRAMCodec<Long> buildCodec(final BitInputStream coreBlockInputStream,
final BitOutputStream coreBlockOutputStream,
Expand Down