-
Notifications
You must be signed in to change notification settings - Fork 242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
VCFHeader, VCFCodec and VCFHeaderLine refactoring for VCF4.3/BCF2.2 #835
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package htsjdk.utils; | ||
|
||
import java.util.function.Supplier; | ||
|
||
public class Utils { | ||
/** | ||
* Checks that an Object {@code object} is not null and returns the same object or throws an {@link IllegalArgumentException} | ||
* @param object any Object | ||
* @return the same object | ||
* @throws IllegalArgumentException if a {@code o == null} | ||
*/ | ||
public static <T> T nonNull(final T object) { | ||
return Utils.nonNull(object, "Null object is not allowed here."); | ||
} | ||
|
||
/** | ||
* Checks that an {@link Object} is not {@code null} and returns the same object or throws an {@link IllegalArgumentException} | ||
* @param object any Object | ||
* @param message the text message that would be passed to the exception thrown when {@code o == null}. | ||
* @return the same object | ||
* @throws IllegalArgumentException if a {@code o == null} | ||
*/ | ||
public static <T> T nonNull(final T object, final String message) { | ||
if (object == null) { | ||
throw new IllegalArgumentException(message); | ||
} | ||
return object; | ||
} | ||
|
||
/** | ||
* Checks that an {@link Object} is not {@code null} and returns the same object or throws an {@link IllegalArgumentException} | ||
* @param object any Object | ||
* @param message the text message that would be passed to the exception thrown when {@code o == null}. | ||
* @return the same object | ||
* @throws IllegalArgumentException if a {@code o == null} | ||
*/ | ||
public static <T> T nonNull(final T object, final Supplier<String> message) { | ||
if (object == null) { | ||
throw new IllegalArgumentException(message.get()); | ||
} | ||
return object; | ||
} | ||
|
||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -150,6 +150,10 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream | |
|
||
if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION ) | ||
error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion()); | ||
|
||
// TODO: fixing this breaks GATK GenomicsDB integration/tests | ||
// Require the minor version to match exactly | ||
//if ( bcfVersion.getMinorVersion() != MIN_MINOR_VERSION ) | ||
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION ) | ||
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); | ||
|
||
|
@@ -206,6 +210,10 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream | |
|
||
@Override | ||
public boolean canDecode( final String path ) { | ||
// TODO: this is broken in a couple of ways: | ||
// First, the version check is too permissive - it accepts any minor version, including BCF 2.2, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the version check, I suggest to extract the checking code in |
||
// which it shouldn't. Second, it doesn't recognize that BCF can be block gzipped, so it rejects | ||
// those files because the header never matches, but only because the stream isn't decompressed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A block-gzipped file cannot be read by using any of the codecs implemented in htsjdk unless it has a |
||
try (InputStream fis = Files.newInputStream(IOUtil.getPath(path)) ){ | ||
final BCFVersion version = BCFVersion.readBCFVersion(fis); | ||
return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,15 +26,19 @@ | |
package htsjdk.variant.variantcontext.writer; | ||
|
||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.samtools.util.Log; | ||
import htsjdk.samtools.util.RuntimeIOException; | ||
import htsjdk.tribble.TribbleException; | ||
import htsjdk.tribble.index.IndexCreator; | ||
import htsjdk.utils.Utils; | ||
import htsjdk.variant.variantcontext.VariantContext; | ||
import htsjdk.variant.variantcontext.VariantContextBuilder; | ||
import htsjdk.variant.vcf.VCFConstants; | ||
import htsjdk.variant.vcf.VCFEncoder; | ||
import htsjdk.variant.vcf.VCFHeader; | ||
import htsjdk.variant.vcf.VCFHeaderLine; | ||
import htsjdk.variant.vcf.VCFHeaderVersion; | ||
import htsjdk.variant.vcf.VCFUtils; | ||
|
||
import java.io.BufferedWriter; | ||
import java.io.ByteArrayOutputStream; | ||
|
@@ -43,14 +47,15 @@ | |
import java.io.OutputStream; | ||
import java.io.OutputStreamWriter; | ||
import java.io.Writer; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* this class writes VCF files | ||
*/ | ||
class VCFWriter extends IndexingVariantContextWriter { | ||
protected final static Log logger = Log.getInstance(VCFWriter.class); | ||
|
||
private static final String VERSION_LINE = | ||
VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_2.getFormatString() + "=" + VCFHeaderVersion.VCF4_2.getVersionString(); | ||
private static final String DEFAULT_VERSION_LINE = VCFHeader.METADATA_INDICATOR + VCFHeader.DEFAULT_VCF_VERSION.getVersionLine(); | ||
|
||
// Initialized when the header is written to the output stream | ||
private VCFEncoder vcfEncoder = null; | ||
|
@@ -141,7 +146,7 @@ public void writeHeader(final VCFHeader header) { | |
} | ||
|
||
public static String getVersionLine() { | ||
return VERSION_LINE; | ||
return DEFAULT_VERSION_LINE; | ||
} | ||
|
||
public static VCFHeader writeHeader(VCFHeader header, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because you are already here, what's about adding the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are already a lot of changes in this PR, so I'd prefer to keep this focused for now. |
||
|
@@ -152,12 +157,18 @@ public static VCFHeader writeHeader(VCFHeader header, | |
header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; | ||
|
||
try { | ||
// the file format field needs to be written first | ||
// Validate that the file version we're writing is version-compatible this header's version. | ||
validateHeaderVersion(header, versionLine); | ||
|
||
// The file format field needs to be written first; below any file format lines | ||
// embedded in the header will be removed | ||
writer.write(versionLine + "\n"); | ||
|
||
for (final VCFHeaderLine line : header.getMetaDataInSortedOrder() ) { | ||
if ( VCFHeaderVersion.isFormatString(line.getKey()) ) | ||
// Remove the fileformat header lines | ||
if ( VCFHeaderVersion.isFormatString(line.getKey()) ) { | ||
continue; | ||
} | ||
|
||
writer.write(VCFHeader.METADATA_INDICATOR); | ||
writer.write(line.toString()); | ||
|
@@ -166,14 +177,9 @@ public static VCFHeader writeHeader(VCFHeader header, | |
|
||
// write out the column line | ||
writer.write(VCFHeader.HEADER_INDICATOR); | ||
boolean isFirst = true; | ||
for (final VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) { | ||
if ( isFirst ) | ||
isFirst = false; // don't write out a field separator | ||
else | ||
writer.write(VCFConstants.FIELD_SEPARATOR); | ||
writer.write(field.toString()); | ||
} | ||
writer.write(header.getHeaderFields().stream() | ||
.map(f -> f.name()) | ||
.collect(Collectors.joining(VCFConstants.FIELD_SEPARATOR)).toString()); | ||
|
||
if ( header.hasGenotypingData() ) { | ||
writer.write(VCFConstants.FIELD_SEPARATOR); | ||
|
@@ -194,6 +200,32 @@ public static VCFHeader writeHeader(VCFHeader header, | |
return header; | ||
} | ||
|
||
/** | ||
* Given a header and a target output version, see if the header's version is compatible with the | ||
* requested version. | ||
* @param header | ||
* @param versionLine | ||
*/ | ||
private static void validateHeaderVersion(final VCFHeader header, final String versionLine) { | ||
Utils.nonNull(header); | ||
Utils.nonNull(versionLine); | ||
|
||
final VCFHeaderVersion vcfVersion = header.getHeaderVersion(); | ||
if (!vcfVersion.equals(VCFHeaderVersion.getHeaderVersion(versionLine))) { | ||
final String message = String.format("Attempt to write a version %s VCF header to a version %s VCF output", | ||
vcfVersion.getVersionString(), | ||
versionLine); | ||
if (VCFHeaderVersion.versionsAreCompatible(VCFHeaderVersion.getHeaderVersion(versionLine), vcfVersion)) { | ||
if (VCFUtils.getStrictVCFVersionValidation()) { | ||
throw new TribbleException(message); | ||
} | ||
} | ||
if (VCFUtils.getVerboseVCFLogging()) { | ||
logger.warn(message); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This warning does not says anything about the compatibility checked in the previous if clause. Maybe it will be nice to add this to the warning to do not scare users writing compatible versions... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not quite sure I get what you're saying. Are you saying to only log this message if the versions are incompatible ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean that the message could include information about the compatibility... |
||
} | ||
} | ||
} | ||
|
||
/** | ||
* attempt to close the VCF file | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I personally don't like that a block for fixing a bug is based on a downstream project. I will rather vote for fixing it here and fix the tests in GATK when it is updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed. It needs to be fixed, but that will go into the next PR. This is one several places in this PR where I added notes like this for things like this.