diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java index 481ab871e5..2c7bf3f39a 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriter.java @@ -53,4 +53,10 @@ public void writeHeader(final VCFHeader header) { public boolean checkError() { return false; } + + @Override + public void setVCFHeader(final VCFHeader header) { + this.underlyingWriter.setVCFHeader(header); + } + } diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java index 8c16aac97f..4a6070ed0b 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java @@ -122,6 +122,10 @@ class BCF2Writer extends IndexingVariantContextWriter { private VCFHeader lastVCFHeaderOfUnparsedGenotypes = null; private boolean canPassOnUnparsedGenotypeDataForLastVCFHeader = false; + // is the header or body written to the output stream? + private boolean outputHasBeenWritten; + + public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) { super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing); @@ -145,39 +149,13 @@ public BCF2Writer(final File location, final OutputStream output, final SAMSeque @Override public void writeHeader(VCFHeader header) { - // make sure the header is sorted correctly - header = new VCFHeader(header.getMetaDataInSortedOrder(), header.getGenotypeSamples()); - - // create the config offsets map - if ( header.getContigLines().isEmpty() ) { - if ( ALLOW_MISSING_CONTIG_LINES ) { - if ( GeneralUtils.DEBUG_MODE_ENABLED ) { - System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary"); - } - createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null)); - } else { - throw new IllegalStateException("Cannot write BCF2 file with missing contig lines"); - } - } else { - createContigDictionary(header.getContigLines()); - } - - // set up the map from dictionary string values -> offset - final ArrayList dict = BCF2Utils.makeDictionary(header); - for ( int i = 0; i < dict.size(); i++ ) { - stringDictionaryMap.put(dict.get(i), i); - } - - sampleNames = header.getGenotypeSamples().toArray(new String[header.getNGenotypeSamples()]); - - // setup the field encodings - fieldManager.setup(header, encoder, stringDictionaryMap); + setVCFHeader(header); try { // write out the header into a byte stream, get its length, and write everything to the file final ByteArrayOutputStream capture = new ByteArrayOutputStream(); final OutputStreamWriter writer = new OutputStreamWriter(capture); - this.header = VCFWriter.writeHeader(header, writer, doNotWriteGenotypes, VCFWriter.getVersionLine(), "BCF2 stream"); + this.header = VCFWriter.writeHeader(this.header, writer, VCFWriter.getVersionLine(), "BCF2 stream"); writer.append('\0'); // the header is null terminated by a byte writer.close(); @@ -185,6 +163,7 @@ public void writeHeader(VCFHeader header) { new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream); BCF2Type.INT32.write(headerBytes.length, outputStream); outputStream.write(headerBytes); + outputHasBeenWritten = true; } catch (IOException e) { throw new RuntimeIOException("BCF2 stream: Got IOException while trying to write BCF2 header", e); } @@ -204,6 +183,7 @@ public void add( VariantContext vc ) { // write the two blocks to disk writeBlock(infoBlock, genotypesBlock); + outputHasBeenWritten = true; } catch ( IOException e ) { throw new RuntimeIOException("Error writing record to BCF2 file: " + vc.toString(), e); @@ -221,6 +201,39 @@ public void close() { super.close(); } + @Override + public void setVCFHeader(final VCFHeader header) { + if (outputHasBeenWritten) { + throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream."); + } + // make sure the header is sorted correctly + this.header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : new VCFHeader( + header.getMetaDataInSortedOrder(), header.getGenotypeSamples()); + // create the config offsets map + if ( this.header.getContigLines().isEmpty() ) { + if ( ALLOW_MISSING_CONTIG_LINES ) { + if ( GeneralUtils.DEBUG_MODE_ENABLED ) { + System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary"); + } + createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null)); + } else { + throw new IllegalStateException("Cannot write BCF2 file with missing contig lines"); + } + } else { + createContigDictionary(this.header.getContigLines()); + } + // set up the map from dictionary string values -> offset + final ArrayList dict = BCF2Utils.makeDictionary(this.header); + for ( int i = 0; i < dict.size(); i++ ) { + stringDictionaryMap.put(dict.get(i), i); + } + + sampleNames = this.header.getGenotypeSamples().toArray(new String[this.header.getNGenotypeSamples()]); + // setup the field encodings + fieldManager.setup(this.header, encoder, stringDictionaryMap); + + } + // -------------------------------------------------------------------------------- // // implicit block diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java index 7d9273f976..dd6b15e968 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java @@ -132,6 +132,11 @@ public synchronized void add(VariantContext vc) { emitSafeRecords(); } + @Override + public void setVCFHeader(final VCFHeader header) { + innerWriter.setVCFHeader(header); + } + /** * Gets a string representation of this object. * @return a string representation of this object @@ -199,4 +204,4 @@ public VCFRecord(VariantContext vc) { this.vc = vc; } } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java index 71aef13424..35b5d10985 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java @@ -66,6 +66,9 @@ class VCFWriter extends IndexingVariantContextWriter { // should we always output a complete format record, even if we could drop trailing fields? private final boolean writeFullFormatField; + // is the header or body written to the output stream? + private boolean outputHasBeenWritten; + /* * The VCF writer uses an internal Writer, based by the ByteArrayOutputStream lineBuffer, * to temp. buffer the header and per-site output before flushing the per line output @@ -128,13 +131,14 @@ private void writeAndResetBuffer() throws IOException { @Override public void writeHeader(final VCFHeader header) { + // note we need to update the mHeader object after this call because they header // may have genotypes trimmed out of it, if doNotWriteGenotypes is true + setVCFHeader(header); try { - this.mHeader = writeHeader(header, writer, doNotWriteGenotypes, getVersionLine(), getStreamName()); - this.vcfEncoder = new VCFEncoder(this.mHeader, this.allowMissingFieldsInHeader, this.writeFullFormatField); + writeHeader(this.mHeader, writer, getVersionLine(), getStreamName()); writeAndResetBuffer(); - + outputHasBeenWritten = true; } catch ( IOException e ) { throw new RuntimeIOException("Couldn't write file " + getStreamName(), e); } @@ -146,11 +150,9 @@ public static String getVersionLine() { public static VCFHeader writeHeader(VCFHeader header, final Writer writer, - final boolean doNotWriteGenotypes, final String versionLine, final String streamNameForError) { - header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; - + try { // the file format field needs to be written first writer.write(versionLine + "\n"); @@ -223,9 +225,18 @@ public void add(final VariantContext context) { write("\n"); writeAndResetBuffer(); - + outputHasBeenWritten = true; } catch (IOException e) { throw new RuntimeIOException("Unable to write the VCF object to " + getStreamName(), e); } } + + @Override + public void setVCFHeader(final VCFHeader header) { + if (outputHasBeenWritten) { + throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream."); + } + this.mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; + this.vcfEncoder = new VCFEncoder(this.mHeader, this.allowMissingFieldsInHeader, this.writeFullFormatField); + } } diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java index 843901a20d..e61dee0b55 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java @@ -35,6 +35,12 @@ */ public interface VariantContextWriter extends Closeable { + /** + * Writes the header + * + * @param header header + * @throws IllegalStateException if header is already written + */ public void writeHeader(VCFHeader header); /** @@ -49,4 +55,15 @@ public interface VariantContextWriter extends Closeable { public boolean checkError(); public void add(VariantContext vc); + + /** + * Sets the VCF header so that data blocks can be written without writing the header + * + * Exactly one of writeHeader() or setVCFHeader() should be called when using a writer + * + * @param header VCF header + * @throws IllegalStateException if header or body is already written + + */ + void setVCFHeader(VCFHeader header); } \ No newline at end of file diff --git a/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java new file mode 100644 index 0000000000..aa4871d729 --- /dev/null +++ b/src/test/java/htsjdk/variant/bcf2/BCF2WriterUnitTest.java @@ -0,0 +1,282 @@ +/* +* Copyright (c) 2017 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package htsjdk.variant.bcf2; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.TestUtil; +import htsjdk.tribble.Tribble; +import htsjdk.tribble.readers.PositionalBufferedStream; +import htsjdk.variant.VariantBaseTest; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.GenotypesContext; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.VariantContextTestProvider; +import htsjdk.variant.variantcontext.writer.Options; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; +import htsjdk.variant.vcf.VCFFormatHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * @author amila + *

+ * Class BCF2WriterUnitTest + *

+ * This class tests out the ability of the BCF writer to correctly write BCF files + */ +public class BCF2WriterUnitTest extends VariantBaseTest { + + private File tempDir; + + /** + * create a fake header of known quantity + * + * @return a fake VCF header + */ + private static VCFHeader createFakeHeader() { + final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); + final Set metaData = new HashSet<>(); + final Set additionalColumns = new HashSet<>(); + metaData.add(new VCFHeaderLine("two", "2")); + additionalColumns.add("extra1"); + additionalColumns.add("extra2"); + final VCFHeader header = new VCFHeader(metaData, additionalColumns); + header.addMetaDataLine(new VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.String, "x")); + header.addMetaDataLine(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x")); + header.addMetaDataLine(new VCFFormatHeaderLine("BB", 1, VCFHeaderLineType.String, "x")); + header.addMetaDataLine(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.String, "x")); + header.setSequenceDictionary(sequenceDict); + return header; + } + + @BeforeClass + private void createTemporaryDirectory() { + tempDir = TestUtil.getTempDirectory("BCFWriter", "StaleIndex"); + tempDir.deleteOnExit(); + } + + + /** + * test, using the writer and reader, that we can output and input BCF without problems + */ + @Test + public void testWriteAndReadBCF() throws IOException { + final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir); + bcfOutputFile.deleteOnExit(); + final VCFHeader header = createFakeHeader(); + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build()) { + writer.writeHeader(header); + writer.add(createVC(header)); + writer.add(createVC(header)); + } + VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider + .readAllVCs(bcfOutputFile, new BCF2Codec()); + int counter = 0; + final Iterator it = container.getVCs().iterator(); + while (it.hasNext()) { + it.next(); + counter++; + } + Assert.assertEquals(counter, 2); + + } + + + /** + * test, with index-on-the-fly option, that we can output and input BCF without problems + */ + @Test + public void testWriteAndReadBCFWithIndex() throws IOException { + final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir); + bcfOutputFile.deleteOnExit(); + Tribble.indexFile(bcfOutputFile).deleteOnExit(); + final VCFHeader header = createFakeHeader(); + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) + .setOptions(EnumSet.of(Options.INDEX_ON_THE_FLY)) + .build()) { + writer.writeHeader(header); + writer.add(createVC(header)); + writer.add(createVC(header)); + } + VariantContextTestProvider.VariantContextContainer container = VariantContextTestProvider + .readAllVCs(bcfOutputFile, new BCF2Codec()); + int counter = 0; + final Iterator it = container.getVCs().iterator(); + while (it.hasNext()) { + it.next(); + counter++; + } + Assert.assertEquals(counter, 2); + } + + /** + * test, using the writer and reader, that we can output and input a BCF body without header + */ + @Test + public void testWriteAndReadBCFHeaderless() throws IOException { + final File bcfOutputFile = File.createTempFile("testWriteAndReadBCFWithHeader.", ".bcf", tempDir); + bcfOutputFile.deleteOnExit(); + final File bcfOutputHeaderlessFile = File.createTempFile("testWriteAndReadBCFHeaderless.", ".bcf", tempDir); + bcfOutputHeaderlessFile.deleteOnExit(); + + final VCFHeader header = createFakeHeader(); + // we write two files, bcfOutputFile with the header, and bcfOutputHeaderlessFile with just the body + try (final VariantContextWriter fakeBCFFileWriter = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build()) { + fakeBCFFileWriter.writeHeader(header); // writes header + } + + try (final VariantContextWriter fakeBCFBodyFileWriter = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputHeaderlessFile).setReferenceDictionary(header.getSequenceDictionary()) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build()) { + fakeBCFBodyFileWriter.setVCFHeader(header); // does not write header + fakeBCFBodyFileWriter.add(createVC(header)); + fakeBCFBodyFileWriter.add(createVC(header)); + } + + VariantContextTestProvider.VariantContextContainer container; + + try (final PositionalBufferedStream headerPbs = new PositionalBufferedStream(new FileInputStream(bcfOutputFile)); + final PositionalBufferedStream bodyPbs = new PositionalBufferedStream(new FileInputStream(bcfOutputHeaderlessFile))) { + + BCF2Codec codec = new BCF2Codec(); + codec.readHeader(headerPbs); + // we use the header information read from identical file with header+body to read just the body of second file + + int counter = 0; + while (!bodyPbs.isDone()) { + VariantContext vc = codec.decode(bodyPbs); + counter++; + } + Assert.assertEquals(counter, 2); + } + + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testWriteHeaderTwice() throws IOException { + final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir); + bcfOutputFile.deleteOnExit(); + + final VCFHeader header = createFakeHeader(); + // prevent writing header twice + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build()) { + writer.writeHeader(header); + writer.writeHeader(header); + } + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testChangeHeaderAfterWritingHeader() throws IOException { + final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir); + bcfOutputFile.deleteOnExit(); + + final VCFHeader header = createFakeHeader(); + // prevent changing header if it's already written + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build()) { + writer.writeHeader(header); + writer.setVCFHeader(header); + } + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testChangeHeaderAfterWritingBody() throws IOException { + final File bcfOutputFile = File.createTempFile("testWriteAndReadVCF.", ".bcf", tempDir); + bcfOutputFile.deleteOnExit(); + + final VCFHeader header = createFakeHeader(); + // prevent changing header if part of body is already written + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(bcfOutputFile).setReferenceDictionary(header.getSequenceDictionary()) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build()) { + writer.setVCFHeader(header); + writer.add(createVC(header)); + writer.setVCFHeader(header); + } + } + + /** + * create a fake VCF record + * + * @param header the VCF header + * @return a VCFRecord + */ + private VariantContext createVC(final VCFHeader header) { + final List alleles = new ArrayList<>(); + final Map attributes = new HashMap<>(); + final GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); + + alleles.add(Allele.create("A", true)); + alleles.add(Allele.create("ACC", false)); + + attributes.put("DP", "50"); + for (final String name : header.getGenotypeSamples()) { + final Genotype gt = new GenotypeBuilder(name, alleles.subList(1, 2)).GQ(0).attribute("BB", "1").phased(true) + .make(); + genotypes.add(gt); + } + return new VariantContextBuilder("RANDOM", "1", 1, 1, alleles) + .genotypes(genotypes).attributes(attributes).make(); + } + + +} + diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java new file mode 100644 index 0000000000..9efa74e1e9 --- /dev/null +++ b/src/test/java/htsjdk/variant/variantcontext/writer/AsyncVariantContextWriterUnitTest.java @@ -0,0 +1,146 @@ +/* +* Copyright (c) 2017 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package htsjdk.variant.variantcontext.writer; + +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.TestUtil; +import htsjdk.tribble.Tribble; +import htsjdk.tribble.readers.AsciiLineReader; +import htsjdk.tribble.readers.AsciiLineReaderIterator; +import htsjdk.variant.VariantBaseTest; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.GenotypesContext; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFCodec; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFHeaderVersion; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * @author amila + *

+ * Class AsyncVariantContextWriterUnitTest + *

+ * This class tests out the ability of the VCF writer to correctly write VCF files with Asynchronous IO + */ +public class AsyncVariantContextWriterUnitTest extends VariantBaseTest { + + @BeforeClass + private void createTemporaryDirectory() { + File tempDir = TestUtil.getTempDirectory("VCFWriter", "StaleIndex"); + tempDir.deleteOnExit(); + } + + /** test, using the writer and reader, that we can output and input a VCF body without problems */ + @Test + public void testWriteAndReadAsyncVCFHeaderless() throws IOException { + final File fakeVCFFile = VariantBaseTest.createTempFile("testWriteAndReadAsyncVCFHeaderless.", ".vcf"); + fakeVCFFile.deleteOnExit(); + + Tribble.indexFile(fakeVCFFile).deleteOnExit(); + final Set metaData = new HashSet<>(); + final Set additionalColumns = new HashSet<>(); + final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); + final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(fakeVCFFile).setReferenceDictionary(sequenceDict) + .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY, Options.USE_ASYNC_IO)) + .build()) { + writer.setVCFHeader(header); + writer.add(createVC(header)); + writer.add(createVC(header)); + } + final VCFCodec codec = new VCFCodec(); + codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); + + try (final FileInputStream fis = new FileInputStream(fakeVCFFile)) { + final AsciiLineReaderIterator iterator = new AsciiLineReaderIterator(new AsciiLineReader(fis)); + int counter = 0; + while (iterator.hasNext()) { + VariantContext context = codec.decode(iterator.next()); + counter++; + } + Assert.assertEquals(counter, 2); + } + } + + /** + * create a fake header of known quantity + * @param metaData the header lines + * @param additionalColumns the additional column names + * @return a fake VCF header + */ + public static VCFHeader createFakeHeader(final Set metaData, final Set additionalColumns, + final SAMSequenceDictionary sequenceDict) { + metaData.add(new VCFHeaderLine("two", "2")); + additionalColumns.add("extra1"); + additionalColumns.add("extra2"); + final VCFHeader ret = new VCFHeader(metaData, additionalColumns); + ret.setSequenceDictionary(sequenceDict); + return ret; + } + + /** + * create a fake VCF record + * @param header the VCF header + * @return a VCFRecord + */ + private VariantContext createVC(final VCFHeader header) { + + final List alleles = new ArrayList(); + final Map attributes = new HashMap(); + final GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); + + alleles.add(Allele.create("A",true)); + alleles.add(Allele.create("ACC",false)); + + attributes.put("DP","50"); + for (final String name : header.getGenotypeSamples()) { + final Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make(); + genotypes.add(gt); + } + return new VariantContextBuilder("RANDOM", "1", 1, 1, alleles) + .genotypes(genotypes).attributes(attributes).make(); + } +} + diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java index eaf2f95a10..2757c9b32e 100644 --- a/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/writer/VCFWriterUnitTest.java @@ -26,10 +26,13 @@ package htsjdk.variant.variantcontext.writer; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.TestUtil; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.FeatureReader; import htsjdk.tribble.Tribble; +import htsjdk.tribble.readers.AsciiLineReader; +import htsjdk.tribble.readers.AsciiLineReaderIterator; import htsjdk.tribble.util.TabixUtils; import htsjdk.variant.VariantBaseTest; import htsjdk.variant.variantcontext.Allele; @@ -45,6 +48,7 @@ import htsjdk.variant.vcf.VCFHeaderVersion; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; @@ -57,7 +61,6 @@ import java.util.Set; import org.testng.Assert; -import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -77,28 +80,22 @@ public class VCFWriterUnitTest extends VariantBaseTest { @BeforeClass private void createTemporaryDirectory() { tempDir = TestUtil.getTempDirectory("VCFWriter", "StaleIndex"); + tempDir.deleteOnExit(); } - @AfterClass - private void deleteTemporaryDirectory() { - for (File f : tempDir.listFiles()) { - f.delete(); - } - tempDir.delete(); - } /** test, using the writer and reader, that we can output and input a VCF file without problems */ @Test(dataProvider = "vcfExtensionsDataProvider") public void testBasicWriteAndRead(final String extension) throws IOException { - final File fakeVCFFile = File.createTempFile("testBasicWriteAndRead.", extension); + final File fakeVCFFile = File.createTempFile("testBasicWriteAndRead.", extension, tempDir); fakeVCFFile.deleteOnExit(); if (".vcf.gz".equals(extension)) { - new File(fakeVCFFile.getAbsolutePath() + ".tbi").deleteOnExit(); + new File(fakeVCFFile.getAbsolutePath() + ".tbi"); } else { Tribble.indexFile(fakeVCFFile).deleteOnExit(); } - metaData = new HashSet(); - additionalColumns = new HashSet(); + metaData = new HashSet<>(); + additionalColumns = new HashSet<>(); final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); final VariantContextWriter writer = new VariantContextWriterBuilder() @@ -118,7 +115,7 @@ public void testBasicWriteAndRead(final String extension) throws IOException { // validate what we're reading in validateHeader(headerFromFile, sequenceDict); - + try { final Iterator it = reader.iterator(); while(it.hasNext()) { @@ -133,13 +130,102 @@ public void testBasicWriteAndRead(final String extension) throws IOException { } + /** test, using the writer and reader, that we can output and input a VCF body without problems */ + @Test(dataProvider = "vcfExtensionsDataProvider") + public void testWriteAndReadVCFHeaderless(final String extension) throws IOException { + final File fakeVCFFile = File.createTempFile("testWriteAndReadVCFHeaderless.", extension, tempDir); + fakeVCFFile.deleteOnExit(); + if (".vcf.gz".equals(extension)) { + new File(fakeVCFFile.getAbsolutePath() + ".tbi"); + } else { + Tribble.indexFile(fakeVCFFile).deleteOnExit(); + } + metaData = new HashSet<>(); + additionalColumns = new HashSet<>(); + final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); + final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); + try (final VariantContextWriter writer = new VariantContextWriterBuilder() + .setOutputFile(fakeVCFFile).setReferenceDictionary(sequenceDict) + .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY)) + .build()) { + writer.setVCFHeader(header); + writer.add(createVC(header)); + writer.add(createVC(header)); + } + final VCFCodec codec = new VCFCodec(); + codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2); + + try (BlockCompressedInputStream bcis = new BlockCompressedInputStream(fakeVCFFile); + FileInputStream fis = new FileInputStream(fakeVCFFile)) { + AsciiLineReaderIterator iterator = + new AsciiLineReaderIterator(new AsciiLineReader(".vcf.gz".equals(extension) ? bcis : fis)); + int counter = 0; + while (iterator.hasNext()) { + VariantContext context = codec.decode(iterator.next()); + counter++; + } + Assert.assertEquals(counter, 2); + } + + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testWriteHeaderTwice() { + final File fakeVCFFile = VariantBaseTest.createTempFile("testBasicWriteAndRead.", ".vcf"); + fakeVCFFile.deleteOnExit(); + final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); + final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); + // prevent writing header twice + try (final VariantContextWriter writer1 = new VariantContextWriterBuilder() + .setOutputFile(fakeVCFFile) + .setReferenceDictionary(sequenceDict) + .build()) { + writer1.writeHeader(header); + writer1.writeHeader(header); + } + } + + + @Test(expectedExceptions = IllegalStateException.class) + public void testChangeHeaderAfterWritingHeader() { + final File fakeVCFFile = VariantBaseTest.createTempFile("testBasicWriteAndRead.", ".vcf"); + fakeVCFFile.deleteOnExit(); + final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); + final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); + // prevent changing header if it's already written + try (final VariantContextWriter writer2 = new VariantContextWriterBuilder() + .setOutputFile(fakeVCFFile) + .setReferenceDictionary(sequenceDict) + .build()) { + writer2.writeHeader(header); + writer2.setVCFHeader(header); + } + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testChangeHeaderAfterWritingBody() { + final File fakeVCFFile = VariantBaseTest.createTempFile("testBasicWriteAndRead.", ".vcf"); + fakeVCFFile.deleteOnExit(); + final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary(); + final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict); + // prevent changing header if part of body is already written + try (final VariantContextWriter writer3 = new VariantContextWriterBuilder() + .setOutputFile(fakeVCFFile) + .setReferenceDictionary(sequenceDict) + .build()) { + writer3.setVCFHeader(header); + writer3.add(createVC(header)); + writer3.setVCFHeader(header); + } + } + /** * create a fake header of known quantity * @param metaData the header lines * @param additionalColumns the additional column names * @return a fake VCF header */ - public static VCFHeader createFakeHeader(final Set metaData, final Set additionalColumns, + private static VCFHeader createFakeHeader(final Set metaData, final Set additionalColumns, final SAMSequenceDictionary sequenceDict) { metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); metaData.add(new VCFHeaderLine("two", "2")); @@ -182,7 +268,7 @@ private VariantContext createVCGeneral(final VCFHeader header, final String chro * validate a VCF header * @param header the header to validate */ - public void validateHeader(final VCFHeader header, final SAMSequenceDictionary sequenceDictionary) { + private void validateHeader(final VCFHeader header, final SAMSequenceDictionary sequenceDictionary) { // check the fields int index = 0; for (final VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {