Skip to content

Commit

Permalink
fix for #366: makeSAMOrBAMWriter accepts only .sam/.bam (#834)
Browse files Browse the repository at this point in the history
- fixes  #366: output format detection with BAM as default option.
- makeWriter delegates to makeSAMOrBAMWriter for non-cram files,
- makeSAMOrBAMWriter warns about non-SAM/BAM extensions and defaults to BAM,
- SamReader.Type method to check file extension,
- BamFileIoUtils points to SamReader BAM_TYPE for BAM extension;
- Default class will inform if `-Dsamjdk.default_reference` file is found (INFO) or not (WARNING)
  • Loading branch information
vadimzalunin authored and Yossi Farjoun committed Feb 1, 2019
1 parent c189878 commit e86af96
Show file tree
Hide file tree
Showing 11 changed files with 431 additions and 182 deletions.
21 changes: 16 additions & 5 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ dependencies {
testCompile "org.testng:testng:6.14.3"
testCompile "com.google.jimfs:jimfs:1.1"
testCompile "com.google.guava:guava:26.0-jre"
}
}

sourceCompatibility = 1.8
targetCompatibility = 1.8
Expand All @@ -61,7 +61,7 @@ defaultTasks 'jar'
jar {
manifest {
attributes 'Implementation-Title': 'HTSJDK',
'Implementation-Vendor' : 'Samtools Organization',
'Implementation-Vendor': 'Samtools Organization',
'Implementation-Version': version
}
}
Expand All @@ -87,6 +87,16 @@ task findScalaAndJavaTypes(type: Exec) {
commandLine './scripts/checkScalaAndJavaFiles.sh'
}


task testWithDefaultReference(type: Test) {
description = "Run tests with a default reference File"
jvmArgs += '-Dsamjdk.reference_fasta=src/test/resources/htsjdk/samtools/cram/ce.fa'

tags {
include "defaultReference"
}
}

test {
description = "Runs the unit tests other than the SRA tests"

Expand All @@ -101,11 +111,12 @@ test {
tags {
exclude "slow"
exclude "broken"
exclude "defaultReference"
exclude "ftp"
if (System.env.CI == "false") exclude "sra"
if (!OperatingSystem.current().isUnix()) exclude "unix"
}
} dependsOn findScalaAndJavaTypes
} dependsOn findScalaAndJavaTypes, testWithDefaultReference


task testFTP(type: Test) {
Expand Down Expand Up @@ -149,7 +160,7 @@ task sourcesJar(type: Jar) {
}

/**
*This specifies what artifacts will be built and uploaded when performing a maven upload.
* This specifies what artifacts will be built and uploaded when performing a maven upload.
*/
artifacts {
archives jar
Expand Down Expand Up @@ -213,7 +224,7 @@ uploadArchives {
}
}
}
doFirst{
doFirst {
System.out.println("Uploading version $version")
}
}
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/BamFileIoUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
public class BamFileIoUtils {
private static final Log LOG = Log.getInstance(BamFileIoUtils.class);

public static final String BAM_FILE_EXTENSION = ".bam";
public static final String BAM_FILE_EXTENSION = "." + SamReader.Type.BAM_TYPE.fileExtension();

public static boolean isBamFile(final File file) {
return ((file != null) && file.getName().endsWith(BAM_FILE_EXTENSION));
return ((file != null) && SamReader.Type.BAM_TYPE.hasValidFileExtension(file.getName()));
}

public static void reheaderBamFile(final SAMFileHeader samFileHeader, final File inputFile, final File outputFile) {
Expand Down
32 changes: 28 additions & 4 deletions src/main/java/htsjdk/samtools/Defaults.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import java.io.File;
import java.util.Collections;
import java.util.Optional;
import java.util.SortedMap;
import java.util.TreeMap;

Expand Down Expand Up @@ -46,6 +47,18 @@ public class Defaults {
/** The output format of the flag field when writing SAM text. Ignored for reading SAM text. Default = DECIMAL */
public static final SamFlagField SAM_FLAG_FIELD_FORMAT;

/**
* The extension to assume a sam file has when the actual file doesn't have an extension, useful
* for when outputing to /dev/stdout, for example.
*/
public static final String DEFAULT_SAM_EXTENSION;

/**
* The extension to assume a vcf has when the actual file doesn't have an extension, useful
* for when outputing to /dev/stdout, for example.
*/
public static final String DEFAULT_VCF_EXTENSION;

/**
* Even if BUFFER_SIZE is 0, this is guaranteed to be non-zero. If BUFFER_SIZE is non-zero,
* this == BUFFER_SIZE (Default = 128k).
Expand Down Expand Up @@ -97,13 +110,17 @@ public class Defaults {
*/
public static final boolean DISABLE_SNAPPY_COMPRESSOR;


public static final String SAMJDK_PREFIX = "samjdk.";
static {
CREATE_INDEX = getBooleanProperty("create_index", false);
CREATE_MD5 = getBooleanProperty("create_md5", false);
USE_ASYNC_IO_READ_FOR_SAMTOOLS = getBooleanProperty("use_async_io_read_samtools", false);
USE_ASYNC_IO_WRITE_FOR_SAMTOOLS = getBooleanProperty("use_async_io_write_samtools", false);
USE_ASYNC_IO_WRITE_FOR_TRIBBLE = getBooleanProperty("use_async_io_write_tribble", false);
COMPRESSION_LEVEL = getIntProperty("compression_level", 5);
DEFAULT_SAM_EXTENSION = getStringProperty("default_sam_type", "bam");
DEFAULT_VCF_EXTENSION = getStringProperty("default_vcf_type", "vcf");
BUFFER_SIZE = getIntProperty("buffer_size", 1024 * 128);
if (BUFFER_SIZE == 0) {
NON_ZERO_BUFFER_SIZE = 1024 * 128;
Expand Down Expand Up @@ -148,7 +165,7 @@ public static SortedMap<String, Object> allDefaults(){
* applications started with -Djava.security.manager . */
private static String getStringProperty(final String name, final String def) {
try {
return System.getProperty("samjdk." + name, def);
return System.getProperty(Defaults.SAMJDK_PREFIX + name, def);
} catch (final java.security.AccessControlException error) {
log.warn(error,"java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning default value: " + def );
return def;
Expand All @@ -160,7 +177,7 @@ private static String getStringProperty(final String name, final String def) {
* applications started with -Djava.security.manager this method returns false. */
private static boolean hasProperty(final String name){
try {
return null != System.getProperty("samjdk." + name);
return null != System.getProperty(Defaults.SAMJDK_PREFIX + name);
} catch (final java.security.AccessControlException error) {
log.warn(error,"java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning false");
return false;
Expand All @@ -182,7 +199,14 @@ private static int getIntProperty(final String name, final int def) {
/** Gets a File system property, prefixed with "samjdk." using the default if the property does not exist. */
private static File getFileProperty(final String name, final String def) {
final String value = getStringProperty(name, def);
// TODO: assert that it is readable
return (null == value) ? null : new File(value);
Optional<File> maybeFile = Optional.ofNullable(value).map(File::new);
maybeFile.ifPresent(f -> {
if (!f.exists()) {
log.warn(String.format("File property for %s has value %s. However file %s doesn't exist.", SAMJDK_PREFIX + name, value, f.getAbsolutePath()));
} else {
log.info(String.format("Found file for property %s: %s ", SAMJDK_PREFIX + name, f.getAbsolutePath()));
}
});
return maybeFile.orElse(null);
}
}
53 changes: 34 additions & 19 deletions src/main/java/htsjdk/samtools/SAMFileWriterFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package htsjdk.samtools;

import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import htsjdk.samtools.util.IOUtil;
Expand All @@ -37,6 +38,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.zip.Deflater;
import static htsjdk.samtools.SamReader.Type.*;

/**
* Create a writer for writing SAM, BAM, or CRAM files.
Expand Down Expand Up @@ -410,7 +412,7 @@ private SAMFileWriter initWriter(final SAMFileHeader header, final boolean preso
}

/**
* Create either a SAM or a BAM writer based on examination of the outputFile extension.
* Create either a SAM or a BAM writer based on examination of the outputFile extension, defaults to BAM writer.
*
* @param header entire header. Sort order is determined by the sortOrder property of this arg.
* @param presorted presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
Expand All @@ -431,24 +433,26 @@ public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolea
*/
public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolean presorted, final Path outputPath) {
final String filename = outputPath.getFileName().toString();
if (filename.endsWith(BamFileIoUtils.BAM_FILE_EXTENSION)) {
return makeBAMWriter(header, presorted, outputPath);
}
if (filename.endsWith(".sam")) {
if (SAM_TYPE.hasValidFileExtension(filename)) {
return makeSAMWriter(header, presorted, outputPath);
} else {
if (!BAM_TYPE.hasValidFileExtension(filename)) {
log.info("Unknown file extension, assuming BAM format when writing file: " + outputPath.toUri().toString());
}
return makeBAMWriter(header, presorted, outputPath);
}
return makeBAMWriter(header, presorted, outputPath);
}

/**
*
* Create a SAM, BAM or CRAM writer based on examination of the outputFile extension.
* The method assumes BAM file format for unknown file extensions.
*
* @param header header. Sort order is determined by the sortOrder property of this arg.
* @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
* @param outputFile where to write the output. Must end with .sam, .bam or .cram.
* @param outputFile where to write the output. Should end with .sam, .bam or .cram.
* @param referenceFasta reference sequence file
* @return SAMFileWriter appropriate for the file type specified in outputFile
* @return SAMFileWriter appropriate for SAM and CRAM file types specified in outputFile, or a BAM writer for all other types
*
*/
public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) {
Expand Down Expand Up @@ -483,11 +487,11 @@ public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presor
*
*/
public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final Path outputPath, final Path referenceFasta) {
if (null != outputPath && outputPath.toString().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
final String filename = outputPath.getFileName().toString();
if (CRAM_TYPE.hasValidFileExtension(filename)) {
return makeCRAMWriter(header, presorted, outputPath, referenceFasta);
}
else {
return makeSAMOrBAMWriter(header, presorted, outputPath);
} else {
return makeSAMOrBAMWriter (header, presorted, outputPath);
}
}

Expand Down Expand Up @@ -628,17 +632,29 @@ private CRAMFileWriter createCRAMWriterWithSettings(
final boolean presorted,
final Path outputFile,
final Path referenceFasta) {

final CRAMReferenceSource referenceSource;
if (referenceFasta == null) {
log.info("Reference fasta is not provided when writing CRAM file " + outputFile.toUri().toString());
log.info("Will attempt to use a default reference or download as set by defaults:");
log.info("Default REFERENCE_FASTA (-Dsamjdk.reference_fasta): " + Defaults.REFERENCE_FASTA);
log.info("Default USE_CRAM_REF_DOWNLOAD (-Dsamjdk.use_cram_ref_download): " + Defaults.USE_CRAM_REF_DOWNLOAD);

referenceSource = ReferenceSource.getDefaultCRAMReferenceSource();
} else {
referenceSource = new ReferenceSource(referenceFasta);
}
OutputStream cramOS = null;
OutputStream indexOS = null ;
OutputStream indexOS = null;

if (createIndex) {
if (!IOUtil.isRegularPath(outputFile)) {
log.warn("Cannot create index for CRAM because output file is not a regular file: " + outputFile.toUri());
}
else {
} else {
final Path indexPath = IOUtil.addExtension(outputFile, BAMIndex.BAI_INDEX_SUFFIX);
try {
indexOS = Files.newOutputStream(indexPath);

indexOS = Files.newOutputStream(indexPath) ;
}
catch (final IOException ioe) {
throw new RuntimeIOException("Error creating index file for: " + indexPath.toUri(), ioe);
Expand All @@ -648,8 +664,7 @@ private CRAMFileWriter createCRAMWriterWithSettings(

try {
cramOS = IOUtil.maybeBufferOutputStream(Files.newOutputStream(outputFile), bufferSize);
}
catch (final IOException ioe) {
} catch (final IOException ioe) {
throw new RuntimeIOException("Error creating CRAM file: " + outputFile.toUri(), ioe);
}

Expand All @@ -658,7 +673,7 @@ private CRAMFileWriter createCRAMWriterWithSettings(
createMd5File ? new Md5CalculatingOutputStream(cramOS, md5Path) : cramOS,
indexOS,
presorted,
new ReferenceSource(referenceFasta),
referenceSource,
header,
outputFile.toUri().toString());
setCRAMWriterDefaults(writer);
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/htsjdk/samtools/SamReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ public String toString() {
public static final Type BAM_TYPE = new TypeImpl("BAM", "bam", "bai");
public static final Type SAM_TYPE = new TypeImpl("SAM", "sam", null);
public static final Type BAM_CSI_TYPE = new TypeImpl("BAM", "bam", "csi");

public boolean hasValidFileExtension(final String fileName) {
return fileName != null && fileName.endsWith("." + fileExtension());
}
}

/**
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/htsjdk/samtools/cram/ref/ReferenceSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.*;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -88,6 +91,7 @@ public ReferenceSource(final ReferenceSequenceFile rsFile) {
public static CRAMReferenceSource getDefaultCRAMReferenceSource() {
if (null != Defaults.REFERENCE_FASTA) {
if (Defaults.REFERENCE_FASTA.exists()) {
log.info(String.format("Default reference file %s exists, so going to use that.", Defaults.REFERENCE_FASTA.getAbsolutePath()));
return new ReferenceSource(Defaults.REFERENCE_FASTA);
}
else {
Expand All @@ -96,11 +100,12 @@ public static CRAMReferenceSource getDefaultCRAMReferenceSource() {
}
}
else if (Defaults.USE_CRAM_REF_DOWNLOAD) {
log.info("USE_CRAM_REF_DOWNLOAD=true, so attmpting to download reference file as needed.");
return new ReferenceSource((ReferenceSequenceFile)null);
}
else {
throw new IllegalStateException(
"A valid CRAM reference was not supplied and one cannot be acquired via the property settings reference_fasta or use_cram_ref_download");
String.format("A valid CRAM reference was not supplied and one cannot be acquired via the property settings %s.reference_fasta or %s.use_cram_ref_download",Defaults.SAMJDK_PREFIX,Defaults.SAMJDK_PREFIX));
}
}

Expand Down
Loading

0 comments on commit e86af96

Please sign in to comment.