-
Notifications
You must be signed in to change notification settings - Fork 596
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PrintFileDiagnostics for cram, crai and bai. (#8577)
* New experimental tool to print out human readable file diagnostics for cram/crai/bai files.
- Loading branch information
Showing
10 changed files
with
1,861 additions
and
0 deletions.
There are no files selected for viewing
79 changes: 79 additions & 0 deletions
79
src/main/java/org/broadinstitute/hellbender/tools/PrintFileDiagnostics.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package org.broadinstitute.hellbender.tools; | ||
|
||
import org.broadinstitute.barclay.argparser.*; | ||
import org.broadinstitute.hellbender.cmdline.CommandLineProgram; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.engine.GATKPath; | ||
import org.broadinstitute.hellbender.tools.filediagnostics.HTSAnalyzer; | ||
import org.broadinstitute.hellbender.tools.filediagnostics.HTSAnalyzerFactory; | ||
import picard.cmdline.programgroups.OtherProgramGroup; | ||
|
||
import java.io.File; | ||
|
||
/** | ||
* A diagnostic tool that prints meta information about a GATK input file. | ||
* | ||
* Works on files ending in .cram, .crai, and .bai. | ||
* | ||
* Sample Usage: | ||
* | ||
* gatk PrintFileDiagnostics \ | ||
* -I input.cram \ | ||
* -count-limit 10 | ||
*/ | ||
@ExperimentalFeature | ||
@WorkflowProperties | ||
@CommandLineProgramProperties( | ||
summary = "Print diagnostic information about a genomics file to stdout", | ||
oneLineSummary = "Print diagnostic information about a genomics file to stdout", | ||
programGroup = OtherProgramGroup.class | ||
) | ||
public class PrintFileDiagnostics extends CommandLineProgram { | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME, | ||
doc = "Input path for diagnostics", | ||
optional = false, | ||
common = true) | ||
@WorkflowInput | ||
public GATKPath inputPath; | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, | ||
doc = "Outut file for diagnostics (must be a local file)", | ||
optional = false, | ||
common = true) | ||
@WorkflowInput | ||
public File outputFile; | ||
|
||
@Argument(shortName="count-limit", | ||
fullName="count-limit", | ||
doc="Limit on how much output to emit (.cram only)") | ||
private long countLimit = 1000; | ||
|
||
private HTSAnalyzer htsAnalyzer; | ||
|
||
@Override | ||
protected void onStartup() { | ||
super.onStartup(); | ||
htsAnalyzer = HTSAnalyzerFactory.getFileAnalyzer(inputPath, outputFile, countLimit); | ||
} | ||
|
||
@Override | ||
protected Object doWork() { | ||
htsAnalyzer.analyze(); | ||
return 0; | ||
} | ||
|
||
@Override | ||
protected void onShutdown() { | ||
if ( htsAnalyzer != null ) { | ||
try { | ||
htsAnalyzer.close(); | ||
} catch (Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} | ||
|
||
} |
31 changes: 31 additions & 0 deletions
31
src/main/java/org/broadinstitute/hellbender/tools/filediagnostics/BAIAnalyzer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package org.broadinstitute.hellbender.tools.filediagnostics; | ||
|
||
import htsjdk.samtools.BAMIndexer; | ||
import org.broadinstitute.hellbender.engine.GATKPath; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
|
||
/** | ||
* Analyzer for BAI files. | ||
*/ | ||
public class BAIAnalyzer extends HTSAnalyzer { | ||
|
||
public BAIAnalyzer(final GATKPath inputPath, final File outputFile) { | ||
super(inputPath, outputFile); | ||
} | ||
|
||
/** | ||
* Run the analyzer for the file. | ||
*/ | ||
protected void doAnalysis() { | ||
System.out.println(String.format("\nOutput written to %s\n", outputFile)); | ||
BAMIndexer.createAndWriteIndex(inputPath.toPath().toFile(), outputFile, true); | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
} | ||
|
||
} | ||
|
59 changes: 59 additions & 0 deletions
59
src/main/java/org/broadinstitute/hellbender/tools/filediagnostics/CRAIAnalyzer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package org.broadinstitute.hellbender.tools.filediagnostics; | ||
|
||
import htsjdk.samtools.CRAMCRAIIndexer; | ||
import htsjdk.samtools.cram.CRAIIndex; | ||
import htsjdk.samtools.util.RuntimeIOException; | ||
import org.broadinstitute.hellbender.engine.GATKPath; | ||
|
||
import java.io.File; | ||
import java.io.FileOutputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
/** | ||
* Analyzer for CRAM (.crai) index files. | ||
*/ | ||
public class CRAIAnalyzer extends HTSAnalyzer { | ||
|
||
final FileOutputStream fos; | ||
|
||
public CRAIAnalyzer(final GATKPath inputPath, final File outputFile) { | ||
super(inputPath, outputFile); | ||
try { | ||
fos = new FileOutputStream(outputFile); | ||
} catch (final IOException e) { | ||
throw new RuntimeIOException(e); | ||
} | ||
} | ||
|
||
protected void emitln(final String s) { | ||
try { | ||
fos.write(s.getBytes()); | ||
fos.write('\n'); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
/** | ||
* Run the analyzer for the file. | ||
*/ | ||
protected void doAnalysis() { | ||
try (final InputStream is = inputPath.getInputStream()) { | ||
final CRAIIndex craiIndex = CRAMCRAIIndexer.readIndex(is); | ||
emitln("\nSeqId AlignmentStart AlignmentSpan ContainerOffset SliceOffset SliceSize\n"); | ||
craiIndex.getCRAIEntries().stream().forEach(e -> emitln(e.toString())); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
if (fos != null) { | ||
fos.close(); | ||
} | ||
} | ||
|
||
} | ||
|
Oops, something went wrong.