Skip to content

Commit

Permalink
Fix bug when loading indexed bgzip fasta file. (#1311)
Browse files Browse the repository at this point in the history
* There was a bug that prevented loading an indexed bgzip fasta file if the index location was specified but wasn't next to the fasta in the file system.
* Fixes #1290
  • Loading branch information
lbergelson authored Mar 4, 2019
1 parent 9f84b7b commit 7b3c7a6
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,15 @@

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.seekablestream.ReadableSeekableStreamByteChannel;
import htsjdk.samtools.seekablestream.SeekablePathStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.GZIIndex;
import htsjdk.samtools.util.IOUtil;

import java.io.BufferedInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path;

/**
Expand All @@ -55,7 +51,7 @@ public class BlockCompressedIndexedFastaSequenceFile extends AbstractIndexedFast

public BlockCompressedIndexedFastaSequenceFile(final Path path)
throws FileNotFoundException {
this(path,new FastaSequenceIndex((findRequiredFastaIndexFile(path))));
this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path))));
}

public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
Expand All @@ -67,9 +63,7 @@ public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSeque
if (gziIndex == null) {
throw new IllegalArgumentException("null gzi index");
}
if (!canCreateBlockCompresedIndexedFastaSequence(path)) {
throw new SAMException("Invalid block-compressed Fasta file");
}
assertIsBlockCompressed(path);
try {
stream = new BlockCompressedInputStream(new SeekablePathStream(path));
gzindex = gziIndex;
Expand Down Expand Up @@ -103,12 +97,14 @@ private static GZIIndex loadFastaGziIndex(final Path path) {
}
}

private static boolean canCreateBlockCompresedIndexedFastaSequence(final Path path) {
private static void assertIsBlockCompressed(final Path path) {
try {
// check if the it is a valid block-compressed file and if the .gzi index exits
return IOUtil.isBlockCompressed(path, true) && Files.exists(GZIIndex.resolveIndexNameForBgzipFile(path));
if (!IOUtil.isBlockCompressed(path, true)) {
throw new SAMException("Invalid block-compressed Fasta file: " + path);
}
} catch (IOException e) {
return false;
throw new SAMException("Invalid block-compressed Fasta file: " + path, e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.GZIIndex;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringUtil;
import org.testng.Assert;
Expand All @@ -39,6 +40,9 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;

/**
* Test the indexed fasta sequence file reader.
Expand Down Expand Up @@ -353,4 +357,16 @@ public void testBadInputForIndexedFastaSequenceFile() throws Exception {
public void testBadInputForBlockCompressedIndexedFastaSequenceFile() throws Exception {
new BlockCompressedIndexedFastaSequenceFile(SEQUENCE_FILE.toPath());
}

@Test
public void testCanCreateBlockCompressedIndexedWithSpecifiedGZIAndDict() throws IOException {
final Path moved = Files.createTempFile("moved", ".fasta.gz");
Files.copy(SEQUENCE_FILE_BGZ.toPath(), moved, StandardCopyOption.REPLACE_EXISTING);
IOUtil.deleteOnExit(moved);
try (ReferenceSequenceFile withNoAdacentIndex = new BlockCompressedIndexedFastaSequenceFile(moved, new FastaSequenceIndex(SEQUENCE_FILE_INDEX), GZIIndex.loadIndex(SEQUENCE_FILE_GZI.toPath()));
ReferenceSequenceFile withFilesAdjacent = new BlockCompressedIndexedFastaSequenceFile(SEQUENCE_FILE_BGZ.toPath())) {
Assert.assertEquals(withNoAdacentIndex.getSubsequenceAt("chrM", 100, 1000).getBases(),
withFilesAdjacent.getSubsequenceAt("chrM", 100, 1000).getBases());
}
}
}

0 comments on commit 7b3c7a6

Please sign in to comment.