Skip to content

Commit

Permalink
Update filter to use new IO interface
Browse files Browse the repository at this point in the history
  • Loading branch information
huddlej committed Mar 10, 2021
1 parent c77bcb7 commit 071023d
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import treetime.utils

from .index import index_sequences
from .io import open_file, read_sequences, write_sequences
from .utils import read_metadata, read_strains, get_numerical_dates, run_shell_command, shquote, is_date_ambiguous

comment_char = '#'
Expand Down Expand Up @@ -545,19 +546,19 @@ def run(args):
dropped_samps = list(available_strains - seq_keep)
write_vcf(args.sequences, args.output, dropped_samps)
elif args.sequences and args.output:
sequences = SeqIO.parse(args.sequences, "fasta")
sequences = read_sequences(args.sequences)

# Stream to disk all sequences that passed all filters to avoid reading
# sequences into memory first. Track the observed strain names in the
# sequence file as part of the single pass to allow comparison with the
# provided sequence index.
observed_sequence_strains = set()
with open(args.output, "w") as output_handle:
with open_file(args.output, "wt") as output_handle:
for sequence in sequences:
observed_sequence_strains.add(sequence.id)

if sequence.id in seq_keep:
SeqIO.write(sequence, output_handle, 'fasta')
write_sequences(sequence, output_handle, 'fasta')

if sequence_strains != observed_sequence_strains:
# Warn the user if the expected strains from the sequence index are
Expand Down

0 comments on commit 071023d

Please sign in to comment.