Skip to content

Commit a367d19

Browse files
committed
🚧 add error handling
1 parent 6805150 commit a367d19

File tree

2 files changed

+66
-36
lines changed

2 files changed

+66
-36
lines changed

Diff for: augur/merge.py

+50-36
Original file line numberDiff line numberDiff line change
@@ -390,45 +390,59 @@ def merge_sequences(args):
390390
if args.quiet:
391391
print_info = lambda *_: None
392392

393-
# FIXME: restore this error handling:
394-
# except subprocess.CalledProcessError as err:
395-
# raise SeqKitError(f"seqkit invocation failed") from err
396-
397-
with seqkit('rmdup',
398-
stdin =subprocess.PIPE,
399-
stdout=subprocess.PIPE,
400-
stderr=subprocess.PIPE) as process:
401-
# Reversed because seqkit rmdup keeps the first entry but this command
402-
# should keep the last entry.
403-
# NOTE(or FIXME?): For duplicates within a file, seqkit rmdup will
404-
# keep the first entry which is somewhat at odds with the "last one
405-
# wins" approach used for the rest of augur merge. We could error in
406-
# this case (which is what augur filter does), however that requires
407-
# reading all the IDs which is unnecessary complexity for a wrapper
408-
# around seqkit rmdup.
409-
for s in reversed(args.sequences):
410-
print_info(f"Reading sequences from {s!r}…")
411-
subprocess.run([*augur, "read-file", s], stdout=process.stdin)
412-
413-
# Add an newline character to support FASTA files that are missing one at the end.
414-
# Extraneous newline characters are stripped by seqkit.
415-
print(file=process.stdin)
416-
417-
# Let seqkit know that there are no more sequences.
418-
process.stdin.close()
419-
420-
print_info(f"Merging sequences and writing to {args.output_sequences!r}…")
421-
422-
for line in iter(process.stderr.readline, ''):
423-
print_info(f"[SeqKit] {line.rstrip()}")
424-
425-
# Write merged sequences.
426-
subprocess.run([*augur, "write-file", args.output_sequences], stdin=process.stdout)
393+
try:
394+
with seqkit('rmdup',
395+
stdin =subprocess.PIPE,
396+
stdout=subprocess.PIPE,
397+
stderr=subprocess.PIPE) as process:
398+
# Reversed because seqkit rmdup keeps the first entry but this command
399+
# should keep the last entry.
400+
# NOTE(or FIXME?): For duplicates within a file, seqkit rmdup will
401+
# keep the first entry which is somewhat at odds with the "last one
402+
# wins" approach used for the rest of augur merge. We could error in
403+
# this case (which is what augur filter does), however that requires
404+
# reading all the IDs which is unnecessary complexity for a wrapper
405+
# around seqkit rmdup.
406+
for s in reversed(args.sequences):
407+
print_info(f"Reading sequences from {s!r}…")
408+
read_process = subprocess.run([*augur, "read-file", s], stdout=process.stdin)
409+
try:
410+
read_process.check_returncode()
411+
except subprocess.CalledProcessError as e:
412+
# FIXME: capture augur read-file errors but not seqkit errors
413+
# print_info(f"Cannot read file {s}: {e}")
414+
pass
415+
416+
# Add an newline character to support FASTA files that are missing one at the end.
417+
# Extraneous newline characters are stripped by seqkit.
418+
print(file=process.stdin)
419+
420+
# Let seqkit know that there are no more sequences.
421+
try:
422+
process.stdin.close()
423+
except BrokenPipeError:
424+
# Let the return code handle this
425+
pass
426+
427+
print_info(f"Merging sequences and writing to {args.output_sequences!r}…")
428+
429+
# FIXME: add error handling?
430+
for line in iter(process.stderr.readline, ''):
431+
print_info(f"[SeqKit] {line.rstrip()}")
432+
433+
# Write merged sequences.
434+
# FIXME: add error handling
435+
subprocess.run([*augur, "write-file", args.output_sequences], stdin=process.stdout)
427436

437+
finally:
428438
# Wait for the seqkit process to finish.
429-
process.wait()
439+
returncode = process.wait()
430440
# FIXME: address potential deadlock issue?
431441
# <https://docs.python.org/3/library/subprocess.html#subprocess.Popen.wait>
442+
# Can't use Popen.communicate() because the data read is buffered in memory.
443+
# <https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate>
444+
if returncode != 0:
445+
raise SeqKitError(f"seqkit invocation failed, see error message above.")
432446

433447

434448
def sqlite3(*args, **kwargs):
@@ -517,7 +531,7 @@ def seqkit(*args, **kwargs):
517531
return subprocess.Popen(argv, encoding="utf-8", text=True, **kwargs)
518532

519533

520-
class SeqKitError(Exception):
534+
class SeqKitError(AugurError):
521535
pass
522536

523537

Diff for: tests/functional/merge/cram/merge-sequences.t

+16
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,19 @@ This behavior is a reflection of the seqkit rmdup internals.
6060
ATCG
6161
>seq2
6262
GCTA
63+
64+
Error handling
65+
66+
$ cat >y.fasta <<~~
67+
> invalid fasta file
68+
> ~~
69+
70+
$ ${AUGUR} merge \
71+
> --sequences x.fasta y.fasta \
72+
> --output-sequences - > merged.fasta
73+
Reading sequences from 'y.fasta'
74+
Reading sequences from 'x.fasta'
75+
Merging sequences and writing to '-'
76+
[SeqKit] \x1b[31m[ERRO]\x1b[0m fastx: invalid FASTA/Q format (esc)
77+
ERROR: seqkit invocation failed, see error message above.
78+
[2]

0 commit comments

Comments
 (0)