@@ -390,45 +390,59 @@ def merge_sequences(args):
390
390
if args .quiet :
391
391
print_info = lambda * _ : None
392
392
393
- # FIXME: restore this error handling:
394
- # except subprocess.CalledProcessError as err:
395
- # raise SeqKitError(f"seqkit invocation failed") from err
396
-
397
- with seqkit ('rmdup' ,
398
- stdin = subprocess .PIPE ,
399
- stdout = subprocess .PIPE ,
400
- stderr = subprocess .PIPE ) as process :
401
- # Reversed because seqkit rmdup keeps the first entry but this command
402
- # should keep the last entry.
403
- # NOTE(or FIXME?): For duplicates within a file, seqkit rmdup will
404
- # keep the first entry which is somewhat at odds with the "last one
405
- # wins" approach used for the rest of augur merge. We could error in
406
- # this case (which is what augur filter does), however that requires
407
- # reading all the IDs which is unnecessary complexity for a wrapper
408
- # around seqkit rmdup.
409
- for s in reversed (args .sequences ):
410
- print_info (f"Reading sequences from { s !r} …" )
411
- subprocess .run ([* augur , "read-file" , s ], stdout = process .stdin )
412
-
413
- # Add an newline character to support FASTA files that are missing one at the end.
414
- # Extraneous newline characters are stripped by seqkit.
415
- print (file = process .stdin )
416
-
417
- # Let seqkit know that there are no more sequences.
418
- process .stdin .close ()
419
-
420
- print_info (f"Merging sequences and writing to { args .output_sequences !r} …" )
421
-
422
- for line in iter (process .stderr .readline , '' ):
423
- print_info (f"[SeqKit] { line .rstrip ()} " )
424
-
425
- # Write merged sequences.
426
- subprocess .run ([* augur , "write-file" , args .output_sequences ], stdin = process .stdout )
393
+ try :
394
+ with seqkit ('rmdup' ,
395
+ stdin = subprocess .PIPE ,
396
+ stdout = subprocess .PIPE ,
397
+ stderr = subprocess .PIPE ) as process :
398
+ # Reversed because seqkit rmdup keeps the first entry but this command
399
+ # should keep the last entry.
400
+ # NOTE(or FIXME?): For duplicates within a file, seqkit rmdup will
401
+ # keep the first entry which is somewhat at odds with the "last one
402
+ # wins" approach used for the rest of augur merge. We could error in
403
+ # this case (which is what augur filter does), however that requires
404
+ # reading all the IDs which is unnecessary complexity for a wrapper
405
+ # around seqkit rmdup.
406
+ for s in reversed (args .sequences ):
407
+ print_info (f"Reading sequences from { s !r} …" )
408
+ read_process = subprocess .run ([* augur , "read-file" , s ], stdout = process .stdin )
409
+ try :
410
+ read_process .check_returncode ()
411
+ except subprocess .CalledProcessError as e :
412
+ # FIXME: capture augur read-file errors but not seqkit errors
413
+ # print_info(f"Cannot read file {s}: {e}")
414
+ pass
415
+
416
+ # Add an newline character to support FASTA files that are missing one at the end.
417
+ # Extraneous newline characters are stripped by seqkit.
418
+ print (file = process .stdin )
419
+
420
+ # Let seqkit know that there are no more sequences.
421
+ try :
422
+ process .stdin .close ()
423
+ except BrokenPipeError :
424
+ # Let the return code handle this
425
+ pass
426
+
427
+ print_info (f"Merging sequences and writing to { args .output_sequences !r} …" )
428
+
429
+ # FIXME: add error handling?
430
+ for line in iter (process .stderr .readline , '' ):
431
+ print_info (f"[SeqKit] { line .rstrip ()} " )
432
+
433
+ # Write merged sequences.
434
+ # FIXME: add error handling
435
+ subprocess .run ([* augur , "write-file" , args .output_sequences ], stdin = process .stdout )
427
436
437
+ finally :
428
438
# Wait for the seqkit process to finish.
429
- process .wait ()
439
+ returncode = process .wait ()
430
440
# FIXME: address potential deadlock issue?
431
441
# <https://docs.python.org/3/library/subprocess.html#subprocess.Popen.wait>
442
+ # Can't use Popen.communicate() because the data read is buffered in memory.
443
+ # <https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate>
444
+ if returncode != 0 :
445
+ raise SeqKitError (f"seqkit invocation failed, see error message above." )
432
446
433
447
434
448
def sqlite3 (* args , ** kwargs ):
@@ -517,7 +531,7 @@ def seqkit(*args, **kwargs):
517
531
return subprocess .Popen (argv , encoding = "utf-8" , text = True , ** kwargs )
518
532
519
533
520
- class SeqKitError (Exception ):
534
+ class SeqKitError (AugurError ):
521
535
pass
522
536
523
537
0 commit comments