File tree 5 files changed +24
-6
lines changed
5 files changed +24
-6
lines changed Original file line number Diff line number Diff line change @@ -4,7 +4,9 @@ All notable changes to this project will be documented in this file.
4
4
The format is based on [ Keep a Changelog] ( https://keepachangelog.com/en/1.0.0/ ) ,
5
5
and this project adheres to [ Semantic Versioning] ( https://semver.org/spec/v2.0.0.html ) .
6
6
7
- ## [ unreleased]
7
+ ## [ v1.11.2]
8
+ ### Added
9
+ - Parsing model information from fastq headers output by Guppy and MinKNOW.
8
10
### Changed
9
11
- Additional explanatory information in VCF INFO fields concerning depth calculations.
10
12
Original file line number Diff line number Diff line change 5
5
import subprocess
6
6
import sys
7
7
8
- __version__ = "1.11.1 "
8
+ __version__ = "1.11.2 "
9
9
10
10
try :
11
11
import pyabpoa as abpoa
Original file line number Diff line number Diff line change @@ -173,16 +173,24 @@ def _model_from_fastq(fname):
173
173
models = set ()
174
174
with pysam .FastxFile (fname , 'r' ) as fastq :
175
175
for rec in itertools .islice (fastq , 100 ):
176
- # model is embedded in RG:Z: tag of comment as
177
- # <run_id>_<model>_<barcode>, but model has _
178
- # characters in also so search for known models
179
176
try :
177
+ # dorado SAM converted to FASTQ with e.g. samtools fastq
178
+ # model is embedded in RG:Z: tag of comment as
179
+ # <run_id>_<model>_<barcode>, but model has _
180
+ # characters in also so search for known models
180
181
read_group = rec .comment .split ("RG:Z:" )[1 ].split ()[0 ]
181
182
for model in known_models :
182
183
if model in read_group :
183
184
models .add (model )
184
185
except Exception :
185
- pass
186
+ # minknow/guppy
187
+ # basecall_model_version_id=<model>
188
+ try :
189
+ model = rec .comment .split (
190
+ "basecall_model_version_id=" )[1 ].split ()[0 ]
191
+ models .add (model )
192
+ except Exception :
193
+ pass
186
194
if len (models ) > 1 :
187
195
# filter out any models without an `@`. These are likely FPs of
188
196
# the search above (there are unversioned models whose name
Original file line number Diff line number Diff line change @@ -71,6 +71,7 @@ class TestScrapBasecaller(unittest.TestCase):
71
71
root_dir = os .path .abspath (os .path .dirname (__file__ ))
72
72
bam = os .path .join (root_dir , 'data/bc_model_scrape.bam' )
73
73
fastq = os .path .join (root_dir , 'data/bc_model_scrape.fastq.gz' )
74
+ fastq_minknow = os .path .join (root_dir , 'data/bc_model_scrape_minknow.fastq.gz' )
74
75
75
76
def test_000_from_bam_consensus (self ):
76
77
model = models .model_from_basecaller (self .bam , variant = False )
@@ -88,6 +89,13 @@ def test_011_from_fastq_variant(self):
88
89
model = models .model_from_basecaller (self .fastq , variant = True )
89
90
self .assertEqual (model , "r1041_e82_400bps_hac_variant_v4.2.0" )
90
91
92
+ def test_020_from_fastq_minknow (self ):
93
+ model = models .model_from_basecaller (self .fastq_minknow , variant = False )
94
+ self .assertEqual (model , "r1041_e82_400bps_sup_v4.2.0" )
95
+
96
+ def test_021_from_fastq_minknow_variant (self ):
97
+ model = models .model_from_basecaller (self .fastq_minknow , variant = True )
98
+ self .assertEqual (model , "r1041_e82_400bps_sup_variant_v4.2.0" )
91
99
92
100
class TestBuildModel (unittest .TestCase ):
93
101
You can’t perform that action at this time.
0 commit comments