23
23
24
24
from talos .utils import read_json_from_path
25
25
26
+ MEAN_SLASH_SAMPLE = 'Mean/sample'
27
+
28
+
29
+ class NoVariantsFoundError (Exception ):
30
+ """raise if a report subset contains no data"""
31
+
32
+
33
+ def get_variant_summary (results : ResultData ) -> dict :
34
+ """
35
+ Run the numbers across all variant categories
36
+ Treat each primary-secondary comp-het pairing as one event
37
+ i.e. the thing being counted here is the number of events
38
+ which passed through the MOI process, not the absolute number
39
+ of variants in the report
40
+
41
+ Args:
42
+ results (ResultData): the results object in full
43
+
44
+ Returns:
45
+ a dictionary summarising the categorised variants
46
+ """
47
+
48
+ # get the categories this report was aware of
49
+ all_categories = results .metadata .categories .keys ()
50
+
51
+ ordered_categories = ['any' , * all_categories ]
52
+
53
+ category_count : dict = {key : [] for key in ordered_categories }
54
+
55
+ for sample_data in results .results .values ():
56
+ sample_variants : dict [str , set [str ]] = {key : set () for key in ordered_categories }
57
+
58
+ # iterate over the list of variants
59
+ for variant in sample_data .variants :
60
+ var_string = variant .var_data .coordinates .string_format
61
+ sample_variants ['any' ].add (var_string )
62
+
63
+ # find all categories associated with this variant
64
+ # for each category, add to corresponding list and set
65
+ for category_value in variant .categories :
66
+ sample_variants [category_value ].add (var_string )
67
+
68
+ # update the global lists with per-sample counts
69
+ for key , key_list in category_count .items ():
70
+ key_list .append (len (sample_variants [key ]))
71
+
72
+ summary_dicts = {
73
+ key : {
74
+ 'Description' : results .metadata .categories .get (key , 'All Variants' ),
75
+ 'Total' : sum (category_count [key ]),
76
+ 'Unique' : len (set (category_count [key ])),
77
+ 'Peak #/sample' : max (category_count [key ]),
78
+ MEAN_SLASH_SAMPLE : sum (category_count [key ]) / len (category_count [key ]),
79
+ }
80
+ for key in ordered_categories
81
+ }
82
+
83
+ # this can fail if there are no categorised variants... at all
84
+ if not summary_dicts :
85
+ raise NoVariantsFoundError ('No categorised variants found' )
86
+
87
+ summary_dicts ['samples_no_variants' ] = category_count ['any' ].count (0 )
88
+
89
+ return summary_dicts
90
+
26
91
27
92
def main (input_path : str , output_path : str | None = None , prefix : int | None = None ):
28
93
"""
29
- read the target report, and summarise the number of affected samples involved
94
+ read the target report, and summarise the content:
95
+ - the number of affected samples involved
96
+ - the number of variants in each category
97
+ - the number of samples with no variants
30
98
31
99
Args:
32
100
input_path (str): where to read the report from
@@ -37,21 +105,22 @@ def main(input_path: str, output_path: str | None = None, prefix: int | None = N
37
105
# read the report file, local or cloud
38
106
report = read_json_from_path (input_path , return_model = ResultData )
39
107
40
- # this is a simple overview
41
- family_breakdown = report .metadata .family_breakdown
108
+ summarised_content : dict = {'family_breakdown' : report .metadata .family_breakdown }
42
109
43
110
if prefix :
44
111
# set up a section in the dictionary for this
45
- family_breakdown ['grouped_by_prefix' ] = defaultdict (int )
112
+ summarised_content [ ' family_breakdown' ] ['grouped_by_prefix' ] = defaultdict (int )
46
113
for proband in report .results .values ():
47
- family_breakdown ['grouped_by_prefix' ][proband .metadata .ext_id [:prefix ]] += 1
114
+ summarised_content ['family_breakdown' ]['grouped_by_prefix' ][proband .metadata .ext_id [:prefix ]] += 1
115
+
116
+ summarised_content ['variant_summary' ] = get_variant_summary (report )
48
117
49
- print (json .dumps (family_breakdown , indent = 4 ))
118
+ print (json .dumps (summarised_content , indent = 4 ))
50
119
51
120
if output_path :
52
121
# write the output to file
53
122
with to_path (output_path ).open ('w' ) as handle :
54
- json .dump (family_breakdown , handle , indent = 4 )
123
+ json .dump (summarised_content , handle , indent = 4 )
55
124
56
125
57
126
def cli_main ():
0 commit comments