-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2023-05-01--norovirus2.py
71 lines (58 loc) · 2.33 KB
/
2023-05-01--norovirus2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
data = {} # year -> [all, has etiology breakdown]
with open("prevalence-data/cdc-nors-outbreak-data.tsv") as inf:
cols = None
for line in inf:
row = line.strip().split("\t")
if not cols:
cols = row
continue
etiologies = set(row[cols.index("Etiology")].split("; "))
etiologies = set(x.replace(" unknown", "").replace(" other", "")
for x in etiologies
if "Norovirus" in x)
if len(etiologies) > 1 and "Norovirus" in etiologies:
etiologies.remove("Norovirus")
genotype = row[cols.index("Serotype or Genotype")]
if not any("Norovirus" in etiology for etiology in etiologies):
continue
group = None
if len(etiologies) == 1:
if list(etiologies) == ['Norovirus Genogroup I']:
group = "I"
elif list(etiologies) == ['Norovirus Genogroup II']:
group = "II"
elif list(etiologies) == ['Norovirus Genogroup IV']:
group = "IV"
elif list(etiologies) == ['Norovirus Genogroup IX']:
group = "IX"
elif list(etiologies) == ['Norovirus']:
pass
else:
raise Exception(etiologies)
elif len(etiologies) == 2:
if etiologies == set(('Norovirus Genogroup I',
'Norovirus Genogroup II')):
group = "I+II"
elif etiologies == set(('Norovirus Genogroup II',
'Norovirus Genogroup IV')):
group = "II+IV"
else:
raise Exception(etiologies)
elif len(etiologies) == 3:
if etiologies == set(('Norovirus Genogroup I',
'Norovirus Genogroup II',
'Norovirus Genogroup IX')):
group = "I+II+IX"
else:
raise Exception(etiologies)
else:
raise Exception(etiologies)
year = row[cols.index("Year")]
if year not in data:
data[year] = [0,0]
data[year][0] += 1
if group:
data[year][1] += 1
for year in sorted(data):
print("%s\t%s\t%s" % (
year, *data[year]))