-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathinsert_coverage_stats.py
executable file
·90 lines (80 loc) · 2.57 KB
/
insert_coverage_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
"""Generate summary statistics of read coverage by region defined in a bedfile.
Use to evaluate per-amplicon performance of primers for a sample."""
import argparse
from io import StringIO
import subprocess
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("bedfile", help="Amplicon insert regions bedfile path")
parser.add_argument("alignments", help="Alignments .bam path")
parser.add_argument(
"--prop_cov",
help="Proportion of a read that must be mapped to a region "
"for it to count towards coverage. Default: 0.90",
default="0.90",
)
args = parser.parse_args()
return args
def run(args):
# Run bedtools to get per-base coverage for reads that map to each insert
cmd = [
"bedtools",
"coverage",
"-F",
args.prop_cov,
"-d",
"-a",
args.bedfile,
"-b",
args.alignments,
]
a = subprocess.run(cmd, capture_output=True, check=True)
df = pd.read_csv(
StringIO(a.stdout.decode("utf-8")),
sep="\t",
header=None,
names=[
"chrom",
"chrom_start",
"chrom_end",
"region_name",
"primer_pool",
"strand",
"position",
"depth",
],
)
# Generate summary statistics of coverage by insert and save to file
insert_stats_data = []
for insert in df["region_name"].unique():
insert_depth_stats = df[df["region_name"] == insert]["depth"].describe()
insert_depth_stats["insert_name"] = str(insert)
insert_stats_data.append(insert_depth_stats)
insert_stats_df = pd.DataFrame(insert_stats_data).reset_index(drop=True)
insert_stats_df.rename(columns={"count": "insert_length"}, inplace=True)
insert_stats_df = insert_stats_df[
[
"insert_name",
"insert_length",
"mean",
"std",
"min",
"25%",
"50%",
"75%",
"max",
]
]
insert_stats_df.to_csv("depth_by_insert_stats.tsv", index=False, sep="\t")
# Save a boxplot of coverage by insert
ax, fig = plt.subplots(figsize=[18, 5])
sns.boxplot(x="region_name", y="depth", data=df, color="lightcyan", linewidth=1)
plt.xticks(rotation=90, size=8)
plt.savefig("depth_by_insert_boxplot.png", dpi=300, bbox_inches="tight")
if __name__ == "__main__":
args = parse_args()
run(args)