From e90f06e7c7b36397b94023628fffb89dccad195a Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Tue, 27 Feb 2024 11:16:31 +0100 Subject: [PATCH] feat: implement ingest and aggregate for sniffles2 (#296) --- README.md | 1 + src/strucvars/ingest/header.rs | 2 + src/strucvars/ingest/mod.rs | 4 + ...ld_output_header_37@sniffles2-min.vcf.snap | 62 ++++++++++++++ ...ld_output_header_38@sniffles2-min.vcf.snap | 62 ++++++++++++++ ...s__ingest__test__smoke_test_singleton.snap | 8 +- tests/strucvars/ingest/sniffles2-min.ped | 1 + tests/strucvars/ingest/sniffles2-min.vcf | 78 ++++++++++++++++++ tests/strucvars/ingest/sniffles2-min.vcf.gz | Bin 0 -> 1942 bytes .../strucvars/ingest/sniffles2-min.vcf.gz.tbi | Bin 0 -> 116 bytes 10 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@sniffles2-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@sniffles2-min.vcf.snap create mode 100644 tests/strucvars/ingest/sniffles2-min.ped create mode 100644 tests/strucvars/ingest/sniffles2-min.vcf create mode 100644 tests/strucvars/ingest/sniffles2-min.vcf.gz create mode 100644 tests/strucvars/ingest/sniffles2-min.vcf.gz.tbi diff --git a/README.md b/README.md index a1c3af05..6bdbc490 100644 --- a/README.md +++ b/README.md @@ -162,6 +162,7 @@ The command supports the following variant callers and can guess the caller from - Manta - MELT - PopDel +- Sniffles2 One record will be written out for each variant, each with a single alternate allele. diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index 5b8c98a6..0dad5291 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -293,6 +293,7 @@ mod test { #[case("tests/strucvars/ingest/manta-min.vcf")] #[case("tests/strucvars/ingest/melt-min.vcf")] #[case("tests/strucvars/ingest/popdel-min.vcf")] + #[case("tests/strucvars/ingest/sniffles2-min.vcf")] #[tokio::test] async fn build_output_header_37(#[case] path: &str) -> Result<(), anyhow::Error> { mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); @@ -338,6 +339,7 @@ mod test { #[case("tests/strucvars/ingest/manta-min.vcf")] #[case("tests/strucvars/ingest/melt-min.vcf")] #[case("tests/strucvars/ingest/popdel-min.vcf")] + #[case("tests/strucvars/ingest/sniffles2-min.vcf")] #[tokio::test] async fn build_output_header_38(#[case] path: &str) -> Result<(), anyhow::Error> { mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index 0e0a9a3e..8aaddeea 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -230,6 +230,8 @@ async fn write_ingest_record( Ok(Some("Popdel".to_string())) } else if caller.starts_with("MELTv") { Ok(Some("Melt".to_string())) + } else if caller.starts_with("SNIFFLESv") { + Ok(Some("Sniffles".to_string())) } else { anyhow::bail!("unknown caller: {}", caller) } @@ -486,6 +488,7 @@ mod test { String::from("tests/strucvars/ingest/gcnv-min.vcf"), String::from("tests/strucvars/ingest/manta-min.vcf"), String::from("tests/strucvars/ingest/melt-min.vcf"), + String::from("tests/strucvars/ingest/sniffles2-min.vcf"), ], path_cov_vcf: vec![], path_ped: "tests/strucvars/ingest/dragen-cnv-min.ped".into(), @@ -558,6 +561,7 @@ mod test { String::from("tests/strucvars/ingest/gcnv-min.vcf.gz"), String::from("tests/strucvars/ingest/manta-min.vcf.gz"), String::from("tests/strucvars/ingest/melt-min.vcf.gz"), + String::from("tests/strucvars/ingest/sniffles2-min.vcf.gz"), ], path_cov_vcf: vec![], path_ped: "tests/strucvars/ingest/dragen-cnv-min.ped".into(), diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@sniffles2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@sniffles2-min.vcf.snap new file mode 100644 index 00000000..fe327e12 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@sniffles2-min.vcf.snap @@ -0,0 +1,62 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-varfish-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@sniffles2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@sniffles2-min.vcf.snap new file mode 100644 index 00000000..41225361 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@sniffles2-min.vcf.snap @@ -0,0 +1,62 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-varfish-genome-build=GRCh38 +##SAMPLE= +##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap index 0c1e99fe..7f437a9b 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap @@ -63,11 +63,15 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##x-varfish-version= ##x-varfish-version= ##x-varfish-version= +##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE 1 1000 . N . . SVCLAIM=J;SVTYPE=INS;END=1000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1/1:2:1:1:2:2:.:.:.:. -1 2000 . N . . SVCLAIM=J;SVTYPE=INS;END=2000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/0:4:4:0:4:1:.:.:.:. +1 1000 . N . . SVCLAIM=DJ;SVTYPE=DEL;END=1049;SVLEN=50;callers=Sniffles GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1/1:60:.:.:33:33:.:.:.:. +1 2000 . N . . SVCLAIM=J;SVTYPE=INS;END=2000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/0:60:4:0:24:24:.:.:.:. 1 3000 . N . . SVCLAIM=J;SVTYPE=INS;END=3000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/0:28:23:0:24:1:.:.:.:. +1 3000 . N . . SVCLAIM=J;SVTYPE=INV;END=3589;SVLEN=590;callers=Sniffles GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:24:.:.:37:10:.:.:.:. +1 4000 . N . . SVCLAIM=DJ;SVTYPE=DUP;END=17584;SVLEN=13585;callers=Sniffles GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:60:.:.:61:18:.:.:.:. +1 5000 . N N]chrUn_JTFH01000344v1_decoy:679] . . SVCLAIM=J;SVTYPE=BND;END=679;chr2=chrUn_JTFH01000344v1_decoy;callers=Sniffles GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:15:.:.:31:8:.:.:.:. 1 1283844 . N . . SVCLAIM=D;SVTYPE=CNV;END=1284844;SVLEN=1001;callers=DragenCnv GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:.:.:.:.:.:.:1:.:1 1 1598413 . N . . SVCLAIM=DJ;SVTYPE=DEL;END=1598580;SVLEN=168;callers=DragenSv GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1/1:53:2:2:20:20:.:.:.:. 1 4124001 . N . . SVCLAIM=D;SVTYPE=DEL;END=4125000;SVLEN=1000;callers=Gcnv GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1:.:.:.:.:.:.:1:.:1 - diff --git a/tests/strucvars/ingest/sniffles2-min.ped b/tests/strucvars/ingest/sniffles2-min.ped new file mode 100644 index 00000000..de863440 --- /dev/null +++ b/tests/strucvars/ingest/sniffles2-min.ped @@ -0,0 +1 @@ +FAM SAMPLE 0 0 1 2 diff --git a/tests/strucvars/ingest/sniffles2-min.vcf b/tests/strucvars/ingest/sniffles2-min.vcf new file mode 100644 index 00000000..72ec3edd --- /dev/null +++ b/tests/strucvars/ingest/sniffles2-min.vcf @@ -0,0 +1,78 @@ +##fileformat=VCFv4.2 +##source=Sniffles2_2.2 +##command="sniffles --input out/SAMPLE.hac.sniffles.snf --vcf SAMPLE.multi.vcf" +##fileDate="2024/02/09 14:19:35" +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +chr1 1000 Sniffles2.DEL.CM0 N 54 PASS PRECISE;SVTYPE=DEL;SVLEN=-50;END=1049;SUPPORT=26;COVERAGE=41,41,42,41,39;STRAND=+-;AC=4;STDEV_LEN=23.094;STDEV_POS=2.887;SUPP_VEC=11 GT:GQ:DR:DV:ID 1/1:60:0:33:Sniffles2.DEL.3720S0 +chr1 2000 Sniffles2.INS.0M0 N GTCCCTCTGTCTCTGCCAACCAGTTAACCTGCTGCTTCCTGGAGGAAGACAGTCCCTCTGTCCCTCTGTCTCTGCCAACCAGTTAACCTGCTGCTTCCTGGAGGCAGACAGTCCCTCT 56 PASS IMPRECISE;SVTYPE=INS;SVLEN=118;END=2000;SUPPORT=30;COVERAGE=31,30,30,30,30;STRAND=+-;AC=4;STDEV_LEN=0.000;STDEV_POS=0.000;SUPP_VEC=11 GT:GQ:DR:DV:ID 1/1:60:0:24:Sniffles2.INS.5S0 +chr1 3000 Sniffles2.INV.215M0 N 59 PASS PRECISE;SVTYPE=INV;SVLEN=590;END=3589;SUPPORT=16;COVERAGE=39,19,20,20,40;STRAND=-;AC=2;STDEV_LEN=0.000;STDEV_POS=0.000;SUPP_VEC=11 GT:GQ:DR:DV:ID 0/1:24:27:10:Sniffles2.INV.9A95S0 +chr1 4000 Sniffles2.DUP.2F0M1 N 60 PASS PRECISE;SVTYPE=DUP;SVLEN=13584;END=17584;SUPPORT=20;COVERAGE=42,44,70,42,44;STRAND=+-;AC=2;STDEV_LEN=0.000;STDEV_POS=0.000;SUPP_VEC=11 GT:GQ:DR:DV:ID 0/1:60:43:18:Sniffles2.DUP.954CS1 +chr1 5000 Sniffles2.BND.FFM0 N N]chrUn_JTFH01000344v1_decoy:679] 58 PASS PRECISE;SVTYPE=BND;SUPPORT=7;COVERAGE=44,30,42,42,40;STRAND=+-;AC=1;STDEV_LEN=0.000;STDEV_POS=0.000;SUPP_VEC=11 GT:GQ:DR:DV:ID 0/1:15:23:8:Sniffles2.BND.C83CS0 diff --git a/tests/strucvars/ingest/sniffles2-min.vcf.gz b/tests/strucvars/ingest/sniffles2-min.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..444d6e3f369ef0affd5034174bb29c99306a6b42 GIT binary patch literal 1942 zcmV;H2Wj{piwFb&00000{{{d;LjnMK2c=kTlcKm5{>=P}RL_^$x@srk-Cn1P0kpZL z=m>4y{jk-Qf!NKx5F7~Z?SH=oC^pfaaki@{Me>|;@;o``B=Yg&Hc9C=FEYX>8+*D} zM);z!&(j`A!cES}tyhXF7gCTv%6kXQURuokPiJ9;M3=r}$q z*%l0k`)CUeqgj>mWVDaAHy=JWu^qzc$;>y+S~! zsYoP>@ChLsDl$w}M4Z$_D$-0tH$=oqPN>U@sVY!Js3!zgWh9#-S=0$?LRnRGRTcrA zpaw#wiqTn&PEg%|x{j5zX?B9DCc=hkh#_#NrzS(Js#pwxJ2@2tVe?!o+^MM|87itK z%Ke{HR;XBq>RDd>ahjv7%jXUjVdp_pHM1VI=3z7B`N6=UWmF_Pp5$zD>`Ot(|0s*^Kv)n~rui%1)8%?$ zwRW>`;8)o9=E^?l7K%lXTWo*91?j6KHb6D=-VgYOkd6aFH4an{#( zX(1rrf`V>oL0LpWK}lSmx!e4jyFDoq!uVxpo^ypf9*dKG5?3SzCCPRv1;GY*4%p@e zXu9<9@HnsduHtPBZyvhqCdSGNgv7Ga6l{}})1m}BQkFCpZD!#c8ngLvM4} zKl9d>-}RpC<>qZ(QNE`Id7?kCweKx|=Jw6)qgnJ!*cH}~)3s&4;bL2msD4D!AA4IY z1Ix1?t*O8yFG&)CjQqbLMADR|wNv`NqAYrO!KMm8a<3EcLrKBvAZ9-!}O0cUq;mMwYtjU3X zm`3GmI^AK}^DQVZgcO~If2`aqT3$VHolE z*{|*xBrcCxo3JOqpXoJf9J;3G&pqFLXGp?;=LffrJFj<#Pb_D{a+f9?&^=|m1UXAz ze!CohzVv6m7QM{3d>Ez4Zb#$8RF27%zKBAkIeP-h@hk&^O@9`y?_3kyWii<0WzzV# zF4?*brSE>4=7fvIa<&<~5u)BV3f|4+QpEGRd0)h~s|i`Z+_-CN)+Np8%|OR}q6CbR z0#v(_7nEd!mdF4^lyO=N0A=w2#Q9&W9luuMricH${MyD|HeOHJ(PmCvi()9o`++Q5 zV0tk}takH?@`~{RD61@^F{pN+mZ09*4#%epCI7f_?JH=^lae2+@3c$hKX7g@{lN8u zhv4xYorOOW-i516>WMO1kb09n)zv4@4}Jqt&PvM4ssv@bgVX+e*1olH13}D7RDF`s z%j@+i*#gSQx0J^1rZpV42hU-_Y9pb`Sks?Z!(rj3$5Th&gTMHBnlC6}fOWckuW0cC zNSZvcj52;a8JwL>E>jea4)<2zUZL3PnUuAQgtc#J>P3lZk?d&V%Vw)=f(LFm=pJ>6bR6m)oFMIK>4a#1*R<^W^>QJtmVx9s(%PL$ z);yHHK3H?9#Yi=6sr`o{wQqtxM9&415QI?bUNemxcRsQgQ1YeU9e4g&QWdHGpCKKN zKzHp|_%K4eATmpR#$^wwB4ihIf`dQSQ3d zI>GV?n#TvLWiY`b!_e=JRXjmRnuX)p*Riu6JDaiRNaz-gH8_T2SswQp$U24rYzg8% zLC+6Hupu}LZQBm*a27V7wryFqZOy{4{tjo&71nn%Yc{j2nN<&VCfA$VJyS{5T1C5K zDT7|6jBE~&Il!>Kl#X7*4!?&TyhSxJtd9{K_bHf` z*)ddvl6Y8+@DwgkQxp%Y&yof&7sbQsxPY>1C~aBj^<7sNuhy`x2xXwdL37hz!*{dP z;8>Bz$Qbu&n5tq2sAZ}4S$^@I(RA7@qyK00Ts*MHe}~h1Soa56QTFIDrcwSf)^zhv zNi{AC@WppJs`@LYN}Z=QP5f#(Tc+r}Op!Xq^7u8;nu%@5b^t&84Y2Itja?T203VA8 c1ONa4009360763o02=@U000000000000nTjBme*a literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/sniffles2-min.vcf.gz.tbi b/tests/strucvars/ingest/sniffles2-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..9d19ac422ae1949ad73919222fecaf85a5ad45bd GIT binary patch literal 116 zcmb2|=3rp}f&Xj_PR>jW;SAh`pHfm%5)u-U5)v9N@&Li9fLT);8x<`VpAgV_c#!0RNQ0*sf$M=B?A~Y&9Vy&v1E{t(7VwIG)^APTxkYour(k80F3q>2mk;8 literal 0 HcmV?d00001