-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement ingest for Clair3+GLNexus (#297)
- Loading branch information
Showing
5 changed files
with
191 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
...arfish_server_worker__seqvars__ingest__test__result_snapshot_test@clair3_glnexus_vcf.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
--- | ||
source: src/seqvars/ingest/mod.rs | ||
expression: "std::fs::read_to_string(&args.path_out)?" | ||
--- | ||
##fileformat=VCFv4.4 | ||
##INFO=<ID=gnomad_exomes_an,Number=1,Type=Integer,Description="Number of alleles in gnomAD exomes"> | ||
##INFO=<ID=gnomad_exomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD exomes"> | ||
##INFO=<ID=gnomad_exomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD exomes"> | ||
##INFO=<ID=gnomad_exomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD exomes"> | ||
##INFO=<ID=gnomad_genomes_an,Number=1,Type=Integer,Description="Number of alleles in gnomAD genomes"> | ||
##INFO=<ID=gnomad_genomes_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in gnomAD genomes"> | ||
##INFO=<ID=gnomad_genomes_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in gnomAD genomes"> | ||
##INFO=<ID=gnomad_genomes_hemi,Number=1,Type=Integer,Description="Number of hemi. alt. carriers in gnomAD genomes"> | ||
##INFO=<ID=helix_an,Number=1,Type=Integer,Description="Number of alleles in HelixMtDb"> | ||
##INFO=<ID=helix_hom,Number=1,Type=Integer,Description="Number of hom. alt. carriers in HelixMtDb"> | ||
##INFO=<ID=helix_het,Number=1,Type=Integer,Description="Number of het. alt. carriers in HelixMtDb"> | ||
##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"> | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth for each allele"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Conditional genotype quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase set"> | ||
##contig=<ID=1,length=249250621,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=2,length=243199373,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=3,length=198022430,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=4,length=191154276,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=5,length=180915260,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=6,length=171115067,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=7,length=159138663,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=8,length=146364022,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=9,length=141213431,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=10,length=135534747,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=11,length=135006516,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=12,length=133851895,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=13,length=115169878,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=14,length=107349540,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=15,length=102531392,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=16,length=90354753,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=17,length=81195210,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=18,length=78077248,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=19,length=59128983,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=20,length=63025520,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=21,length=48129895,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=22,length=51304566,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=X,length=155270560,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=Y,length=59373566,assembly="GRCh37",species="Homo sapiens"> | ||
##contig=<ID=MT,length=16569,assembly="GRCh37",species="Homo sapiens"> | ||
##fileDate=20230421 | ||
##SAMPLE=<ID=SAMPLE1,Sex="Male",Disease="Affected"> | ||
##SAMPLE=<ID=SAMPLE2,Sex="Female",Disease="Affected"> | ||
##PEDIGREE=<ID=SAMPLE1> | ||
##PEDIGREE=<ID=SAMPLE2> | ||
##x-varfish-case-uuid=00000000-0000-0000-0000-000000000000 | ||
##x-varfish-version=<ID=varfish-server-worker,Version="x.y.z"> | ||
##x-varfish-version=<ID=orig-caller,Name="Glnexus",Version="v1.4.1-0-g68e25e5",ConfigName="/tmp/clair3.yml"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 | ||
1 10108 . C CT . . . GT:DP:AD:GQ ./.:17:17,0:0 1/1:15:1,14:1 | ||
1 10109 . A T . . . GT:DP:AD:GQ 0/1:11:8,3:9 0/0:15:15,0:44 | ||
1 10147 . C CCCCTAA . . . GT:DP:AD:GQ 0/1:11:8,3:5 0/1:15:4,11:4 | ||
1 10206 . A AC . . . GT:DP:AD:GQ ./.:15:15,0:0 0/1:15:4,11:2 | ||
1 10212 . A AC . . . GT:DP:AD:GQ ./.:15:15,0:0 0/1:15:4,11:1 | ||
1 10218 . A AC . . . GT:DP:AD:GQ ./.:13:13,0:0 0/1:15:4,11:0 | ||
1 10230 . AC ACCCTAA . . . GT:DP:AD:GQ 1/0:11:8,3:5 0/1:15:4,11:2 | ||
1 10230 . AC A . . . GT:DP:AD:GQ 0/1:11:5,6:5 0/0:15:15,0:2 | ||
1 10254 . T C . . . GT:DP:AD:GQ 0/1:11:9,2:5 1/1:15:4,11:2 | ||
1 10257 . A C . . . GT:DP:AD:GQ 0/1:11:7,4:4 1/1:15:4,11:2 | ||
1 10291 . C T . . . GT:DP:AD:GQ 0/1:11:3,8:7 0/1:15:2,13:6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
FAM SAMPLE1 0 0 1 2 | ||
FAM SAMPLE2 0 0 2 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
##fileformat=VCFv4.2 | ||
##NOTE=We split the RNC values with a comma where originally they are a 2-character string. | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##GLnexusVersion=v1.4.1-0-g68e25e5 | ||
##GLnexusConfigName=/tmp/clair3.yml | ||
##GLnexusConfigCRC32C=496041348 | ||
##GLnexusConfig={unifier_config: {drop_filtered: false, min_allele_copy_number: 1, min_AQ1: 10, min_AQ2: 10, min_GQ: 2, max_alleles_per_site: 32, monoallelic_sites_for_lost_alleles: true, preference: common}, genotyper_config: {revise_genotypes: true, min_assumed_allele_frequency: 9.99999975e-05, snv_prior_calibration: 1, indel_prior_calibration: 1, required_dp: 1, allow_partial_data: true, allele_dp_format: AD, ref_dp_format: MIN_DP, output_residuals: false, more_PL: true, squeeze: false, trim_uncalled_alleles: true, top_two_half_calls: false, output_format: BCF, liftover_fields: [{orig_names: [MIN_DP, DP], name: DP, description: "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">", type: int, number: basic, default_type: missing, count: 1, combi_method: min, ignore_non_variants: true}, {orig_names: [AD], name: AD, description: "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">", type: int, number: alleles, default_type: zero, count: 0, combi_method: min, ignore_non_variants: false}, {orig_names: [GQ], name: GQ, description: "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">", type: int, number: basic, default_type: missing, count: 1, combi_method: min, ignore_non_variants: true}, {orig_names: [PL], name: PL, description: "##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"Phred-scaled genotype Likelihoods\">", type: int, number: genotype, default_type: missing, count: 0, combi_method: missing, ignore_non_variants: true}]}} | ||
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency estimate for each alternate allele"> | ||
##INFO=<ID=AQ,Number=A,Type=Integer,Description="Allele Quality score reflecting evidence for each alternate allele (Phred scale)"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##FILTER=<ID=MONOALLELIC,Description="Site represents one ALT allele in a region with multiple variants that could not be unified into non-overlapping multi-allelic sites"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=RNC,Number=2,Type=Character,Description="Reason for No Call in GT: . = n/a, M = Missing data, P = Partial data, I = gVCF input site is non-called, D = insufficient Depth of coverage, - = unrepresentable overlapping deletion, L = Lost/unrepresentable allele (other than deletion), U = multiple Unphased variants present, O = multiple Overlapping variants present, 1 = site is Monoallelic, no assertion about presence of REF or ALT allele"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype Likelihoods"> | ||
##contig=<ID=1,length=249250621> | ||
##contig=<ID=2,length=243199373> | ||
##contig=<ID=3,length=198022430> | ||
##contig=<ID=4,length=191154276> | ||
##contig=<ID=5,length=180915260> | ||
##contig=<ID=6,length=171115067> | ||
##contig=<ID=7,length=159138663> | ||
##contig=<ID=8,length=146364022> | ||
##contig=<ID=9,length=141213431> | ||
##contig=<ID=10,length=135534747> | ||
##contig=<ID=11,length=135006516> | ||
##contig=<ID=12,length=133851895> | ||
##contig=<ID=13,length=115169878> | ||
##contig=<ID=14,length=107349540> | ||
##contig=<ID=15,length=102531392> | ||
##contig=<ID=16,length=90354753> | ||
##contig=<ID=17,length=81195210> | ||
##contig=<ID=18,length=78077248> | ||
##contig=<ID=19,length=59128983> | ||
##contig=<ID=20,length=63025520> | ||
##contig=<ID=21,length=48129895> | ||
##contig=<ID=22,length=51304566> | ||
##contig=<ID=X,length=155270560> | ||
##contig=<ID=Y,length=59373566> | ||
##bcftools_viewVersion=1.19+htslib-1.19.1 | ||
##bcftools_viewCommand=view test.bcf; Date=Thu Feb 8 15:26:01 2024 | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 | ||
1 10108 1_10108_C_CT C CT 16 . AF=0.5;AQ=16 GT:DP:AD:GQ:PL:RNC ./.:17:17,0:0:128,0,278:II 1/1:15:1,14:1:16,4,0:.. | ||
1 10109 1_10109_A_T A T 10 . AF=0.25;AQ=10 GT:DP:AD:GQ:PL:RNC 0/1:11:7,3:9:10,0,25:.. 0/0:15:15,0:44:0,45,449:.. | ||
1 10147 1_10147_C_CCCCTAA C CCCCTAA 24 . AF=0.5;AQ=24 GT:DP:AD:GQ:PL:RNC 0/1:11:3,3:5:16,0,15:.. 0/1:15:1,11:4:24,0,10:.. | ||
1 10206 1_10206_A_AC A AC 11 . AF=0.25;AQ=11 GT:DP:AD:GQ:PL:RNC ./.:15:15,0:0:104,0,254:I,I 0/1:15:4,11:2:11,0,8:.. | ||
1 10212 1_10212_A_AC A AC 10 . AF=0.25;AQ=10 GT:DP:AD:GQ:PL:RNC ./.:15:15,0:0:104,0,254:I,I 0/1:15:3,11:1:10,0,5:.. | ||
1 10218 1_10218_A_AC A AC 10 . AF=0.25;AQ=10 GT:DP:AD:GQ:PL:RNC ./.:13:13,0:0:50,0,260:I,I 0/1:15:2,11:0:10,0,5:.. | ||
1 10230 1_10231_C_CCCTAA;1_10230_AC_A AC ACCCTAA,A 23 . AF=0.5,0.25;AQ=19,23 GT:DP:AD:GQ:PL:RNC 1/2:11:.,3,6:5:0,0,0,0,0,0:.. 0/1:15:2,11,0:2:19,0,8,990,990,990:.. | ||
1 10254 1_10254_T_C T C 26 . AF=0.75;AQ=26 GT:DP:AD:GQ:PL:RNC 0/1:11:6,2:5:5,0,23:.. 1/1:15:2,11:2:26,3,0:.. | ||
1 10257 1_10257_A_C A C 29 . AF=0.75;AQ=29 GT:DP:AD:GQ:PL:RNC 0/1:11:6,4:4:7,0,7:.. 1/1:15:3,11:2:29,3,0:.. | ||
1 10291 1_10291_C_T C T 27 . AF=0.5;AQ=27 GT:DP:AD:GQ:PL:RNC 0/1:11:1,8:7:20,0,7:.. 0/1:15:1,13:6:27,0,3:.. |