1
1
/* test/test-vcf-api.c -- VCF test harness.
2
2
3
- Copyright (C) 2013, 2014, 2017-2021, 2023 Genome Research Ltd.
3
+ Copyright (C) 2013, 2014, 2017-2021, 2023, 2025 Genome Research Ltd.
4
4
5
5
Author: Petr Danecek <[email protected] >
6
6
@@ -658,6 +658,135 @@ void test_open_format(void) {
658
658
error ("Expected failure for wrong extension 'vcf.bvcf.bgz'" );
659
659
}
660
660
661
+ //tests on rlen calculation
662
+ void test_rlen_values (void )
663
+ {
664
+ bcf_hdr_t * hdr ;
665
+ bcf1_t * rec , * rec2 ;
666
+ int i , j ;
667
+ int32_t tmpi ;
668
+ //data common for all versions, interpreted differently based on version
669
+ #define data "##reference=file://tmp\n" \
670
+ "##FILTER=<ID=PASS,Description=\"All filters passed\">\n" \
671
+ "##INFO=<ID=END,Number=1,Type=Integer,Description=\"end\">\n" \
672
+ "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"gt\">\n" \
673
+ "##INFO=<ID=SVLEN,Number=A,Type=Integer,Description=\"svlen\">\n" \
674
+ "##INFO=<ID=SVCLAIM,Number=A,Type=String,Description=\"svclaim\">\n" \
675
+ "##FORMAT=<ID=LEN,Number=1,Type=Integer,Description=\"fmt len\">\n" \
676
+ "##contig=<ID=1,Length=40>\n" \
677
+ "##ALT=<ID=INS,Description=\"INS\">\n" \
678
+ "##ALT=<ID=DEL,Description=\"DEL\">\n" \
679
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSample1\tSample2\n" \
680
+ "1\t4310\t.\tG\tA\t.\t.\t.\tGT\t0/0\t0|1\n" \
681
+ "1\t4311\t.\tC\tCT\t.\t.\t.\tGT\t0/0\t1/0\n" \
682
+ "1\t4312\t.\tTC\tT\t213.73\t.\t.\tGT\t0/1\t0|0\n" \
683
+ "1\t4314\t.\tG\t<INS>\t213.73\t.\tSVLEN=10;SVCLAIM=J\tGT\t0/1\t0|0\n" \
684
+ "1\t4315\t.\tG\t<DEL>\t213.73\t.\tSVLEN=-10;SVCLAIM=D\tGT\t0/1\t0|0\n" \
685
+ "1\t4326\t.\tG\t<INS>\t213.73\t.\tEND=4326;SVLEN=10;SVCLAIM=J\tGT\t0/1\t0|0\n" \
686
+ "1\t4327\t.\tG\t<DEL>\t213.73\t.\tEND=4337;SVLEN=-10;SVCLAIM=J\tGT\t0/1\t0|0\n" \
687
+ "1\t4338\t.\tG\t<*>\t213.73\t.\tEND=4342;SVLEN=.;SVCLAIM=.\tGT:LEN\t0/1:7\t0|0:8\n" \
688
+ "1\t4353\t.\tG\t<*>\t213.73\t.\tEND=4357;SVLEN=.;SVCLAIM=.\tGT:LEN\t0/1:7\t0|0:.\n" \
689
+ "1\t4363\t.\tG\t<*>\t213.73\t.\tEND=4367;SVLEN=.;SVCLAIM=.\tGT:LEN\t0/1:7\t0|0:.\n" \
690
+ "1\t4373\t.\tG\tT\t213.73\t.\tEND=4375;SVLEN=8;SVCLAIM=.\tGT:LEN\t0/1:.\t0|0:10\n"
691
+ //this last one is invalid one, to showcase how SVLEN/LEN are considered even when ALT is no SV/gvcf
692
+ //this is because data are not cross checked with allele types, to make it simple
693
+
694
+ //test vcf with different versions
695
+ const int vcfsz = 11 , testsz = 3 ;
696
+ static char d43 [] = "data:,"
697
+ "##fileformat=VCFv4.3\n" data ;
698
+ static char d44 [] = "data:,"
699
+ "##fileformat=VCFv4.4\n" data ;
700
+ static char d45 [] = "data:,"
701
+ "##fileformat=VCFv4.5\n" data ;
702
+ //expected rlen for tests
703
+ int rlen43 [] = {1 , 1 , 2 , 1 , 1 , 1 , 11 , 5 , 5 , 5 , 3 };
704
+ int rlen44 [] = {1 , 1 , 2 , 1 , 11 , 1 , 11 , 5 , 5 , 5 , 9 };
705
+ int rlen45 [] = {1 , 1 , 2 , 1 , 11 , 1 , 11 , 8 , 7 , 7 , 10 };
706
+
707
+ char * darr [] = {& d43 [0 ], & d44 [0 ], & d45 [0 ]}; //data array
708
+ int * rarr [] = {& rlen43 [0 ], & rlen44 [0 ], & rlen45 [0 ]}; //result array
709
+
710
+ enum htsLogLevel logging = hts_get_log_level ();
711
+
712
+ // Silence warning messages
713
+ hts_set_log_level (HTS_LOG_ERROR );
714
+
715
+ rec = bcf_init1 (); rec2 = bcf_init1 ();
716
+ if (!rec || !rec2 ) error ("Failed to allocate BCF record : %s" , strerror (errno ));
717
+ //calculating rlen with different vcf versions
718
+ for (i = 0 ; i < testsz ; ++ i ) {
719
+ htsFile * fp = hts_open (darr [i ], "r" );
720
+ if (!fp ) error ("Failed to open vcf data with %d : %s" , i + 1 , strerror (errno ));
721
+ bcf_clear1 (rec );
722
+ hdr = bcf_hdr_read (fp );
723
+ if (!hdr ) error ("Failed to read BCF header with %d : %s" , i + 1 , strerror (errno ));
724
+ for (j = 0 ; j < vcfsz ; ++ j ) {
725
+ check0 (bcf_read (fp , hdr , rec ));
726
+ if (rec -> rlen != rarr [i ][j ]) {
727
+ error ("Incorrect rlen @ vcf %d on test %d - expected %d got %" PRIhts_pos "\n" , j + 1 , i + 1 , rarr [i ][j ], rec -> rlen );
728
+ }
729
+ }
730
+ bcf_hdr_destroy (hdr );
731
+ hts_close (fp );
732
+ }
733
+
734
+ //calculating rlen with update to fields
735
+ htsFile * fp = hts_open (d45 , "r" );
736
+ int id = 1 , val []= { 1 , 15 };
737
+ bcf_clear1 (rec );
738
+ hdr = bcf_hdr_read (fp );
739
+ if (!hdr ) error ("Failed to read BCF header : %s" , strerror (errno ));
740
+ check0 (bcf_read (fp , hdr , rec ));
741
+ //"1\t4310\t.\tG\tA\t.\t.\t.\tGT\t0/0\t0|1\n"
742
+ if (rec -> rlen != 1 )
743
+ error ("Incorrect rlen set, expected 1 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
744
+ //alt change A->AT
745
+ ++ id ; check0 (bcf_update_alleles_str (hdr , rec , "G,AT" ));
746
+ if (rec -> rlen != 1 )
747
+ error ("Incorrect rlen set, expected 1 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
748
+ //ref change G->GC, alt AT -> A, "1\t4310\t.\tGC\tA\t.\t.\t.\tGT\t0/0\t0|1\n"
749
+ id ++ ; check0 (bcf_update_alleles_str (hdr , rec , "GC,A" ));
750
+ if (rec -> rlen != 2 )
751
+ error ("Incorrect rlen set, expected 2 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
752
+ ++ id ; check0 (bcf_update_alleles_str (hdr , rec , "G,<*>" ));
753
+ if (rec -> rlen != 1 )
754
+ error ("Incorrect rlen set, expected 1 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
755
+ tmpi = 4323 ;
756
+ ++ id ; check0 (bcf_update_info_int32 (hdr , rec , "END" , & tmpi , 1 )); //SVLEN to infer from END and use
757
+ if (rec -> rlen != 14 )
758
+ error ("Incorrect rlen set, expected 14 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
759
+ ++ id ; check0 (bcf_update_format_int32 (hdr , rec , "LEN" , & val , 2 ));
760
+ if (rec -> rlen != 15 )
761
+ error ("Incorrect rlen set, expected 15 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
762
+ ++ id ; check0 (bcf_update_info_int32 (hdr , rec , "END" , & tmpi , 0 )); //removes END
763
+ if (rec -> rlen != 15 )
764
+ error ("Incorrect rlen set, expected 15 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
765
+ ++ id ; check0 (bcf_update_format_int32 (hdr , rec , "LEN" , & tmpi , 0 )); //removes LEN
766
+ if (rec -> rlen != 1 )
767
+ error ("Incorrect rlen set, expected 1 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
768
+ //ALT -> DEL, "1\t4310\t.\tG\tT,<DEL>\t.\t.\t.\tGT\t0/0\t0|1\n"
769
+ ++ id ; check0 (bcf_update_alleles_str (hdr , rec , "G,T,<DEL>" ));
770
+ if (rec -> rlen != 1 )
771
+ error ("Incorrect rlen set, expected 1 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
772
+ val [0 ] = 0 ; val [1 ] = -5 ;
773
+ ++ id ; check0 (bcf_update_info_int32 (hdr , rec , "SVLEN" , & val , 2 )); //Add svlen
774
+ if (rec -> rlen != 6 )
775
+ error ("Incorrect rlen set, expected 6 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
776
+ ++ id ; bcf_copy (rec2 , rec );
777
+ if (rec2 -> rlen != 6 )
778
+ error ("Incorrect rlen set, expected 6 got %" PRIhts_pos " @ %d\n" , rec -> rlen , id );
779
+
780
+ //needs update when header version change is handled
781
+ bcf_destroy1 (rec );
782
+ bcf_destroy1 (rec2 );
783
+ bcf_hdr_destroy (hdr );
784
+
785
+ hts_close (fp );
786
+
787
+ hts_set_log_level (logging );
788
+ }
789
+
661
790
int main (int argc , char * * argv )
662
791
{
663
792
char * fname = argc > 1 ? argv [1 ] : "rmme.bcf" ;
@@ -674,5 +803,6 @@ int main(int argc, char **argv)
674
803
test_get_info_values (fname );
675
804
test_invalid_end_tag ();
676
805
test_open_format ();
806
+ test_rlen_values ();
677
807
return 0 ;
678
808
}
0 commit comments