Skip to content

Commit

Permalink
Add 6 CRAM compliance tests from htslib (#1185)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmthibault79 authored Oct 5, 2018
1 parent ff3db93 commit 23f3223
Show file tree
Hide file tree
Showing 23 changed files with 1,135 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/test/java/htsjdk/samtools/CRAMComplianceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,19 @@ public Object[][] getPartialVerificationData() {
{"c1#noseq"}, // unsigned attributes: https://github.com/samtools/htsjdk/issues/499
{"c1#unknown"}, // unsigned attributes: https://github.com/samtools/htsjdk/issues/499
{"ce#5b"}, // reads with no read bases: https://github.com/samtools/htsjdk/issues/509
{"ce#1000"}, // SAMRecord mismatch: https://github.com/samtools/htsjdk/issues/1189
{"ce#tag_depadded"},// reads with no read bases: https://github.com/samtools/htsjdk/issues/509
{"ce#tag_padded"}, // reads with no read bases: https://github.com/samtools/htsjdk/issues/509
{"ce#unmap"}, // unmapped reads with non-zero MAPQ value that is not restored
// https://github.com/samtools/htsjdk/issues/714
{"xx#triplet"}, // the version 2.1 variant of this file has a bad insertSize, which is
// probably residual detritus from https://github.com/samtools/htsjdk/issues/364
{"xx#minimal"}, // cigar string "5H0M5H" is restored as "10H"
// https://github.com/samtools/htsjdk/issues/713
{"xx#repeated"}, // SAMRecord mismatch: https://github.com/samtools/htsjdk/issues/1189
{"xx#tlen"}, // SAMRecord mismatch: https://github.com/samtools/htsjdk/issues/1189
{"xx#tlen2"}, // SAMRecord mismatch: https://github.com/samtools/htsjdk/issues/1189
{"xx#triplet"}, // the version 2.1 variant of this file has a bad insertSize, which is
// probably residual detritus from https://github.com/samtools/htsjdk/issues/364
//{"md#1"}, // fails with "offensive record" errors: https://github.com/samtools/htsjdk/issues/1187
};
}

Expand All @@ -67,6 +72,7 @@ public Object[][] getFullVerificationData() {
{"c1#pad1"},
{"c1#pad2"},
{"c1#pad3"},
{"c2#pad"},
{"ce#1"},
{"ce#2"},
{"ce#5"},
Expand Down
Binary file modified src/test/resources/htsjdk/samtools/cram/c2#pad.2.1.cram
Binary file not shown.
Binary file modified src/test/resources/htsjdk/samtools/cram/c2#pad.3.0.cram
Binary file not shown.
26 changes: 26 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/c2#pad.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
@SQ SN:c2 LN:9
@CO
@CO mpileup example from https://github.com/samtools/htslib/issues/59
@CO with additional Pad cigar operations
@CO
@CO c2 CC***AA**T**AA***CC
@CO
@CO +s1 CT***AA**T**AA***TC
@CO +s1b CT*******T*******TC
@CO +s2 CT*****G***G*****TC
@CO +s2p CT*****G***G*****TC
@CO +s3 CT*****GG*GG*****TC
@CO +s3b CT****CGGCGGC****TC
@CO +s4 CT***AAG***GAA***TC
@CO +s4p CT***AAG***GAA***TC
@CO +s5 CTGGG*********GGGTC
@CO
s1 0 c2 1 0 9M * 0 0 CTAATAATC XXXXXXXXX
s1b 0 c2 1 0 2M2D1M2D2M * 0 0 CTTTC *
s2 0 c2 1 0 2M2D1I1D1I2D2M * 0 0 CTGGTC *
s2p 0 c2 1 0 2M2D1I1P1D1P1I2D2M * 0 0 CTGGTC *
s3 0 c2 1 0 2M2D2I1D2I2D2M * 0 0 CTGGGGTC *
s3b 0 c2 1 0 2M1D1M2I1M2I1M1D2M * 0 0 CTCGGCGGCTC *
s4 0 c2 1 0 4M1I1D1I4M * 0 0 CTAAGGAATC *
s4p 0 c2 1 0 4M1I1P1D1P1I4M * 0 0 CTAAGGAATC *
s5 0 c2 1 0 2M3I5D3I2M * 0 0 CTGGGGGGTC *
2 changes: 2 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/c2.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>c2
CCAATAACC
1 change: 1 addition & 0 deletions src/test/resources/htsjdk/samtools/cram/c2.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
c2 9 4 9 10
Binary file not shown.
Binary file not shown.
1,005 changes: 1,005 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/ce#1000.sam

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
12 changes: 12 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/md#1.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@HD VN:1.0 SO:coordinate
@SQ SN:a LN:40
x 0 a 1 255 40M * 0 0 AAAAAAAAAACCCCCCCCYNNRGGGGGGGGTTTTTTTTTT * NM:i:2 MD:Z:19N0N19
x* 0 a 1 255 40M * 0 0 CAAAAAAAANNCCCCCCCYNNRGGGGGGGGTTTTTTTTTC * NM:i:6 MD:Z:0A8A0C8N0N18T0
xIP 0 a 1 255 1I10M1I10M2I10M2P3I2P10M1I * 0 0 NAAAAAAAAAAGCCCCCCCCCCAAGGGGGGGGGGCCCTTTTTTTTTTN * NM:i:12 MD:Z:18Y0N0N0R18
xIP* 0 a 1 255 1I10M1I10M2I10M2P3I2P10M1I * 0 0 NCAAAAAAAAAGCCCCCCCCCCAAGGGGGGGGGACCCATTTTTTTTCN * NM:i:16 MD:Z:0A17Y0N0N0R7G0T8T0
xD 0 a 1 255 9M1D8M2D7M3D6M * 0 0 AAAAAAAAACCCCCCYNNRGGGGGTTTTTT * NM:i:9 MD:Z:9^A6C0C0^YN0N6^GGG6
xD* 0 a 1 255 9M1D8M2D7M3D6M * 0 0 CAAAAAAANNCCCCCCTTGGGGGGTTTTTA * NM:i:13 MD:Z:0A7A0^A0C6C0^YN0N0R5^GGG5T0
xN 0 a 1 255 10M20N10M * 0 0 AAAAAAAAAATTTTTTTTTT * NM:i:0 MD:Z:20
xN* 0 a 1 255 10M20N10M * 0 0 CAAAAAAACCAATTTTTTTA * NM:i:6 MD:Z:0A7A0A0T0T7T0
xS 0 a 11 255 5H10S20M10S5H * 0 0 AAAAAAAAAACCCCCCCCYNNRGGGGGGGGTTTTTTTTTT * NM:i:2 MD:Z:9N0N9
xS* 0 a 11 255 5H10S20M10S5H * 0 0 CAAAATAAAACCCCCCCCYNNRGGGGGGGGTTTTATTTTC * NM:i:2 MD:Z:9N0N9
3 changes: 3 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/md.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
>a
AAAAAAAAAACCCCCCCCYNNRGGGGGGGGTTTTTTTTTT

1 change: 1 addition & 0 deletions src/test/resources/htsjdk/samtools/cram/md.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a 40 3 40 41
Binary file not shown.
Binary file not shown.
7 changes: 7 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/xx#repeated.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:xx LN:20
S 67 xx 1 1 10M = 11 20 AAAAAAAAAA **********
S 131 xx 11 1 10M = 1 -20 TTTTTTTTTT **********
S 67 xx 1 1 10M = 11 20 AAAAAAAAAA **********
S 131 xx 11 1 10M = 1 -20 TTTTTTTTTT **********
S 67 xx 1 1 10M = 11 20 AAAAAAAAAA **********
S 131 xx 11 1 10M = 1 -20 TTTTTTTTTT **********
Binary file modified src/test/resources/htsjdk/samtools/cram/xx#tlen.2.1.cram
Binary file not shown.
Binary file modified src/test/resources/htsjdk/samtools/cram/xx#tlen.3.0.cram
Binary file not shown.
34 changes: 34 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/xx#tlen.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
@CO xx has been encoded using the SAM spec; leftmost to rightmost
@CO yy has been encoded using bwa/picard methods; 5' to 3'
@CO
@CO 00000000011111111112
@CO 12345678901234567890
@CO AAAAAAAAAATTTTTTTTTT
@CO 1>>>> <<<<2 x1
@CO
@CO 1>>>> x2
@CO <<<<2
@CO
@CO 1>>>> x3 (7..15 vs 10..11)
@CO <<<<2
@CO
@CO <<<<2 1>>>> x4 (1..20 vs 16..5)
@CO
@SQ SN:xx LN:20
@SQ SN:yy LN:20
x1 99 xx 1 1 5M = 16 20 AAAAA *****
x1 147 xx 16 1 5M = 1 -20 TTTTT *****
x2 99 xx 7 1 5M = 10 8 AAAAT *****
x2 147 xx 10 1 5M = 7 -8 ATTTT *****
x3 147 xx 7 1 5M = 10 8 AAAAT *****
x3 99 xx 10 1 5M = 7 -8 ATTTT *****
x4 147 xx 1 1 5M = 16 20 AAAAA *****
x4 99 xx 16 1 5M = 1 -20 TTTTT *****
y1 99 yy 1 1 5M = 16 20 AAAAA *****
y1 147 yy 16 1 5M = 1 -20 TTTTT *****
y2 99 yy 7 1 5M = 10 8 AAAAT *****
y2 147 yy 10 1 5M = 7 -8 ATTTT *****
y3 147 yy 7 1 5M = 10 -2 AAAAT *****
y3 99 yy 10 1 5M = 7 2 ATTTT *****
y4 147 yy 1 1 5M = 16 10 AAAAA *****
y4 99 yy 16 1 5M = 1 -10 TTTTT *****
Binary file modified src/test/resources/htsjdk/samtools/cram/xx#tlen2.2.1.cram
Binary file not shown.
Binary file modified src/test/resources/htsjdk/samtools/cram/xx#tlen2.3.0.cram
Binary file not shown.
36 changes: 36 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/xx#tlen2.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
@CO As per xx#tlen.sam but every read is unsorted with partner
@CO
@CO xx has been encoded using the SAM spec; leftmost to rightmost
@CO yy has been encoded using bwa/picard methods; 5' to 3'
@CO
@CO 00000000011111111112
@CO 12345678901234567890
@CO AAAAAAAAAATTTTTTTTTT
@CO 1>>>> <<<<2 x1
@CO
@CO 1>>>> x2
@CO <<<<2
@CO
@CO 1>>>> x3 (7..15 vs 10..11)
@CO <<<<2
@CO
@CO <<<<2 1>>>> x4 (1..20 vs 16..5)
@CO
@SQ SN:xx LN:20
@SQ SN:yy LN:20
x1 147 xx 16 1 5M = 1 -20 TTTTT *****
x1 99 xx 1 1 5M = 16 20 AAAAA *****
x2 147 xx 10 1 5M = 7 -8 ATTTT *****
x2 99 xx 7 1 5M = 10 8 AAAAT *****
x3 99 xx 10 1 5M = 7 -8 ATTTT *****
x3 147 xx 7 1 5M = 10 8 AAAAT *****
x4 99 xx 16 1 5M = 1 -20 TTTTT *****
x4 147 xx 1 1 5M = 16 20 AAAAA *****
y1 147 yy 16 1 5M = 1 -20 TTTTT *****
y1 99 yy 1 1 5M = 16 20 AAAAA *****
y2 147 yy 10 1 5M = 7 -8 ATTTT *****
y2 99 yy 7 1 5M = 10 8 AAAAT *****
y3 99 yy 10 1 5M = 7 2 ATTTT *****
y3 147 yy 7 1 5M = 10 -2 AAAAT *****
y4 99 yy 16 1 5M = 1 -10 TTTTT *****
y4 147 yy 1 1 5M = 16 10 AAAAA *****

0 comments on commit 23f3223

Please sign in to comment.