Skip to content

Commit 37d1a13

Browse files
committed
test: update Ensembl file's test
1 parent 84ed497 commit 37d1a13

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
chr1 HAVANA transcript 910578 917473 . - . ID=ENST00000433179.4;Parent=ENSG00000187642.10;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_num_mappings=1;remap_status=full_contig;remap_target_status=overlap
2+
chr1 HAVANA exon 910578 911649 . - . ID=exon:ENST00000433179.4:4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=4;exon_id=ENSE00003963238.1_1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:975198-976269;remap_status=full_contig
3+
chr1 HAVANA three_prime_UTR 910578 911551 . - . ID=UTR3:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=3;exon_id=ENSE00003963238.1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:975198-976171;remap_status=full_contig
4+
chr1 HAVANA CDS 911552 911649 . - 2 ID=CDS:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=3;exon_id=ENSE00003963238.1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:976172-976269;remap_status=full_contig
5+
chr1 HAVANA stop_codon 911552 911554 . - 0 ID=stop_codon:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=3;exon_id=ENSE00003963238.1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:976172-976174;remap_status=full_contig
6+
chr1 HAVANA exon 911879 912004 . - . ID=exon:ENST00000433179.4:3;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=3;exon_id=ENSE00002263633.3_1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:976499-976624;remap_status=full_contig
7+
chr1 HAVANA CDS 911879 912004 . - 2 ID=CDS:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=2;exon_id=ENSE00002263633.3;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:976499-976624;remap_status=full_contig
8+
chr1 HAVANA exon 914261 916553 . - . ID=exon:ENST00000433179.4:2;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=2;exon_id=ENSE00001389758.2_1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:978881-981173;remap_status=full_contig
9+
chr1 HAVANA CDS 914261 916409 . - 0 ID=CDS:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=1;exon_id=ENSE00001389758.2;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:978881-981029;remap_status=full_contig
10+
chr1 HAVANA start_codon 916407 916409 . - 0 ID=start_codon:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=1;exon_id=ENSE00001389758.2;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:981027-981029;remap_status=full_contig
11+
chr1 HAVANA five_prime_UTR 916410 916553 . - . ID=UTR5:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=1;exon_id=ENSE00001389758.2;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:981030-981173;remap_status=full_contig
12+
chr1 HAVANA exon 917445 917473 . - . ID=exon:ENST00000433179.4:1;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=1;exon_id=ENSE00003728856.2_1;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:982065-982093;remap_status=full_contig
13+
chr1 HAVANA five_prime_UTR 917445 917473 . - . ID=UTR5:ENST00000433179.4;Parent=ENST00000433179.4;gene_id=ENSG00000187642.10_9;transcript_id=ENST00000433179.4_7;gene_type=protein_coding;gene_name=PERM1;transcript_type=protein_coding;transcript_name=PERM1-202;exon_number=0;exon_id=ENSE00003728856.2;level=2;protein_id=ENSP00000414022.3;transcript_support_level=5;hgnc_id=HGNC:28208;tag=RNA_Seq_supported_only,basic,Ensembl_canonical,appris_principal_2,CCDS;ccdsid=CCDS76083.1;havana_gene=OTTHUMG00000040757.3_9;remap_original_location=chr1:-:982065-982093;remap_status=full_contig

test/varity/ref_gene_test.clj

+13-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
test-load-refgene-file
1313
test-load-refseq-file
1414
test-gff3-file
15+
test-lift-gff3-file
1516
test-gtf-file]]))
1617

1718
(def parse-ref-gene-line #'varity.ref-gene/parse-ref-gene-line)
@@ -69,7 +70,7 @@
6970
(dissoc test-gtf-row :attribute)))))
7071

7172
(deftest load-ncbi-file-test
72-
(testing "refSeq.txt and ncbiRefGene.txt produces identical data instead of its accession number"
73+
(testing "refGene.txt and ncbiRefGene.txt produces identical data instead of its accession number"
7374
(is (apply = (map #(-> % first (dissoc :name))
7475
[(#'rg/load-ncbi-file test-load-refgene-file)
7576
(#'rg/load-ncbi-file test-load-refseq-file)])))))
@@ -78,7 +79,9 @@
7879

7980
(def parsed-gff3-region (first (rg/load-gff3 test-gff3-file)))
8081

81-
(deftest load-gencode
82+
(def parsed-lift-gff3-region (first (rg/load-gff3 test-lift-gff3-file)))
83+
84+
(deftest load-gencode-test
8285
(let [extract (fn [region] (select-keys region [:name2
8386
:exon-ranges
8487
:tx-start
@@ -88,12 +91,17 @@
8891
:exon-count
8992
:chr
9093
:cds-end]))]
91-
(testing "load-gff3"
94+
(testing "load-gff3 produces same data as load-ref-genes"
9295
(is (= (extract parsed-gff3-region)
9396
(extract test-ref-gene))))
94-
(testing "load-gtf"
97+
(testing "load-gtf produces same data as load-ref-genes"
9598
(is (= (extract parsed-gtf-region)
96-
(extract test-ref-gene))))))
99+
(extract test-ref-gene))))
100+
(testing "liftover file's trailing string is ommited"
101+
(is (= (select-keys (first (rg/load-gff3 test-lift-gff3-file))
102+
[:name :gene-id])
103+
{:name "ENST00000433179.4",
104+
:gene-id "ENSG00000187642.10"})))))
97105

98106
(defslowtest in-any-exon?-test
99107
(cavia-testing "in-any-exon? (slow)"

test/varity/t_common.clj

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666

6767
(def test-gff3-file "./test-resources/gff3_parse_test.gff3")
6868

69+
(def test-lift-gff3-file "./test-resources/gff3_lift_parse_test.gff3")
70+
6971
(def test-load-refgene-file "./test-resources/test-refgene.txt")
7072

7173
(def test-load-refseq-file "./test-resources/test-refseq.txt")

0 commit comments

Comments
 (0)