-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_requirements.nf
155 lines (118 loc) · 3 KB
/
prepare_requirements.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
esm_script_path = "esm/scripts/extract.py"
go_basic_url = "https://purl.obolibrary.org/obo/go/go-basic.obo"
esm_git_url = "https://github.com/facebookresearch/esm.git"
taxallnomy_git_url = "https://github.com/tetsufmbio/taxallnomy.git"
gocheck_url = "https://current.geneontology.org/ontology/subsets/gocheck_do_not_annotate.json"
goa_all_url = 'https://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gaf.gz'
uniprot_url = "https://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/complete/uniprot_sprot.fasta.gz"
//ProtT5 swiss_prot
prot_t5_embs_url = "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/embeddings/uniprot_sprot/per-protein.h5"
process download_go {
publishDir "databases", mode: 'copy'
input:
val url
output:
path "go-basic.obo", emit: go_basic
script:
"""
wget $url
"""
}
process download_gocheck_do_not_annotate {
publishDir "databases", mode: 'copy'
input:
val url
output:
path "gocheck_do_not_annotate.json", emit: gocheck_do_not_annotate
script:
"""
wget $url
"""
}
process download_esm{
publishDir "libs/", mode: 'copy'
input:
val esm_git
output:
path "esm", emit: esm_dir
script:
"""
git clone $esm_git
"""
}
process download_uniprot{
publishDir "databases", mode: 'copy'
input:
val url
output:
path "uniprot_sprot.fasta.gz", emit: uniprot_fasta
script:
"""
wget $url
"""
}
process download_prot5{
publishDir "databases", mode: 'copy'
input:
val url
output:
path "per-protein.h5", emit: prot5_embs_h5
script:
"""
wget $url
"""
}
process download_goa{
publishDir "databases", mode: 'copy'
input:
val url
output:
path "goa_uniprot_all.gaf.gz", emit: go_annotation_raw
script:
"""
wget $url
"""
}
process clone_taxallnomy{
//publishDir "libs/", mode: 'copy'
input:
val taxallnomy_git
output:
path "taxallnomy", emit: taxallnomy_dir
script:
"""
git clone $taxallnomy_git
"""
}
process run_taxallnomy{
input:
val taxallnomy_dir
output:
path "taxallnomy_data/taxallnomy_lin.tab", emit: taxallnomy_lin
script:
"""
perl $taxallnomy_dir/generate_taxallnomy.pl
"""
}
process compress_and_save_taxallnomy{
publishDir "databases", mode: 'copy'
input:
val taxallnomy_lin_path
output:
path "taxallnomy.tsv.gz", emit: taxallnomy_tsv_path
script:
"""
gzip -c $taxallnomy_lin_path > taxallnomy.tsv.gz
"""
}
workflow {
clone_taxallnomy(taxallnomy_git_url)
run_taxallnomy(clone_taxallnomy.out.taxallnomy_dir)
compress_and_save_taxallnomy(run_taxallnomy.out.taxallnomy_lin)
download_uniprot(uniprot_url)
download_goa(goa_all_url)
download_go(go_basic_url)
download_esm(esm_git_url)
download_gocheck_do_not_annotate(gocheck_url)
download_prot5(prot_t5_embs_url)
}