1
1
#!/usr/bin/env ruby
2
-
3
- require 'trollop'
4
2
require 'transrate'
5
- require 'csv'
6
- require 'bindeps'
7
- require 'colorize'
8
-
9
3
include Transrate
10
4
11
- # Show the help message if no arguments provided
12
- ARGV [ 0 ] = "--help" if ARGV . length ( ) == 0
13
-
14
5
# We want clean error messages through the logger, no ugly backtraces
15
6
# because the user doesn't care about them, unless they specifically ask for
16
7
# them with --loglevel debug
@@ -28,369 +19,5 @@ module Kernel
28
19
end
29
20
end
30
21
31
- txp = '░▓▓▓^▓▓▓░'
32
- toptxp = txp . green
33
- midtxp = txp . yellow
34
- bottxp = txp . red
35
-
36
- opts = Trollop ::options do
37
- version Transrate ::VERSION ::STRING . dup
38
- banner <<-EOS
39
- _ _
40
- | |_ _ __ __ _ _ __ ___ _ __ __ _ | |_ ___
41
- #{ toptxp } | __|| '__|/ _` || '_ \\ / __|| '__|/ _` || __|/ _ \\ #{ toptxp }
42
- #{ midtxp } | |_ | | | (_| || | | |\\ __ \\ | | | (_| || |_| __/ #{ midtxp }
43
- #{ bottxp } \\ __||_| \\ __,_||_| |_||___/|_| \\ __,_| \\ __|\\ ___| #{ bottxp }
44
-
45
- Transrate v#{ Transrate ::VERSION ::STRING . dup }
46
- by Richard Smith-Unna, Chris Boursnell, Rob Patro,
47
- Julian Hibberd, and Steve Kelly
48
-
49
- DESCRIPTION:
50
- Analyse a de-novo transcriptome assembly using three kinds of metrics:
51
-
52
- 1. sequence based (if --assembly is given)
53
- 2. read mapping based (if --left and --right are given)
54
- 3. reference based (if --reference is given)
55
-
56
- Documentation at http://hibberdlab.com/transrate
57
-
58
- USAGE:
59
- transrate <options>
60
-
61
- OPTIONS:
62
-
63
- EOS
64
- opt :assembly , "Assembly file(s) in FASTA format, comma-separated" ,
65
- :type => String
66
- opt :left , "Left reads file in FASTQ format" ,
67
- :type => String
68
- opt :right , "Right reads file in FASTQ format" ,
69
- :type => String
70
- opt :reference , "Reference proteome or transcriptome file in FASTA format" ,
71
- :type => String
72
- opt :threads , "Number of threads to use" ,
73
- :default => 8 ,
74
- :type => Integer
75
- opt :merge_assemblies , "Merge best contigs from multiple assemblies into file" ,
76
- :type => String
77
- opt :outfile , "Prefix filename to use for CSV output" ,
78
- :default => 'transrate'
79
- opt :loglevel , "Log level. " +
80
- "One of [error, info, warn, debug]" ,
81
- :default => 'info'
82
- opt :install_deps , "Install any missing dependencies. One of [all, read, ref]" ,
83
- :type => String , :default => nil
84
- opt :examples , "Show some example commands with explanations"
85
- end
86
-
87
- if opts . examples
88
- puts <<-EOS
89
-
90
- Transrate v#{ Transrate ::VERSION ::STRING . dup }
91
-
92
- EXAMPLE COMMANDS:
93
-
94
- # check dependencies and install any that are missing
95
- transrate --install-deps
96
-
97
- # get the transrate score for the assembly and each contig
98
- transrate --assembly contigs.fa --left left.fq --right right.fq
99
-
100
- # basic assembly metrics only
101
- transrate --assembly contigs.fa
102
-
103
- # basic and reference-based metrics with 8 threads
104
- transrate --assembly contigs.fa --reference ref.fa --threads 8
105
-
106
- # contig and read-based metrics for two assemblies with 32 threads
107
- transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
108
-
109
- EOS
110
- exit ( 0 )
111
- end
112
-
113
- # Check dependencies if they are relevant to the command issued,
114
- # and handle any commands to install missing ones
115
- gem_dir = Gem . loaded_specs [ 'transrate' ] . full_gem_path
116
- gem_deps = File . join ( gem_dir , 'deps' , 'deps.yaml' )
117
- blast_dep = File . join ( gem_dir , 'deps' , 'blast.yaml' )
118
-
119
- deps , read_deps , ref_deps = nil
120
- unless opts . install_deps . nil?
121
-
122
- unless %w[ all read ref ] . include? opts . install_deps
123
- raise TransrateError . new "install-deps #{ opts . install_deps } is not valid. " +
124
- "You must specify one of: all, read, ref."
125
- end
126
-
127
- deps = opts . install_deps == 'all'
128
- read_deps = opts . install_deps == 'read'
129
- ref_deps = opts . install_deps == 'ref'
130
- end
131
-
132
- if deps || read_deps || ref_deps
133
- # user has requested dependency installation
134
- puts "Checking dependencies"
135
-
136
- missing = [ ]
137
- if deps || read_deps
138
- Bindeps . require gem_deps
139
- missing += Bindeps . missing gem_deps
140
- end
141
-
142
- if deps || ref_deps
143
- Bindeps . require blast_dep
144
- missing += Bindeps . missing blast_dep
145
- end
146
-
147
- unless missing . empty?
148
- list = missing . collect { |i | "#{ i . name } :#{ i . version } " } . join ( "\n - " )
149
- msg = "Failed to install: \n - #{ list } "
150
- raise TransrateError . new msg
151
- end
152
-
153
- puts "All dependencies installed"
154
- exit
155
-
156
- else
157
- # no dependency installation requested, but check dependencies
158
- # for the commands provided are installed
159
- missing = [ ]
160
- missing = Bindeps . missing gem_deps if opts . left
161
- blast_missing = [ ]
162
- blast_missing = Bindeps . missing blast_dep if opts . reference
163
-
164
- if missing . length + blast_missing . length > 0
165
- puts "Dependencies are missing:"
166
-
167
- missing . each do |dep |
168
- puts " - #{ dep . name } (#{ dep . version } )"
169
- end
170
-
171
- blast_missing . each do |dep |
172
- puts " - #{ dep . name } (#{ dep . version } )"
173
- end
174
-
175
- puts "To install all missing dependencies, run:"
176
- puts " transrate --install-deps a;;"
177
- puts "If you only want the read-metrics dependencies:"
178
- puts " transrate --install-deps read"
179
- puts "Or if you only want the reference-metrics dependencies: "
180
- puts " transrate --install-deps ref"
181
-
182
- exit 1
183
- end
184
-
185
- end
186
-
187
- # Handle commands
188
- unless %w[ error info warn debug ] . include? opts . loglevel
189
- raise TransrateError . new "Loglevel #{ opts . loglevel } is not valid. " +
190
- "It must be one of: error, info, warn, debug."
191
- end
192
-
193
- logger . level = Yell ::Level . new opts . loglevel . to_sym
194
-
195
- if opts . assembly
196
- opts . assembly . split ( ',' ) . each do |assembly_file |
197
- unless File . exist? ( assembly_file )
198
- raise TransrateIOError . new "Assembly fasta file does not exist: " +
199
- " #{ assembly_file } "
200
- end
201
- end
202
- else
203
- raise TransrateArgError . new "Option --assembly must be specified. " +
204
- "Try --help for help."
205
- end
206
-
207
- if opts . reference && !File . exist? ( opts . reference )
208
- raise TransrateIOError . new "Reference fasta file does not exist: " +
209
- " #{ opts . reference } "
210
- end
211
-
212
- if opts . left and opts . right
213
- if opts . left . split ( "," ) . length != opts . right . split ( "," ) . length
214
- msg = "Please provide the same number of left reads as right reads"
215
- raise TransrateArgError . new msg
216
- end
217
- opts . left . split ( "," ) . zip ( opts . right . split ( "," ) ) . each do |left , right |
218
- if !File . exist? ( left )
219
- raise TransrateIOError . new "Left read fastq file does not exist: #{ left } "
220
- end
221
- if !File . exist? ( right )
222
- raise TransrateIOError . new "Right read fastq file does not exist: #{ right } "
223
- end
224
- end
225
- end
226
-
227
- def pretty_print_hash hash , width , round = 2
228
- hash . map do |k , v |
229
- # show as float if there are any decimal places
230
- if v . to_f . round ( round ) . to_s . split ( '.' ) . last . to_i > 0
231
- v = v . to_f . round ( round )
232
- end
233
- if v . is_a? Float
234
- v = v . round ( round )
235
- end
236
- pad = ( width - ( k . to_s . length + v . to_s . length ) )
237
- pad = [ pad , 0 ] . max
238
- logger . info "#{ k . to_s . split ( '_' ) . join ( ' ' ) } " +
239
- "#{ " " * pad } " +
240
- "#{ v } "
241
- end
242
- end
243
-
244
- r = opts . reference ? Assembly . new ( opts . reference ) : nil
245
- report_width = 35
246
-
247
- # loop through the assemblies, storing their outputs in an array of hashes
248
- all = [ ]
249
-
250
- assemblies = opts . assembly
251
- if opts . merge_assemblies
252
- merged_file = opts . merge_assemblies
253
- merged = { }
254
- assemblies . split ( "," ) . each do |file |
255
- Bio ::FastaFormat . open ( file ) . each do |entry |
256
- contig_name = "#{ File . basename ( file , File . extname ( file ) ) } :"
257
- contig_name << "#{ entry . entry_id } "
258
- merged [ contig_name ] = entry . seq
259
- end
260
- end
261
- logger . info "Merging assemblies into one file...'#{ merged_file } '"
262
- File . open ( merged_file , "wb" ) do |out |
263
- merged . each do |name , seq |
264
- out . write ">#{ name } \n "
265
- out . write "#{ seq } \n "
266
- end
267
- end
268
-
269
- assemblies = merged_file
270
- end
271
-
272
- assemblies . split ( ',' ) . each do |assembly |
273
-
274
- logger . info "Loading assembly: #{ assembly } "
275
-
276
- a = Assembly . new assembly
277
- transrater = Transrater . new ( a , r , threads : opts . threads )
278
-
279
- logger . info "Analysing assembly: #{ assembly } "
280
-
281
- contig_results = { }
282
-
283
- logger . info "Calculating contig metrics..."
284
- t0 = Time . now
285
- contig_results = transrater . assembly_metrics . basic_stats
286
- contig_results . merge! transrater . assembly . contig_metrics . results
287
- if contig_results
288
- logger . info "Contig metrics:"
289
- logger . info "-" * report_width
290
- pretty_print_hash ( contig_results , report_width )
291
- end
292
-
293
- logger . info "Contig metrics done in #{ ( Time . now - t0 ) . round } seconds"
294
-
295
- read_results = { }
296
-
297
- if ( opts . left && opts . right )
298
- logger . info "Calculating read diagnostics..."
299
- t0 = Time . now
300
- read_results = transrater . read_metrics ( opts . left , opts . right ) . read_stats
301
-
302
- if read_results
303
- logger . info "Read mapping metrics:"
304
- logger . info "-" * report_width
305
- pretty_print_hash ( read_results , report_width )
306
- end
307
-
308
- logger . info "Read metrics done in #{ ( Time . now - t0 ) . round } seconds"
309
- else
310
- logger . info "No reads provided, skipping read diagnostics"
311
- end
312
-
313
- comparative_results = { }
314
-
315
- if opts . reference
316
- logger . info "Calculating comparative metrics..."
317
- t0 = Time . now
318
- comparative_metrics = transrater . comparative_metrics
319
- comparative_results = comparative_metrics . comp_stats
320
-
321
- if comparative_results
322
- logger . info "Comparative metrics:"
323
- logger . info "-" * report_width
324
- pretty_print_hash ( comparative_results , report_width )
325
- end
326
-
327
- logger . info "Comparative metrics done in #{ ( Time . now - t0 ) . round } seconds"
328
-
329
- logger . info "-" * report_width
330
- else
331
- logger . info "No reference provided, skipping comparative diagnostics"
332
- end
333
-
334
- prefix = "#{ opts . outfile } _#{ File . basename ( assembly ) } "
335
-
336
- if ( opts . left && opts . right )
337
- score = transrater . assembly_score
338
-
339
- optimal , cutoff = transrater . assembly_optimal_score prefix
340
- unless score . nil?
341
- pretty_print_hash ( { :TRANSRATE_ASSEMBLY_SCORE => score } , report_width , 4 )
342
- logger . info "-" * report_width
343
- pretty_print_hash ( { :TRANSRATE_OPTIMAL_SCORE => optimal } , report_width , 4 )
344
- pretty_print_hash ( { :TRANSRATE_OPTIMAL_CUTOFF => cutoff } , report_width , 4 )
345
- pretty_print_hash ( transrater . good_contigs , report_width )
346
- end
347
- end
348
-
349
- # write contig metrics to file for each contig
350
- outfile = "#{ prefix } _contigs.csv"
351
- logger . info "Writing contig metrics for each contig to #{ outfile } "
352
- # have option to turn off, default on
353
- first = true
354
- CSV . open ( outfile , 'wb' ) do |csv |
355
- a . each do |name , contig |
356
- basic_metrics = { :contig_name => name } . merge ( contig . basic_metrics )
357
- if opts . reference
358
- comp_metrics = contig . comparative_metrics
359
- basic_metrics . merge! ( comp_metrics )
360
- end
361
- if opts . left and opts . right
362
- read_metrics = contig . read_metrics
363
- basic_metrics . merge! ( read_metrics )
364
- end
365
- if first
366
- csv << basic_metrics . keys
367
- first = false
368
- end
369
- csv << basic_metrics . values . map { |x | x . is_a? ( Float ) ? x . round ( 6 ) : x }
370
- end
371
- end
372
-
373
- all << contig_results . merge ( read_results )
374
- . merge ( comparative_results )
375
- . merge ( { :assembly => assembly } )
376
- . merge ( { :score => score } )
377
- . merge ( { :optimal_score => optimal } )
378
- . merge ( { :cutoff => cutoff } )
379
-
380
- end
381
-
382
- # write out all resuls to .csv
383
- outfile = "#{ opts . outfile } _assemblies.csv"
384
- logger . info "Writing analysis results to #{ outfile } "
385
- CSV . open ( outfile , 'wb' ) do |file |
386
- keys = all [ 0 ] . keys
387
- keys . delete ( :assembly )
388
- head = [ :assembly ] + keys
389
- file << head
390
- all . each do |row |
391
- file << head . map { |x |
392
- entry = row [ x ]
393
- entry . is_a? ( Float ) ? entry . round ( 5 ) : entry
394
- }
395
- end
396
- end
22
+ cmdline = Cmdline . new ARGV
23
+ cmdline . run
0 commit comments