Skip to content

Commit 250fbb2

Browse files
committed
Refactor CLI
Move all binary functionality to a Cmdline class (closes #113) Tidy up output into a clear directory structure (closes #148) Add path expansion of all file paths and some more logging.
1 parent cbc79ff commit 250fbb2

File tree

3 files changed

+512
-375
lines changed

3 files changed

+512
-375
lines changed

bin/transrate

+2-375
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
11
#!/usr/bin/env ruby
2-
3-
require 'trollop'
42
require 'transrate'
5-
require 'csv'
6-
require 'bindeps'
7-
require 'colorize'
8-
93
include Transrate
104

11-
# Show the help message if no arguments provided
12-
ARGV[0] = "--help" if ARGV.length() == 0
13-
145
# We want clean error messages through the logger, no ugly backtraces
156
# because the user doesn't care about them, unless they specifically ask for
167
# them with --loglevel debug
@@ -28,369 +19,5 @@ module Kernel
2819
end
2920
end
3021

31-
txp = '░▓▓▓^▓▓▓░'
32-
toptxp = txp.green
33-
midtxp = txp.yellow
34-
bottxp = txp.red
35-
36-
opts = Trollop::options do
37-
version Transrate::VERSION::STRING.dup
38-
banner <<-EOS
39-
_ _
40-
| |_ _ __ __ _ _ __ ___ _ __ __ _ | |_ ___
41-
#{toptxp} | __|| '__|/ _` || '_ \\ / __|| '__|/ _` || __|/ _ \\ #{toptxp}
42-
#{midtxp} | |_ | | | (_| || | | |\\__ \\| | | (_| || |_| __/ #{midtxp}
43-
#{bottxp} \\__||_| \\__,_||_| |_||___/|_| \\__,_| \\__|\\___| #{bottxp}
44-
45-
Transrate v#{Transrate::VERSION::STRING.dup}
46-
by Richard Smith-Unna, Chris Boursnell, Rob Patro,
47-
Julian Hibberd, and Steve Kelly
48-
49-
DESCRIPTION:
50-
Analyse a de-novo transcriptome assembly using three kinds of metrics:
51-
52-
1. sequence based (if --assembly is given)
53-
2. read mapping based (if --left and --right are given)
54-
3. reference based (if --reference is given)
55-
56-
Documentation at http://hibberdlab.com/transrate
57-
58-
USAGE:
59-
transrate <options>
60-
61-
OPTIONS:
62-
63-
EOS
64-
opt :assembly, "Assembly file(s) in FASTA format, comma-separated",
65-
:type => String
66-
opt :left, "Left reads file in FASTQ format",
67-
:type => String
68-
opt :right, "Right reads file in FASTQ format",
69-
:type => String
70-
opt :reference, "Reference proteome or transcriptome file in FASTA format",
71-
:type => String
72-
opt :threads, "Number of threads to use",
73-
:default => 8,
74-
:type => Integer
75-
opt :merge_assemblies, "Merge best contigs from multiple assemblies into file",
76-
:type => String
77-
opt :outfile, "Prefix filename to use for CSV output",
78-
:default => 'transrate'
79-
opt :loglevel, "Log level. " +
80-
"One of [error, info, warn, debug]",
81-
:default => 'info'
82-
opt :install_deps, "Install any missing dependencies. One of [all, read, ref]",
83-
:type => String, :default => nil
84-
opt :examples, "Show some example commands with explanations"
85-
end
86-
87-
if opts.examples
88-
puts <<-EOS
89-
90-
Transrate v#{Transrate::VERSION::STRING.dup}
91-
92-
EXAMPLE COMMANDS:
93-
94-
# check dependencies and install any that are missing
95-
transrate --install-deps
96-
97-
# get the transrate score for the assembly and each contig
98-
transrate --assembly contigs.fa --left left.fq --right right.fq
99-
100-
# basic assembly metrics only
101-
transrate --assembly contigs.fa
102-
103-
# basic and reference-based metrics with 8 threads
104-
transrate --assembly contigs.fa --reference ref.fa --threads 8
105-
106-
# contig and read-based metrics for two assemblies with 32 threads
107-
transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
108-
109-
EOS
110-
exit(0)
111-
end
112-
113-
# Check dependencies if they are relevant to the command issued,
114-
# and handle any commands to install missing ones
115-
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
116-
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
117-
blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
118-
119-
deps, read_deps, ref_deps = nil
120-
unless opts.install_deps.nil?
121-
122-
unless %w[all read ref].include? opts.install_deps
123-
raise TransrateError.new "install-deps #{opts.install_deps} is not valid. " +
124-
"You must specify one of: all, read, ref."
125-
end
126-
127-
deps = opts.install_deps == 'all'
128-
read_deps = opts.install_deps == 'read'
129-
ref_deps = opts.install_deps == 'ref'
130-
end
131-
132-
if deps || read_deps || ref_deps
133-
# user has requested dependency installation
134-
puts "Checking dependencies"
135-
136-
missing = []
137-
if deps || read_deps
138-
Bindeps.require gem_deps
139-
missing += Bindeps.missing gem_deps
140-
end
141-
142-
if deps || ref_deps
143-
Bindeps.require blast_dep
144-
missing += Bindeps.missing blast_dep
145-
end
146-
147-
unless missing.empty?
148-
list = missing.collect {|i| "#{i.name}:#{i.version}"}.join("\n - ")
149-
msg = "Failed to install: \n - #{list}"
150-
raise TransrateError.new msg
151-
end
152-
153-
puts "All dependencies installed"
154-
exit
155-
156-
else
157-
# no dependency installation requested, but check dependencies
158-
# for the commands provided are installed
159-
missing = []
160-
missing = Bindeps.missing gem_deps if opts.left
161-
blast_missing = []
162-
blast_missing = Bindeps.missing blast_dep if opts.reference
163-
164-
if missing.length + blast_missing.length > 0
165-
puts "Dependencies are missing:"
166-
167-
missing.each do |dep|
168-
puts " - #{dep.name} (#{dep.version})"
169-
end
170-
171-
blast_missing.each do |dep|
172-
puts " - #{dep.name} (#{dep.version})"
173-
end
174-
175-
puts "To install all missing dependencies, run:"
176-
puts " transrate --install-deps a;;"
177-
puts "If you only want the read-metrics dependencies:"
178-
puts " transrate --install-deps read"
179-
puts "Or if you only want the reference-metrics dependencies: "
180-
puts " transrate --install-deps ref"
181-
182-
exit 1
183-
end
184-
185-
end
186-
187-
# Handle commands
188-
unless %w[error info warn debug].include? opts.loglevel
189-
raise TransrateError.new "Loglevel #{opts.loglevel} is not valid. " +
190-
"It must be one of: error, info, warn, debug."
191-
end
192-
193-
logger.level = Yell::Level.new opts.loglevel.to_sym
194-
195-
if opts.assembly
196-
opts.assembly.split(',').each do |assembly_file|
197-
unless File.exist?(assembly_file)
198-
raise TransrateIOError.new "Assembly fasta file does not exist: " +
199-
" #{assembly_file}"
200-
end
201-
end
202-
else
203-
raise TransrateArgError.new "Option --assembly must be specified. " +
204-
"Try --help for help."
205-
end
206-
207-
if opts.reference && !File.exist?(opts.reference)
208-
raise TransrateIOError.new "Reference fasta file does not exist: " +
209-
" #{opts.reference}"
210-
end
211-
212-
if opts.left and opts.right
213-
if opts.left.split(",").length != opts.right.split(",").length
214-
msg = "Please provide the same number of left reads as right reads"
215-
raise TransrateArgError.new msg
216-
end
217-
opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
218-
if !File.exist?(left)
219-
raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
220-
end
221-
if !File.exist?(right)
222-
raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
223-
end
224-
end
225-
end
226-
227-
def pretty_print_hash hash, width, round=2
228-
hash.map do |k, v|
229-
# show as float if there are any decimal places
230-
if v.to_f.round(round).to_s.split('.').last.to_i > 0
231-
v = v.to_f.round(round)
232-
end
233-
if v.is_a? Float
234-
v = v.round(round)
235-
end
236-
pad = (width - (k.to_s.length + v.to_s.length))
237-
pad = [pad, 0].max
238-
logger.info "#{k.to_s.split('_').join(' ')}" +
239-
"#{" " * pad}" +
240-
"#{v}"
241-
end
242-
end
243-
244-
r = opts.reference ? Assembly.new(opts.reference) : nil
245-
report_width = 35
246-
247-
# loop through the assemblies, storing their outputs in an array of hashes
248-
all = []
249-
250-
assemblies=opts.assembly
251-
if opts.merge_assemblies
252-
merged_file = opts.merge_assemblies
253-
merged = {}
254-
assemblies.split(",").each do |file|
255-
Bio::FastaFormat.open(file).each do |entry|
256-
contig_name = "#{File.basename(file,File.extname(file))}:"
257-
contig_name << "#{entry.entry_id}"
258-
merged[contig_name] = entry.seq
259-
end
260-
end
261-
logger.info "Merging assemblies into one file...'#{merged_file}'"
262-
File.open(merged_file, "wb") do |out|
263-
merged.each do |name, seq|
264-
out.write ">#{name}\n"
265-
out.write "#{seq}\n"
266-
end
267-
end
268-
269-
assemblies = merged_file
270-
end
271-
272-
assemblies.split(',').each do |assembly|
273-
274-
logger.info "Loading assembly: #{assembly}"
275-
276-
a = Assembly.new assembly
277-
transrater = Transrater.new(a, r, threads: opts.threads)
278-
279-
logger.info "Analysing assembly: #{assembly}"
280-
281-
contig_results = {}
282-
283-
logger.info "Calculating contig metrics..."
284-
t0 = Time.now
285-
contig_results = transrater.assembly_metrics.basic_stats
286-
contig_results.merge! transrater.assembly.contig_metrics.results
287-
if contig_results
288-
logger.info "Contig metrics:"
289-
logger.info "-" * report_width
290-
pretty_print_hash(contig_results, report_width)
291-
end
292-
293-
logger.info "Contig metrics done in #{(Time.now - t0).round} seconds"
294-
295-
read_results = {}
296-
297-
if (opts.left && opts.right)
298-
logger.info "Calculating read diagnostics..."
299-
t0 = Time.now
300-
read_results = transrater.read_metrics(opts.left, opts.right).read_stats
301-
302-
if read_results
303-
logger.info "Read mapping metrics:"
304-
logger.info "-" * report_width
305-
pretty_print_hash(read_results, report_width)
306-
end
307-
308-
logger.info "Read metrics done in #{(Time.now - t0).round} seconds"
309-
else
310-
logger.info "No reads provided, skipping read diagnostics"
311-
end
312-
313-
comparative_results={}
314-
315-
if opts.reference
316-
logger.info "Calculating comparative metrics..."
317-
t0 = Time.now
318-
comparative_metrics = transrater.comparative_metrics
319-
comparative_results = comparative_metrics.comp_stats
320-
321-
if comparative_results
322-
logger.info "Comparative metrics:"
323-
logger.info "-" * report_width
324-
pretty_print_hash(comparative_results, report_width)
325-
end
326-
327-
logger.info "Comparative metrics done in #{(Time.now - t0).round} seconds"
328-
329-
logger.info "-" * report_width
330-
else
331-
logger.info "No reference provided, skipping comparative diagnostics"
332-
end
333-
334-
prefix = "#{opts.outfile}_#{File.basename(assembly)}"
335-
336-
if (opts.left && opts.right)
337-
score = transrater.assembly_score
338-
339-
optimal, cutoff = transrater.assembly_optimal_score prefix
340-
unless score.nil?
341-
pretty_print_hash({:TRANSRATE_ASSEMBLY_SCORE => score}, report_width, 4)
342-
logger.info "-" * report_width
343-
pretty_print_hash({:TRANSRATE_OPTIMAL_SCORE => optimal}, report_width, 4)
344-
pretty_print_hash({:TRANSRATE_OPTIMAL_CUTOFF => cutoff}, report_width, 4)
345-
pretty_print_hash(transrater.good_contigs, report_width)
346-
end
347-
end
348-
349-
# write contig metrics to file for each contig
350-
outfile = "#{prefix}_contigs.csv"
351-
logger.info "Writing contig metrics for each contig to #{outfile}"
352-
# have option to turn off, default on
353-
first=true
354-
CSV.open(outfile, 'wb') do |csv|
355-
a.each do |name, contig|
356-
basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
357-
if opts.reference
358-
comp_metrics = contig.comparative_metrics
359-
basic_metrics.merge!(comp_metrics)
360-
end
361-
if opts.left and opts.right
362-
read_metrics = contig.read_metrics
363-
basic_metrics.merge!(read_metrics)
364-
end
365-
if first
366-
csv << basic_metrics.keys
367-
first = false
368-
end
369-
csv << basic_metrics.values.map{ |x| x.is_a?(Float) ? x.round(6) : x }
370-
end
371-
end
372-
373-
all << contig_results.merge(read_results)
374-
.merge(comparative_results)
375-
.merge({ :assembly => assembly })
376-
.merge({ :score => score })
377-
.merge({ :optimal_score => optimal })
378-
.merge({ :cutoff => cutoff })
379-
380-
end
381-
382-
# write out all resuls to .csv
383-
outfile = "#{opts.outfile}_assemblies.csv"
384-
logger.info "Writing analysis results to #{outfile}"
385-
CSV.open(outfile, 'wb') do |file|
386-
keys = all[0].keys
387-
keys.delete(:assembly)
388-
head = [:assembly] + keys
389-
file << head
390-
all.each do |row|
391-
file << head.map { |x|
392-
entry = row[x]
393-
entry.is_a?(Float) ? entry.round(5) : entry
394-
}
395-
end
396-
end
22+
cmdline = Cmdline.new ARGV
23+
cmdline.run

lib/transrate.rb

+1
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,4 @@ class TransrateArgError < TransrateError; end
3838
require 'transrate/comparative_metrics'
3939
require 'transrate/contig_metrics'
4040
require 'transrate/cmd'
41+
require 'transrate/cmdline'

0 commit comments

Comments
 (0)