forked from inukshuk/anystyle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile
166 lines (148 loc) · 3.94 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
require 'bundler'
begin
Bundler.setup
rescue Bundler::BundlerError => e
$stderr.puts e.message
$stderr.puts "Run `bundle install` to install missing gems"
exit e.status_code
end
require 'rake'
require 'rake/clean'
$:.unshift(File.join(File.dirname(__FILE__), './lib'))
require 'anystyle/version'
task :default
task :build => [:clean] do
system 'gem build anystyle-parser.gemspec'
end
task :release => [:build] do
system "git tag #{AnyStyle::VERSION}"
system "gem push anystyle-parser-#{AnyStyle::VERSION}.gem"
end
task :check_warnings do
$VERBOSE = true
require 'anystyle/parser'
puts AnyStyle::VERSION
end
require 'rspec/core'
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec) do |spec|
spec.pattern = FileList['spec/**/*_spec.rb']
end
begin
require 'coveralls/rake/task'
Coveralls::RakeTask.new
task :test_with_coveralls => [:spec, 'coveralls:push']
rescue LoadError
# ignore
end if ENV['CI']
task :default => :spec
begin
require 'yard'
YARD::Rake::YardocTask.new
rescue LoadError
# ignore
end
desc 'Run an IRB session with AnyStyle loaded'
task :console, [:script] do |t, args|
ARGV.clear
require 'irb'
require 'anystyle'
IRB.conf[:SCRIPT] = args.script
IRB.start
end
desc 'Update model using latest source and training data'
task :train, :model, :threads do |t, args|
model = args[:model] || 'parser'
threads = args[:threads] || 4
require 'anystyle'
Wapiti.debug!
case model
when 'finder'
AnyStyle::Finder.defaults[:threads] = threads
AnyStyle.finder.train
AnyStyle.finder.model.save
else
AnyStyle::Parser.defaults[:threads] = threads
AnyStyle.parser.train
AnyStyle.parser.model.save
end
end
desc 'Check all tagged datasets'
task :check, :model do |t, args|
model = args[:model] || 'parser'
require 'anystyle'
case model
when 'finder'
Dir['./res/finder/*.ttx'].sort.each do |ttx|
print 'Checking %.25s' % "#{File.basename(ttx)}....................."
start = Time.now
stats = AnyStyle.finder.check ttx.untaint
report stats, Time.now - start
end
else
Dir['./res/parser/*.xml'].sort.each do |xml|
print 'Checking %.25s' % "#{File.basename(xml)}....................."
start = Time.now
stats = AnyStyle.parser.check xml.untaint
report stats, Time.now - start
end
end
end
desc "Save delta of a tagged dataset with itself"
task :delta, :input do |t, args|
require 'anystyle'
input = args[:input].untaint
if File.directory?(input)
files = Dir.entries(input)
.reject { |f| f.start_with?('.') }
.map { |f| File.join(input, f).untaint }
else
files = [input]
end
files.each do |file|
extn = File.extname(file)
print 'Checking %.25s' % "#{File.basename(file)}....................."
case extn
when '.ttx'
input = Wapiti::Dataset.new([AnyStyle::Document.open(file)])
output = AnyStyle.finder.label input
format = 'txt'
else
input = Wapiti::Dataset.open(file)
output = AnyStyle.parser.label input
format = 'xml'
end
delta = output - input
if delta.length == 0
puts ' ✓'
else
name = File.basename(file, extn)
delta.save "delta_#{name}#{extn}", indent: 2, tagged: true, format: format
puts "delta saved to delta_#{name}#{extn} (#{delta.length})"
end
end
end
desc "Find references in document"
task :find, :input do |t, args|
require 'anystyle'
file = args[:input].untaint
refs = AnyStyle.finder.find(file, format: :references)[0]
break unless refs.length > 0
output = AnyStyle.parser.label refs.join("\n")
puts output.to_xml(indent: 2)
end
def report(stats, time)
if stats[:token][:errors] == 0
puts ' ✓ %2ds' % time
else
puts '%4d seq %6.2f%% %6d tok %5.2f%% %2ds' % [
stats[:sequence][:errors],
stats[:sequence][:rate],
stats[:token][:errors],
stats[:token][:rate],
time
]
end
end
CLEAN.include('*.gem')
CLEAN.include('*.rbc')