-
Notifications
You must be signed in to change notification settings - Fork 3
/
Rakefile
99 lines (84 loc) · 2.9 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
require 'csv'
require 'delegate'
require 'securerandom'
require 'active_support/core_ext/hash/except'
require 'hashdiff'
require 'kramdown'
require 'nokogiri'
require 'regexp-examples'
require_relative 'lib/build_node'
require_relative 'lib/table_builder'
require_relative 'lib/xml_base'
require_relative 'lib/xml_builder'
ROMAN_NUMERALS = {
'1' => 'I',
'2' => 'II',
'3' => 'III',
'4' => 'IV',
'5' => 'V',
'6' => 'VI',
'7' => 'VII',
}
# Modify RegexpExamples to exclude control characters.
# https://github.com/tom-lord/regexp-examples/blob/master/lib/regexp-examples/char_sets.rb
module RegexpExamples
module CharSets
def self.redefine(const, value)
self.send(:remove_const, const)
self.const_set(const, value)
end
redefine(:Any, Any - Control) # libxml2 errors on control characters
redefine(:AnyNoNewLine, AnyNoNewLine - Control) # libxml2 errors on control characters
redefine(:Whitespace, Whitespace - ["\f", "\v", "\r", "\n"]) # libxml2 errors on \f and \v, some types restrict \r and \n
redefine(:BackslashCharMap, BackslashCharMap.merge('s' => Whitespace))
end
end
def files(glob)
if ENV['FILES']
replacement = "{#{ENV['FILES']}}"
else
replacement = '*'
end
glob.sub!('{}', replacement)
matches = Dir[glob]
raise "expected files to be found (#{glob})" if matches.empty?
matches.sort
end
def pdftotext(path)
text_path = path.sub(/\.pdf/, '.txt')
if File.exist?(text_path)
lines = File.readlines(text_path, chomp: true)
else
lines = `pdftotext -layout #{path} -`.split("\n")
end
# Remove endnotes.
lines = lines[0...lines.index{ |line| line['<<HD_reminder>>'] } || -1] + lines[lines.index{ |line| line[/<<annex_d\d>>/] } || -1...-1]
# Remove footers.
lines = lines.reject{ |line| line[/\A<<HD_ln>> <<standardform>> \d+ – <<\S+>> +\d+\z/] }
lines.join("\n")
end
def label_keys(text)
text.scan(/<<([^>]+)>>/).flatten
end
def indices(text)
text.scan(/\bsection_(\d)\b/).flatten.map{ |number| ROMAN_NUMERALS.fetch(number) } + text.scan(/\b[IV]+(?:\.\d+)*/).flatten
end
def help_text?(key, number: nil, override: [])
key[/\AHD?_/] && key != 'H_agree_to_publish' || %w(allocation_rest excl_vat icar_H_provide_numbers notice_design_cont request_qualification social_transfer_staff).include?(key) || override.include?(key)
end
# The same XSD is used for both T01 and T02, but each form uses different parts.
def select_move_rows(data, number)
case number
when 'T01'
label_key = 'envisaged_start'
reject_pattern = 'AWARD_CONTRACT|LEFTI|OBJECT_CONTRACT/OBJECT_DESCR/ESSENTIAL_ASSETS'
when 'T02'
label_key = 'start_date_duration'
reject_pattern = 'PROCEDURE'
else
raise "unexpected form: #{number}"
end
data.find{ |row| row['xpath'] == '/OBJECT_CONTRACT/OBJECT_DESCR/DURATION' }['label-key'] = label_key
data.reject{ |row| row['xpath'][%r{\A/(?:#{reject_pattern})}] }
end
Dir['tasks/*.rake'].each { |r| import r }