-
Notifications
You must be signed in to change notification settings - Fork 253
/
Copy pathmod.rs
210 lines (186 loc) · 6.67 KB
/
mod.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use failure::Error;
use log::warn;
use regex::Regex;
use serde_derive::Deserialize;
#[derive(Deserialize, Debug, Default, Clone)]
pub struct CompileCmd {
/// The working directory of the compilation. All paths specified in the command
/// or file fields must be either absolute or relative to this directory.
directory: PathBuf,
/// The main translation unit source processed by this compilation step. This is
/// used by tools as the key into the compilation database. There can be multiple
/// command objects for the same file, for example if the same source file is compiled
/// with different configurations.
pub file: PathBuf,
/// The compile command executed. After JSON unescaping, this must be a valid command
/// to rerun the exact compilation step for the translation unit in the environment
/// the build system uses. Parameters use shell quoting and shell escaping of quotes,
/// with ‘"’ and ‘\’ being the only special characters. Shell expansion is not supported.
#[serde(skip_deserializing)]
_command: Option<String>,
/// The compile command executed as list of strings. Either arguments or command is required.
#[serde(default, skip_deserializing)]
_arguments: Vec<String>,
/// The name of the output created by this compilation step. This field is optional. It can
/// be used to distinguish different processing modes of the same input file.
output: Option<String>,
}
impl CompileCmd {
pub fn abs_file(&self) -> PathBuf {
match self.file.is_absolute() {
true => self.file.clone(),
false => {
let path = self.directory.join(&self.file);
let e = format!("could not canonicalize {}", path.display());
path.canonicalize().expect(&e)
}
}
}
}
#[derive(Deserialize, Debug, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum LinkType {
Exe,
Shared,
Static,
}
impl LinkType {
pub fn is_library(&self) -> bool {
match self {
LinkType::Exe => false,
LinkType::Shared => true,
LinkType::Static => true,
}
}
pub fn as_cargo_types(&self) -> &str {
match self {
LinkType::Exe => "\"rlib\"",
LinkType::Shared => "\"cdylib\"",
LinkType::Static => "\"staticlib\", \"rlib\"",
}
}
}
#[derive(Deserialize, Debug)]
pub struct LinkCmd {
/// All input files going into this link
pub inputs: Vec<String>,
/// The output file; this is taken from the `CompileCmd`
#[serde(default)]
pub output: Option<String>,
/// List of libraries to link in (without `-l` prefix)
pub libs: Vec<String>,
/// List of library directories
pub lib_dirs: Vec<PathBuf>,
/// What type of binary we're building
pub r#type: LinkType,
/// Input files in `CompileCmd` form
#[serde(default)]
pub cmd_inputs: Vec<Rc<CompileCmd>>,
#[serde(default)]
pub top_level: bool,
}
/// Convert a linear vector of `CompileCmd`s into a DAG of `LinkCmd`s and `CompileCmd`s
fn build_link_commands(mut v: Vec<Rc<CompileCmd>>) -> Result<Vec<LinkCmd>, Error> {
let mut output_map = HashMap::new();
for (idx, ccmd) in v.iter().enumerate() {
if let Some(ref output) = ccmd.output {
output_map.insert(output, idx);
}
}
let mut seen_ccmds = HashSet::new();
let mut res = vec![];
for (idx, ccmd) in v.iter().enumerate() {
let lcmd = match ccmd.file.strip_prefix("/c2rust/link/") {
Ok(lcmd) => lcmd.to_str().unwrap(),
Err(_) => continue,
};
let mut lcmd: LinkCmd = serde_bencode::from_str(lcmd)?;
lcmd.output = ccmd.output.clone();
for inp in &lcmd.inputs {
if let Some(ccmd_idx) = output_map.get(&inp) {
let inp_ccmd = Rc::clone(&v[*ccmd_idx]);
lcmd.cmd_inputs.push(inp_ccmd);
seen_ccmds.insert(*ccmd_idx);
}
}
res.push(lcmd);
seen_ccmds.insert(idx);
}
// TODO: add binaries
// Check if we have left-over compile commands; if we do,
// bind them to the crate itself (which becomes a `staticlib` or `rlib`)
let mut idx = 0;
v.retain(|_| {
idx += 1;
!seen_ccmds.contains(&(idx - 1))
});
if !v.is_empty() {
let lcmd = LinkCmd {
// FIXME: this doesn't catch all of them; do we need to???
inputs: v.iter().filter_map(|ccmd| ccmd.output.clone()).collect(),
output: None,
libs: vec![],
lib_dirs: vec![],
r#type: LinkType::Static,
cmd_inputs: v,
top_level: true,
};
res.push(lcmd);
}
Ok(res)
}
/// some build scripts repeatedly compile the same input file with different
/// command line flags thus creating multiple outputs. We remove any duplicates
/// in the order we see them and warn the user.
fn filter_duplicate_cmds(v: Vec<Rc<CompileCmd>>) -> Vec<Rc<CompileCmd>> {
let mut seen = HashSet::new();
let mut cmds = vec![];
for cmd in v {
let absf = cmd.abs_file();
if seen.contains(&absf) {
warn!("Skipping duplicate compilation cmd for {}", absf.display());
continue;
}
seen.insert(absf);
cmds.push(cmd)
}
cmds
}
/// Read `compile_commands` file, optionally ignore any entries not matching
/// `filter`, and filter out any .S files since they're likely assembly files.
pub fn get_compile_commands(
compile_commands: &Path,
filter: &Option<Regex>,
) -> Result<Vec<LinkCmd>, Error> {
let f = std::io::BufReader::new(File::open(compile_commands)?); // open read-only
// Read the JSON contents of the file as an instance of `Value`
let v: Vec<Rc<CompileCmd>> = serde_json::from_reader(f)?;
// apply the filter argument, if any
let v = if let &Some(ref re) = filter {
v.into_iter()
.filter(|c| re.is_match(c.file.to_str().unwrap()))
.collect::<Vec<Rc<CompileCmd>>>()
} else {
v
};
// Filter out any assembly files
let v = v
.into_iter()
.filter(|c| {
let file = c.file.to_str().unwrap();
let likely_asm = file.ends_with(".S") || file.ends_with(".s");
!likely_asm
})
.collect::<Vec<Rc<CompileCmd>>>();
let mut lcmds = build_link_commands(v)?;
for lcmd in &mut lcmds {
let inputs = std::mem::take(&mut lcmd.cmd_inputs);
let inputs = filter_duplicate_cmds(inputs);
lcmd.cmd_inputs = inputs;
}
Ok(lcmds)
}