forked from septract/starling-tool
-
Notifications
You must be signed in to change notification settings - Fork 1
/
bench.py
executable file
·345 lines (250 loc) · 10.7 KB
/
bench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#!/usr/bin/env python3
import argparse
import collections
import os
import pathlib
import statistics
import sys
import platform
import timeit
from dataclasses import dataclass
from typing import Any, List, Optional, Mapping, DefaultDict, Union
from starling_common import *
# This file should contain records of the form
# descr:bucket:name:(grasshopper module path)
SPEC_FILE: str = 'bench.in'
# Number of times to run Z3 time benchmarks.
Z3_TIMES: int = 3
# Number of times to run GH time benchmarks.
GH_TIMES: int = 3
ARGS: Any = None
def example(value: Any) -> None:
"""Prints out an example description.
Args:
value: The value of the description.
"""
print("#", value)
def fact(key: str, value: Any) -> None:
"""Prints out a benchmark fact.
Args:
key: The name of the fact to print.
value: The value to print out.
"""
print(" - ", key, ": ", value, sep="")
def loc(path: os.PathLike) -> int:
"""Gets the line count of the file at 'path'.
Args:
path: The path to the file to measure.
Returns:
The number of lines in the file at 'path'.
"""
return sum(1 for _ in open(path))
def count_terms(starling: Starling, path: os.PathLike) -> Tuple[int, int]:
"""Runs Starling in Z3 mode on a file, and counts the outputted terms.
Args:
starling: The Starling object to use.
path: The path to the Starling file to run.
Returns:
A pair of the total number of terms, and the number of
those that generated an 'success' result from SMT
elimination.
"""
num_total: int = 0
num_success: int = 0
output: Output = starling.run_z3(path, extra_args=["-Btry-approx"])
for line in output.stdout:
_, sep, result = line.partition(':')
if sep == '':
continue
num_total = num_total + 1
is_success = result.strip() == 'success'
num_success = num_success + 1 if is_success else num_success
return num_total, num_success
@dataclass
class Z3TimeAnalysis:
working_set: int
total_time: float
tool_time: float
z3_time: float
def scrape_z3_times(lines: List[str]) -> Mapping[str, Mapping[str, List[float]]]:
"""Reads raw Z3 time analysis from a list of output lines.
Args:
lines: The list of lines from which we're scraping.
Returns:
The raw mapping of phases to metrics to time/memory observations.
"""
# Accumulate metrics with a map from phase to metric to value,
# auto-generating the map structure where it's missing.
phases: Mapping[str, DefaultDict[str, List[float]]] = collections.defaultdict(
lambda: collections.defaultdict(list)
)
for _ in range(Z3_TIMES):
for line in lines:
phase, sep, rest = line.partition(';')
if sep == '':
continue
metric, _, value_raw = rest.partition(':')
# This is always 'Phase XYZ': we want just 'XYZ'.
phase = phase[6:]
# The value sometimes comes with 'ms' on the end of it.
# We don't want, or need, that.
value_str : str = value_raw[:-2] if value_raw[-2:] == 'ms' else value_raw
value: float = float(value_str)
phases[phase.strip()][metric.strip()].append(value)
return phases
def z3_times(starling: Starling, starling_path: os.PathLike) -> Z3TimeAnalysis:
"""Performs time and memory analysis on Starling/Z3.
Args:
starling: The Starling object to use.
starling_path: The path to the Starling file.
Returns:
A tuple of the working set size, in KB; the total time
spent in Starling; the time, of that, spent in the tool;
and the time, of that, spent in Z3.
"""
output: Output = starling.run_z3(starling_path, ["-Pphase-time,phase-working-set"])
phases: Mapping[str, Mapping[str, List[float]]] = scrape_z3_times(output.stderr)
# Working set is returned in KiB, but Starling emits in B.
working_set_bytes: float = statistics.mean(phases['Eliminate']['WorkingSet'])
working_set: int = int(working_set_bytes / 1024.0)
# Times are emitted in milliseconds, so convert to seconds.
elapsed_averages: Mapping[str, float] = {phase: statistics.mean(phases[phase]['Elapsed']) for phase in phases}
# Total time is literally just sum of all phase times.
total_time_micros: float = sum(elapsed_averages[phase] for phase in phases)
total_time: float = total_time_micros / 1000.0
# Time 'in Z3' is taken to be the eliminate phase.
z3_time_micros: float = elapsed_averages['Eliminate']
z3_time: float = z3_time_micros / 1000.0
# Time 'on tool' is total time less Z3 time.
tool_time: float = total_time - z3_time
return Z3TimeAnalysis(working_set=working_set, total_time=total_time, tool_time=tool_time, z3_time=z3_time)
def proof_lines(starling_path: os.PathLike) -> Iterator[str]:
"""Yields the proof-carrying lines in a Starling file.
This is a rough guess based on some string matching.
Args:
starling_path: The path to the Starling file.
Yields:
Each proof-carrying line.
"""
view_nest: int = 0
decl_nest: int = 0
with open(starling_path) as f:
for line in f:
# TODO: This is very clumsy and ad-hoc.
# Ideally Starling should be able to do this analysis
# for us.
in_proof = False
view_nest = view_nest + line.count("{|")
if view_nest > 0:
in_proof = True
view_nest = view_nest - line.count("|}")
# Some comments in the current examples contain the
# word 'constraints', which breaks this checker.
# For now, insist on a space after 'constraint', etc.
# This still over-reports proof LoC but not as much.
decl_nest = (decl_nest +
line.count("constraint ") +
line.count("view "))
if decl_nest > 0:
in_proof = True
decl_nest = decl_nest - line.count(";")
if in_proof:
yield line
def guess_proof_loc(starling_path: os.PathLike) -> int:
"""Guesses the lines of proof code in a Starling file.
Args:
starling_path: The path to the Starling file.
Returns:
A rough estimate of the lines of proof code.
"""
return sum(1 for _ in proof_lines(starling_path))
@dataclass
class LocAnalysis:
starling: int
grass: Optional[int]
proof: int
def count_loc(starling_path: os.PathLike, grass_path: Optional[os.PathLike]) -> LocAnalysis:
"""Performs lines of code analysis for one Starling example.
Args:
starling_path: The path to the Starling file.
grass_path: The path to the GRASShopper auxiliary module.
If None, the example is taken to be a Z3 example.
Returns:
A triple of:
- Number of Starling input lines;
- Number of GRASShopper input lines;
- Number of proof input lines (guesstimate).
"""
starling_loc: int = loc(starling_path)
grass_loc: Optional[int] = None if grass_path is None else loc(grass_path)
proof_loc: int = 0 if grass_loc is None else grass_loc
proof_loc = proof_loc + guess_proof_loc(starling_path)
return LocAnalysis(starling_loc, grass_loc, proof_loc)
def make_grass_path(grass_name: str) -> Optional[pathlib.Path]:
"""If 'grass_name' is empty, returns None;
otherwise, creates the full path to the GRASShopper example given by the name.
Args:
grass_name: The name of the GRASShopper example; can be empty, but can't be None.
Returns:
The full GRASShopper path, if appropriate.
"""
return None if grass_name == '' else pathlib.Path('grasshopper', grass_name + '.spl')
def bench(starling: Starling, bucket: str, name: str, grass: str):
"""Performs benchmarking for one Starling example.
Dumps results to stdout.
Args:
starling: The Starling object to use.
bucket: The category (Pass, Fail, etc.) in which the example
lives.
name: The name, less the file extension, of the example.
grass: The name of the GRASShopper auxiliary module.
If empty, the example is taken to be a Z3 example.
"""
starling_path: pathlib.Path = inflate_example_name(bucket, name)
fact("Path", starling_path)
grass_path: Optional[pathlib.Path] = make_grass_path(grass)
is_grass: bool = grass_path is not None
if (is_grass and ARGS.nogh) or (ARGS.noz3 and not is_grass):
print(" *SKIPPED*")
return
fact("Mode", "GH" if is_grass else "Z3")
line_counts: LocAnalysis = count_loc(starling_path, grass_path)
fact("No. lines input", line_counts.starling)
fact("No. lines auxiliary GRASShopper", "-" if line_counts.grass is None else line_counts.grass)
fact("No. lines proof input", line_counts.proof)
ntotal, nsuccess = count_terms(starling, starling_path)
fact("No. generated terms", ntotal)
fact("No. SMT-elim terms", nsuccess)
z3: Z3TimeAnalysis = z3_times(starling, starling_path)
fact("Time, total excl GH (s)", z3.total_time)
fact("Time, on tool (s)", z3.tool_time)
fact("Time, SMT (s)", z3.z3_time)
fact("Mem use, Starling (KiB)", z3.working_set)
if not is_grass:
fact("No. lines gen GH", "-")
fact("Time, GH (s)", "-")
return
# For GRASShopper, we can do the translation once, report the
# LoC from that, and then run the last benchmarks directly
# on GRASShopper alone.
ghpath = starling.run_grasshopper_translate(starling_path)
fact("No. lines gen GH", loc(ghpath))
# This overestimates by adding Python overhead, but should be ok.
ghtime = timeit.timeit('starling_common.run_and_cook(["grasshopper.native","' + os.fspath(ghpath) + '"])',
setup='import starling_common',
number=GH_TIMES)
fact("Time, GH (s)", ghtime)
ghpath.unlink()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-z', '--noz3', action='store_true', help='do not benchmark Z3 examples')
parser.add_argument('-g', '--nogh', action='store_true', help='do not benchmark GRASShopper examples')
ARGS = parser.parse_args()
starling: Starling = get_starling()
# TODO: Find out why this happens.
if platform.system() == 'FreeBSD':
print("WARNING: Memory usage metrics may be incorrectly reported as '0'.", file=sys.stderr)
print("If this happens, try running eg. `command time -l ./starling.sh FILE`", file=sys.stderr)
for desc, bucket, name, grass in read_infile(pathlib.Path(SPEC_FILE)):
example(desc)
bench(starling, bucket, name, grass)