Skip to content

Commit

Permalink
Benchmark improvements (emscripten-core#8398)
Browse files Browse the repository at this point in the history
normalize too-short scores, handle 0 reps, general cleanup, prepare for wasm backend (disable MINIMAL_RUNTIME for now, but remember tests where it can be enabled soon)
  • Loading branch information
kripken authored and VirtualTim committed May 21, 2019
1 parent 5c25fc5 commit 15f323a
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 61 deletions.
10 changes: 5 additions & 5 deletions tests/havlak.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -770,11 +770,11 @@ int main(int argc, char **argv) {
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: NUM = 3; break;
case 2: NUM = 10; break;
case 3: NUM = 20; break;
case 4: NUM = 30; break;
case 5: NUM = 50; break;
case 1: NUM = 10; break;
case 2: NUM = 30; break;
case 3: NUM = 60; break;
case 4: NUM = 100; break;
case 5: NUM = 150; break;
default: printf("error: %d\\n", arg); return -1;
}

Expand Down
10 changes: 5 additions & 5 deletions tests/lzma/benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ int main(int argc, char **argv) {
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: size = 100000; iters = 1; break;
case 2: size = 100000; iters = 10; break;
case 3: size = 100000; iters = 22; break;
case 4: size = 100000; iters = 125; break;
case 5: size = 100000; iters = 225; break;
case 1: size = 100000; iters = 4*1; break;
case 2: size = 100000; iters = 4*10; break;
case 3: size = 100000; iters = 4*22; break;
case 4: size = 100000; iters = 4*125; break;
case 5: size = 100000; iters = 4*225; break;
default: printf("error: %d\\n", arg); return -1;
}

Expand Down
91 changes: 40 additions & 51 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# 5: 10 seconds
DEFAULT_ARG = '4'

TEST_REPS = 3
TEST_REPS = 5

# by default, run just core benchmarks
CORE_BENCHMARKS = True
Expand Down Expand Up @@ -72,24 +72,25 @@ def bench(self, args, output_parser=None, reps=TEST_REPS):
def display(self, baseline=None):
# speed

if baseline == self:
baseline = None
mean = sum(self.times) / len(self.times)
squared_times = [x * x for x in self.times]
mean_of_squared = sum(squared_times) / len(self.times)
std = math.sqrt(mean_of_squared - mean * mean)
sorted_times = self.times[:]
sorted_times.sort()
median = sum(sorted_times[len(sorted_times) // 2 - 1:len(sorted_times) // 2 + 1]) / 2

print(' %10s: mean: %4.3f (+-%4.3f) secs median: %4.3f range: %4.3f-%4.3f (noise: %4.3f%%) (%d runs)' % (self.name, mean, std, median, min(self.times), max(self.times), 100 * std / mean, self.reps), end=' ')

if baseline:
mean_baseline = sum(baseline.times) / len(baseline.times)
final = mean / mean_baseline
print(' Relative: %.2f X slower' % final)
else:
print()
if self.times:
if baseline == self:
baseline = None
mean = sum(self.times) / len(self.times)
squared_times = [x * x for x in self.times]
mean_of_squared = sum(squared_times) / len(self.times)
std = math.sqrt(mean_of_squared - mean * mean)
sorted_times = self.times[:]
sorted_times.sort()
median = sum(sorted_times[len(sorted_times) // 2 - 1:len(sorted_times) // 2 + 1]) / 2

print(' %10s: mean: %4.3f (+-%4.3f) secs median: %4.3f range: %4.3f-%4.3f (noise: %4.3f%%) (%d runs)' % (self.name, mean, std, median, min(self.times), max(self.times), 100 * std / mean, self.reps), end=' ')

if baseline:
mean_baseline = sum(baseline.times) / len(baseline.times)
final = mean / mean_baseline
print(' Relative: %.2f X slower' % final)
else:
print()

# size

Expand Down Expand Up @@ -302,33 +303,21 @@ def cleanup(self):


# Benchmarkers
benchmarkers = [
NativeBenchmarker('clang', CLANG_CC, CLANG),
# NativeBenchmarker('gcc', 'gcc', 'g++')
]

benchmarkers = []

if CLANG_CC and CLANG:
benchmarkers += [
# NativeBenchmarker('clang', CLANG_CC, CLANG),
# NativeBenchmarker('gcc', 'gcc', 'g++')
]
if SPIDERMONKEY_ENGINE and SPIDERMONKEY_ENGINE in shared.JS_ENGINES:
benchmarkers += [
# EmscriptenBenchmarker('sm-asmjs', SPIDERMONKEY_ENGINE, ['-s', 'WASM=0']),
# EmscriptenBenchmarker('sm-asm2wasm', SPIDERMONKEY_ENGINE + ['--no-wasm-baseline'], []),
# EmscriptenBenchmarker('v8-wasmbc', V8_ENGINE, env={
# 'LLVM': os.path.expanduser('~/Dev/llvm/build/bin'),
# }),
# EmscriptenBenchmarker('v8-wasmobj', V8_ENGINE, ['-s', 'WASM_OBJECT_FILES=1'], env={
# 'LLVM': os.path.expanduser('~/Dev/llvm/build/bin'),
# }),
# EmscriptenBenchmarker('sm', SPIDERMONKEY_ENGINE),
]
if V8_ENGINE and V8_ENGINE in shared.JS_ENGINES:
benchmarkers += [
EmscriptenBenchmarker('v8-asmjs', V8_ENGINE, ['-s', 'WASM=0']),
EmscriptenBenchmarker('v8-asm2wasm', V8_ENGINE, env={
'LLVM': os.path.expanduser('~/Dev/fastcomp/build/bin'),
}),
EmscriptenBenchmarker('v8-wasmbc', V8_ENGINE, env={
'LLVM': os.path.expanduser('~/Dev/llvm/build/bin'),
}),
EmscriptenBenchmarker('v8-wasmobj', V8_ENGINE, ['-s', 'WASM_OBJECT_FILES=1'], env={
'LLVM': os.path.expanduser('~/Dev/llvm/build/bin'),
}),
EmscriptenBenchmarker('v8', V8_ENGINE),
]
if os.path.exists(CHEERP_BIN):
benchmarkers += [
Expand Down Expand Up @@ -449,7 +438,7 @@ def test_primes(self, check=True):
return 0;
}
'''
self.do_benchmark('primes' if check else 'primes-nocheck', src, 'lastprime:' if check else '', shared_args=['-DCHECK'] if check else [], emcc_args=['-s', 'MINIMAL_RUNTIME=1'])
self.do_benchmark('primes' if check else 'primes-nocheck', src, 'lastprime:' if check else '', shared_args=['-DCHECK'] if check else [], emcc_args=['-s', 'MINIMAL_RUNTIME=0'])

# Also interesting to test it without the printfs which allow checking the output. Without
# printf, code size is dominated by the runtime itself (the compiled code is just a few lines).
Expand Down Expand Up @@ -487,7 +476,7 @@ def test_memops(self):
return 0;
}
'''
self.do_benchmark('memops', src, 'final:', emcc_args=['-s', 'MINIMAL_RUNTIME=1'])
self.do_benchmark('memops', src, 'final:', emcc_args=['-s', 'MINIMAL_RUNTIME=0'])

def zzztest_files(self):
src = r'''
Expand Down Expand Up @@ -601,11 +590,11 @@ def test_ifs(self):
int arg = argc > 1 ? argv[1][0] - '0' : 3;
switch(arg) {
case 0: return 0; break;
case 1: arg = 75; break;
case 2: arg = 625; break;
case 3: arg = 1250; break;
case 4: arg = 5*1250; break;
case 5: arg = 10*1250; break;
case 1: arg = 5*75; break;
case 2: arg = 5*625; break;
case 3: arg = 5*1250; break;
case 4: arg = 5*5*1250; break;
case 5: arg = 5*10*1250; break;
default: printf("error: %d\\n", arg); return -1;
}
Expand All @@ -629,7 +618,7 @@ def test_ifs(self):
return sum;
}
'''
self.do_benchmark('ifs', src, 'ok', reps=TEST_REPS)
self.do_benchmark('ifs', src, 'ok')

def test_conditionals(self):
src = r'''
Expand Down Expand Up @@ -665,7 +654,7 @@ def test_conditionals(self):
return x;
}
'''
self.do_benchmark('conditionals', src, 'ok', reps=TEST_REPS, emcc_args=['-s', 'MINIMAL_RUNTIME=1'])
self.do_benchmark('conditionals', src, 'ok', reps=TEST_REPS, emcc_args=['-s', 'MINIMAL_RUNTIME=0'])

def test_fannkuch(self):
src = open(path_from_root('tests', 'fannkuch.cpp'), 'r').read().replace(
Expand Down Expand Up @@ -803,7 +792,7 @@ def test_life(self):
def test_linpack(self):
def output_parser(output):
mflops = re.search(r'Unrolled Double Precision ([\d\.]+) Mflops', output).group(1)
return 100.0 / float(mflops)
return 10000.0 / float(mflops)
self.do_benchmark('linpack_double', open(path_from_root('tests', 'linpack2.c')).read(), '''Unrolled Double Precision''', force_c=True, output_parser=output_parser)

# Benchmarks the synthetic performance of calling native functions.
Expand Down

0 comments on commit 15f323a

Please sign in to comment.