Skip to content

Commit

Permalink
benchmark: ignore significance when using --runs 1
Browse files Browse the repository at this point in the history
Because the standard deviation can't be calculated when there is only
one observation the R scripts raises an error. However it may still be
useful to run them for non-statistical purposes.

This changes the behaviour such when there is only one observation, the
values that depends on the standard deviation becomes Not Applicable
(NA).

Fixes: #8288
PR-URL: #8299
Reviewed-By: Anna Henningsen <[email protected]>
  • Loading branch information
AndreasMadsen committed Sep 16, 2016
1 parent 6f9157f commit d3834a1
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 21 deletions.
41 changes: 25 additions & 16 deletions benchmark/compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,39 @@ if (!is.null(plot.filename)) {

# Print a table with results
statistics = ddply(dat, "name", function(subdat) {
# Perform a statistics test to see of there actually is a difference in
# performace.
w = t.test(rate ~ binary, data=subdat);
old.rate = subset(subdat, binary == "old")$rate;
new.rate = subset(subdat, binary == "new")$rate;

# Calculate improvement for the "new" binary compared with the "old" binary
new_mu = mean(subset(subdat, binary == "new")$rate);
old_mu = mean(subset(subdat, binary == "old")$rate);
improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
old.mu = mean(old.rate);
new.mu = mean(new.rate);
improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));

# Add user friendly stars to the table. There should be at least one star
# before you can say that there is an improvement.
significant = '';
if (w$p.value < 0.001) {
significant = '***';
} else if (w$p.value < 0.01) {
significant = '**';
} else if (w$p.value < 0.05) {
significant = '*';
p.value = NA;
significant = 'NA';
# Check if there is enough data to calulate the calculate the p-value
if (length(old.rate) > 1 && length(new.rate) > 1) {
# Perform a statistics test to see of there actually is a difference in
# performance.
w = t.test(rate ~ binary, data=subdat);
p.value = w$p.value;

# Add user friendly stars to the table. There should be at least one star
# before you can say that there is an improvement.
significant = '';
if (p.value < 0.001) {
significant = '***';
} else if (p.value < 0.01) {
significant = '**';
} else if (p.value < 0.05) {
significant = '*';
}
}

r = list(
improvement = improvement,
significant = significant,
p.value = w$p.value
p.value = p.value
);
return(data.frame(r));
});
Expand Down
17 changes: 12 additions & 5 deletions benchmark/scatter.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,17 @@ if (length(aggregate) > 0) {
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
rate = subdat$rate;

# calculate standard error of the mean
se = sqrt(var(rate)/length(rate));
# calculate confidence interval of the mean
ci = NA;
if (length(rate) > 1) {
se = sqrt(var(rate)/length(rate));
ci = se * qt(0.975, length(rate) - 1)
}

# calculate mean and 95 % confidence interval
r = list(
rate = mean(rate),
confidence.interval = se * qt(0.975, length(rate) - 1)
confidence.interval = ci
);

return(data.frame(r));
Expand All @@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
print(stats, row.names=F);

if (!is.null(plot.filename)) {
p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
if (use.log2) {
p = p + scale_x_continuous(trans='log2');
}
p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
p = p + geom_errorbar(
aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
width=.1, na.rm=TRUE
);
p = p + geom_point();
p = p + ylab("rate of operations (higher is better)");
p = p + ggtitle(dat[1, 1]);
Expand Down

0 comments on commit d3834a1

Please sign in to comment.