Skip to content

Commit

Permalink
Compare results using standard deviation for better stability
Browse files Browse the repository at this point in the history
  • Loading branch information
vinistock committed Feb 15, 2023
1 parent 9726b3a commit d75c92d
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 34 deletions.
44 changes: 22 additions & 22 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,40 @@ jobs:
runs-on: ubuntu-latest
name: Benchmark
steps:
- uses: actions/checkout@v3
# Setup the main branch to run benchmarks there as a baseline for comparison
- name: Checkout to main
uses: actions/checkout@v3
with:
ref: main

- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.2
bundler-cache: true

# Restore the cached benchmark results from the main branch to compare it with the current branch
- name: Restore main benchmark results
id: restore-main-results
uses: actions/cache/restore@v3
if: github.event_name == 'pull_request'
- name: Main benchmark
id: main-benchmark
run: |
bin/benchmark
# Setup the PR's branch to run benchmarks and compare with main results
- name: Checkout to branch
uses: actions/checkout@v3

- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
path: |
/tmp/ruby_lsp_benchmark_results.json
key: main-benchmark-results
ruby-version: 3.2
bundler-cache: true

# Run the benchmark. The script reads the cache and compares the results between the PR's branch and main
- name: Benchmark
- name: Branch benchmark
id: benchmark
run: |
result=$(bin/benchmark) || true
echo "REPORT=$(echo "$result" | tr '\n' '#')" >> $GITHUB_OUTPUT
# Post the results as a comment on the PR
- name: Comment report
uses: actions/github-script@v6
if: github.event_name == 'pull_request'
Expand All @@ -45,20 +54,11 @@ jobs:
body: '```#${{ steps.benchmark.outputs.REPORT }}#```'.split("#").join("\n"),
});
# Fail the job based on the benchmark output
- name: Finalize job
run: |
if [[ "${{ steps.benchmark.outputs.REPORT }}" == *"At least one benchmark is 15% slower than the main branch"* ]]; then
if [[ "${{ steps.benchmark.outputs.REPORT }}" == *"At least one benchmark is slower than the main branch"* ]]; then
exit 1
else
exit 0
fi
# Only cache benchmark results from the main branch
- name: Save main benchmark results
id: save-main-results
uses: actions/cache/save@v3
if: github.ref_name == 'main'
with:
path: |
/tmp/ruby_lsp_benchmark_results.json
key: main-benchmark-results
70 changes: 58 additions & 12 deletions bin/benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,58 @@ $LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
$VERBOSE = nil

require "bundler/setup"
require "sorbet-runtime"

# Disable any Sorbet checks
begin
T::Configuration.default_checked_level = :never
T::Configuration.call_validation_error_handler = ->(*) {}
T::Configuration.inline_type_error_handler = ->(*) {}
T::Configuration.sig_validation_error_handler = ->(*) {}
rescue
nil
end

require "ruby_lsp/internal"
require "benchmark"

ITERATIONS = 500
# Run signature blocks ahead of time
T::Utils.run_all_sig_blocks

# Fetch Rails documents ahead of time
RubyLsp::Requests::Support::RailsDocumentClient.send(:search_index)

# Build gem file paths ahead of time
RubyLsp::Requests::DocumentLink.gem_paths

ITERATIONS = 1000
CACHE_FILE_PATH = "/tmp/ruby_lsp_benchmark_results.json"

def avg_bench(method, params)
(0...ITERATIONS).map do
results = (0...ITERATIONS).map do
# Create a new store every time to prevent caching
store = RubyLsp::Store.new
store.set(FILE_URI, FIXTURE)
# Parse ahead of time or else one of the requests will do it
store.get(FILE_URI).parse

Benchmark.realtime do
GC.disable
result = Benchmark.measure do
RubyLsp::Executor.new(store).execute({
method: method,
params: params,
})
end
end.sum.to_f / ITERATIONS
end.utime
GC.enable
result
end

average = results.sum.to_f / ITERATIONS

# Calculate standard deviation
variance = results.map { |r| (r - average)**2 }.sum / ITERATIONS
standard_deviation = Math.sqrt(variance)
[average, standard_deviation]
end

FILE_URI = "file://#{File.expand_path(__FILE__)}"
Expand All @@ -35,7 +66,7 @@ range = {
start: { line: 50, character: 0 },
end: { line: 75, character: 0 },
}
position = { line: 50, character: 8 }
position = { line: 54, character: 8 }

# The purpose of this fixture is not to make sense semantically, but to be syntatically complex. It also contains style
# violations on purpose to ensure RuboCop finds at least some
Expand Down Expand Up @@ -102,6 +133,7 @@ FIXTURE = <<~RUBY
validate :safe_content
# source://mutex_m//mutex_m.rb#1
scope :by_user, ->(user_id) { where(user_id: user_id) }
scope :by_post, ->(post_id) { where(post_id: post_id) }
Expand All @@ -121,6 +153,7 @@ FIXTURE = <<~RUBY
class User < ApplicationRecord
extend T::Sig
# source://mutex_m//mutex_m.rb#1
has_many :comments, dependent: :destroy
has_many :favourites, dependent: :destroy, class_name: "Post"
Expand All @@ -141,6 +174,7 @@ FIXTURE = <<~RUBY
has_many :taggings, dependent: :destroy
has_many :posts, through: :taggings
# source://mutex_m//mutex_m.rb#1
scope :by_name, ->(name) { where(name: name) }
end
Expand Down Expand Up @@ -179,7 +213,12 @@ requests.each { |method, params| results[method] = avg_bench(method, params) }
longest_key_length = requests.keys.max_by(&:length).length

puts "Benchmark results in seconds (slowest at top)\n\n"
puts results.sort_by { |_, v| -v }.map { |k, v| "#{k.rjust(longest_key_length)} #{v}" }.join("\n")
puts results
.sort_by { |_method, (average, _std_dev)| -average }
.map { |k, (average, std_dev)|
"#{k.rjust(longest_key_length)} average: #{average.round(6)} std_dev: #{std_dev.round(6)}"
}
.join("\n")

if File.exist?(CACHE_FILE_PATH)
main_results = JSON.parse(File.read(CACHE_FILE_PATH))
Expand All @@ -188,13 +227,18 @@ if File.exist?(CACHE_FILE_PATH)
puts "\n\n"
puts "=" * 80
puts "Comparison with main branch:\n\n"
results.each do |method, current|
ratio = main_results[method].nil? ? 1.0 : 1.0 - (current / main_results[method])
results.each do |method, (new_average, new_std_dev)|
current_average, _current_std_dev = main_results[method]
next if new_average.nil?

ratio = 1.0 - (new_average / current_average)
absolute_ratio = ratio.abs
percentage = 100 * absolute_ratio
percentage = (100 * absolute_ratio).round(3)
adjusted_method = method.rjust(longest_key_length)

if absolute_ratio < 0.15
# If the difference between the new average and the average of the main branch is less than the a standard deviation
# then we consider it unchanged. This is necessary to avoid flaky benchmarks on CI, where there's a lot of variance
if (new_average - current_average).abs < new_std_dev
puts "#{adjusted_method} unchanged"
elsif ratio.negative?
puts "#{adjusted_method} slower by #{percentage} %"
Expand All @@ -205,7 +249,9 @@ if File.exist?(CACHE_FILE_PATH)
end

unless success
puts "\n\nAt least one benchmark is 15% slower than the main branch."
# If this phrase is changed, we have to update .github/workflows/benchmark.yml since this is used to determine
# whether to fail the build or not
puts "\n\nAt least one benchmark is slower than the main branch."
exit(1)
end
end
Expand Down

0 comments on commit d75c92d

Please sign in to comment.