Optimize base16 encoding#7274
Conversation
- << turns out to be faster than [].join
|
|
||
| # The IRS has requested data be encoded this way. Loosely emulate the Base64 class. | ||
|
|
||
| def self.encode16(str) |
There was a problem hiding this comment.
I think it will matter more in context as well, but another option is for us to shell out to xxd. I think once we start experimenting with big encrypting, we might just write the raw data to a file and shell out to xxd | gzip or something and I think that will be faster than doing things in ruby
|
Updated benchmark, 10k bytes vs 100k bytes, require 'benchmark/ips'
require 'base16'
require 'securerandom'
require 'open3'
require 'tempfile'
class PrBase16
def self.encode16(str)
str.bytes.map { |char| char.to_s(16).upcase.rjust(2, "0") }.join
end
def self.decode16(str)
output = ''
str.chars.each_slice(2) do |chars|
output << chars.join.to_i(16).chr
end
output
end
end
class Base16V3
def self.encode16(str)
output = ''
str.bytes.each { |char| output << char.to_s(16).upcase.rjust(2, "0") }
output
end
def self.decode16(str)
str.chars.each_slice(2).map do |pair|
pair.join.to_i(16).chr
end.join
end
end
random_10k_bytes = SecureRandom.random_bytes(10_000)
random_10k_hex = SecureRandom.hex(10_000)
random_10k_bytes_file = Tempfile.new
File.open(random_10k_bytes_file.path, 'wb') { |f| f.write(random_10k_bytes) }
random_10k_hex_file = Tempfile.new
File.open(random_10k_hex_file.path, 'w') { |f| f.write(random_10k_hex) }
random_100k_bytes = SecureRandom.random_bytes(100_000)
random_100k_hex = SecureRandom.hex(100_000)
random_100k_bytes_file = Tempfile.new
File.open(random_100k_bytes_file.path, 'wb') { |f| f.write(random_100k_bytes) }
random_100k_hex_file = Tempfile.new
File.open(random_100k_hex_file.path, 'w') { |f| f.write(random_100k_hex) }
outfile = Tempfile.new
Benchmark.ips do|x|
x.report('encode_gem_10k') do
Base16.encode16(random_10k_bytes)
end
x.report('encode_pr_10k') do
PrBase16.encode16(random_10k_bytes)
end
x.report('encode_3_10k') do
Base16V3.encode16(random_10k_bytes)
end
x.report('xxd_file_10k') do
system('xxd', '-u', '-plain', random_10k_bytes_file.path, outfile.path)
end
x.report('xxd_stdin_10k') do
Open3.popen3('xxd', '-u', '-plain') do |stdin, stdout|
stdin.write(random_10k_bytes)
stdin.close
stdout.read
end
end
x.compare!
end
Benchmark.ips do |x|
x.report('xxd_stdin_100k') do
Open3.popen3('xxd', '-u', '-plain') do |stdin, stdout|
stdin.write(random_100k_bytes)
stdin.close
stdout.read
end
end
x.report('encode_gem_100k') do
Base16.encode16(random_100k_bytes)
end
x.report('encode_pr_100k') do
PrBase16.encode16(random_100k_bytes)
end
x.report('encode_3_100k') do
Base16V3.encode16(random_100k_bytes)
end
x.report('xxd_file_100k') do
system('xxd', '-u', '-plain', random_100k_bytes_file.path, outfile.path)
end
x.report('xxd_stdin_100k') do
Open3.popen3('xxd', '-u', '-plain') do |stdin, stdout|
stdin.write(random_100k_bytes)
stdin.close
stdout.read
end
end
x.compare!
end
Benchmark.ips do|x|
x.report('decode_gem') do
Base16.decode16(random_10k_hex)
end
x.report('decode_pr') do
PrBase16.decode16(random_10k_hex)
end
x.report('decode_3') do
Base16V3.decode16(random_10k_hex)
end
x.compare!
end
random_10k_bytes_file.unlink
random_10k_hex_file.unlink |
|
💡 that |
|
ok one last time, updated because we needed an |
* Optimize base16 encoding
<<turns out to be faster than[].joinSee "encode_3" for some improvements over existing. Normally I would not try to pre-optimize this, but we I know that we were planning to do this for multiple megabytes of data at a time
Here's what I did to benchmark this against #7204