Skip to content

Commit

Permalink
use insert_all to insert WordChunks
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesdeb committed Apr 10, 2020
1 parent 9f450cd commit f008dc6
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 58 deletions.
45 changes: 37 additions & 8 deletions app/models/text_sample.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# frozen_string_literal: true

class TextSample < ApplicationRecord
require 'date'

validates :description, presence: true
validates :text, presence: true

Expand All @@ -14,18 +16,12 @@ def build_word_chunks
end
end

def build_word_chunks_of_size(chunk_size)
def build_word_chunks_of_size(chunk_size, save_strategy = :insert_all)
# create a hash
chunks_hash = build_chunk_hash(chunk_size)

# store it chunk by chunk in the database
# TODO: this is very slow and inefficient; storing a single hash per
# row may be a better solution
chunks_hash.each do |chunk_text, count|
WordChunk.create!(
text: chunk_text, size: chunk_size, count: count, text_sample_id: id
)
end
save_word_chunks(chunks_hash, chunk_size, save_strategy)
end

def build_chunk_hash(chunk_size)
Expand All @@ -39,4 +35,37 @@ def build_chunk_hash(chunk_size)
end
hash
end

# TODO: these are rather slow and inefficient; storing a single hash per
# row may be a better solution
def save_word_chunks(chunks_hash, chunk_size, save_strategy = :insert_all)
case save_strategy
when :insert_all
save_word_chunks_by_insert_all(chunks_hash, chunk_size)
when :create!
save_word_chunks_by_create(chunks_hash, chunk_size)
else
raise "Unknown save_strategy: #{save_strategy}"
end
end

def save_word_chunks_by_insert_all(chunks_hash, chunk_size)
current_time = DateTime.now
import_array = []
chunks_hash.each do |chunk_text, count|
import_hash = { text: chunk_text, size: chunk_size,
count: count, text_sample_id: id,
created_at: current_time, updated_at: current_time }
import_array << import_hash
end
WordChunk.insert_all import_array
end

def save_word_chunks_by_create(chunks_hash, chunk_size)
chunks_hash.each do |chunk_text, count|
WordChunk.create!(
text: chunk_text, size: chunk_size, count: count, text_sample_id: id
)
end
end
end
214 changes: 164 additions & 50 deletions spec/models/text_sample_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,61 +37,25 @@
end
end

describe '#build_word_chunks_of_size' do # rubocop:disable Metrics/BlockLength
describe '#build_word_chunks_of_size' do
let(:text_sample) { TextSample.create!(description: 'Stuff', text: 'at') }
let(:chunk_hash) { { 'at' => 1 } }

before(:each) do
# don't hit database
allow(WordChunk).to receive(:create!)
allow(text_sample).to receive(:build_chunk_hash).and_return(chunk_hash)
allow(text_sample).to receive(:save_word_chunks)
end

context '2 letter text sample, chunk size of 2' do
let(:text_sample) { TextSample.create!(description: 'Stuff', text: 'at') }
let(:chunk_hash) { { 'at' => 1 } }

before(:each) do
allow(text_sample).to receive(:build_chunk_hash).and_return(chunk_hash)
end

it 'builds a hash' do
text_sample.build_word_chunks_of_size(2)
expect(text_sample).to have_received(:build_chunk_hash).with(2)
end

it 'saves the hash to the database' do
text_sample.build_word_chunks_of_size(2)

expect(WordChunk).to(
have_received(:create!)
.with(text: 'at', size: 2, count: 1, text_sample_id: text_sample.id)
)
end
it 'builds a hash' do
text_sample.build_word_chunks_of_size(2)
expect(text_sample).to have_received(:build_chunk_hash).with(2)
end

context '3 letter text sample, chunk size of 2' do
let(:text_sample) { TextSample.create!(description: 'Stuff', text: 'ant') }
let(:chunk_hash) { { 'an' => 1, 'nt' => 1 } }

before(:each) do
allow(text_sample).to receive(:build_chunk_hash).and_return(chunk_hash)
end

it 'builds a hash' do
text_sample.build_word_chunks_of_size(2)
expect(text_sample).to have_received(:build_chunk_hash).with(2)
end

it 'saves the hash to the database' do
allow(WordChunk).to receive(:create!)
text_sample.build_word_chunks_of_size(2)

expect(WordChunk).to(
have_received(:create!)
.with(text: 'an', size: 2, count: 1, text_sample_id: text_sample.id)
)
expect(WordChunk).to(
have_received(:create!)
.with(text: 'nt', size: 2, count: 1, text_sample_id: text_sample.id)
)
end
it 'attempts to save the hash to the database' do
text_sample.build_word_chunks_of_size(2)
expect(text_sample).to have_received(
:save_word_chunks
).with(chunk_hash, 2, :insert_all)
end
end

Expand Down Expand Up @@ -124,4 +88,154 @@
end
end
end

describe '#save_word_chunks' do # rubocop:disable Metrics/BlockLength
let(:long_string) do
<<~LONG.strip
The rain in Spain falls mainly in the plain, but we do not really
know what we are missing in this much longer sentence. Will it
make a massive difference to the import time, or am I just
doing premature optimisation which by common consent is largely
seen as a waste of time. But here we go, adding a bunch more text
to see if the extra overhead of text will make the slightest bit of
difference to the import time. Right now, I am not convinced, but
who knows. The best way to know is always to measure and then
measure again - checking the hypothesis against the actual results
of the test.
LONG
end
let(:text_sample) do
TextSample.create!(description: 'Longer sample', text: long_string)
# TextSample.create!(description: 'Longer sample', text: 'anty')
end

describe '[behaviour]' do
let(:chunk_size) { 2 }
let(:chunks_hash) { text_sample.build_chunk_hash(chunk_size) }

before(:each) do
allow(text_sample).to receive(:save_word_chunks_by_insert_all)
allow(text_sample).to receive(:save_word_chunks_by_create)
end

it 'raises an exception for an unknown save_strategy' do
expect do
text_sample.save_word_chunks(chunks_hash, chunk_size, :bogus_strategy)
end
.to raise_exception(/Unknown save_strategy/)
end

it 'uses :insert_all as the default strategy' do
text_sample.save_word_chunks(chunks_hash, chunk_size)
expect(text_sample).to have_received(:save_word_chunks_by_insert_all)
end

it 'uses :insert_all when instructed' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :insert_all)
expect(text_sample).to have_received(:save_word_chunks_by_insert_all)
end

it 'uses :create! when instructed' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :create!)
expect(text_sample).to have_received(:save_word_chunks_by_create)
end
end

describe '[performance]' do # rubocop:disable Metrics/BlockLength
around(:each) do |example|
start_time = DateTime.now

example.run

seconds_elapsed = (DateTime.now - start_time) * 1000.0
chunk_size = example.metadata[:chunk_size]
puts "saving chunks (size #{chunk_size} took #{seconds_elapsed} seconds"
end

context 'chunk size of 2', chunk_size: 2 do
let(:chunk_size) { 2 }
let(:chunks_hash) { text_sample.build_chunk_hash(chunk_size) }
it 'uses insert_all for individual word_chunks' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :insert_all)
end
it 'uses individual create! for each word_chunk' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :create!)
end
end

context 'chunk size of 3', chunk_size: 3 do
let(:chunk_size) { 3 }
let(:chunks_hash) { text_sample.build_chunk_hash(chunk_size) }
it 'uses insert_all for individual word_chunks' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :insert_all)
end
it 'uses individual create! for each word_chunk' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :create!)
end
end

context 'chunk size of 4', chunk_size: 4 do
let(:chunk_size) { 4 }
let(:chunks_hash) { text_sample.build_chunk_hash(chunk_size) }
it 'uses insert_all for individual word_chunks' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :insert_all)
end
it 'uses individual create! for each word_chunk' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :create!)
end
end

context 'chunk size of 8', chunk_size: 8 do
let(:chunk_size) { 8 }
let(:chunks_hash) { text_sample.build_chunk_hash(chunk_size) }
it 'uses insert_all for individual word_chunks' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :insert_all)
end
it 'uses individual create! for each word_chunk' do
text_sample.save_word_chunks(chunks_hash, chunk_size, :create!)
end
end
end
end

describe '#save_word_chunks_by_insert_all' do
let(:text_sample) { TextSample.create!(description: 'Stuff', text: 'ant') }
let(:chunk_hash) { { 'an' => 1, 'nt' => 1 } }

before(:each) do
allow(text_sample).to receive(:build_chunk_hash).and_return(chunk_hash)
end

it 'saves the hash to the database' do
allow(WordChunk).to receive(:insert_all)
text_sample.save_word_chunks_by_insert_all(chunk_hash, 2)

expect(WordChunk).to(
have_received(:insert_all).once
)
end
end

describe '#save_word_chunks_by_create' do
let(:text_sample) { TextSample.create!(description: 'Stuff', text: 'ant') }
let(:chunk_hash) { { 'an' => 1, 'nt' => 1 } }

before(:each) do
allow(text_sample).to receive(:build_chunk_hash).and_return(chunk_hash)
end

it 'saves the hash to the database' do
allow(WordChunk).to receive(:create!)
text_sample.save_word_chunks_by_create(chunk_hash, 2)

expect(WordChunk).to(
have_received(:create!)
.with(text: 'an', size: 2, count: 1, text_sample_id: text_sample.id)
)
expect(WordChunk).to(
have_received(:create!)
.with(text: 'nt', size: 2, count: 1, text_sample_id: text_sample.id)
)
end
end
end

0 comments on commit f008dc6

Please sign in to comment.