-
Notifications
You must be signed in to change notification settings - Fork 166
LG-8058 | Populate identities consented field #7374
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
564dd63
53278fb
5194f9e
9e78292
0e7b203
18d1958
d80f631
dfe643d
68ecd11
5f5572f
caa43ed
30fe64d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| class IdentitiesBackfillJob < ApplicationJob | ||
| # This is a short-term solution to backfill data requiring a table scan. | ||
| # This job can be deleted once it's done. | ||
|
|
||
| queue_as :long_running | ||
|
|
||
| # Let's give us the option to fine-tune this on the fly | ||
| BATCH_SIZE_KEY = 'IdentitiesBackfillJob.batch_size'.freeze | ||
| SLICE_SIZE_KEY = 'IdentitiesBackfillJob.slice_size'.freeze | ||
| CACHE_KEY = 'IdentitiesBackfillJob.position'.freeze | ||
|
|
||
| def perform | ||
| start_time = Time.zone.now | ||
| max_id = ServiceProviderIdentity.last.id | ||
|
|
||
zachmargolis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if position > max_id | ||
| logger.info "backfill reached end; skipping. position=#{position} max_id=#{max_id}" | ||
| return | ||
| end | ||
|
|
||
| last_id = position | ||
|
|
||
| (batch_size / slice_size).times.each do |slice_num| | ||
| start_id = position + (slice_size * slice_num) | ||
| next if start_id > max_id | ||
|
|
||
| params = { | ||
| min_id: start_id, | ||
| max_id: start_id + slice_size, | ||
| }.transform_values { |v| ActiveRecord::Base.connection.quote(v) } | ||
| sp_query = format(<<~SQL, params) | ||
| UPDATE identities | ||
| SET last_consented_at = created_at | ||
| WHERE id > %{min_id} | ||
| AND id <= %{max_id} | ||
| AND deleted_at IS NULL | ||
| AND last_consented_at IS NULL | ||
| RETURNING id | ||
| SQL | ||
|
|
||
| result = ActiveRecord::Base.connection.execute(sp_query) | ||
|
|
||
| last_id = result.to_a&.first ? result.to_a.first['id'] : start_id + slice_size | ||
|
|
||
| logger.info "Processed rows #{start_id} through #{last_id} (updated=#{result.ntuples})" | ||
| end | ||
|
|
||
| elapsed_time = Time.zone.now - start_time | ||
| logger.info( | ||
| "Finished a full batch (#{batch_size} rows to id=#{last_id}) in #{elapsed_time} seconds", | ||
| ) | ||
|
Comment on lines
+48
to
+51
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we combine this log message with the one above? that way we only need one log line per job run to check.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed this in chat, but for the logs here: the previous log entry is inside the per-slice loop, while this is at the end of the whole batch. We concluded we'd keep both. |
||
|
|
||
| # If we made it here without error, increment the counter for next time: | ||
| REDIS_POOL.with { |redis| redis.set(CACHE_KEY, last_id) } | ||
| end | ||
|
|
||
| def position | ||
| redis_get(CACHE_KEY, 0) | ||
| end | ||
|
|
||
| def batch_size | ||
| redis_get(BATCH_SIZE_KEY, 500_000) | ||
| end | ||
|
|
||
| def slice_size | ||
| redis_get(SLICE_SIZE_KEY, 10_000) | ||
| end | ||
|
|
||
| def redis_get(key, default) | ||
| (REDIS_POOL.with { |redis| redis.get(key) } || default).to_i | ||
| end | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I pulled this out into its own method because I found a bug in how I was doing this initially and figured I'd move it all to one place and fix it. I was doing |
||
| end | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,4 +7,12 @@ | |
| trait :active do | ||
| last_authenticated_at { Time.zone.now } | ||
| end | ||
|
|
||
| trait :consented do | ||
| last_consented_at { Time.zone.now - 5.minutes } | ||
| end | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually think we probably want to make |
||
|
|
||
| trait :soft_deleted_5m_ago do | ||
| deleted_at { Time.zone.now - 5.minutes } | ||
| end | ||
| end | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| require 'rails_helper' | ||
|
|
||
| RSpec.describe IdentitiesBackfillJob, type: :job do | ||
| describe '#perform' do | ||
| let!(:deleted) do | ||
| create(:service_provider_identity, :soft_deleted_5m_ago, :consented) | ||
| end | ||
|
|
||
| let!(:consented_at_set) do | ||
| create(:service_provider_identity, :consented) | ||
| end | ||
|
|
||
| let!(:no_consented_at) do | ||
| create(:service_provider_identity, :active) | ||
| end | ||
|
|
||
| subject { described_class.perform_now } | ||
|
|
||
| it 'does not update rows that have been soft-deleted' do | ||
| expect(deleted.deleted_at).to_not be_nil | ||
| consent_time = deleted.last_consented_at.to_i | ||
|
|
||
| subject | ||
| deleted.reload | ||
|
|
||
| expect(deleted.deleted_at).not_to be_nil | ||
| expect(deleted.last_consented_at.to_i).to eq(consent_time) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed in Slack, but the The factory sets |
||
| end | ||
|
|
||
| it 'does not update rows that already have a date populated' do | ||
| time = consented_at_set.last_consented_at | ||
| expect(time).not_to be_nil | ||
|
|
||
| subject | ||
|
|
||
| expect(consented_at_set.reload.last_consented_at.to_i).to eq time.to_i | ||
| end | ||
|
|
||
| it 'does update rows which are not deleted and have no last_consented_at date' do | ||
| expect(no_consented_at.last_consented_at).to be_nil | ||
| expect(no_consented_at.created_at).to_not be_nil | ||
| expect(no_consented_at.deleted_at).to be_nil | ||
|
|
||
| subject | ||
|
|
||
| expect(no_consented_at.reload.last_consented_at).to_not be_nil | ||
| end | ||
|
|
||
| context 'when the batch size is small' do | ||
| let(:batch_size) { 2 } | ||
| let(:slice_size) { 1 } | ||
| before do | ||
| REDIS_POOL.with do |redis| | ||
| redis.set(IdentitiesBackfillJob::BATCH_SIZE_KEY, batch_size) | ||
| redis.set(IdentitiesBackfillJob::SLICE_SIZE_KEY, slice_size) | ||
| redis.set( | ||
| IdentitiesBackfillJob::CACHE_KEY, | ||
| ServiceProviderIdentity.first.id - 1, | ||
| ) | ||
| end | ||
| end | ||
|
|
||
| it 'increments the position properly' do | ||
| # The first time around, the position should match the second row: | ||
| described_class.perform_now | ||
| position = described_class.new.position | ||
| expect(position).to eq(ServiceProviderIdentity.second.id) | ||
|
|
||
| # The second time, we finish the table, so position should point past the end: | ||
| described_class.perform_now | ||
| expect(described_class.new.position).to eq(position + batch_size) | ||
| end | ||
| end | ||
|
|
||
| context 'when done updating the database' do | ||
| it 'updates the position in Redis' do | ||
| expect(described_class.new.position).to eq(0) | ||
|
|
||
| subject | ||
|
|
||
| expect(described_class.new.position).to eq(ServiceProviderIdentity.last.id) | ||
| end | ||
| end | ||
| end | ||
|
|
||
| describe '#batch_size' do | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Arguably this and |
||
| subject { described_class.new.batch_size } | ||
|
|
||
| context 'when there is no cache key in redis' do | ||
| it 'returns the default of 500,000' do | ||
| expect(subject).to eq(500_000) | ||
| end | ||
| end | ||
|
|
||
| context 'when there is a cache key in redis' do | ||
| before do | ||
| REDIS_POOL.with do |redis| | ||
| redis.set(described_class::BATCH_SIZE_KEY, 1000) | ||
| end | ||
| end | ||
|
|
||
| it 'returns that value' do | ||
| expect(subject).to eq 1000 | ||
| end | ||
| end | ||
| end | ||
| end | ||
Uh oh!
There was an error while loading. Please reload this page.