diff --git a/lib/tasks/backfill_idv_level.rake b/lib/tasks/backfill_idv_level.rake new file mode 100644 index 00000000000..b06ed2a4110 --- /dev/null +++ b/lib/tasks/backfill_idv_level.rake @@ -0,0 +1,78 @@ +namespace :profiles do + desc 'Backfill the idv_level column.' + + ## + # Usage: + # + # bundle exec rake profiles:backfill_idv_level + # + task backfill_idv_level: :environment do |_task, _args| + with_statement_timeout do + is_in_person = Profile.where(id: InPersonEnrollment.select(:profile_id)) + is_not_in_person = Profile.where.not(id: InPersonEnrollment.select(:profile_id)) + needs_idv_level = Profile.where(idv_level: nil) + + in_person_and_needs_idv_level = Profile.and(is_in_person).and(needs_idv_level) + not_in_person_and_needs_idv_level = Profile.and(is_not_in_person).and(needs_idv_level) + + profile_count = in_person_and_needs_idv_level.count + not_in_person_and_needs_idv_level.count + warn("Found #{profile_count} profile(s) needing backfill") + + count = 0 + in_person_and_needs_idv_level. + in_batches(of: batch_size) do |batch| + count += batch.update_all(idv_level: :legacy_in_person) # rubocop:disable Rails/SkipsModelValidations + report_count(count, profile_count) + end + warn("set idv_level for #{count} legacy_in_person profile(s)") + + count = 0 + not_in_person_and_needs_idv_level. + in_batches(of: batch_size) do |batch| + count += batch.update_all(idv_level: :legacy_unsupervised) # rubocop:disable Rails/SkipsModelValidations + report_count(count, profile_count) + end + + warn("set idv_level for #{count} legacy_unsupervised profile(s)") + end + + with_statement_timeout do + warn('Profile counts by idv_level after update:') + [:legacy_in_person, :legacy_unsupervised, nil].each do |value| + count = Profile.where(idv_level: value).count + warn("#{value.inspect}: #{count}") + end + end + end + + def batch_size + ENV['BATCH_SIZE'] ? ENV['BATCH_SIZE'].to_i : 1000 + end + + def report_count(count, profile_count) + report_interval = ENV['REPORT_INTERVAL'] ? ENV['REPORT_INTERVAL'].to_i.seconds : 10.seconds + return if !report_interval + + @last_report ||= Time.zone.now + return if Time.zone.now - @last_report < report_interval + + percent = sprintf('%.2f', (count / profile_count.to_f) * 100) + warn("Backfilled #{count} profile(s) (#{percent}%)") + + @last_report = Time.zone.now + end + + def with_statement_timeout(timeout_in_seconds = nil) + timeout_in_seconds ||= if ENV['STATEMENT_TIMEOUT_IN_SECONDS'] + ENV['STATEMENT_TIMEOUT_IN_SECONDS'].to_i.seconds + else + 60.seconds + end + + ActiveRecord::Base.transaction do + quoted_timeout = ActiveRecord::Base.connection.quote(timeout_in_seconds.in_milliseconds) + ActiveRecord::Base.connection.execute("SET statement_timeout = #{quoted_timeout}") + yield + end + end +end diff --git a/spec/lib/tasks/backfill_idv_level_rake_spec.rb b/spec/lib/tasks/backfill_idv_level_rake_spec.rb new file mode 100644 index 00000000000..1d084f2ee88 --- /dev/null +++ b/spec/lib/tasks/backfill_idv_level_rake_spec.rb @@ -0,0 +1,78 @@ +require 'rails_helper' +require 'rake' + +RSpec.describe 'profiles:backfill_idv_level rake task' do + let(:task) do + Rake.application.rake_require 'tasks/backfill_idv_level' + Rake::Task.define_task(:environment) + Rake::Task['profiles:backfill_idv_level'] + end + + subject(:invoke_task) do + og_stderr = $stderr + fake_stderr = StringIO.new + begin + $stderr = fake_stderr + task.reenable + task.invoke + fake_stderr.string + ensure + $stderr = og_stderr + end + end + + let(:profiles) do + { + unsupervised: create(:user, :proofed).active_profile, + unsupervised_no_level: create(:user, :proofed).active_profile.tap do |profile| + profile.update!(idv_level: nil) + end, + in_person: create( + :user, + :with_pending_in_person_enrollment, + ).pending_profile, + in_person_no_level: create( + :user, + :with_pending_in_person_enrollment, + ).pending_profile.tap { |profile| profile.update!(idv_level: nil) }, + } + end + + before do + expect(profiles[:unsupervised].idv_level).not_to be_nil + expect(profiles[:unsupervised_no_level].idv_level).to be_nil + expect(profiles[:in_person].idv_level).not_to be_nil + expect(profiles[:in_person_no_level].idv_level).to be_nil + invoke_task + end + + it 'outputs what it did' do + expect(invoke_task.to_s).to eql( + <<~END, + Found 2 profile(s) needing backfill + set idv_level for 1 legacy_in_person profile(s) + set idv_level for 1 legacy_unsupervised profile(s) + Profile counts by idv_level after update: + :legacy_in_person: 2 + :legacy_unsupervised: 2 + nil: 0 + END + ) + end + + it 'updates legacy unsupervised user that was missing value' do + expect(profiles[:unsupervised_no_level].reload.idv_level).to eql('legacy_unsupervised') + end + + it 'does not mess up unsupervised user with value' do + expect(profiles[:unsupervised].reload.idv_level).to eql('legacy_unsupervised') + end + + it 'updates legacy in person user that was missing value' do + expect(profiles[:in_person_no_level].reload.idv_level).to eql('legacy_in_person') + end + + it 'does not mess up in person user with value' do + expect(profiles[:in_person].reload.idv_level).to eql('legacy_in_person') + end +end