From 6b2d5bb87995af9da5509d9a1505e30f845428d3 Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Wed, 22 Jan 2025 16:32:46 -0500 Subject: [PATCH 1/9] LG-15394 Add tooling for an SP proofing events by UUID report We have an agency partner who requested a specialized report the involves a table with columns representing proofing events and rows representing users for the partner where the values in each cell correspond to whether that user encountered that event in the given time period. We have been manually servicing this request for a little while. This commit adds tooling to generate this report automatically so it does not require manual processing. This commit does not add the report to the job schedule with the intention being to test it first before scheduling it to run automatically. [skip changelog] --- .../reports/sp_proofing_events_by_uuid.rb | 60 +++++ lib/reporting/sp_proofing_events_by_uuid.rb | 205 ++++++++++++++++++ .../sp_proofing_events_by_uuid_spec.rb | 88 ++++++++ .../sp_proofing_events_by_uuid_spec.rb | 150 +++++++++++++ 4 files changed, 503 insertions(+) create mode 100644 app/jobs/reports/sp_proofing_events_by_uuid.rb create mode 100644 lib/reporting/sp_proofing_events_by_uuid.rb create mode 100644 spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb create mode 100644 spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb diff --git a/app/jobs/reports/sp_proofing_events_by_uuid.rb b/app/jobs/reports/sp_proofing_events_by_uuid.rb new file mode 100644 index 00000000000..f12da5b8821 --- /dev/null +++ b/app/jobs/reports/sp_proofing_events_by_uuid.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'reporting/sp_proofing_events_by_uuid' + +module Reports + class SpProofingEventsByUuid < BaseReport + attr_accessor :report_date, :issuers, :agency_abbreviation + + def perform(report_date, issuers, agency_abbreviation) + return unless IdentityConfig.store.s3_reports_enabled + self.report_date = report_date + self.issuers = issuers + self.agency_abbreviation = agency_abbreviation + + csv = report_maker.to_csv + + save_report(agency_report_nane, csv, extension: 'csv') + + email = IdentityConfig.store.team_ada_email + if email.blank? + Rails.logger.warn "No email addresses received - #{agency_report_title} NOT SENT" + return false + end + + ReportMailer.tables_report( + email: email, + subject: "#{agency_report_title} - #{report_date.to_date}", + reports: reports, + message: message, + attachment_format: :csv, + ).deliver_now + end + + def agency_report_nane + "#{agency_abbreviation.downcase}_proofing_events_by_uuid" + end + + def agency_report_title + "#{agency_abbreviation} Proofing Events By UUID" + end + + def message + <<~HTML.html_safe # rubocop:disable Rails/OutputSafety +

#{agency_report_title}

+ HTML + end + + def reports + report_maker.as_emailable_reports + end + + def report_maker + @report_maker ||= Reporting::SpProofingEventsByUuid.new( + issuers:, + agency_abbreviation:, + time_range: report_date.all_week(:sunday), + ) + end + end +end diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb new file mode 100644 index 00000000000..707fb29628d --- /dev/null +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -0,0 +1,205 @@ +# frozen_string_literal: true + +require 'csv' +begin + require 'reporting/cloudwatch_client' + require 'reporting/cloudwatch_query_quoting' + require 'reporting/command_line_options' +rescue LoadError => e + warn 'could not load paths, try running with "bundle exec rails runner"' + raise e +end + +module Reporting + class SpProofingEventsByUuid + attr_reader :issuers, :agency_abbreviation, :time_range + + def initialize( + issuers:, + agency_abbreviation:, + time_range:, + verbose: false, + progress: false, + cloudwatch_client: nil + ) + @issuers = issuers + @agency_abbreviation = agency_abbreviation + @time_range = time_range + @verbose = verbose + @progress = progress + @cloudwatch_client = cloudwatch_client + end + + def verbose? + @verbose + end + + def progress? + @progress + end + + def query + <<~QUERY + filter properties.service_provider in #{issuers.inspect} or + (name = "IdV: enter verify by mail code submitted" and properties.event_properties.initiating_service_provider in #{issuers.inspect}) + | filter name in [ + "IdV: doc auth welcome visited", + "IdV: doc auth document_capture visited", + "Frontend: IdV: front image added", + "Frontend: IdV: back image added", + "idv_selfie_image_added", + "IdV: doc auth image upload vendor submitted", + "IdV: doc auth ssn submitted", + "IdV: doc auth verify proofing results", + "IdV: phone confirmation form", + "IdV: phone confirmation vendor", + "IdV: final resolution", + "IdV: enter verify by mail code submitted", + "Fraud: Profile review passed", + "Fraud: Profile review rejected", + "User registration: agency handoff visited", + "SP redirect initiated" + ] + + | fields coalesce(name = "Fraud: Profile review passed" and properties.event_properties.success, 0) * properties.event_properties.profile_age_in_seconds as fraud_review_profile_age_in_seconds, + coalesce(name = "IdV: enter verify by mail code submitted" and properties.event_properties.success and !properties.event_properties.pending_in_person_enrollment and !properties.event_properties.fraud_check_failed, 0) * properties.event_properties.profile_age_in_seconds as gpo_profile_age_in_seconds, + fraud_review_profile_age_in_seconds + gpo_profile_age_in_seconds as profile_age_in_seconds + + | stats sum(name = "IdV: doc auth welcome visited") > 0 as workflow_started, + sum(name = "IdV: doc auth document_capture visited") > 0 as doc_auth_started, + sum(name = "Frontend: IdV: front image added") > 0 and sum(name = "Frontend: IdV: back image added") > 0 as document_captured, + sum(name = "idv_selfie_image_added") > 0 as selfie_captured, + sum(name = "IdV: doc auth image upload vendor submitted" and properties.event_properties.success) > 0 as doc_auth_passed, + sum(name = "IdV: doc auth ssn submitted") > 0 as ssn_submitted, + sum(name = "IdV: doc auth verify proofing results") > 0 as personal_info_submitted, + sum(name = "IdV: doc auth verify proofing results" and properties.event_properties.success) > 0 as personal_info_verified, + sum(name = "IdV: phone confirmation form") > 0 as phone_submitted, + sum(name = "IdV: phone confirmation vendor" and properties.event_properties.success) > 0 as phone_verified, + sum(name = "IdV: final resolution") > 0 as online_workflow_completed, + sum(name = "IdV: final resolution" and !properties.event_properties.gpo_verification_pending and !properties.event_properties.in_person_verification_pending and !coalesce(properties.event_properties.fraud_pending_reason, 0)) > 0 as verified_in_band, + sum(name = "IdV: enter verify by mail code submitted" and properties.event_properties.success and !properties.event_properties.pending_in_person_enrollment and !properties.event_properties.fraud_check_failed) > 0 as verified_by_mail, + sum(name = "Fraud: Profile review passed" and properties.event_properties.success) > 0 as verified_fraud_review, + max(profile_age_in_seconds) as out_of_band_verification_pending_seconds, + sum(name = "User registration: agency handoff visited" and properties.event_properties.ial2) > 0 as agency_handoff, + sum(name = "SP redirect initiated" and properties.event_properties.ial == 2) > 0 as sp_redirect + by properties.user_id as login_uuid + | filter workflow_started > 0 or verified_by_mail > 0 or verified_fraud_review > 0 + | limit 10000 + QUERY + end + + def as_csv + csv = [] + csv << ['Date Range', "#{time_range.begin.to_date} - #{time_range.end.to_date}"] + csv << csv_header + data.each do |result_row| + csv << result_row + end + csv.compact + end + + def to_csv + CSV.generate do |csv| + as_csv.each do |row| + csv << row + end + end + end + + def as_emailable_reports + [ + EmailableReport.new( + title: "#{agency_abbreviation} Proofing Events By UUID", + table: report_maker.as_csv, + filename: "#{agency_abbreviation.downcase}_proofing_events_by_uuid", + ), + ] + end + + def csv_header + [ + 'UUID', + 'Workflow Started', + 'Documnet Capture Started', + 'Document Captured', + 'Selfie Captured', + 'Document Authentication Passed', + 'SSN Submitted', + 'Personal Information Submitted', + 'Personal Information Verified', + 'Phone Submitted', + 'Phone Verified', + 'Verification Workflow Complete', + 'Identity Verified for In-Band Users', + 'Identity Verified for Verify-By-Mail Users', + 'Identity Verified for Fraud Review Users', + 'Out-of-Band Verification Pending Seconds', + 'Agency Handoff Visited', + 'Agency Handoff Submitted', + ] + end + + def data + @data ||= fetch_results.map do |result_row| + process_result_row(result_row) + end + end + + def process_result_row(result_row) + login_uuid = result_row['login_uuid'] + agency_uuid = convert_uuid(login_uuid) + return unless agency_uuid.present? + [ + agency_uuid, + result_row['workflow_started'] == '1', + result_row['doc_auth_started'] == '1', + result_row['document_captured'] == '1', + result_row['selfie_captured'] == '1', + result_row['doc_auth_passed'] == '1', + result_row['ssn_submitted'] == '1', + result_row['personal_info_submitted'] == '1', + result_row['personal_info_verified'] == '1', + result_row['phone_submitted'] == '1', + result_row['phone_verified'] == '1', + result_row['online_workflow_completed'] == '1', + result_row['verified_in_band'] == '1', + result_row['verified_by_mail'] == '1', + result_row['verified_fraud_review'] == '1', + result_row['out_of_band_verification_pending_seconds'].to_i, + result_row['agency_handoff'] == '1', + result_row['sp_redirect'] == '1', + ] + end + + def convert_uuid(uuid) + user = User.find_by(uuid: uuid) + user&.agency_identities&.find_by(agency:)&.uuid + end + + def agency + @agency ||= begin + record = Agency.find_by(abbreviation: agency_abbreviation) + raise "Unable to find agency with abbreviation: #{agency_abbreviation}" if record.nil? + record + end + end + + def fetch_results + cloudwatch_client.fetch( + query:, + from: time_range.begin.beginning_of_day, + to: time_range.end.end_of_day, + ) + end + + def cloudwatch_client + @cloudwatch_client ||= Reporting::CloudwatchClient.new( + num_threads: 1, + ensure_complete_logs: false, + slice_interval: 100.years, + progress: progress?, + logger: verbose? ? Logger.new(STDERR) : nil, + ) + end + end +end diff --git a/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb new file mode 100644 index 00000000000..0a6fa139f89 --- /dev/null +++ b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb @@ -0,0 +1,88 @@ +require 'rails_helper' + +RSpec.describe Reports::SpProofingEventsByUuid do + let(:report_date) { Date.new(2024, 12, 1).in_time_zone('UTC') } + let(:issuers) { ['super:cool:test:issuer'] } + let(:agency_abbreviation) { 'DOL' } + + before do + allow(IdentityConfig.store).to receive(:s3_reports_enabled).and_return(true) + end + + describe '#perform' do + it 'gets a CSV from the report maker, saves it to S3, and sends email to team' do + allow(IdentityConfig.store).to receive(:team_ada_email).and_return('ada@example.com') + + report = [ + ['UUID', 'Welcome Visited', 'Welcome Submitted'], + ['123abc', true, true], + ['456def', true, false], + ] + csv_report = CSV.generate do |csv| + report.each { |row| csv << row } + end + emailable_reports = [ + Reporting::EmailableReport.new( + title: 'DOL Proofing Events By UUID - 2024-12-01', + table: report, + filename: 'dol_proofing_events_by_uuid', + ), + ] + + report_maker = double( + Reporting::SpProofingEventsByUuid, + to_csv: csv_report, + as_emailable_reports: emailable_reports, + ) + + allow(subject).to receive(:report_maker).and_return(report_maker) + expect(subject).to receive(:save_report).with( + 'dol_proofing_events_by_uuid', + csv_report, + extension: 'csv', + ) + + expect(ReportMailer).to receive(:tables_report).once.with( + email: IdentityConfig.store.team_ada_email, + subject: 'DOL Proofing Events By UUID - 2024-12-01', + reports: emailable_reports, + message: anything, + attachment_format: :csv, + ).and_call_original + + subject.perform(report_date, issuers, agency_abbreviation) + end + + it 'does not send report in email if the email field is empty' do + allow(IdentityConfig.store).to receive(:team_ada_email).and_return('') + + report_maker = double( + Reporting::SpProofingEventsByUuid, + to_csv: 'I am a CSV, see', + identity_verification_emailable_report: 'I am a report', + ) + allow(subject).to receive(:report_maker).and_return(report_maker) + expect(subject).to receive(:save_report).with( + 'dol_proofing_events_by_uuid', + 'I am a CSV, see', + extension: 'csv', + ) + + expect(ReportMailer).to_not receive(:tables_report) + + subject.perform(report_date, issuers, agency_abbreviation) + end + end + + describe '#report_maker' do + it 'is a identity verification report maker with the correct attributes' do + subject.report_date = Date.new(2024, 12, 1) + subject.issuers = ['super:cool:test:issuer'] + subject.agency_abbreviation = 'DOL' + + expect(subject.report_maker.time_range).to eq(Date.new(2024, 12, 1)..Date.new(2024, 12, 7)) + expect(subject.report_maker.issuers).to eq(['super:cool:test:issuer']) + expect(subject.agency_abbreviation).to eq('DOL') + end + end +end diff --git a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb new file mode 100644 index 00000000000..944b78b033c --- /dev/null +++ b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb @@ -0,0 +1,150 @@ +require 'rails_helper' +require 'reporting/sp_proofing_events_by_uuid' + +RSpec.describe Reporting::SpProofingEventsByUuid do + let(:issuer) { 'super_cool_test_issuer' } + let(:agency_abbreviation) { 'DOL' } + let(:agency) { Agency.find_by(abbreviation: agency_abbreviation) } + + let(:time_range) { Date.new(2024, 12, 1).all_week(:sunday) } + + let(:deleted_user_uuid) { 'deleted_user_test' } + let(:non_agency_user_uuid) { 'non_agency_user_test' } + let(:agency_user_login_uuid) { 'agency_user_login_uuid_test' } + let(:agency_user_agency_uuid) { 'agency_user_agency_uuid_test' } + + let(:cloudwatch_logs) do + [ + { 'login_uuid' => deleted_user_uuid, 'workflow_started' => '1' }, + { 'login_uuid' => non_agency_user_uuid, 'workflow_started' => '1' }, + { 'login_uuid' => agency_user_login_uuid, 'workflow_started' => '1' }, + ] + end + + before do + create(:user, uuid: non_agency_user_uuid) + agency_user = create(:user, uuid: agency_user_login_uuid) + create(:agency_identity, user: agency_user, uuid: agency_user_agency_uuid) + + stub_cloudwatch_logs(cloudwatch_logs) + end + + subject(:report) do + Reporting::SpProofingEventsByUuid.new( + issuers: Array(issuer), agency_abbreviation:, time_range:, + ) + end + + describe 'as_csv' do + it 'renders a CSV report with converted UUIDs' do + expected_csv = [ + ['Date Range', '2024-12-01 - 2024-12-07'], + [ + 'UUID', + 'Workflow Started', + 'Documnet Capture Started', + 'Document Captured', + 'Selfie Captured', + 'Document Authentication Passed', + 'SSN Submitted', + 'Personal Information Submitted', + 'Personal Information Verified', + 'Phone Submitted', + 'Phone Verified', + 'Verification Workflow Complete', + 'Identity Verified for In-Band Users', + 'Identity Verified for Verify-By-Mail Users', + 'Identity Verified for Fraud Review Users', + 'Out-of-Band Verification Pending Seconds', + 'Agency Handoff Visited', + 'Agency Handoff Submitted', + ], + [ + agency_user_login_uuid, + true, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + 0, + false, + false, + ], + ] + aggregate_failures do + report.as_csv.zip(expected_csv).each do |actual, expected| + expect(actual).to eq(expected) + end + end + end + end + + describe '#to_csv' do + it 'generates a csv' do + csv = CSV.parse(report.to_csv, headers: false) + + expected_csv = [ + ['Date Range', '2024-12-01 - 2024-12-07'], + [ + 'UUID', + 'Workflow Started', + 'Documnet Capture Started', + 'Document Captured', + 'Selfie Captured', + 'Document Authentication Passed', + 'SSN Submitted', + 'Personal Information Submitted', + 'Personal Information Verified', + 'Phone Submitted', + 'Phone Verified', + 'Verification Workflow Complete', + 'Identity Verified for In-Band Users', + 'Identity Verified for Verify-By-Mail Users', + 'Identity Verified for Fraud Review Users', + 'Out-of-Band Verification Pending Seconds', + 'Agency Handoff Visited', + 'Agency Handoff Submitted', + ], + [ + agency_user_login_uuid, + 'true', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + 'false', + '0', + 'false', + 'false', + ], + ] + + aggregate_failures do + csv.map(&:to_a).zip(expected_csv).each do |actual, expected| + expect(actual).to eq(expected) + end + end + end + end + + describe '#as_emailable_reports' do + it 'returns an emailable report' + end +end From ac224f6aa4c77360bc9b78cc14f798fac54af22b Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Wed, 22 Jan 2025 17:08:14 -0500 Subject: [PATCH 2/9] Test cleanup --- lib/reporting/sp_proofing_events_by_uuid.rb | 2 +- .../sp_proofing_events_by_uuid_spec.rb | 153 +++++++----------- 2 files changed, 63 insertions(+), 92 deletions(-) diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb index 707fb29628d..2d951a93f84 100644 --- a/lib/reporting/sp_proofing_events_by_uuid.rb +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -110,7 +110,7 @@ def as_emailable_reports [ EmailableReport.new( title: "#{agency_abbreviation} Proofing Events By UUID", - table: report_maker.as_csv, + table: as_csv, filename: "#{agency_abbreviation.downcase}_proofing_events_by_uuid", ), ] diff --git a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb index 944b78b033c..56d7df18ed3 100644 --- a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb +++ b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb @@ -2,7 +2,7 @@ require 'reporting/sp_proofing_events_by_uuid' RSpec.describe Reporting::SpProofingEventsByUuid do - let(:issuer) { 'super_cool_test_issuer' } + let(:issuer) { 'super:cool:test:issuer' } let(:agency_abbreviation) { 'DOL' } let(:agency) { Agency.find_by(abbreviation: agency_abbreviation) } @@ -21,10 +21,56 @@ ] end + let(:expect_csv_result) do + [ + ['Date Range', '2024-12-01 - 2024-12-07'], + [ + 'UUID', + 'Workflow Started', + 'Documnet Capture Started', + 'Document Captured', + 'Selfie Captured', + 'Document Authentication Passed', + 'SSN Submitted', + 'Personal Information Submitted', + 'Personal Information Verified', + 'Phone Submitted', + 'Phone Verified', + 'Verification Workflow Complete', + 'Identity Verified for In-Band Users', + 'Identity Verified for Verify-By-Mail Users', + 'Identity Verified for Fraud Review Users', + 'Out-of-Band Verification Pending Seconds', + 'Agency Handoff Visited', + 'Agency Handoff Submitted', + ], + [ + agency_user_agency_uuid, + true, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + 0, + false, + false, + ], + ] + end + before do create(:user, uuid: non_agency_user_uuid) agency_user = create(:user, uuid: agency_user_login_uuid) - create(:agency_identity, user: agency_user, uuid: agency_user_agency_uuid) + create(:agency_identity, user: agency_user, agency:, uuid: agency_user_agency_uuid) stub_cloudwatch_logs(cloudwatch_logs) end @@ -37,51 +83,8 @@ describe 'as_csv' do it 'renders a CSV report with converted UUIDs' do - expected_csv = [ - ['Date Range', '2024-12-01 - 2024-12-07'], - [ - 'UUID', - 'Workflow Started', - 'Documnet Capture Started', - 'Document Captured', - 'Selfie Captured', - 'Document Authentication Passed', - 'SSN Submitted', - 'Personal Information Submitted', - 'Personal Information Verified', - 'Phone Submitted', - 'Phone Verified', - 'Verification Workflow Complete', - 'Identity Verified for In-Band Users', - 'Identity Verified for Verify-By-Mail Users', - 'Identity Verified for Fraud Review Users', - 'Out-of-Band Verification Pending Seconds', - 'Agency Handoff Visited', - 'Agency Handoff Submitted', - ], - [ - agency_user_login_uuid, - true, - false, - false, - false, - false, - false, - false, - false, - false, - false, - false, - false, - false, - false, - 0, - false, - false, - ], - ] aggregate_failures do - report.as_csv.zip(expected_csv).each do |actual, expected| + expect_csv_result.zip(report.as_csv).each do |actual, expected| expect(actual).to eq(expected) end end @@ -92,52 +95,10 @@ it 'generates a csv' do csv = CSV.parse(report.to_csv, headers: false) - expected_csv = [ - ['Date Range', '2024-12-01 - 2024-12-07'], - [ - 'UUID', - 'Workflow Started', - 'Documnet Capture Started', - 'Document Captured', - 'Selfie Captured', - 'Document Authentication Passed', - 'SSN Submitted', - 'Personal Information Submitted', - 'Personal Information Verified', - 'Phone Submitted', - 'Phone Verified', - 'Verification Workflow Complete', - 'Identity Verified for In-Band Users', - 'Identity Verified for Verify-By-Mail Users', - 'Identity Verified for Fraud Review Users', - 'Out-of-Band Verification Pending Seconds', - 'Agency Handoff Visited', - 'Agency Handoff Submitted', - ], - [ - agency_user_login_uuid, - 'true', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - 'false', - '0', - 'false', - 'false', - ], - ] + stringified_csv = expect_csv_result.map { |row| row.map(&:to_s) } aggregate_failures do - csv.map(&:to_a).zip(expected_csv).each do |actual, expected| + csv.map(&:to_a).zip(stringified_csv).each do |actual, expected| expect(actual).to eq(expected) end end @@ -145,6 +106,16 @@ end describe '#as_emailable_reports' do - it 'returns an emailable report' + it 'returns an emailable report' do + expect(report.as_emailable_reports).to eq( + [ + Reporting::EmailableReport.new( + title: 'DOL Proofing Events By UUID', + table: expect_csv_result, + filename: 'dol_proofing_events_by_uuid', + ), + ], + ) + end end end From 6247883202807a1f7ceb23d390b7f26ea5199fbd Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Wed, 22 Jan 2025 17:09:20 -0500 Subject: [PATCH 3/9] tweak wording --- spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb index 56d7df18ed3..4854478aa9a 100644 --- a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb +++ b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb @@ -106,7 +106,7 @@ end describe '#as_emailable_reports' do - it 'returns an emailable report' do + it 'returns an array with an emailable report' do expect(report.as_emailable_reports).to eq( [ Reporting::EmailableReport.new( From 106ab13c7435f3dfcb8d268cc65ca39357f243d1 Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Tue, 28 Jan 2025 11:02:46 -0500 Subject: [PATCH 4/9] Modify the job to use a JSON config --- .../reports/sp_proofing_events_by_uuid.rb | 70 +++++++-------- config/application.yml.default | 1 + lib/identity_config.rb | 1 + .../sp_proofing_events_by_uuid_spec.rb | 85 ++++++++++++------- 4 files changed, 93 insertions(+), 64 deletions(-) diff --git a/app/jobs/reports/sp_proofing_events_by_uuid.rb b/app/jobs/reports/sp_proofing_events_by_uuid.rb index f12da5b8821..92866ea81b6 100644 --- a/app/jobs/reports/sp_proofing_events_by_uuid.rb +++ b/app/jobs/reports/sp_proofing_events_by_uuid.rb @@ -4,57 +4,59 @@ module Reports class SpProofingEventsByUuid < BaseReport - attr_accessor :report_date, :issuers, :agency_abbreviation + attr_accessor :report_date - def perform(report_date, issuers, agency_abbreviation) + def perform(report_date) return unless IdentityConfig.store.s3_reports_enabled + self.report_date = report_date - self.issuers = issuers - self.agency_abbreviation = agency_abbreviation + + IdentityConfig.store.sp_proofing_events_by_uuid_report_configs.each do |report_config| + send_report(report_config) + end + end + + def send_report(report_config) + return unless IdentityConfig.store.s3_reports_enabled + issuers = report_config['issuers'] + agency_abbreviation = report_config['agency_abbreviation'] + emails = report_config['emails'] + + agency_report_nane = "#{agency_abbreviation.downcase}_proofing_events_by_uuid" + agency_report_title = "#{agency_abbreviation} Proofing Events By UUID" + + report_maker = build_report_maker( + issuers:, + agency_abbreviation:, + time_range: report_date.to_date.all_week(:sunday), + ) csv = report_maker.to_csv save_report(agency_report_nane, csv, extension: 'csv') - email = IdentityConfig.store.team_ada_email - if email.blank? + if emails.blank? Rails.logger.warn "No email addresses received - #{agency_report_title} NOT SENT" return false end - ReportMailer.tables_report( - email: email, - subject: "#{agency_report_title} - #{report_date.to_date}", - reports: reports, - message: message, - attachment_format: :csv, - ).deliver_now - end - - def agency_report_nane - "#{agency_abbreviation.downcase}_proofing_events_by_uuid" - end - - def agency_report_title - "#{agency_abbreviation} Proofing Events By UUID" - end - - def message - <<~HTML.html_safe # rubocop:disable Rails/OutputSafety + email_message = <<~HTML.html_safe # rubocop:disable Rails/OutputSafety

#{agency_report_title}

HTML - end - def reports - report_maker.as_emailable_reports + emails.each do |email| + ReportMailer.tables_report( + email: email, + subject: "#{agency_report_title} - #{report_date.to_date}", + reports: report_maker.as_emailable_reports, + message: email_message, + attachment_format: :csv, + ).deliver_now + end end - def report_maker - @report_maker ||= Reporting::SpProofingEventsByUuid.new( - issuers:, - agency_abbreviation:, - time_range: report_date.all_week(:sunday), - ) + def build_report_maker(issuers:, agency_abbreviation:, time_range:) + Reporting::SpProofingEventsByUuid.new(issuers:, agency_abbreviation:, time_range:) end end end diff --git a/config/application.yml.default b/config/application.yml.default index 743581627b4..fc77926b7ba 100644 --- a/config/application.yml.default +++ b/config/application.yml.default @@ -403,6 +403,7 @@ socure_reason_code_base_url: '' socure_reason_code_timeout_in_seconds: 5 sp_handoff_bounce_max_seconds: 2 sp_issuer_user_counts_report_configs: '[]' +sp_proofing_events_by_uuid_report_configs: '[]' state_tracking_enabled: true team_ada_email: '' team_all_login_emails: '[]' diff --git a/lib/identity_config.rb b/lib/identity_config.rb index 4a6557cac45..b09a7ff326a 100644 --- a/lib/identity_config.rb +++ b/lib/identity_config.rb @@ -439,6 +439,7 @@ def self.store config.add(:socure_reason_code_timeout_in_seconds, type: :integer) config.add(:sp_handoff_bounce_max_seconds, type: :integer) config.add(:sp_issuer_user_counts_report_configs, type: :json) + config.add(:sp_proofing_events_by_uuid_report_configs, type: :json) config.add(:state_tracking_enabled, type: :boolean) config.add(:team_ada_email, type: :string) config.add(:team_all_login_emails, type: :json) diff --git a/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb index 0a6fa139f89..ac0caefbae9 100644 --- a/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb +++ b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb @@ -2,17 +2,30 @@ RSpec.describe Reports::SpProofingEventsByUuid do let(:report_date) { Date.new(2024, 12, 1).in_time_zone('UTC') } + let(:agency_abbreviation) { 'ABC' } + let(:report_emails) { ['test@example.com'] } let(:issuers) { ['super:cool:test:issuer'] } - let(:agency_abbreviation) { 'DOL' } + let(:sp_proofing_events_by_uuid_report_configs) do + [ + { + 'issuers' => issuers, + 'agency_abbreviation' => 'ABC', + 'emails' => report_emails, + }, + ] + end before do allow(IdentityConfig.store).to receive(:s3_reports_enabled).and_return(true) + allow(IdentityConfig.store).to receive( + :sp_proofing_events_by_uuid_report_configs, + ).and_return( + sp_proofing_events_by_uuid_report_configs, + ) end describe '#perform' do it 'gets a CSV from the report maker, saves it to S3, and sends email to team' do - allow(IdentityConfig.store).to receive(:team_ada_email).and_return('ada@example.com') - report = [ ['UUID', 'Welcome Visited', 'Welcome Submitted'], ['123abc', true, true], @@ -35,54 +48,66 @@ as_emailable_reports: emailable_reports, ) - allow(subject).to receive(:report_maker).and_return(report_maker) + allow(subject).to receive(:build_report_maker).with( + issuers: issuers, + agency_abbreviation: 'ABC', + time_range: Date.new(2024, 12, 1)..Date.new(2024, 12, 7), + ).and_return(report_maker) expect(subject).to receive(:save_report).with( - 'dol_proofing_events_by_uuid', + 'abc_proofing_events_by_uuid', csv_report, extension: 'csv', ) expect(ReportMailer).to receive(:tables_report).once.with( - email: IdentityConfig.store.team_ada_email, - subject: 'DOL Proofing Events By UUID - 2024-12-01', + email: 'test@example.com', + subject: 'ABC Proofing Events By UUID - 2024-12-01', reports: emailable_reports, message: anything, attachment_format: :csv, ).and_call_original - subject.perform(report_date, issuers, agency_abbreviation) + subject.perform(report_date) end - it 'does not send report in email if the email field is empty' do - allow(IdentityConfig.store).to receive(:team_ada_email).and_return('') + context 'with no emails configured' do + let(:report_emails) { [] } - report_maker = double( - Reporting::SpProofingEventsByUuid, - to_csv: 'I am a CSV, see', - identity_verification_emailable_report: 'I am a report', - ) - allow(subject).to receive(:report_maker).and_return(report_maker) - expect(subject).to receive(:save_report).with( - 'dol_proofing_events_by_uuid', - 'I am a CSV, see', - extension: 'csv', - ) + it 'does not send the report in email' do + report_maker = double( + Reporting::SpProofingEventsByUuid, + to_csv: 'I am a CSV, see', + identity_verification_emailable_report: 'I am a report', + ) + allow(subject).to receive(:build_report_maker).with( + issuers: issuers, + agency_abbreviation: 'ABC', + time_range: Date.new(2024, 12, 1)..Date.new(2024, 12, 7), + ).and_return(report_maker) + expect(subject).to receive(:save_report).with( + 'abc_proofing_events_by_uuid', + 'I am a CSV, see', + extension: 'csv', + ) - expect(ReportMailer).to_not receive(:tables_report) + expect(ReportMailer).to_not receive(:tables_report) - subject.perform(report_date, issuers, agency_abbreviation) + subject.perform(report_date) + end end end - describe '#report_maker' do + describe '#build_report_maker' do it 'is a identity verification report maker with the correct attributes' do - subject.report_date = Date.new(2024, 12, 1) - subject.issuers = ['super:cool:test:issuer'] - subject.agency_abbreviation = 'DOL' + report_maker = subject.build_report_maker( + issuers: ['super:cool:test:issuer'], + agency_abbreviation: 'ABC', + time_range: Date.new(2024, 12, 1)..Date.new(2024, 12, 7), + ) - expect(subject.report_maker.time_range).to eq(Date.new(2024, 12, 1)..Date.new(2024, 12, 7)) - expect(subject.report_maker.issuers).to eq(['super:cool:test:issuer']) - expect(subject.agency_abbreviation).to eq('DOL') + expect(report_maker.issuers).to eq(['super:cool:test:issuer']) + expect(report_maker.agency_abbreviation).to eq('ABC') + expect(report_maker.time_range).to eq(Date.new(2024, 12, 1)..Date.new(2024, 12, 7)) end end end From b337d20954b3e203dd9425ede99378d32ead3136 Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Tue, 28 Jan 2025 11:08:29 -0500 Subject: [PATCH 5/9] Add the job to the schedules with an empty default config --- app/jobs/reports/sp_proofing_events_by_uuid.rb | 2 +- config/initializers/job_configurations.rb | 6 ++++++ spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/app/jobs/reports/sp_proofing_events_by_uuid.rb b/app/jobs/reports/sp_proofing_events_by_uuid.rb index 92866ea81b6..03a91bb63cf 100644 --- a/app/jobs/reports/sp_proofing_events_by_uuid.rb +++ b/app/jobs/reports/sp_proofing_events_by_uuid.rb @@ -28,7 +28,7 @@ def send_report(report_config) report_maker = build_report_maker( issuers:, agency_abbreviation:, - time_range: report_date.to_date.all_week(:sunday), + time_range: report_date.to_date.weeks_ago(1).all_week(:sunday), ) csv = report_maker.to_csv diff --git a/config/initializers/job_configurations.rb b/config/initializers/job_configurations.rb index 572c450ce86..d8f87610a8b 100644 --- a/config/initializers/job_configurations.rb +++ b/config/initializers/job_configurations.rb @@ -119,6 +119,12 @@ cron: cron_24h, args: -> { [Time.zone.yesterday] }, }, + # Send the SP IdV Weekly Dropoff Report + sp_idv_weekly_dropoff: { + class: 'Reports::SpProofingEventsByUuid', + cron: cron_every_monday_2am, + args: -> { [Time.zone.today] }, + }, # Sync opted out phone numbers from AWS phone_number_opt_out_sync_job: { class: 'PhoneNumberOptOutSyncJob', diff --git a/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb index ac0caefbae9..a2fe2bff220 100644 --- a/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb +++ b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb @@ -1,7 +1,7 @@ require 'rails_helper' RSpec.describe Reports::SpProofingEventsByUuid do - let(:report_date) { Date.new(2024, 12, 1).in_time_zone('UTC') } + let(:report_date) { Date.new(2024, 12, 9) } let(:agency_abbreviation) { 'ABC' } let(:report_emails) { ['test@example.com'] } let(:issuers) { ['super:cool:test:issuer'] } @@ -61,7 +61,7 @@ expect(ReportMailer).to receive(:tables_report).once.with( email: 'test@example.com', - subject: 'ABC Proofing Events By UUID - 2024-12-01', + subject: 'ABC Proofing Events By UUID - 2024-12-09', reports: emailable_reports, message: anything, attachment_format: :csv, From 414d1b99ba0bb5979ab3441a3b3982afb18cd4ad Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Tue, 28 Jan 2025 11:12:41 -0500 Subject: [PATCH 6/9] remove command line options --- lib/reporting/sp_proofing_events_by_uuid.rb | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb index 2d951a93f84..2fe9dce06a9 100644 --- a/lib/reporting/sp_proofing_events_by_uuid.rb +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -1,14 +1,7 @@ # frozen_string_literal: true -require 'csv' -begin - require 'reporting/cloudwatch_client' - require 'reporting/cloudwatch_query_quoting' - require 'reporting/command_line_options' -rescue LoadError => e - warn 'could not load paths, try running with "bundle exec rails runner"' - raise e -end +require 'reporting/cloudwatch_client' +require 'reporting/cloudwatch_query_quoting' module Reporting class SpProofingEventsByUuid From c61063d63cdb0231af1a7c4dd5e512cdf68c36c1 Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Tue, 28 Jan 2025 12:13:08 -0500 Subject: [PATCH 7/9] get events after the first 10k --- lib/reporting/sp_proofing_events_by_uuid.rb | 19 +++++-- .../sp_proofing_events_by_uuid_spec.rb | 49 +++++++++++++++++-- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb index 2fe9dce06a9..e51b5c7383e 100644 --- a/lib/reporting/sp_proofing_events_by_uuid.rb +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -31,8 +31,8 @@ def progress? @progress end - def query - <<~QUERY + def query(after_row:) + base_query = <<~QUERY filter properties.service_provider in #{issuers.inspect} or (name = "IdV: enter verify by mail code submitted" and properties.event_properties.initiating_service_provider in #{issuers.inspect}) | filter name in [ @@ -75,10 +75,15 @@ def query max(profile_age_in_seconds) as out_of_band_verification_pending_seconds, sum(name = "User registration: agency handoff visited" and properties.event_properties.ial2) > 0 as agency_handoff, sum(name = "SP redirect initiated" and properties.event_properties.ial == 2) > 0 as sp_redirect + toMillis(min(@timestamp)) as first_event by properties.user_id as login_uuid | filter workflow_started > 0 or verified_by_mail > 0 or verified_fraud_review > 0 | limit 10000 + | sort first_event asc QUERY + return base_query if after_row.nil? + + base_query + " | filter first_event > #{after_row['first_event']}" end def as_csv @@ -133,6 +138,8 @@ def csv_header end def data + return @data if defined? @data + @data ||= fetch_results.map do |result_row| process_result_row(result_row) end @@ -177,12 +184,14 @@ def agency end end - def fetch_results - cloudwatch_client.fetch( - query:, + def fetch_results(after_row: nil) + results = cloudwatch_client.fetch( + query: query(after_row:), from: time_range.begin.beginning_of_day, to: time_range.end.end_of_day, ) + return results if results.count < 10000 + results + fetch_results(after_row: results.last) end def cloudwatch_client diff --git a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb index 4854478aa9a..674a8f83dc6 100644 --- a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb +++ b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb @@ -15,9 +15,21 @@ let(:cloudwatch_logs) do [ - { 'login_uuid' => deleted_user_uuid, 'workflow_started' => '1' }, - { 'login_uuid' => non_agency_user_uuid, 'workflow_started' => '1' }, - { 'login_uuid' => agency_user_login_uuid, 'workflow_started' => '1' }, + { + 'login_uuid' => deleted_user_uuid, + 'workflow_started' => '1', + 'first_event' => '1.735275676123E12', + }, + { + 'login_uuid' => non_agency_user_uuid, + 'workflow_started' => '1', + 'first_event' => '1.735275676456E12', + }, + { + 'login_uuid' => agency_user_login_uuid, + 'workflow_started' => '1', + 'first_event' => '1.735275676789E12', + }, ] end @@ -81,7 +93,7 @@ ) end - describe 'as_csv' do + describe '#as_csv' do it 'renders a CSV report with converted UUIDs' do aggregate_failures do expect_csv_result.zip(report.as_csv).each do |actual, expected| @@ -118,4 +130,33 @@ ) end end + + describe '#data' do + it 'fetches additional results if 10k results are returned' do + cloudwatch_client = double(Reporting::CloudwatchClient) + expect(cloudwatch_client).to receive(:fetch).ordered do |args| + expect(args[:query]).to_not include('| filter first_event') + [ + { + 'login_uuid' => agency_user_login_uuid, + 'workflow_started' => '1', + 'first_event' => '1.123456E12', + }, + ] * 10000 + end + expect(cloudwatch_client).to receive(:fetch).ordered do |args| + expect(args[:query]).to include('| filter first_event > 1.123456E12') + [ + { + 'login_uuid' => agency_user_login_uuid, + 'workflow_started' => '1', + 'first_event' => '1.123456E12', + }, + ] + end + allow(report).to receive(:cloudwatch_client).and_return(cloudwatch_client) + + expect(report.data.count).to eq(10_001) + end + end end From a6aad0b0086318a314863fa0b75b36d8a19a61a9 Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Tue, 28 Jan 2025 12:49:14 -0500 Subject: [PATCH 8/9] Batch lookup agency UUIDs --- lib/reporting/sp_proofing_events_by_uuid.rb | 35 ++++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb index e51b5c7383e..f851fa406e8 100644 --- a/lib/reporting/sp_proofing_events_by_uuid.rb +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -140,17 +140,22 @@ def csv_header def data return @data if defined? @data - @data ||= fetch_results.map do |result_row| + login_uuid_data ||= fetch_results.map do |result_row| process_result_row(result_row) end + login_uuid_to_agency_uuid_map = build_uuid_map(login_uuid_data.map(&:first)) + + @data = login_uuid_data.map do |row| + login_uuid, *row_data = row + agency_uuid = login_uuid_to_agency_uuid_map[login_uuid] + next if agency_uuid.nil? + [agency_uuid, *row_data] + end.compact end def process_result_row(result_row) - login_uuid = result_row['login_uuid'] - agency_uuid = convert_uuid(login_uuid) - return unless agency_uuid.present? [ - agency_uuid, + result_row['login_uuid'], result_row['workflow_started'] == '1', result_row['doc_auth_started'] == '1', result_row['document_captured'] == '1', @@ -171,10 +176,24 @@ def process_result_row(result_row) ] end - def convert_uuid(uuid) - user = User.find_by(uuid: uuid) - user&.agency_identities&.find_by(agency:)&.uuid + # rubocop:disable Rails/FindEach + # Use of `find` instead of `find_each` here is safe since we are already batching the UUIDs + # that go into the query + def build_uuid_map(uuids) + uuid_map = Hash.new + + uuids.each_slice(1000) do |uuid_slice| + AgencyIdentity.joins(:user).where( + agency:, + users: { uuid: uuid_slice }, + ).each do |agency_identity| + uuid_map[agency_identity.user.uuid] = agency_identity.uuid + end + end + + uuid_map end + # rubocop:enable Rails/FindEach def agency @agency ||= begin From 40d7b051b4aace6dd26c9baf99006ccb0f85ae37 Mon Sep 17 00:00:00 2001 From: Jonathan Hooper Date: Tue, 28 Jan 2025 14:49:11 -0500 Subject: [PATCH 9/9] fix a CW syntax error --- lib/reporting/sp_proofing_events_by_uuid.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb index f851fa406e8..d9cc5eca312 100644 --- a/lib/reporting/sp_proofing_events_by_uuid.rb +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -74,7 +74,7 @@ def query(after_row:) sum(name = "Fraud: Profile review passed" and properties.event_properties.success) > 0 as verified_fraud_review, max(profile_age_in_seconds) as out_of_band_verification_pending_seconds, sum(name = "User registration: agency handoff visited" and properties.event_properties.ial2) > 0 as agency_handoff, - sum(name = "SP redirect initiated" and properties.event_properties.ial == 2) > 0 as sp_redirect + sum(name = "SP redirect initiated" and properties.event_properties.ial == 2) > 0 as sp_redirect, toMillis(min(@timestamp)) as first_event by properties.user_id as login_uuid | filter workflow_started > 0 or verified_by_mail > 0 or verified_fraud_review > 0