diff --git a/app/jobs/reports/sp_proofing_events_by_uuid.rb b/app/jobs/reports/sp_proofing_events_by_uuid.rb new file mode 100644 index 00000000000..03a91bb63cf --- /dev/null +++ b/app/jobs/reports/sp_proofing_events_by_uuid.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require 'reporting/sp_proofing_events_by_uuid' + +module Reports + class SpProofingEventsByUuid < BaseReport + attr_accessor :report_date + + def perform(report_date) + return unless IdentityConfig.store.s3_reports_enabled + + self.report_date = report_date + + IdentityConfig.store.sp_proofing_events_by_uuid_report_configs.each do |report_config| + send_report(report_config) + end + end + + def send_report(report_config) + return unless IdentityConfig.store.s3_reports_enabled + issuers = report_config['issuers'] + agency_abbreviation = report_config['agency_abbreviation'] + emails = report_config['emails'] + + agency_report_nane = "#{agency_abbreviation.downcase}_proofing_events_by_uuid" + agency_report_title = "#{agency_abbreviation} Proofing Events By UUID" + + report_maker = build_report_maker( + issuers:, + agency_abbreviation:, + time_range: report_date.to_date.weeks_ago(1).all_week(:sunday), + ) + + csv = report_maker.to_csv + + save_report(agency_report_nane, csv, extension: 'csv') + + if emails.blank? + Rails.logger.warn "No email addresses received - #{agency_report_title} NOT SENT" + return false + end + + email_message = <<~HTML.html_safe # rubocop:disable Rails/OutputSafety +

#{agency_report_title}

+ HTML + + emails.each do |email| + ReportMailer.tables_report( + email: email, + subject: "#{agency_report_title} - #{report_date.to_date}", + reports: report_maker.as_emailable_reports, + message: email_message, + attachment_format: :csv, + ).deliver_now + end + end + + def build_report_maker(issuers:, agency_abbreviation:, time_range:) + Reporting::SpProofingEventsByUuid.new(issuers:, agency_abbreviation:, time_range:) + end + end +end diff --git a/config/application.yml.default b/config/application.yml.default index 743581627b4..fc77926b7ba 100644 --- a/config/application.yml.default +++ b/config/application.yml.default @@ -403,6 +403,7 @@ socure_reason_code_base_url: '' socure_reason_code_timeout_in_seconds: 5 sp_handoff_bounce_max_seconds: 2 sp_issuer_user_counts_report_configs: '[]' +sp_proofing_events_by_uuid_report_configs: '[]' state_tracking_enabled: true team_ada_email: '' team_all_login_emails: '[]' diff --git a/config/initializers/job_configurations.rb b/config/initializers/job_configurations.rb index 572c450ce86..d8f87610a8b 100644 --- a/config/initializers/job_configurations.rb +++ b/config/initializers/job_configurations.rb @@ -119,6 +119,12 @@ cron: cron_24h, args: -> { [Time.zone.yesterday] }, }, + # Send the SP IdV Weekly Dropoff Report + sp_idv_weekly_dropoff: { + class: 'Reports::SpProofingEventsByUuid', + cron: cron_every_monday_2am, + args: -> { [Time.zone.today] }, + }, # Sync opted out phone numbers from AWS phone_number_opt_out_sync_job: { class: 'PhoneNumberOptOutSyncJob', diff --git a/lib/identity_config.rb b/lib/identity_config.rb index 4a6557cac45..b09a7ff326a 100644 --- a/lib/identity_config.rb +++ b/lib/identity_config.rb @@ -439,6 +439,7 @@ def self.store config.add(:socure_reason_code_timeout_in_seconds, type: :integer) config.add(:sp_handoff_bounce_max_seconds, type: :integer) config.add(:sp_issuer_user_counts_report_configs, type: :json) + config.add(:sp_proofing_events_by_uuid_report_configs, type: :json) config.add(:state_tracking_enabled, type: :boolean) config.add(:team_ada_email, type: :string) config.add(:team_all_login_emails, type: :json) diff --git a/lib/reporting/sp_proofing_events_by_uuid.rb b/lib/reporting/sp_proofing_events_by_uuid.rb new file mode 100644 index 00000000000..d9cc5eca312 --- /dev/null +++ b/lib/reporting/sp_proofing_events_by_uuid.rb @@ -0,0 +1,226 @@ +# frozen_string_literal: true + +require 'reporting/cloudwatch_client' +require 'reporting/cloudwatch_query_quoting' + +module Reporting + class SpProofingEventsByUuid + attr_reader :issuers, :agency_abbreviation, :time_range + + def initialize( + issuers:, + agency_abbreviation:, + time_range:, + verbose: false, + progress: false, + cloudwatch_client: nil + ) + @issuers = issuers + @agency_abbreviation = agency_abbreviation + @time_range = time_range + @verbose = verbose + @progress = progress + @cloudwatch_client = cloudwatch_client + end + + def verbose? + @verbose + end + + def progress? + @progress + end + + def query(after_row:) + base_query = <<~QUERY + filter properties.service_provider in #{issuers.inspect} or + (name = "IdV: enter verify by mail code submitted" and properties.event_properties.initiating_service_provider in #{issuers.inspect}) + | filter name in [ + "IdV: doc auth welcome visited", + "IdV: doc auth document_capture visited", + "Frontend: IdV: front image added", + "Frontend: IdV: back image added", + "idv_selfie_image_added", + "IdV: doc auth image upload vendor submitted", + "IdV: doc auth ssn submitted", + "IdV: doc auth verify proofing results", + "IdV: phone confirmation form", + "IdV: phone confirmation vendor", + "IdV: final resolution", + "IdV: enter verify by mail code submitted", + "Fraud: Profile review passed", + "Fraud: Profile review rejected", + "User registration: agency handoff visited", + "SP redirect initiated" + ] + + | fields coalesce(name = "Fraud: Profile review passed" and properties.event_properties.success, 0) * properties.event_properties.profile_age_in_seconds as fraud_review_profile_age_in_seconds, + coalesce(name = "IdV: enter verify by mail code submitted" and properties.event_properties.success and !properties.event_properties.pending_in_person_enrollment and !properties.event_properties.fraud_check_failed, 0) * properties.event_properties.profile_age_in_seconds as gpo_profile_age_in_seconds, + fraud_review_profile_age_in_seconds + gpo_profile_age_in_seconds as profile_age_in_seconds + + | stats sum(name = "IdV: doc auth welcome visited") > 0 as workflow_started, + sum(name = "IdV: doc auth document_capture visited") > 0 as doc_auth_started, + sum(name = "Frontend: IdV: front image added") > 0 and sum(name = "Frontend: IdV: back image added") > 0 as document_captured, + sum(name = "idv_selfie_image_added") > 0 as selfie_captured, + sum(name = "IdV: doc auth image upload vendor submitted" and properties.event_properties.success) > 0 as doc_auth_passed, + sum(name = "IdV: doc auth ssn submitted") > 0 as ssn_submitted, + sum(name = "IdV: doc auth verify proofing results") > 0 as personal_info_submitted, + sum(name = "IdV: doc auth verify proofing results" and properties.event_properties.success) > 0 as personal_info_verified, + sum(name = "IdV: phone confirmation form") > 0 as phone_submitted, + sum(name = "IdV: phone confirmation vendor" and properties.event_properties.success) > 0 as phone_verified, + sum(name = "IdV: final resolution") > 0 as online_workflow_completed, + sum(name = "IdV: final resolution" and !properties.event_properties.gpo_verification_pending and !properties.event_properties.in_person_verification_pending and !coalesce(properties.event_properties.fraud_pending_reason, 0)) > 0 as verified_in_band, + sum(name = "IdV: enter verify by mail code submitted" and properties.event_properties.success and !properties.event_properties.pending_in_person_enrollment and !properties.event_properties.fraud_check_failed) > 0 as verified_by_mail, + sum(name = "Fraud: Profile review passed" and properties.event_properties.success) > 0 as verified_fraud_review, + max(profile_age_in_seconds) as out_of_band_verification_pending_seconds, + sum(name = "User registration: agency handoff visited" and properties.event_properties.ial2) > 0 as agency_handoff, + sum(name = "SP redirect initiated" and properties.event_properties.ial == 2) > 0 as sp_redirect, + toMillis(min(@timestamp)) as first_event + by properties.user_id as login_uuid + | filter workflow_started > 0 or verified_by_mail > 0 or verified_fraud_review > 0 + | limit 10000 + | sort first_event asc + QUERY + return base_query if after_row.nil? + + base_query + " | filter first_event > #{after_row['first_event']}" + end + + def as_csv + csv = [] + csv << ['Date Range', "#{time_range.begin.to_date} - #{time_range.end.to_date}"] + csv << csv_header + data.each do |result_row| + csv << result_row + end + csv.compact + end + + def to_csv + CSV.generate do |csv| + as_csv.each do |row| + csv << row + end + end + end + + def as_emailable_reports + [ + EmailableReport.new( + title: "#{agency_abbreviation} Proofing Events By UUID", + table: as_csv, + filename: "#{agency_abbreviation.downcase}_proofing_events_by_uuid", + ), + ] + end + + def csv_header + [ + 'UUID', + 'Workflow Started', + 'Documnet Capture Started', + 'Document Captured', + 'Selfie Captured', + 'Document Authentication Passed', + 'SSN Submitted', + 'Personal Information Submitted', + 'Personal Information Verified', + 'Phone Submitted', + 'Phone Verified', + 'Verification Workflow Complete', + 'Identity Verified for In-Band Users', + 'Identity Verified for Verify-By-Mail Users', + 'Identity Verified for Fraud Review Users', + 'Out-of-Band Verification Pending Seconds', + 'Agency Handoff Visited', + 'Agency Handoff Submitted', + ] + end + + def data + return @data if defined? @data + + login_uuid_data ||= fetch_results.map do |result_row| + process_result_row(result_row) + end + login_uuid_to_agency_uuid_map = build_uuid_map(login_uuid_data.map(&:first)) + + @data = login_uuid_data.map do |row| + login_uuid, *row_data = row + agency_uuid = login_uuid_to_agency_uuid_map[login_uuid] + next if agency_uuid.nil? + [agency_uuid, *row_data] + end.compact + end + + def process_result_row(result_row) + [ + result_row['login_uuid'], + result_row['workflow_started'] == '1', + result_row['doc_auth_started'] == '1', + result_row['document_captured'] == '1', + result_row['selfie_captured'] == '1', + result_row['doc_auth_passed'] == '1', + result_row['ssn_submitted'] == '1', + result_row['personal_info_submitted'] == '1', + result_row['personal_info_verified'] == '1', + result_row['phone_submitted'] == '1', + result_row['phone_verified'] == '1', + result_row['online_workflow_completed'] == '1', + result_row['verified_in_band'] == '1', + result_row['verified_by_mail'] == '1', + result_row['verified_fraud_review'] == '1', + result_row['out_of_band_verification_pending_seconds'].to_i, + result_row['agency_handoff'] == '1', + result_row['sp_redirect'] == '1', + ] + end + + # rubocop:disable Rails/FindEach + # Use of `find` instead of `find_each` here is safe since we are already batching the UUIDs + # that go into the query + def build_uuid_map(uuids) + uuid_map = Hash.new + + uuids.each_slice(1000) do |uuid_slice| + AgencyIdentity.joins(:user).where( + agency:, + users: { uuid: uuid_slice }, + ).each do |agency_identity| + uuid_map[agency_identity.user.uuid] = agency_identity.uuid + end + end + + uuid_map + end + # rubocop:enable Rails/FindEach + + def agency + @agency ||= begin + record = Agency.find_by(abbreviation: agency_abbreviation) + raise "Unable to find agency with abbreviation: #{agency_abbreviation}" if record.nil? + record + end + end + + def fetch_results(after_row: nil) + results = cloudwatch_client.fetch( + query: query(after_row:), + from: time_range.begin.beginning_of_day, + to: time_range.end.end_of_day, + ) + return results if results.count < 10000 + results + fetch_results(after_row: results.last) + end + + def cloudwatch_client + @cloudwatch_client ||= Reporting::CloudwatchClient.new( + num_threads: 1, + ensure_complete_logs: false, + slice_interval: 100.years, + progress: progress?, + logger: verbose? ? Logger.new(STDERR) : nil, + ) + end + end +end diff --git a/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb new file mode 100644 index 00000000000..a2fe2bff220 --- /dev/null +++ b/spec/jobs/reports/sp_proofing_events_by_uuid_spec.rb @@ -0,0 +1,113 @@ +require 'rails_helper' + +RSpec.describe Reports::SpProofingEventsByUuid do + let(:report_date) { Date.new(2024, 12, 9) } + let(:agency_abbreviation) { 'ABC' } + let(:report_emails) { ['test@example.com'] } + let(:issuers) { ['super:cool:test:issuer'] } + let(:sp_proofing_events_by_uuid_report_configs) do + [ + { + 'issuers' => issuers, + 'agency_abbreviation' => 'ABC', + 'emails' => report_emails, + }, + ] + end + + before do + allow(IdentityConfig.store).to receive(:s3_reports_enabled).and_return(true) + allow(IdentityConfig.store).to receive( + :sp_proofing_events_by_uuid_report_configs, + ).and_return( + sp_proofing_events_by_uuid_report_configs, + ) + end + + describe '#perform' do + it 'gets a CSV from the report maker, saves it to S3, and sends email to team' do + report = [ + ['UUID', 'Welcome Visited', 'Welcome Submitted'], + ['123abc', true, true], + ['456def', true, false], + ] + csv_report = CSV.generate do |csv| + report.each { |row| csv << row } + end + emailable_reports = [ + Reporting::EmailableReport.new( + title: 'DOL Proofing Events By UUID - 2024-12-01', + table: report, + filename: 'dol_proofing_events_by_uuid', + ), + ] + + report_maker = double( + Reporting::SpProofingEventsByUuid, + to_csv: csv_report, + as_emailable_reports: emailable_reports, + ) + + allow(subject).to receive(:build_report_maker).with( + issuers: issuers, + agency_abbreviation: 'ABC', + time_range: Date.new(2024, 12, 1)..Date.new(2024, 12, 7), + ).and_return(report_maker) + expect(subject).to receive(:save_report).with( + 'abc_proofing_events_by_uuid', + csv_report, + extension: 'csv', + ) + + expect(ReportMailer).to receive(:tables_report).once.with( + email: 'test@example.com', + subject: 'ABC Proofing Events By UUID - 2024-12-09', + reports: emailable_reports, + message: anything, + attachment_format: :csv, + ).and_call_original + + subject.perform(report_date) + end + + context 'with no emails configured' do + let(:report_emails) { [] } + + it 'does not send the report in email' do + report_maker = double( + Reporting::SpProofingEventsByUuid, + to_csv: 'I am a CSV, see', + identity_verification_emailable_report: 'I am a report', + ) + allow(subject).to receive(:build_report_maker).with( + issuers: issuers, + agency_abbreviation: 'ABC', + time_range: Date.new(2024, 12, 1)..Date.new(2024, 12, 7), + ).and_return(report_maker) + expect(subject).to receive(:save_report).with( + 'abc_proofing_events_by_uuid', + 'I am a CSV, see', + extension: 'csv', + ) + + expect(ReportMailer).to_not receive(:tables_report) + + subject.perform(report_date) + end + end + end + + describe '#build_report_maker' do + it 'is a identity verification report maker with the correct attributes' do + report_maker = subject.build_report_maker( + issuers: ['super:cool:test:issuer'], + agency_abbreviation: 'ABC', + time_range: Date.new(2024, 12, 1)..Date.new(2024, 12, 7), + ) + + expect(report_maker.issuers).to eq(['super:cool:test:issuer']) + expect(report_maker.agency_abbreviation).to eq('ABC') + expect(report_maker.time_range).to eq(Date.new(2024, 12, 1)..Date.new(2024, 12, 7)) + end + end +end diff --git a/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb new file mode 100644 index 00000000000..674a8f83dc6 --- /dev/null +++ b/spec/lib/reporting/sp_proofing_events_by_uuid_spec.rb @@ -0,0 +1,162 @@ +require 'rails_helper' +require 'reporting/sp_proofing_events_by_uuid' + +RSpec.describe Reporting::SpProofingEventsByUuid do + let(:issuer) { 'super:cool:test:issuer' } + let(:agency_abbreviation) { 'DOL' } + let(:agency) { Agency.find_by(abbreviation: agency_abbreviation) } + + let(:time_range) { Date.new(2024, 12, 1).all_week(:sunday) } + + let(:deleted_user_uuid) { 'deleted_user_test' } + let(:non_agency_user_uuid) { 'non_agency_user_test' } + let(:agency_user_login_uuid) { 'agency_user_login_uuid_test' } + let(:agency_user_agency_uuid) { 'agency_user_agency_uuid_test' } + + let(:cloudwatch_logs) do + [ + { + 'login_uuid' => deleted_user_uuid, + 'workflow_started' => '1', + 'first_event' => '1.735275676123E12', + }, + { + 'login_uuid' => non_agency_user_uuid, + 'workflow_started' => '1', + 'first_event' => '1.735275676456E12', + }, + { + 'login_uuid' => agency_user_login_uuid, + 'workflow_started' => '1', + 'first_event' => '1.735275676789E12', + }, + ] + end + + let(:expect_csv_result) do + [ + ['Date Range', '2024-12-01 - 2024-12-07'], + [ + 'UUID', + 'Workflow Started', + 'Documnet Capture Started', + 'Document Captured', + 'Selfie Captured', + 'Document Authentication Passed', + 'SSN Submitted', + 'Personal Information Submitted', + 'Personal Information Verified', + 'Phone Submitted', + 'Phone Verified', + 'Verification Workflow Complete', + 'Identity Verified for In-Band Users', + 'Identity Verified for Verify-By-Mail Users', + 'Identity Verified for Fraud Review Users', + 'Out-of-Band Verification Pending Seconds', + 'Agency Handoff Visited', + 'Agency Handoff Submitted', + ], + [ + agency_user_agency_uuid, + true, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + 0, + false, + false, + ], + ] + end + + before do + create(:user, uuid: non_agency_user_uuid) + agency_user = create(:user, uuid: agency_user_login_uuid) + create(:agency_identity, user: agency_user, agency:, uuid: agency_user_agency_uuid) + + stub_cloudwatch_logs(cloudwatch_logs) + end + + subject(:report) do + Reporting::SpProofingEventsByUuid.new( + issuers: Array(issuer), agency_abbreviation:, time_range:, + ) + end + + describe '#as_csv' do + it 'renders a CSV report with converted UUIDs' do + aggregate_failures do + expect_csv_result.zip(report.as_csv).each do |actual, expected| + expect(actual).to eq(expected) + end + end + end + end + + describe '#to_csv' do + it 'generates a csv' do + csv = CSV.parse(report.to_csv, headers: false) + + stringified_csv = expect_csv_result.map { |row| row.map(&:to_s) } + + aggregate_failures do + csv.map(&:to_a).zip(stringified_csv).each do |actual, expected| + expect(actual).to eq(expected) + end + end + end + end + + describe '#as_emailable_reports' do + it 'returns an array with an emailable report' do + expect(report.as_emailable_reports).to eq( + [ + Reporting::EmailableReport.new( + title: 'DOL Proofing Events By UUID', + table: expect_csv_result, + filename: 'dol_proofing_events_by_uuid', + ), + ], + ) + end + end + + describe '#data' do + it 'fetches additional results if 10k results are returned' do + cloudwatch_client = double(Reporting::CloudwatchClient) + expect(cloudwatch_client).to receive(:fetch).ordered do |args| + expect(args[:query]).to_not include('| filter first_event') + [ + { + 'login_uuid' => agency_user_login_uuid, + 'workflow_started' => '1', + 'first_event' => '1.123456E12', + }, + ] * 10000 + end + expect(cloudwatch_client).to receive(:fetch).ordered do |args| + expect(args[:query]).to include('| filter first_event > 1.123456E12') + [ + { + 'login_uuid' => agency_user_login_uuid, + 'workflow_started' => '1', + 'first_event' => '1.123456E12', + }, + ] + end + allow(report).to receive(:cloudwatch_client).and_return(cloudwatch_client) + + expect(report.data.count).to eq(10_001) + end + end +end