diff --git a/app/jobs/reports/monthly_account_reuse_report.rb b/app/jobs/reports/monthly_account_reuse_report.rb new file mode 100644 index 00000000000..82be2eda6c4 --- /dev/null +++ b/app/jobs/reports/monthly_account_reuse_report.rb @@ -0,0 +1,162 @@ +require 'csv' + +module Reports + class MonthlyAccountReuseReport < BaseReport + REPORT_NAME = 'monthly-account-reuse-report' + + attr_reader :report_date + + def perform(report_date) + @report_date = report_date + + _latest, path = generate_s3_paths(REPORT_NAME, 'json', now: report_date) + body = report_body.to_json + + if bucket_name.present? + upload_file_to_s3_bucket( + path: path, + body: body, + content_type: 'application/json', + bucket: bucket_name, + ) + end + end + + def first_day_of_report_month + report_date.beginning_of_month.strftime('%Y-%m-%d') + end + + def params + { + query_date: first_day_of_report_month, + }.transform_values { |v| ActiveRecord::Base.connection.quote(v) } + end + + def agency_reuse_results + agency_sql = format(<<-SQL, params) + SELECT + COUNT(*) AS num_users + , agencies_per_user.num_agencies + FROM ( + SELECT + COUNT(DISTINCT agencies.id) AS num_agencies + , identities.user_id + FROM + identities + JOIN + service_providers sp ON identities.service_provider = sp.issuer + JOIN + agencies ON sp.agency_id = agencies.id + WHERE + identities.last_ial2_authenticated_at IS NOT NULL + AND + identities.verified_at < %{query_date} + GROUP BY + identities.user_id + ) agencies_per_user + GROUP BY + agencies_per_user.num_agencies + HAVING agencies_per_user.num_agencies > 1 + ORDER BY + num_agencies ASC + SQL + + agency_results = transaction_with_timeout do + ActiveRecord::Base.connection.execute(agency_sql) + end + + agency_results.as_json + end + + def num_active_profiles + proofed_sql = format(<<-SQL, params) + SELECT + COUNT(*) AS num_proofed + FROM + profiles + WHERE + profiles.active = TRUE + AND + profiles.activated_at < %{query_date} + SQL + + proofed_results = transaction_with_timeout do + ActiveRecord::Base.connection.execute(proofed_sql) + end + + proofed_results.first['num_proofed'] + end + + def stats_month + report_date.prev_month(1).strftime('%b-%Y') + end + + def total_reuse_report + reuse_stats = agency_reuse_results + + reuse_total_users = 0 + reuse_total_percentage = 0 + + total_proofed = num_active_profiles + + if !reuse_stats.empty? + reuse_stats.each do |result_entry| + reuse_total_users += result_entry['num_users'] + end + + if total_proofed > 0 + reuse_stats.each_with_index do |result_entry, index| + reuse_stats[index]['percentage'] = + result_entry['num_users'] / total_proofed.to_f * 100 + + reuse_total_percentage += reuse_stats[index]['percentage'] + end + end + end + + # reuse_stats and total_stats + { reuse_stats: reuse_stats, + total_users: reuse_total_users, + total_percentage: reuse_total_percentage, + total_proofed: total_proofed } + end + + def report_csv + monthly_reuse_report = total_reuse_report + + csv_array = [] + csv_array << ["IDV app reuse rate #{stats_month}"] + csv_array << ['Num. SPs', 'Num. users', 'Percentage'] + + monthly_reuse_report[:reuse_stats].each do |result_entry| + csv_array << [ + result_entry['num_agencies'], + result_entry['num_users'], + result_entry['percentage'], + ] + end + csv_array << [ + 'Total (all >1)', + monthly_reuse_report[:total_users], + monthly_reuse_report[:total_percentage], + ] + + csv_array << [] + csv_array << ['Total proofed identities'] + csv_array << [ + "Total proofed identities (#{stats_month})", + monthly_reuse_report[:total_proofed], + ] + + csv_array + end + + def report_body + { + report_date: first_day_of_report_month, + month: stats_month, + results: [report_csv], + } + end + end +end diff --git a/config/initializers/job_configurations.rb b/config/initializers/job_configurations.rb index 8c1d45db7b9..8d62b722c2b 100644 --- a/config/initializers/job_configurations.rb +++ b/config/initializers/job_configurations.rb @@ -4,6 +4,7 @@ cron_24h = '0 0 * * *' gpo_cron_24h = '0 10 * * *' # 10am UTC is 5am EST/6am EDT cron_1w = '0 0 * * 0' +cron_1st_of_mo = '0 0 1 * *' if defined?(Rails::Console) Rails.logger.info 'job_configurations: console detected, skipping schedule' @@ -188,6 +189,11 @@ cron: cron_24h, args: -> { [Time.zone.today] }, }, + monthly_account_reuse_report: { + class: 'Reports::MonthlyAccountReuseReport', + cron: cron_1st_of_mo, + args: -> { [Time.zone.today] }, + }, }.compact end # rubocop:enable Metrics/BlockLength diff --git a/spec/jobs/reports/monthly_account_reuse_report_spec.rb b/spec/jobs/reports/monthly_account_reuse_report_spec.rb new file mode 100644 index 00000000000..62cd578a58e --- /dev/null +++ b/spec/jobs/reports/monthly_account_reuse_report_spec.rb @@ -0,0 +1,147 @@ +require 'rails_helper' +require 'csv' + +RSpec.describe Reports::MonthlyAccountReuseReport do + subject(:report) { Reports::MonthlyAccountReuseReport.new } + + let(:report_date) { Date.new(2021, 3, 1) } + let(:s3_report_bucket_prefix) { 'reports-bucket' } + let(:s3_report_path) do + 'int/monthly-account-reuse-report/2021/2021-03-01.monthly-account-reuse-report.json' + end + + before do + allow(Identity::Hostdata).to receive(:env).and_return('int') + allow(Identity::Hostdata).to receive(:aws_account_id).and_return('1234') + allow(Identity::Hostdata).to receive(:aws_region).and_return('us-west-1') + allow(IdentityConfig.store).to receive(:s3_report_bucket_prefix). + and_return(s3_report_bucket_prefix) + + Aws.config[:s3] = { + stub_responses: { + put_object: {}, + }, + } + end + + describe '#perform' do + it 'uploads a file to S3 based on the report date' do + expect(report).to receive(:upload_file_to_s3_bucket).with( + path: s3_report_path, + body: kind_of(String), + content_type: 'application/json', + bucket: 'reports-bucket.1234-us-west-1', + ).exactly(1).time.and_call_original + + expect(report).to receive(:report_body).and_call_original.once + + report.perform(report_date) + end + + context 'with data' do + let(:in_query) { report_date - 12.days } + let(:out_of_query) { report_date + 12.days } + + let(:agency) { create(:agency, name: 'The Agency') } + let(:agency2) { create(:agency, name: 'The Other Agency') } + let(:agency3) { create(:agency, name: 'The Other Other Agency') } + let(:sp_a) { 'a' } + let(:sp_b) { 'b' } + let(:sp_c) { 'c' } + + def create_identity(id, provider, verified_time) + ServiceProviderIdentity.create( + user_id: id, service_provider: provider, + last_ial2_authenticated_at: in_query, verified_at: verified_time + ) + end + + before do + create( + :service_provider, + issuer: sp_a, + iaa: 'iaa123', + friendly_name: 'The App', + agency: agency, + ) + create( + :service_provider, + issuer: sp_b, + iaa: 'iaa456', + friendly_name: 'The Other App', + agency: agency2, + ) + create( + :service_provider, + issuer: sp_c, + iaa: 'iaa789', + friendly_name: 'The Other Other App', + agency: agency3, + ) + + # Seed the database with data to be queried + # + # User 1 has 3 SPs and 3 show up in the query + # User 2 has 3 SPs and 3 show up in the query + # User 3 has 3 SPs and only 2 show up in the query + # User 4 has 2 SPs and 2 show up in the query + # User 5 has 2 SPs and 2 show up in the query + # User 6 has 2 SPs and only 1 shows up in the query + # User 7 has 1 SP and 1 shows up in the query + # User 8 has 1 SP and 0 show up in the query + # + # This will give 2 users with 3 SPs and 3 users with 2 SPs for the report + + users_to_query = [ + { id: 1, sp: [sp_a, sp_b, sp_c], timestamp: [in_query, in_query, in_query] }, + { id: 2, sp: [sp_a, sp_b, sp_c], timestamp: [in_query, in_query, in_query] }, + { id: 3, sp: [sp_a, sp_b, sp_c], timestamp: [in_query, in_query, out_of_query] }, + { id: 4, sp: [sp_a, sp_b], timestamp: [in_query, in_query] }, + { id: 5, sp: [sp_a, sp_b], timestamp: [in_query, in_query] }, + { id: 6, sp: [sp_a, sp_b], timestamp: [in_query, out_of_query] }, + { id: 7, sp: [sp_a], timestamp: [in_query] }, + { id: 8, sp: [sp_a], timestamp: [out_of_query] }, + ] + + users_to_query.each do |user| + user[:sp].each_with_index do |sp, i| + create_identity(user[:id], sp, user[:timestamp][i]) + end + end + + # Create active profiles for total_proofed_identities + # These 13 profiles will yield 10 active profiles in the results + (1..10).each do |_| + create(:profile, :active, activated_at: in_query) + end + (1..3).each do |_| + create(:profile, :active, activated_at: out_of_query) + end + end + + it 'aggregates by issuer' do + expect(report).to receive(:upload_file_to_s3_bucket). + exactly(1).times do |path:, body:, content_type:, bucket:| + parsed = JSON.parse(body, symbolize_names: true) + + expect(parsed[:report_date]).to eq(report_date.strftime('%Y-%m-01')) + expect(parsed[:month]).to eq(report_date.prev_month(1).strftime('%b-%Y')) + actual_csv = parsed[:results] + expected_csv = [ + ['IDV app reuse rate Feb-2021'], + ['Num. SPs', 'Num. users', 'Percentage'], + [2, 3, 30.0], + [3, 2, 20.0], + ['Total (all >1)', 5, 50.0], + [], + ['Total proofed identities'], + ['Total proofed identities (Feb-2021)', 10], + ] + expect(actual_csv.first).to eq(expected_csv) + end + + report.perform(report_date) + end + end + end +end