Skip to content
162 changes: 162 additions & 0 deletions app/jobs/reports/monthly_account_reuse_report.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
require 'csv'

module Reports
class MonthlyAccountReuseReport < BaseReport
REPORT_NAME = 'monthly-account-reuse-report'

attr_reader :report_date

def perform(report_date)
@report_date = report_date

_latest, path = generate_s3_paths(REPORT_NAME, 'json', now: report_date)
body = report_body.to_json

if bucket_name.present?
upload_file_to_s3_bucket(
path: path,
body: body,
content_type: 'application/json',
bucket: bucket_name,
)
end
end

def first_day_of_report_month
report_date.beginning_of_month.strftime('%Y-%m-%d')
end

def params
{
query_date: first_day_of_report_month,
}.transform_values { |v| ActiveRecord::Base.connection.quote(v) }
end

def agency_reuse_results
agency_sql = format(<<-SQL, params)
Comment thread
ThatSpaceGuy marked this conversation as resolved.
SELECT
COUNT(*) AS num_users
, agencies_per_user.num_agencies
FROM (
SELECT
COUNT(DISTINCT agencies.id) AS num_agencies
, identities.user_id
FROM
identities
JOIN
service_providers sp ON identities.service_provider = sp.issuer
JOIN
agencies ON sp.agency_id = agencies.id
WHERE
identities.last_ial2_authenticated_at IS NOT NULL
AND
identities.verified_at < %{query_date}
GROUP BY
identities.user_id
) agencies_per_user
GROUP BY
agencies_per_user.num_agencies
HAVING agencies_per_user.num_agencies > 1
ORDER BY
num_agencies ASC
SQL

agency_results = transaction_with_timeout do
ActiveRecord::Base.connection.execute(agency_sql)
end

agency_results.as_json
end

def num_active_profiles
proofed_sql = format(<<-SQL, params)
SELECT
COUNT(*) AS num_proofed
FROM
profiles
WHERE
profiles.active = TRUE
AND
profiles.activated_at < %{query_date}
SQL

proofed_results = transaction_with_timeout do
ActiveRecord::Base.connection.execute(proofed_sql)
end

proofed_results.first['num_proofed']
end

def stats_month
report_date.prev_month(1).strftime('%b-%Y')
end

def total_reuse_report
reuse_stats = agency_reuse_results

reuse_total_users = 0
reuse_total_percentage = 0

total_proofed = num_active_profiles

if !reuse_stats.empty?
reuse_stats.each do |result_entry|
reuse_total_users += result_entry['num_users']
end

if total_proofed > 0
reuse_stats.each_with_index do |result_entry, index|
reuse_stats[index]['percentage'] =
result_entry['num_users'] / total_proofed.to_f * 100

reuse_total_percentage += reuse_stats[index]['percentage']
end
end
end

# reuse_stats and total_stats
{ reuse_stats: reuse_stats,
total_users: reuse_total_users,
total_percentage: reuse_total_percentage,
total_proofed: total_proofed }
end

def report_csv
monthly_reuse_report = total_reuse_report

csv_array = []
csv_array << ["IDV app reuse rate #{stats_month}"]
csv_array << ['Num. SPs', 'Num. users', 'Percentage']

monthly_reuse_report[:reuse_stats].each do |result_entry|
csv_array << [
result_entry['num_agencies'],
result_entry['num_users'],
result_entry['percentage'],
]
end
csv_array << [
'Total (all >1)',
monthly_reuse_report[:total_users],
monthly_reuse_report[:total_percentage],
]

csv_array << []
csv_array << ['Total proofed identities']
csv_array << [
"Total proofed identities (#{stats_month})",
monthly_reuse_report[:total_proofed],
]

csv_array
end

def report_body
{
report_date: first_day_of_report_month,
month: stats_month,
results: [report_csv],
}
end
end
end
6 changes: 6 additions & 0 deletions config/initializers/job_configurations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
cron_24h = '0 0 * * *'
gpo_cron_24h = '0 10 * * *' # 10am UTC is 5am EST/6am EDT
cron_1w = '0 0 * * 0'
cron_1st_of_mo = '0 0 1 * *'

if defined?(Rails::Console)
Rails.logger.info 'job_configurations: console detected, skipping schedule'
Expand Down Expand Up @@ -188,6 +189,11 @@
cron: cron_24h,
args: -> { [Time.zone.today] },
},
monthly_account_reuse_report: {
class: 'Reports::MonthlyAccountReuseReport',
cron: cron_1st_of_mo,
args: -> { [Time.zone.today] },
},
}.compact
end
# rubocop:enable Metrics/BlockLength
Expand Down
147 changes: 147 additions & 0 deletions spec/jobs/reports/monthly_account_reuse_report_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
require 'rails_helper'
require 'csv'

RSpec.describe Reports::MonthlyAccountReuseReport do
subject(:report) { Reports::MonthlyAccountReuseReport.new }

let(:report_date) { Date.new(2021, 3, 1) }
let(:s3_report_bucket_prefix) { 'reports-bucket' }
let(:s3_report_path) do
'int/monthly-account-reuse-report/2021/2021-03-01.monthly-account-reuse-report.json'
end

before do
allow(Identity::Hostdata).to receive(:env).and_return('int')
allow(Identity::Hostdata).to receive(:aws_account_id).and_return('1234')
allow(Identity::Hostdata).to receive(:aws_region).and_return('us-west-1')
allow(IdentityConfig.store).to receive(:s3_report_bucket_prefix).
and_return(s3_report_bucket_prefix)

Aws.config[:s3] = {
stub_responses: {
put_object: {},
},
}
end

describe '#perform' do
it 'uploads a file to S3 based on the report date' do
expect(report).to receive(:upload_file_to_s3_bucket).with(
path: s3_report_path,
body: kind_of(String),
content_type: 'application/json',
bucket: 'reports-bucket.1234-us-west-1',
).exactly(1).time.and_call_original

expect(report).to receive(:report_body).and_call_original.once

report.perform(report_date)
end

context 'with data' do
let(:in_query) { report_date - 12.days }
let(:out_of_query) { report_date + 12.days }

let(:agency) { create(:agency, name: 'The Agency') }
let(:agency2) { create(:agency, name: 'The Other Agency') }
let(:agency3) { create(:agency, name: 'The Other Other Agency') }
let(:sp_a) { 'a' }
let(:sp_b) { 'b' }
let(:sp_c) { 'c' }

def create_identity(id, provider, verified_time)
ServiceProviderIdentity.create(
user_id: id, service_provider: provider,
last_ial2_authenticated_at: in_query, verified_at: verified_time
)
end

before do
create(
:service_provider,
issuer: sp_a,
iaa: 'iaa123',
friendly_name: 'The App',
agency: agency,
)
create(
:service_provider,
issuer: sp_b,
iaa: 'iaa456',
friendly_name: 'The Other App',
agency: agency2,
)
create(
:service_provider,
issuer: sp_c,
iaa: 'iaa789',
friendly_name: 'The Other Other App',
agency: agency3,
)

# Seed the database with data to be queried
#
# User 1 has 3 SPs and 3 show up in the query
# User 2 has 3 SPs and 3 show up in the query
# User 3 has 3 SPs and only 2 show up in the query
# User 4 has 2 SPs and 2 show up in the query
# User 5 has 2 SPs and 2 show up in the query
# User 6 has 2 SPs and only 1 shows up in the query
# User 7 has 1 SP and 1 shows up in the query
# User 8 has 1 SP and 0 show up in the query
#
# This will give 2 users with 3 SPs and 3 users with 2 SPs for the report

users_to_query = [
{ id: 1, sp: [sp_a, sp_b, sp_c], timestamp: [in_query, in_query, in_query] },
{ id: 2, sp: [sp_a, sp_b, sp_c], timestamp: [in_query, in_query, in_query] },
{ id: 3, sp: [sp_a, sp_b, sp_c], timestamp: [in_query, in_query, out_of_query] },
{ id: 4, sp: [sp_a, sp_b], timestamp: [in_query, in_query] },
{ id: 5, sp: [sp_a, sp_b], timestamp: [in_query, in_query] },
{ id: 6, sp: [sp_a, sp_b], timestamp: [in_query, out_of_query] },
{ id: 7, sp: [sp_a], timestamp: [in_query] },
{ id: 8, sp: [sp_a], timestamp: [out_of_query] },
]

users_to_query.each do |user|
user[:sp].each_with_index do |sp, i|
create_identity(user[:id], sp, user[:timestamp][i])
end
end

# Create active profiles for total_proofed_identities
# These 13 profiles will yield 10 active profiles in the results
(1..10).each do |_|
create(:profile, :active, activated_at: in_query)
end
(1..3).each do |_|
create(:profile, :active, activated_at: out_of_query)
end
end

it 'aggregates by issuer' do
expect(report).to receive(:upload_file_to_s3_bucket).
exactly(1).times do |path:, body:, content_type:, bucket:|
parsed = JSON.parse(body, symbolize_names: true)

expect(parsed[:report_date]).to eq(report_date.strftime('%Y-%m-01'))
expect(parsed[:month]).to eq(report_date.prev_month(1).strftime('%b-%Y'))
actual_csv = parsed[:results]
expected_csv = [
['IDV app reuse rate Feb-2021'],
['Num. SPs', 'Num. users', 'Percentage'],
[2, 3, 30.0],
[3, 2, 20.0],
['Total (all >1)', 5, 50.0],
[],
['Total proofed identities'],
['Total proofed identities (Feb-2021)', 10],
]
expect(actual_csv.first).to eq(expected_csv)
end

report.perform(report_date)
end
end
end
end