Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions app/jobs/reports/daily_registrations_report.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
module Reports
class DailyRegistrationsReport < BaseReport
REPORT_NAME = 'daily-registrations-report'

attr_reader :report_date

def perform(report_date)
@report_date = report_date

_latest, path = generate_s3_paths(REPORT_NAME, 'json', now: report_date)
body = report_body.to_json

[
bucket_name, # default reporting bucket
IdentityConfig.store.s3_public_reports_enabled && public_bucket_name,
].select(&:present?).
each do |bucket_name|
upload_file_to_s3_bucket(
path: path,
body: body,
content_type: 'application/json',
bucket: bucket_name,
)
end
end

def finish
report_date.end_of_day
end

def report_body
results = (total_users.to_a + fully_registered_users.to_a).
group_by { |row| row['date'] }.
map do |date, rows|
{
date: date,
total_users: rows.map { |r| r['total_users'] }.compact.first || 0,
fully_registered_users: rows.map { |r| r['fully_registered_users'] }.compact.first || 0,
}
end.sort_by { |elem| elem[:date] }

{
finish: finish,
results: results.as_json,
}
Comment on lines +42 to +45
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one note: other reports collect a single day at a time (to keep calculations relatively cheap), and the frontend will make one request per day it's querying.

However, for this report, we always want to know all of time. So rather than have clients make requests going back each day to 2017 (1000+ requests), I opted to save client time so the clients only need to query one "day" to get all time.

I think it's a worthwhile tradeoff, but open to feedback!

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to write over the same file in S3 then? It could get quite large.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no this will make a new file every day, just like the current reports. A report from last week is about 152kb, so not that big IMO

end

def total_users
params = {
finish: finish,
}.transform_values { |v| ActiveRecord::Base.connection.quote(v) }

sql = format(<<-SQL, params)
SELECT
COUNT(*) AS total_users
, created_at::date AS date
FROM users
WHERE
created_at <= %{finish}
GROUP BY
created_at::date
SQL
Comment on lines +53 to +62
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a table scan, ~100,000ms in prod. We could likely speed it up if we added an index on users.created_at, unsure how others feel about the cost/benefit of that

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't feel worthwhile if it's only for a report


transaction_with_timeout do
ActiveRecord::Base.connection.execute(sql)
end
end

def fully_registered_users
params = {
finish: finish,
}.transform_values { |v| ActiveRecord::Base.connection.quote(v) }

sql = format(<<-SQL, params)
SELECT
COUNT(*) AS fully_registered_users
, registered_at::date AS date
FROM registration_logs
WHERE
registered_at <= %{finish}
GROUP BY
registered_at::date
SQL
Comment on lines +74 to +83
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is indexed already, relatively fast query


transaction_with_timeout do
ActiveRecord::Base.connection.execute(sql)
end
end
end
end
6 changes: 6 additions & 0 deletions config/initializers/job_configurations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@
cron: cron_24h,
args: -> { [Time.zone.yesterday] },
},
# Send daily registrations report to S3
daily_registrations: {
class: 'Reports::DailyRegistrationsReport',
cron: cron_24h,
args: -> { [Time.zone.yesterday] },
},
# Sync opted out phone numbers from AWS
phone_number_opt_out_sync_job: {
class: 'PhoneNumberOptOutSyncJob',
Expand Down
104 changes: 104 additions & 0 deletions spec/jobs/reports/daily_registration_report_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
require 'rails_helper'

RSpec.describe Reports::DailyRegistrationsReport do
subject(:report) { Reports::DailyRegistrationsReport.new }

let(:report_date) { Date.new(2021, 3, 1) }
let(:s3_public_reports_enabled) { true }
let(:s3_report_bucket_prefix) { 'reports-bucket' }
let(:s3_report_public_bucket_prefix) { 'public-reports-bucket' }

before do
allow(Identity::Hostdata).to receive(:env).and_return('int')
allow(Identity::Hostdata).to receive(:aws_account_id).and_return('1234')
allow(Identity::Hostdata).to receive(:aws_region).and_return('us-west-1')
allow(IdentityConfig.store).to receive(:s3_public_reports_enabled).
and_return(s3_public_reports_enabled)
allow(IdentityConfig.store).to receive(:s3_report_bucket_prefix).
and_return(s3_report_bucket_prefix)
allow(IdentityConfig.store).to receive(:s3_report_public_bucket_prefix).
and_return(s3_report_public_bucket_prefix)

Aws.config[:s3] = {
stub_responses: {
put_object: {},
},
}
end

describe '#perform' do
it 'uploads a file to S3 based on the report date' do
['reports-bucket.1234-us-west-1', 'public-reports-bucket-int.1234-us-west-1'].each do |bucket|
expect(report).to receive(:upload_file_to_s3_bucket).with(
path: 'int/daily-registrations-report/2021/2021-03-01.daily-registrations-report.json',
body: kind_of(String),
content_type: 'application/json',
bucket: bucket,
).exactly(1).time.and_call_original
end

expect(report).to receive(:report_body).and_call_original.once

report.perform(report_date)
end

context 'when s3 public reports are disabled' do
let(:s3_public_reports_enabled) { false }

it 'only uploads to one bucket' do
expect(report).to receive(:upload_file_to_s3_bucket).with(
hash_including(
path: 'int/daily-registrations-report/2021/2021-03-01.daily-registrations-report.json',
bucket: 'reports-bucket.1234-us-west-1',
),
).exactly(1).time.and_call_original

report.perform(report_date)
end
end

context 'with data' do
let(:yesterday) { (report_date - 1).in_time_zone('UTC') }
let(:two_days_ago) { (report_date - 2).in_time_zone('UTC') }

before do
# not fully registered
create_list(:user, 2, created_at: yesterday)
create_list(:user, 1, created_at: two_days_ago)

# fully registered
create_list(:user, 2, created_at: yesterday).each do |user|
RegistrationLog.create(user: user, registered_at: user.created_at)
end
create_list(:user, 1, created_at: two_days_ago).each do |user|
RegistrationLog.create(user: user, registered_at: user.created_at)
end
end

it 'calculates users and fully registered users by day' do
expect(report).to receive(:upload_file_to_s3_bucket).
exactly(2).times do |path:, body:, content_type:, bucket:|
parsed = JSON.parse(body, symbolize_names: true)

expect(parsed[:finish]).to eq(report_date.end_of_day.as_json)
expect(parsed[:results]).to match_array(
[
{
date: two_days_ago.to_date.as_json,
total_users: 2,
fully_registered_users: 1,
},
{
date: yesterday.to_date.as_json,
total_users: 4,
fully_registered_users: 2,
},
],
)
end

report.perform(report_date)
end
end
end
end