-
Notifications
You must be signed in to change notification settings - Fork 166
Add cumulative registrations report (LG-8679) #7674
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| module Reports | ||
| class DailyRegistrationsReport < BaseReport | ||
| REPORT_NAME = 'daily-registrations-report' | ||
|
|
||
| attr_reader :report_date | ||
|
|
||
| def perform(report_date) | ||
| @report_date = report_date | ||
|
|
||
| _latest, path = generate_s3_paths(REPORT_NAME, 'json', now: report_date) | ||
| body = report_body.to_json | ||
|
|
||
| [ | ||
| bucket_name, # default reporting bucket | ||
| IdentityConfig.store.s3_public_reports_enabled && public_bucket_name, | ||
| ].select(&:present?). | ||
| each do |bucket_name| | ||
| upload_file_to_s3_bucket( | ||
| path: path, | ||
| body: body, | ||
| content_type: 'application/json', | ||
| bucket: bucket_name, | ||
| ) | ||
| end | ||
| end | ||
|
|
||
| def finish | ||
| report_date.end_of_day | ||
| end | ||
|
|
||
| def report_body | ||
| results = (total_users.to_a + fully_registered_users.to_a). | ||
| group_by { |row| row['date'] }. | ||
| map do |date, rows| | ||
| { | ||
| date: date, | ||
| total_users: rows.map { |r| r['total_users'] }.compact.first || 0, | ||
| fully_registered_users: rows.map { |r| r['fully_registered_users'] }.compact.first || 0, | ||
| } | ||
| end.sort_by { |elem| elem[:date] } | ||
|
|
||
| { | ||
| finish: finish, | ||
| results: results.as_json, | ||
| } | ||
| end | ||
|
|
||
| def total_users | ||
| params = { | ||
| finish: finish, | ||
| }.transform_values { |v| ActiveRecord::Base.connection.quote(v) } | ||
|
|
||
| sql = format(<<-SQL, params) | ||
| SELECT | ||
| COUNT(*) AS total_users | ||
| , created_at::date AS date | ||
| FROM users | ||
| WHERE | ||
| created_at <= %{finish} | ||
| GROUP BY | ||
| created_at::date | ||
| SQL | ||
|
Comment on lines
+53
to
+62
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a table scan, ~100,000ms in prod. We could likely speed it up if we added an index on
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't feel worthwhile if it's only for a report |
||
|
|
||
| transaction_with_timeout do | ||
| ActiveRecord::Base.connection.execute(sql) | ||
| end | ||
| end | ||
|
|
||
| def fully_registered_users | ||
| params = { | ||
| finish: finish, | ||
| }.transform_values { |v| ActiveRecord::Base.connection.quote(v) } | ||
|
|
||
| sql = format(<<-SQL, params) | ||
| SELECT | ||
| COUNT(*) AS fully_registered_users | ||
| , registered_at::date AS date | ||
| FROM registration_logs | ||
| WHERE | ||
| registered_at <= %{finish} | ||
| GROUP BY | ||
| registered_at::date | ||
| SQL | ||
|
Comment on lines
+74
to
+83
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is indexed already, relatively fast query |
||
|
|
||
| transaction_with_timeout do | ||
| ActiveRecord::Base.connection.execute(sql) | ||
| end | ||
| end | ||
| end | ||
| end | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| require 'rails_helper' | ||
|
|
||
| RSpec.describe Reports::DailyRegistrationsReport do | ||
| subject(:report) { Reports::DailyRegistrationsReport.new } | ||
|
|
||
| let(:report_date) { Date.new(2021, 3, 1) } | ||
| let(:s3_public_reports_enabled) { true } | ||
| let(:s3_report_bucket_prefix) { 'reports-bucket' } | ||
| let(:s3_report_public_bucket_prefix) { 'public-reports-bucket' } | ||
|
|
||
| before do | ||
| allow(Identity::Hostdata).to receive(:env).and_return('int') | ||
| allow(Identity::Hostdata).to receive(:aws_account_id).and_return('1234') | ||
| allow(Identity::Hostdata).to receive(:aws_region).and_return('us-west-1') | ||
| allow(IdentityConfig.store).to receive(:s3_public_reports_enabled). | ||
| and_return(s3_public_reports_enabled) | ||
| allow(IdentityConfig.store).to receive(:s3_report_bucket_prefix). | ||
| and_return(s3_report_bucket_prefix) | ||
| allow(IdentityConfig.store).to receive(:s3_report_public_bucket_prefix). | ||
| and_return(s3_report_public_bucket_prefix) | ||
|
|
||
| Aws.config[:s3] = { | ||
| stub_responses: { | ||
| put_object: {}, | ||
| }, | ||
| } | ||
| end | ||
|
|
||
| describe '#perform' do | ||
| it 'uploads a file to S3 based on the report date' do | ||
| ['reports-bucket.1234-us-west-1', 'public-reports-bucket-int.1234-us-west-1'].each do |bucket| | ||
| expect(report).to receive(:upload_file_to_s3_bucket).with( | ||
| path: 'int/daily-registrations-report/2021/2021-03-01.daily-registrations-report.json', | ||
| body: kind_of(String), | ||
| content_type: 'application/json', | ||
| bucket: bucket, | ||
| ).exactly(1).time.and_call_original | ||
| end | ||
|
|
||
| expect(report).to receive(:report_body).and_call_original.once | ||
|
|
||
| report.perform(report_date) | ||
| end | ||
|
|
||
| context 'when s3 public reports are disabled' do | ||
| let(:s3_public_reports_enabled) { false } | ||
|
|
||
| it 'only uploads to one bucket' do | ||
| expect(report).to receive(:upload_file_to_s3_bucket).with( | ||
| hash_including( | ||
| path: 'int/daily-registrations-report/2021/2021-03-01.daily-registrations-report.json', | ||
| bucket: 'reports-bucket.1234-us-west-1', | ||
| ), | ||
| ).exactly(1).time.and_call_original | ||
|
|
||
| report.perform(report_date) | ||
| end | ||
| end | ||
|
|
||
| context 'with data' do | ||
| let(:yesterday) { (report_date - 1).in_time_zone('UTC') } | ||
| let(:two_days_ago) { (report_date - 2).in_time_zone('UTC') } | ||
|
|
||
| before do | ||
| # not fully registered | ||
| create_list(:user, 2, created_at: yesterday) | ||
| create_list(:user, 1, created_at: two_days_ago) | ||
|
|
||
| # fully registered | ||
| create_list(:user, 2, created_at: yesterday).each do |user| | ||
| RegistrationLog.create(user: user, registered_at: user.created_at) | ||
| end | ||
| create_list(:user, 1, created_at: two_days_ago).each do |user| | ||
| RegistrationLog.create(user: user, registered_at: user.created_at) | ||
| end | ||
| end | ||
|
|
||
| it 'calculates users and fully registered users by day' do | ||
| expect(report).to receive(:upload_file_to_s3_bucket). | ||
| exactly(2).times do |path:, body:, content_type:, bucket:| | ||
| parsed = JSON.parse(body, symbolize_names: true) | ||
|
|
||
| expect(parsed[:finish]).to eq(report_date.end_of_day.as_json) | ||
| expect(parsed[:results]).to match_array( | ||
| [ | ||
| { | ||
| date: two_days_ago.to_date.as_json, | ||
| total_users: 2, | ||
| fully_registered_users: 1, | ||
| }, | ||
| { | ||
| date: yesterday.to_date.as_json, | ||
| total_users: 4, | ||
| fully_registered_users: 2, | ||
| }, | ||
| ], | ||
| ) | ||
| end | ||
|
|
||
| report.perform(report_date) | ||
| end | ||
| end | ||
| end | ||
| end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one note: other reports collect a single day at a time (to keep calculations relatively cheap), and the frontend will make one request per day it's querying.
However, for this report, we always want to know all of time. So rather than have clients make requests going back each day to 2017 (1000+ requests), I opted to save client time so the clients only need to query one "day" to get all time.
I think it's a worthwhile tradeoff, but open to feedback!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want to write over the same file in S3 then? It could get quite large.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no this will make a new file every day, just like the current reports. A report from last week is about 152kb, so not that big IMO