Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions app/jobs/data_warehouse/table_summary_stats_export_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ module DataWarehouse
class TableSummaryStatsExportJob < BaseJob
REPORT_NAME = 'table_summary_stats'

TABLE_EXCLUSION_LIST = %w[
agency_identities
].freeze

TIMESTAMP_OVERRIDE = {
'sp_return_logs' => 'returned_at',
'registration_logs' => 'registered_at',
}.freeze

def perform(timestamp)
return if data_warehouse_disabled?

Expand All @@ -22,6 +31,8 @@ def fetch_table_max_ids_and_counts(timestamp)
def max_ids_and_counts(timestamp)
active_tables = {}
ActiveRecord::Base.connection.tables.each do |table|
next if TABLE_EXCLUSION_LIST.include?(table)

if table_has_id_column?(table)
active_tables[table] = fetch_max_id_and_count(table, timestamp)
end
Expand All @@ -39,15 +50,23 @@ def table_has_id_column?(table)
def fetch_max_id_and_count(table, timestamp)
quoted_table = ActiveRecord::Base.connection.quote_table_name(table)
query = <<-SQL
SELECT COALESCE(MAX(id), 0) AS max_id, COUNT(*) AS row_count
FROM #{quoted_table}
SELECT COALESCE(MAX(id), 0) AS max_id, COUNT(*) AS row_count
FROM #{quoted_table}
SQL
if table_has_column?(table, 'created_at')
timestamp_column = 'created_at'
timestamp_column = TIMESTAMP_OVERRIDE[table] if TIMESTAMP_OVERRIDE.key?(table)

if table_has_column?(table, timestamp_column)
quoted_timestamp = ActiveRecord::Base.connection.quote(timestamp)
query += " WHERE created_at <= #{quoted_timestamp}"
query += " WHERE #{timestamp_column} <= #{quoted_timestamp}"
end

ActiveRecord::Base.connection.execute(query).first
result = ActiveRecord::Base.connection.execute(query).first
result['timestamp_column'] = nil
result['timestamp_column'] = 'created_at' if table_has_column?(table, 'created_at')
result['timestamp_column'] = TIMESTAMP_OVERRIDE[table] if TIMESTAMP_OVERRIDE.key?(table)

result
end

def table_has_column?(table, column_name)
Expand Down
80 changes: 78 additions & 2 deletions spec/jobs/data_warehouse/table_summary_stats_export_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@
let(:timestamp) { Date.new(2024, 10, 10).in_time_zone('UTC').end_of_day }
let(:job) { described_class.new }
let(:expected_bucket) { 'login-gov-analytics-export-test-1234-us-west-2' }
let(:test_on_tables) { ['users'] }
let(:test_on_tables) { ['agencies', 'users'] }
let(:s3_data_warehouse_bucket_prefix) { 'login-gov-analytics-export' }
let(:data_warehouse_enabled) { true }

let(:expected_json) do
{
'agencies' => {
'max_id' => 19,
'row_count' => 19,
'timestamp_column' => nil,
},
'users' => {
'max_id' => 2,
'row_count' => 2,
'timestamp_column' => 'created_at',
},
}.to_json
end
Expand Down Expand Up @@ -66,7 +72,12 @@
end

context 'when tables are empty' do
let(:expected_empty_json) { { 'users' => { 'max_id' => 0, 'row_count' => 0 } }.to_json }
let(:test_on_tables) { ['users'] }
let(:expected_empty_json) do
{ 'users' => { 'max_id' => 0,
'row_count' => 0,
'timestamp_column' => 'created_at' } }.to_json
end

before do
User.delete_all # Clear the User table to simulate emptiness
Expand Down Expand Up @@ -97,6 +108,65 @@
end
end

context 'pulls correct timestamp column value' do
let(:expected_json) do
{
'users' => {
'max_id' => 2,
'row_count' => 2,
'timestamp_column' => 'created_at',
},
'sp_return_logs' => {
'max_id' => 1,
'row_count' => 1,
'timestamp_column' => 'returned_at',
},
'agencies' => {
'max_id' => 19,
'row_count' => 19,
'timestamp_column' => nil,
},
}.to_json
end

before do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(
['users',
'sp_return_logs', 'agencies'],
)
end

it 'generates correct values without timestamp column' do
json_data = job.fetch_table_max_ids_and_counts(timestamp)

expect(json_data.to_json).to eq(expected_json)
end
end

context 'when tables should be excluded' do
let(:test_on_tables) { ['agency_identities', 'users'] }
let(:expected_json) do
{
'users' => {
'max_id' => 2,
'row_count' => 2,
'timestamp_column' => 'created_at',
},
}.to_json
end

before do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(test_on_tables)
end

it 'excludes tables in the exclusion list' do
json_data = job.fetch_table_max_ids_and_counts(timestamp)

expect(json_data.to_json).to eq(expected_json)
expect(json_data.keys).not_to include('agency_identities')
end
end

context 'when uploading to S3' do
it 'uploads a file to S3 based on the report date' do
expect(job).to receive(:upload_file_to_s3_bucket).with(
Expand All @@ -112,5 +182,11 @@
def add_data_to_tables
User.create!(id: 1, created_at: (timestamp - 1.hour))
User.create!(id: 2, created_at: (timestamp - 1.day))
SpReturnLog.create!(
id: 1,
requested_at: (timestamp - 1.day),
returned_at: (timestamp - 1.day),
request_id: 1, ial: 1, issuer: 'foo'
)
end
end