Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7fc68fd
Initial cloudwatch query script to summarize events
mdiarra3 Dec 10, 2024
f0ed646
Query cloudwatch and get user events
mdiarra3 Dec 10, 2024
f0a3390
add timestamp remove limit
mdiarra3 Dec 10, 2024
4bc5435
[Hackathon] Allow sourcing events from stdin (#11619)
matthinz Dec 11, 2024
19e6afe
[Hackathon] Add ExampleMatcher (#11622)
matthinz Dec 11, 2024
9894989
use optparse to allow command options/defaults (#11627)
solipet Dec 12, 2024
9dca95a
[HACKATHON] Initial crack at an IdV matcher (#11624)
matthinz Dec 12, 2024
87223ae
[HACKATHON] Output formatting tweaks (#11635)
matthinz Dec 12, 2024
3e5859b
[HACKATHON] Minor tweaks (#11637)
matthinz Dec 12, 2024
c8555bc
report on TrueID success/failure (#11638)
solipet Dec 12, 2024
d60a9f4
Try to identify IDV abandonment (#11639)
matthinz Dec 12, 2024
afa3fd7
Login hackathon 2024 user narrative account deletion (#11629)
mdiarra3 Dec 16, 2024
d5d3bc7
read events from file without changing stdin
eileen-nava Dec 16, 2024
23fa124
remove ipp from gpo code submission event
eileen-nava Dec 16, 2024
32a68c8
update example documentation in script
eileen-nava Dec 16, 2024
fb395d7
Merge pull request #11651 from 18F/em/fork-hackathon-user-narrative
eileen-nava Dec 20, 2024
536623a
Merge branch 'main' into login-hackathon-2024-user-narrative
matthinz Dec 30, 2024
295a676
Update lib/event_summarizer/vendor_result_evaluators/aamva.rb
matthinz Dec 30, 2024
dd95b76
Start writing a spec
matthinz Dec 30, 2024
1a78307
Merge branch 'main' into login-hackathon-2024-user-narrative
matthinz Dec 31, 2024
4d07f37
Tidy up logic in IV result evaluator
matthinz Dec 31, 2024
e03cc73
Set event['name'] if not already set
matthinz Dec 31, 2024
95e069c
Fix typo
matthinz Dec 31, 2024
3e3b4c9
Use Eastern time zone by default
matthinz Dec 31, 2024
e6308fd
Update pluralization code + add spec
matthinz Dec 31, 2024
47b95ca
Start on spec for summarize-user-events command
matthinz Dec 31, 2024
06f9ddf
Protect rubocop's delicate sensibilities
matthinz Dec 31, 2024
93b9b3e
Add more specs
matthinz Jan 7, 2025
362e1ce
Look at banner michael
matthinz Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
286 changes: 286 additions & 0 deletions bin/summarize-user-events
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

Dir.chdir(__dir__) { require 'bundler/setup' }

require 'active_support'
require 'active_support/core_ext/integer/time'
require 'active_support/core_ext/object/blank'
require 'active_support/time'
require 'aws-sdk-cloudwatchlogs'
require 'concurrent-ruby'
require 'optparse'

$LOAD_PATH.unshift(File.expand_path(File.join(__dir__, '../lib')))
require 'reporting/cloudwatch_client'
require 'reporting/cloudwatch_query_quoting'

# Require all *_matcher.rb files in lib/event_summarizer
Dir[File.expand_path(
File.join(__dir__, '../lib/event_summarizer', '**', '*_matcher.rb'),
)].sort.each do |f|
require f
end

class SummarizeUserEvents
attr_reader :file_name, :uuid, :from_date, :stderr, :stdout, :to_date, :zone

NICE_DATE_AND_TIME_FORMAT = '%B %d, %Y at %I:%M %p %Z'
TIME_ONLY_FORMAT = '%I:%M %p'

def initialize(
file_name: nil,
user_uuid: nil,
start_time: nil,
end_time: nil,
zone: 'UTC',
stdout: STDOUT,
stderr: STDERR
)
@file_name = file_name
@zone = zone
@uuid = user_uuid
@from_date = parse_time(start_time) || 1.week.ago
@to_date = parse_time(end_time) || (
start_time.present? ?
from_date + 1.week :
ActiveSupport::TimeZone[zone].now
)
@stdout = stdout
@stderr = stderr
end

def parse_time(time_str)
return nil if time_str.nil?

parsed = ActiveSupport::TimeZone['UTC'].parse(time_str)
parsed = parsed.in_time_zone(zone) if zone && parsed

parsed
end

def matchers
@matchers ||= [
EventSummarizer::ExampleMatcher.new,
EventSummarizer::AccountDeletionMatcher.new,
EventSummarizer::IdvMatcher.new,
]
end

def self.parse_command_line_options(argv)
options = {
zone: 'America/New_York'
}
basename = File.basename($0)

# rubocop:disable Metrics/LineLength
optparse = OptionParser.new do |opts|
opts.banner = <<~EOM

Summarize user events in a human-readable format

Cloudwatch logs can be read from a file as newline-delimited JSON (ndjson),
or fetched directly via aws-vault.

Usage: #{basename} [OPTIONS]

Examples:
#{basename} -f events.ndjson
aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York

EOM

opts.on('-f', '--file_name FILE_NAME', 'filename from which to read the events') do |val|
options[:file_name] = val
end

opts.on('-h', '--help', 'Display this message') do
warn opts
exit
end

opts.on('-u', '--user-uuid USER_UUID', 'UUID of the protagonist of the story') do |val|
options[:user_uuid] = val
end

opts.on('-s', '--start-time START_TIME', 'Time of the start of the query period (e.g. 2024-12-09T10:00:00Z), default: 1 week ago') do |val|
options[:start_time] = val
end

opts.on('-e', '--end-time END_TIME', 'Time of the end of the query period (e.g. 2024-12-09T14:30:00Z), default: 1 week from start') do |val|
options[:end_time] = val
end

opts.on('-z', '--timezone TIMEZONE', 'Timezone to use (e.g. America/New_York), default: UTC') do |val|
options[:zone] = val
end
end
# rubocop:enable Metrics/LineLength

optparse.parse!(argv)

options
end


def run
in_correct_time_zone do
find_cloudwatch_events do |event|
# Time.zone is thread-local, and CloudwatchClient may use multiple
# threads to make requests. So we have to make double-sure we're
# in the right Timezone.
in_correct_time_zone do
normalize_event!(event)

matchers.each do |matcher|
matcher.handle_cloudwatch_event(event)
end
end
end

overall_results = []

matchers.each do |matcher|
results_for_matcher = matcher.finish
overall_results.append(*results_for_matcher)
end

stdout.puts format_results(overall_results)
end
end

def format_results(results)
# Each Hash in results should have _at least_ a :title key defined

results.
sort_by { |r| r[:timestamp] || r[:started_at] || Time.zone.at(0) }.
map do |r|
timestamp = r[:timestamp] || r[:started_at]

heading = r[:title]

if timestamp
heading = "#{heading} (#{format_time(timestamp)})"
end

prev_timestamp = timestamp

list_items = r[:attributes]
&.sort_by { |attr| attr[:timestamp] || Time.zone.at(0) }
&.map do |attr|
text = attr[:description]

formatted_timestamp = format_time(attr[:timestamp], prev_timestamp)
prev_timestamp = attr[:timestamp]

text = "(#{formatted_timestamp}) #{text}" if formatted_timestamp

"* #{text}"
end

[
"## #{heading}",
*list_items,
'',
]
end.join("\n")
end

def format_time(timestamp, prev_timestamp = nil)
return if timestamp.blank?

timestamp = timestamp.in_time_zone(zone)
prev_timestamp = prev_timestamp&.in_time_zone(zone)

same_date = timestamp.to_date == prev_timestamp&.to_date

if same_date
timestamp.strftime(TIME_ONLY_FORMAT)
else
timestamp.strftime(NICE_DATE_AND_TIME_FORMAT)
end
end

def query
format(<<~QUERY)
fields
name
, properties.event_properties.success as success
, @message
, @timestamp
| filter properties.user_id = '#{uuid}'
| sort @timestamp asc
| limit 10000
QUERY
end

def cloudwatch_client
@cloudwatch_client ||= Reporting::CloudwatchClient.new(
num_threads: 5,
ensure_complete_logs: true,
log_group_name: 'prod_/srv/idp/shared/log/events.log',
)
end

def find_cloudwatch_events(&block)
unless file_name.nil?
warn 'Reading Cloudwatch events as newline-delimited JSON (ndjson) file'
file_source(&block)
else
cloudwatch_source(&block)
end
end

def file_source(&block)
events = []

File.read(file_name).each_line do |line|
next if line.blank?
events << JSON.parse(line)
end

events.sort_by! { |e| e['@timestamp'] }

events.each do |e|
block.call(e)
end
end

def cloudwatch_source(&block)
cloudwatch_client.fetch(
query: query,
from: from_date,
to: to_date,
&block
)
end

def in_correct_time_zone
old_time_zone = Time.zone
Time.zone = zone
yield
ensure
Time.zone = old_time_zone
end

def normalize_event!(event)
if event['@timestamp'].is_a?(String)
event['@timestamp'] = ActiveSupport::TimeZone['UTC'].parse(event['@timestamp'])
end

if event['@message'].is_a?(String)
event['@message'] = JSON.parse(event['@message'])
end

event['name'] ||= event['@message']['name']
end
end

def main
options = SummarizeUserEvents.parse_command_line_options(ARGV)
SummarizeUserEvents.new(**options).run
end

if $PROGRAM_NAME == __FILE__
main
end
74 changes: 74 additions & 0 deletions lib/event_summarizer/account_deletion_matcher.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# frozen_string_literal: true

module EventSummarizer
class AccountDeletionMatcher
ACCOUNT_DELETION_STARTED_EVENT = 'Account Reset: request'
ACCOUNT_DELETION_SUBMITTED_EVENT = 'Account Reset: delete'
ACCOUNT_DELETION_CANCELED_EVENT = 'Account Reset: cancel'

EVENT_PROPERTIES = ['@message', 'properties', 'event_properties'].freeze

attr_accessor :account_deletion_events, :event_summaries

def initialize
@account_deletion_events = Array.new
@event_summaries = Array.new
account_deletion_events
end

def handle_cloudwatch_event(event)
case event['name']
when ACCOUNT_DELETION_STARTED_EVENT
process_account_reset_request(event)
when ACCOUNT_DELETION_SUBMITTED_EVENT
process_account_reset_delete(event)
when ACCOUNT_DELETION_CANCELED_EVENT
process_account_reset_cancel(event)
end
end

def finish
event_summaries
end

private

def process_account_reset_request(event)
event_message = {
title: 'Account deletion Request',
attributes: [
{ type: :account_deletion_request,
description: "On #{event["@timestamp"]} user initiated account deletion" },
],
}
event_summaries.push(event_message)
end

def process_account_reset_cancel(event)
event_message = {
title: 'Account deletion cancelled',
attributes: [
{ type: :account_deletion_cancelled,
description: "On #{event["@timestamp"]} user canceled account deletion" },
],
}
event_summaries.push(event_message)
end

def process_account_reset_delete(event)
message = event['@message']
age = message['properties']['event_properties']['account_age_in_days']
date = event['@timestamp']
event_message = {
title: 'Account deleted',
attributes: [
{
type: :account_deleted,
description: "On #{date} user deleted their account which was #{age} days old",
},
],
}
event_summaries.push(event_message)
end
end
end
28 changes: 28 additions & 0 deletions lib/event_summarizer/example_matcher.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

module EventSummarizer
class ExampleMatcher
attr_reader :event_count

def initialize
@event_count = 0
end

def handle_cloudwatch_event(_event)
@event_count += 1
end

def finish
[
{
title: 'Processed some events',
attributes: [
{ type: :event_count, description: "Processed #{event_count} event(s)" },
],
}.tap do
@event_count = 0
end,
]
end
end
end
Loading