From 7fc68fd0a74306f6633997588cf39ecb82d9596e Mon Sep 17 00:00:00 2001 From: Malick Diarra Date: Tue, 10 Dec 2024 11:00:13 -0500 Subject: [PATCH 01/26] Initial cloudwatch query script to summarize events --- bin/summarize-user-events | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 bin/summarize-user-events diff --git a/bin/summarize-user-events b/bin/summarize-user-events new file mode 100644 index 00000000000..4b6cf481e6e --- /dev/null +++ b/bin/summarize-user-events @@ -0,0 +1,55 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + + +$LOAD_PATH.unshift(File.expand_path(File.join(__dir__, '../../lib'))) +require 'reporting/cloudwatch_client' +require 'reporting/cloudwatch_query_quoting' + + +class SummarizeUserEvents + attr_reader :uuid, :from_date, to_date: + def initialize(argv:, stdin:, stdout:) + # argv[0] == uuid + # argv[1] == From date + # argv[2] == to date + + uuid = argv[0] + from_date = argv[1].present? ? DateTime.parse(argv[1]) : 1.week.ago + to_date = argv[2].present? ? DateTime.parse(argv[2]) : DateTime.now + end + + def run + cloudwatch_client.fetch( + query: query, + from: from_date, + to: to_date, + ) + end + + + def query + format(<<~QUERY, params) + fields + name + , properties.event_properties.success as success + , @message + | filter user_id = #{uuid} + | limit 10000 + QUERY + end + + + def cloudwatch_client + @cloudwatch_client ||= Reporting::CloudwatchClient.new( + num_threads: 5, + ensure_complete_logs: true, + slice_interval: 3.hours, + ) + end +end + + +if $PROGRAM_NAME == __FILE__ + SummarizeUserEvents.new(argv: ARGV, stdin: STDIN, stdout: STDOUT).run +end \ No newline at end of file From f0ed64696d4bfe876cdd62fc40e876da51d677d3 Mon Sep 17 00:00:00 2001 From: Malick Diarra Date: Tue, 10 Dec 2024 11:44:22 -0500 Subject: [PATCH 02/26] Query cloudwatch and get user events --- bin/summarize-user-events | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) mode change 100644 => 100755 bin/summarize-user-events diff --git a/bin/summarize-user-events b/bin/summarize-user-events old mode 100644 new mode 100755 index 4b6cf481e6e..ce23b86c709 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -1,40 +1,49 @@ #!/usr/bin/env ruby # frozen_string_literal: true +Dir.chdir(__dir__) { require 'bundler/setup' } +require 'active_support' +require 'active_support/core_ext/integer/time' +require 'active_support/core_ext/object/blank' +require 'active_support/time' +require 'aws-sdk-cloudwatchlogs' +require 'concurrent-ruby' -$LOAD_PATH.unshift(File.expand_path(File.join(__dir__, '../../lib'))) +$LOAD_PATH.unshift(File.expand_path(File.join(__dir__, '../lib'))) require 'reporting/cloudwatch_client' require 'reporting/cloudwatch_query_quoting' class SummarizeUserEvents - attr_reader :uuid, :from_date, to_date: + attr_reader :uuid, :from_date, :to_date + def initialize(argv:, stdin:, stdout:) # argv[0] == uuid # argv[1] == From date # argv[2] == to date - uuid = argv[0] - from_date = argv[1].present? ? DateTime.parse(argv[1]) : 1.week.ago - to_date = argv[2].present? ? DateTime.parse(argv[2]) : DateTime.now + @uuid = argv[0] + @from_date = argv[1].present? ? Time.strptime(argv[1], '%m/%d/%Y') : 1.week.ago + @to_date = argv[2].present? ? Time.strptime(argv[2], '%m/%d/%Y') : DateTime.now end def run - cloudwatch_client.fetch( + results = cloudwatch_client.fetch( query: query, from: from_date, to: to_date, ) + pp results end def query - format(<<~QUERY, params) + format(<<~QUERY) fields name , properties.event_properties.success as success , @message - | filter user_id = #{uuid} + | filter properties.user_id = '#{uuid}' | limit 10000 QUERY end @@ -44,7 +53,7 @@ class SummarizeUserEvents @cloudwatch_client ||= Reporting::CloudwatchClient.new( num_threads: 5, ensure_complete_logs: true, - slice_interval: 3.hours, + log_group_name: 'prod_/srv/idp/shared/log/events.log', ) end end From f0a3390b213a63f9646621f868bc2efa1568ae2c Mon Sep 17 00:00:00 2001 From: Malick Diarra Date: Tue, 10 Dec 2024 13:47:49 -0500 Subject: [PATCH 03/26] add timestamp remove limit --- bin/summarize-user-events | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index ce23b86c709..b1fa463ec9d 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -43,8 +43,9 @@ class SummarizeUserEvents name , properties.event_properties.success as success , @message + , @timestamp | filter properties.user_id = '#{uuid}' - | limit 10000 + | sort @timestamp asc QUERY end From 4bc5435e82bb36643e047f6d99609db4e0eeb37c Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Wed, 11 Dec 2024 10:20:13 -0800 Subject: [PATCH 04/26] [Hackathon] Allow sourcing events from stdin (#11619) * Allow sourcing events from stdin It may be useful sometimes to take a local cache of cloudwatch events and pipe them into this command. [skip changelog] * Add 'limit: 10000' to CW query This is required for `complete` to work --- bin/summarize-user-events | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index b1fa463ec9d..7a1e0ba123d 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -28,12 +28,9 @@ class SummarizeUserEvents end def run - results = cloudwatch_client.fetch( - query: query, - from: from_date, - to: to_date, - ) - pp results + find_cloudwatch_events do |cloudwatch_event| + pp cloudwatch_event + end end @@ -46,6 +43,7 @@ class SummarizeUserEvents , @timestamp | filter properties.user_id = '#{uuid}' | sort @timestamp asc + | limit 10000 QUERY end @@ -57,6 +55,32 @@ class SummarizeUserEvents log_group_name: 'prod_/srv/idp/shared/log/events.log', ) end + + def find_cloudwatch_events(&block) + if $stdin.tty? + cloudwatch_source(&block) + else + warn "Reading Cloudwatch events as newline-delimited JSON (ndjson) from stdin" + stdin_source(&block) + end + end + + def stdin_source(&block) + $stdin.each_line do |line| + next if line.blank? + event = JSON.parse(line) + block.call(event) + end + end + + def cloudwatch_source(&block) + cloudwatch_client.fetch( + query: query, + from: from_date, + to: to_date, + &block + ) + end end From 19e6afed33a5aeb8f30c2761a0865bd1b74a49bf Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Wed, 11 Dec 2024 12:50:35 -0800 Subject: [PATCH 05/26] [Hackathon] Add ExampleMatcher (#11622) * Add ExampleMatcher Add an example matcher that just counts events and outputs how many it saw. [skip changelog] * Remove excess whitespace * Add frozen_string_literal: true --- bin/summarize-user-events | 63 ++++++++++++++++++++++++- lib/event_summarizer/example_matcher.rb | 28 +++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 lib/event_summarizer/example_matcher.rb diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 7a1e0ba123d..05c059ff737 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -13,6 +13,7 @@ $LOAD_PATH.unshift(File.expand_path(File.join(__dir__, '../lib'))) require 'reporting/cloudwatch_client' require 'reporting/cloudwatch_query_quoting' +require 'event_summarizer/example_matcher' class SummarizeUserEvents attr_reader :uuid, :from_date, :to_date @@ -27,12 +28,44 @@ class SummarizeUserEvents @to_date = argv[2].present? ? Time.strptime(argv[2], '%m/%d/%Y') : DateTime.now end + def matchers + @matchers ||= [ + EventSummarizer::ExampleMatcher.new + ] + end + def run - find_cloudwatch_events do |cloudwatch_event| - pp cloudwatch_event + find_cloudwatch_events do |event| + event['@message'] = JSON.parse(event['@message']) if event['@message'].is_a?(String) + + matchers.each do |matcher| + matcher.handle_cloudwatch_event(event) + end + end + + overall_results = [] + + matchers.each do |matcher| + results_for_matcher = matcher.finish + overall_results.append(*results_for_matcher) end + + puts format_results(overall_results) end + def format_results(results) + results.map do |r| + title = r[:title] + + [ + "## #{title}", + *r[:attributes]&.map do |attr| + "* #{attr[:description]}" + end, + "" + ] + end.join("\n") + end def query format(<<~QUERY) @@ -47,6 +80,32 @@ class SummarizeUserEvents QUERY end + def find_events(&block) + warn "$stdin.tty? = #{$stdin.tty?}" + if $stdin.tty? + cloudwatch_source(&block) + else + stdin_source(&block) + end + end + + def stdin_source(&block) + $stdin.each_line do |line| + next if line.blank? + event = JSON.parse(line) + block.call(event) + end + end + + def cloudwatch_source(&block) + cloudwatch_client.fetch( + query: query, + from: from_date, + to: to_date, + &block + ) + end + def cloudwatch_client @cloudwatch_client ||= Reporting::CloudwatchClient.new( diff --git a/lib/event_summarizer/example_matcher.rb b/lib/event_summarizer/example_matcher.rb new file mode 100644 index 00000000000..b7807ec47f0 --- /dev/null +++ b/lib/event_summarizer/example_matcher.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module EventSummarizer + class ExampleMatcher + attr_reader :event_count + + def initialize + @event_count = 0 + end + + def handle_cloudwatch_event(_event) + @event_count += 1 + end + + def finish + [ + { + title: 'Processed some events', + attributes: [ + { type: :event_count, description: "Processed #{event_count} event(s)" }, + ], + }.tap do + @event_count = 0 + end, + ] + end + end +end From 98949895254f4a0dda76eb3dffd48d36f7f153ae Mon Sep 17 00:00:00 2001 From: Doug Price Date: Wed, 11 Dec 2024 22:32:27 -0500 Subject: [PATCH 06/26] use optparse to allow command options/defaults (#11627) --- bin/summarize-user-events | 114 +++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 37 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 05c059ff737..7b780276bfe 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -1,5 +1,6 @@ #!/usr/bin/env ruby # frozen_string_literal: true + Dir.chdir(__dir__) { require 'bundler/setup' } require 'active_support' @@ -8,29 +9,38 @@ require 'active_support/core_ext/object/blank' require 'active_support/time' require 'aws-sdk-cloudwatchlogs' require 'concurrent-ruby' +require 'optparse' $LOAD_PATH.unshift(File.expand_path(File.join(__dir__, '../lib'))) require 'reporting/cloudwatch_client' require 'reporting/cloudwatch_query_quoting' -require 'event_summarizer/example_matcher' +# Require all *_matcher.rb files in lib/event_summarizer +Dir[File.expand_path( + File.join(__dir__, '../lib/event_summarizer', '**', '*_matcher.rb'), +)].sort.each do |f| + require f +end class SummarizeUserEvents attr_reader :uuid, :from_date, :to_date - - def initialize(argv:, stdin:, stdout:) - # argv[0] == uuid - # argv[1] == From date - # argv[2] == to date - - @uuid = argv[0] - @from_date = argv[1].present? ? Time.strptime(argv[1], '%m/%d/%Y') : 1.week.ago - @to_date = argv[2].present? ? Time.strptime(argv[2], '%m/%d/%Y') : DateTime.now + + def initialize(user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') + Time.zone = zone + @uuid = user_uuid + @from_date = parse_time(start_time) || 1.week.ago + @to_date = parse_time(end_time) || start_time.present? ? from_date + 1.week : Time.zone.now + end + + def parse_time(time_str) + Time.zone.parse(time_str) + rescue StandardError + nil end def matchers @matchers ||= [ - EventSummarizer::ExampleMatcher.new + EventSummarizer::ExampleMatcher.new, ] end @@ -44,12 +54,12 @@ class SummarizeUserEvents end overall_results = [] - + matchers.each do |matcher| results_for_matcher = matcher.finish overall_results.append(*results_for_matcher) end - + puts format_results(overall_results) end @@ -62,7 +72,7 @@ class SummarizeUserEvents *r[:attributes]&.map do |attr| "* #{attr[:description]}" end, - "" + '', ] end.join("\n") end @@ -89,24 +99,6 @@ class SummarizeUserEvents end end - def stdin_source(&block) - $stdin.each_line do |line| - next if line.blank? - event = JSON.parse(line) - block.call(event) - end - end - - def cloudwatch_source(&block) - cloudwatch_client.fetch( - query: query, - from: from_date, - to: to_date, - &block - ) - end - - def cloudwatch_client @cloudwatch_client ||= Reporting::CloudwatchClient.new( num_threads: 5, @@ -119,7 +111,7 @@ class SummarizeUserEvents if $stdin.tty? cloudwatch_source(&block) else - warn "Reading Cloudwatch events as newline-delimited JSON (ndjson) from stdin" + warn 'Reading Cloudwatch events as newline-delimited JSON (ndjson) from stdin' stdin_source(&block) end end @@ -138,11 +130,59 @@ class SummarizeUserEvents from: from_date, to: to_date, &block - ) - end + ) + end end +def main + options = {} + basename = File.basename($0) + + # rubocop:disable Metrics/BlockLength, Metrics/LineLength + optparse = OptionParser.new do |opts| + opts.banner = <<-EOM + + Summarize user events in a human-readable format + + Cloudwatch logs can be read from stdin as newline-delimited JSON (ndjson), + or fetched directly via aws-vault. + + usage: #{basename} [OPTIONS] + + Examples: + #{basename} << events.ndjson + aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York + + EOM + + opts.on('-h', '--help', 'Display this message') do + warn opts + exit + end + + opts.on('-u', '--user-uuid USER_UUID', 'UUID of the protagonist of the story') do |val| + options[:user_uuid] = val + end + + opts.on('-s', '--start-time START_TIME', 'Time of the start of the query period (e.g. 2024-12-09T10:00:00Z), default: 1 week ago') do |val| + options[:start_time] = val + end + + opts.on('-e', '--end_time END_TIME', 'Time of the end of the query period (e.g. 2024-12-09T14:30:00Z), default: 1 week from start') do |val| + options[:end_time] = val + end + + opts.on('-z', '--timezone TIMEZONE', 'Timezone to use (e.g. America/New_York), default: UTC') do |val| + options[:zone] = val + end + end + # rubocop:enable Metrics/BlockLength, Metrics/LineLength + + optparse.parse! + + SummarizeUserEvents.new(**options).run +end if $PROGRAM_NAME == __FILE__ - SummarizeUserEvents.new(argv: ARGV, stdin: STDIN, stdout: STDOUT).run -end \ No newline at end of file + main +end From 9dca95ad651407159f71ab655a8b645ed672c26b Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Thu, 12 Dec 2024 09:06:53 -0800 Subject: [PATCH 07/26] [HACKATHON] Initial crack at an IdV matcher (#11624) * Initial crack at an IdV matcher Matcher is a state machine that collects IDV "attempts" as they happen and tries to suss out interesting things about them. [skip changelog] * removed unused method --------- Co-authored-by: Douglas Price --- bin/summarize-user-events | 15 +- lib/event_summarizer/idv_matcher.rb | 436 ++++++++++++++++++ .../vendor_result_evaluators/aamva.rb | 129 ++++++ .../instant_verify.rb | 83 ++++ 4 files changed, 653 insertions(+), 10 deletions(-) create mode 100644 lib/event_summarizer/idv_matcher.rb create mode 100644 lib/event_summarizer/vendor_result_evaluators/aamva.rb create mode 100644 lib/event_summarizer/vendor_result_evaluators/instant_verify.rb diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 7b780276bfe..0b4443cfb68 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -23,9 +23,10 @@ Dir[File.expand_path( end class SummarizeUserEvents - attr_reader :uuid, :from_date, :to_date + attr_reader :uuid, :from_date, :to_date, :zone def initialize(user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') + @zone = zone Time.zone = zone @uuid = user_uuid @from_date = parse_time(start_time) || 1.week.ago @@ -41,11 +42,14 @@ class SummarizeUserEvents def matchers @matchers ||= [ EventSummarizer::ExampleMatcher.new, + EventSummarizer::IdvMatcher.new ] end def run find_cloudwatch_events do |event| + Time.zone ||= zone + event['@message'] = JSON.parse(event['@message']) if event['@message'].is_a?(String) matchers.each do |matcher| @@ -90,15 +94,6 @@ class SummarizeUserEvents QUERY end - def find_events(&block) - warn "$stdin.tty? = #{$stdin.tty?}" - if $stdin.tty? - cloudwatch_source(&block) - else - stdin_source(&block) - end - end - def cloudwatch_client @cloudwatch_client ||= Reporting::CloudwatchClient.new( num_threads: 5, diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb new file mode 100644 index 00000000000..e547ffdf8be --- /dev/null +++ b/lib/event_summarizer/idv_matcher.rb @@ -0,0 +1,436 @@ +# frozen_string_literal: true + +require 'active_support' +require 'active_support/time' + +require 'event_summarizer/vendor_result_evaluators/aamva' +require 'event_summarizer/vendor_result_evaluators/instant_verify' + +module EventSummarizer + class IdvMatcher + IDV_WELCOME_SUBMITTED_EVENT = 'IdV: doc auth welcome submitted' + IDV_GPO_CODE_SUBMITTED_EVENT = 'IdV: enter verify by mail code submitted' + IDV_FINAL_RESOLUTION_EVENT = 'IdV: final resolution' + IDV_VERIFY_PROOFING_RESULTS_EVENT = 'IdV: doc auth verify proofing results' + IPP_ENROLLMENT_STATUS_UPDATED_EVENT = 'GetUspsProofingResultsJob: Enrollment status updated' + PROFILE_ENCRYPTION_INVALID_EVENT = 'Profile Encryption: Invalid' + RATE_LIMIT_REACHED_EVENT = 'Rate Limit Reached' + + EVENT_PROPERTIES = ['@message', 'properties', 'event_properties'].freeze + + VENDORS = { + 'lexisnexis:instant_verify' => { + id: :instant_verify, + name: 'Instant Verify', + evaluator_module: EventSummarizer::VendorResultEvaluators::InstantVerify, + }, + 'aamva:state_id' => { + id: :aamva, + name: 'AAMVA', + evaluator_module: EventSummarizer::VendorResultEvaluators::Aamva, + }, + }.freeze + + UNKNOWN_VENDOR = { + id: :unknown, + name: 'Unknown vendor', + }.freeze + + IdvAttempt = Data.define( + :started_at, + :significant_events, + ) do + def initialize(started_at:, significant_events: []) + super(started_at:, significant_events:) + end + + def gpo? + self.significant_events.any? { |e| e.type == :start_gpo } + end + + def ipp? + self.significant_events.any? { |e| e.type == :start_ipp } + end + + def successful? + self.significant_events.any? do |e| + e.type == :identity_verified + end + end + end.freeze + + SignificantIdvEvent = Data.define( + :timestamp, + :type, + :description, + ).freeze + + attr_reader :current_idv_attempt + attr_reader :idv_attempts + + # @return {Hash,nil} + def handle_cloudwatch_event(event) + @idv_attempts ||= [] + + case event['name'] + when IDV_WELCOME_SUBMITTED_EVENT + start_new_idv_attempt(event:) + + when IDV_FINAL_RESOLUTION_EVENT + for_current_idv_attempt(event:) do + handle_final_resolution_event(event:) + end + + when IDV_GPO_CODE_SUBMITTED_EVENT + for_current_idv_attempt(event:) do + handle_gpo_code_submission(event:) + end + + when IPP_ENROLLMENT_STATUS_UPDATED_EVENT + for_current_idv_attempt(event:) do + handle_ipp_enrollment_status_update(event:) + end + + when IDV_VERIFY_PROOFING_RESULTS_EVENT + for_current_idv_attempt(event:) do + handle_verify_proofing_results_event(event:) + end + + when PROFILE_ENCRYPTION_INVALID_EVENT + for_current_idv_attempt(event:) do + handle_profile_encryption_error(event:) + end + + when RATE_LIMIT_REACHED_EVENT + handle_rate_limit_reached(event:) + + else + warn event['name'] if ENV['LOG_UNHANDLED_EVENTS'] + end + end + + def finish + finish_current_idv_attempt + + self.idv_attempts.map { |a| summarize_idv_attempt(a) }.tap do + idv_attempts.clear + end + end + + private + + def add_significant_event( + timestamp:, + type:, + description: + ) + current_idv_attempt.significant_events << SignificantIdvEvent.new( + timestamp:, + type:, + description:, + ) + end + + def for_current_idv_attempt(event:, &block) + if !current_idv_attempt + warn <<~WARNING + Encountered #{event['name']} without seeing a '#{IDV_WELCOME_SUBMITTED_EVENT}' event first. + This could indicate you need to include earlier events in your request. + WARNING + return + end + + block.call(event) + end + + def finish_current_idv_attempt + idv_attempts << current_idv_attempt if current_idv_attempt + @current_idv_attempt = nil + end + + # @return {Hash,nil} + def handle_final_resolution_event(event:) + timestamp = Time.zone.parse(event['@timestamp']) + + gpo_pending = !!event.dig( + *EVENT_PROPERTIES, + 'gpo_verification_pending', + ) + + if gpo_pending + add_significant_event( + type: :start_gpo, + timestamp:, + description: 'User requested a letter to verfy by mail', + ) + end + + ipp_pending = !!event.dig( + *EVENT_PROPERTIES, + 'in_person_verification_pending', + ) + + if ipp_pending + add_significant_event( + type: :start_ipp, + timestamp:, + descirption: 'User entered the in-person proofing flow', + ) + end + + fraud_review_pending = !!event.dig( + *EVENT_PROPERTIES, + 'fraud_review_pending', + ) + + if fraud_review_pending + add_significant_event( + type: :flagged_for_fraud, + timestamp:, + description: 'User was flagged for fraud', + ) + end + + pending = + gpo_pending || + ipp_pending || + fraud_review_pending + + if !pending + add_significant_event( + type: :verified, + timestamp:, + description: 'User completed identity verification (remote unsupervised flow)', + ) + + finish_current_idv_attempt + end + end + + def handle_gpo_code_submission(event:) + timestamp = Time.zone.parse(event['@timestamp']) + success = event.dig(*EVENT_PROPERTIES, 'success') + + if !success + add_significant_event( + type: :gpo_code_failure, + timestamp:, + description: 'The user entered an invalid GPO code', + ) + return + end + + # User successfully entered GPO code. If nothing else is pending, + # then they are fully verified + + ipp_pending = !!event.dig( + *EVENT_PROPERTIES, + 'pending_in_person_enrollment', + ) + + fraud_review_pending = !!event.dig( + *EVENT_PROPERTIES, + 'fraud_check_failed', + ) + + fully_verified = !(ipp_pending || fraud_review_pending) + + description = ipp_pending ? + 'User successfully entered a GPO code, but is still pending in-person proofing' + : 'User successfully entered a GPO code' + + add_significant_event( + type: :gpo_code_success, + timestamp:, + description:, + ) + + if fully_verified + add_significant_event( + type: :verified, + timestamp:, + description: 'User completed identity verification', + ) + + finish_current_idv_attempt + end + end + + def handle_ipp_enrollment_status_update(event:) + timestamp = Time.zone.parse(event['@timestamp']) + passed = event.dig(*EVENT_PROPERTIES, 'passed') + tmx_status = event.dig(*EVENT_PROPERTIES, 'tmx_status') + + return if !passed + + add_significant_event( + type: :ipp_enrollment_complete, + timestamp:, + description: 'User visited the post office and completed IPP enrollment', + ) + + verified = tmx_status != 'review' && tmx_status != 'reject' + + if verified + current_idv_attempt.event << SignificantIdvEvent.new( + type: :verified, + timestamp:, + description: 'User is fully verified', + ) + end + end + + def handle_profile_encryption_error(event:) + caveats = [ + # TODO these need to check if GPO/IPP were still pending at time of the event + current_idv_attempt.gpo? ? 'The user will not be able to enter a GPO code' : nil, + current_idv_attempt.ipp? ? 'the user will not be able to verify in-person' : nil, + ].compact + + add_significant_event( + type: :password_reset, + timestamp: Time.zone.parse(event['@timestamp']), + description: [ + 'The user reset their password and did not provide their personal key.', + caveats.length > 0 ? + "The user will not be able to #{caveats.join(' or ')}" : + nil, + ].compact.join(' '), + ) + end + + def handle_rate_limit_reached(event:) + limiters = { + 'idv_doc_auth' => 'Doc Auth', + } + + limiter_type = event.dig(*EVENT_PROPERTIES, 'limiter_type') + + limit_name = limiters[limiter_type] + + return if limit_name.blank? + + timestamp = Time.zone.parse(event['@timestamp']) + + for_current_idv_attempt(event:) do + add_significant_event( + type: :rate_limited, + timestamp:, + description: "Rate limited for #{limit_name}", + ) + end + end + + def handle_verify_proofing_results_event(event:) + timestamp = Time.zone.parse(event['@timestamp']) + success = event.dig(*EVENT_PROPERTIES, 'success') + + if success + # We only really care about passing identity resolution if the + # user previously failed in this attempt + + prior_failures = current_idv_attempt.significant_events.count do |e| + e[:type] == :failed_identity_resolution + end + + if prior_failures > 0 + # TODO: What changed that made them be able to pass? + + add_significant_event( + timestamp:, + type: :passed_identity_resolution, + description: "User passed identity resolution after #{prior_failures + 1} tries", + ) + end + + return + end + + # Failing identity resolution is where it gets interesting + + prev_count = current_idv_attempt.significant_events.count + + add_events_for_failed_vendor_result( + event.dig( + *EVENT_PROPERTIES, 'proofing_results', 'context', 'stages', 'resolution' + ), + timestamp:, + ) + + add_events_for_failed_vendor_result( + event.dig( + *EVENT_PROPERTIES, 'proofing_results', 'context', 'stages', 'residential_address' + ), + timestamp:, + ) + + add_events_for_failed_vendor_result( + event.dig( + *EVENT_PROPERTIES, 'proofing_results', 'context', 'stages', 'state_id' + ), + timestamp:, + ) + + any_events_added = current_idv_attempt.significant_events.count > prev_count + + if !any_events_added + add_significant_event( + timestamp:, + type: :failed_identity_resolution, + description: 'User failed identity resolution (check logs for reason)', + ) + + end + end + + def add_events_for_failed_vendor_result(result, timestamp:) + return if result['success'] + + vendor = VENDORS[result['vendor_name']] || UNKNOWN_VENDOR + evaluator = vendor[:evaluator_module] + + if !evaluator.present? + add_significant_event( + type: :"#{vendor[:id]}_request_failed", + timestamp:, + description: "Request to #{vendor[:name]} failed.", + ) + return + end + + evaluation = evaluator.evaluate_result(result) + add_significant_event(**evaluation, timestamp:) if evaluation + end + + # @return {IdvAttempt,nil} The previous IdvAttempt (if any) + def start_new_idv_attempt(event:) + finish_current_idv_attempt if current_idv_attempt + + @current_idv_attempt = IdvAttempt.new( + started_at: Time.zone.parse(event['@timestamp']), + ) + end + + def summarize_idv_attempt(attempt) + type = :idv + title = 'Identity verification started' + attributes = attempt.significant_events.map do |e| + { + type: e.type, + description: e.description, + } + end + + if attempt.successful? + title = 'Identity verified' + end + + { + started_at: attempt.started_at, + title:, + type:, + attributes:, + } + end + end +end diff --git a/lib/event_summarizer/vendor_result_evaluators/aamva.rb b/lib/event_summarizer/vendor_result_evaluators/aamva.rb new file mode 100644 index 00000000000..2c1ac9ffd2f --- /dev/null +++ b/lib/event_summarizer/vendor_result_evaluators/aamva.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +module EventSummarizer + module VendorResultEvaluators + module Aamva + ID_TYPES = { + 'state_id_card' => 'non-driving ID card', + 'drivers_license' => 'drivers\' license', + }.freeze + + # TODO: Load these from the AAMVA proofer or put them somewhere common + + REQUIRED_VERIFICATION_ATTRIBUTES = %i[ + state_id_number + dob + last_name + first_name + ].freeze + + REQUIRED_IF_PRESENT_ATTRIBUTES = [:state_id_expiration].freeze + + # @param result {Hash} The result structure logged to Cloudwatch + # @return [Hash] A Hash with a type, timestamp, and description key. + def self.evaluate_result(result) + if result['success'] + return { + type: :aamva_success, + description: 'AAMVA call succeeded', + } + end + + if result['timed_out'] + return { + type: :aamva_timed_out, + description: 'AAMVA request timed out.', + } + end + + if result['mva_exception'] + state = result['state_id_jurisdiction'] + return { + type: :aamva_mva_exception, + # rubocop:disable Layout/LineLength + description: "AAMVA request failed because the MVA in #{state} failed to return a response.", + # rubocop:enable Layout/LineLength + } + end + + if result['exception'] + + description = 'AAMVA request resulted in an exception' + + m = /ExceptionText: (.+?),/.match(result['exception']) + if m.present? + description = "#{description} (#{m[1]})" + end + + return { + type: :aamva_exception, + description:, + } + end + + # The API call failed because of actual errors in the user's data. + # Try to come up with an explanation + + explanation = explain_errors(result) || 'Check logs for more info.' + + return { + type: :aamva_error, + description: "AAMVA request failed. #{explanation}", + } + end + + def self.explain_errors(result) + # The values in the errors object are arrays + attributes = {} + result['errors'].each do |key, values| + attributes[key] = values.first + end + + id_type = ID_TYPES[result['state_id_type']] || 'id card' + state = result['state_id_jurisdiction'] + + if mva_says_invalid_id_number?(attributes) + # rubocop:disable Layout/LineLength + return "The ID # from the user's #{id_type} was invalid according to the state of #{state}" + # rubocop:enable Layout/LineLength + end + + failed_attributes = relevant_failed_attributes(attributes) + + if !failed_attributes.empty? + plural = failed_attributes.length == 1 ? '' : 's' + + # rubocop:disable Layout/LineLength + "#{failed_attributes.length} attribute#{plural} failed to validate: #{failed_attributes.join(', ')}" + # rubocop:enable Layout/LineLength + end + end + + def self.mva_says_invalid_id_number?(attributes) + # When all attributes are marked "MISSING", except ID number, + # which is marked "UNVERIFIED", that indicates the MVA could not + # find the ID number to compare PII + + missing_count = attributes.count do |_attr, status| + status == 'MISSING' + end + + attributes['state_id_number'] == 'UNVERIFIED' && missing_count == attributes.count - 1 + end + + def self.relevant_failed_attributes(attributes) + failed_attributes = Set.new + + REQUIRED_VERIFICATION_ATTRIBUTES.each do |attr| + failed_attributes << attr if attributes[attr] != 'VERIFIED' + end + + REQUIRED_IF_PRESENT_ATTRIBUTES.each do |attr| + failed_attributes << attr if attributes[attr].present? && attributes[attr] != 'VERIFIED' + end + + failed_attributes + end + end + end +end diff --git a/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb new file mode 100644 index 00000000000..81dc58ba54f --- /dev/null +++ b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module EventSummarizer + module VendorResultEvaluators + module InstantVerify + # @param result {Hash} The result structure logged to Cloudwatch + # @return [Hash] A Hash with a type and description keys. + def self.evaluate_result(result) + if result['success'] + return { + type: :instant_verify_success, + description: 'Instant Verify call succeeded', + } + end + + if result['timed_out'] + return { + type: :instant_verify_timed_out, + description: 'Instant Verify request timed out.', + } + end + + if result['exception'] + return { + type: :instant_verify_exception, + description: 'Instant Verify request resulted in an exception', + } + end + + # The API call failed because of actual errors in the user's data. + # Try to come up with an explanation + + explanation = explain_errors(result) || 'Check logs for more info.' + + return { + type: :instant_verify_error, + description: "Instant Verify request failed. #{explanation}", + } + end + + # Attempts to render a legible explanation of what went wrong in a + # LexisNexis Instant Verify request. + # @param result {Hash} The result structure logged to Cloudwatch + # @return {String} + def self.explain_errors(result) + # (The structure of the 'errors' key for Instant Verify is kind of weird) + + failed_items = [] + + result.dig('errors', 'InstantVerify')&.each do |iv_instance| + next if iv_instance['ProductStatus'] != 'fail' + iv_instance['Items'].each do |item| + if item['ItemStatus'] == 'fail' + failed_items << item + end + end + end + + if failed_items.empty? + return 'Check the full logs for more info.' + end + + checks = failed_items.map do |item| + name = item['ItemName'] + reason = item['ItemReason'] + reason_code = reason ? reason['Code'] : nil + + if reason_code + # TODO: Translate these reason codes to plain language + # TODO: Add suggestions for how the user could remedy + "#{name} (#{reason_code})" + else + name + end + end + + plural = checks.length == 1 ? '' : 's' + + "#{checks.length} check#{plural} failed: #{checks.join(", ")}" + end + end + end +end From 87223ae08da132883c787c2c8424505165c6947c Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Thu, 12 Dec 2024 10:11:11 -0800 Subject: [PATCH 08/26] [HACKATHON] Output formatting tweaks (#11635) * Normalize @timestamp to UTC for each event Pre-parse it in the script so that matchers don't have to worry about it * Slightly improve output - Include timestamps where possible [skip changelog] --- bin/summarize-user-events | 71 +++++++++++++++++++++++------ lib/event_summarizer/idv_matcher.rb | 15 +++--- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 0b4443cfb68..330067a866e 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -25,6 +25,9 @@ end class SummarizeUserEvents attr_reader :uuid, :from_date, :to_date, :zone + NICE_DATE_AND_TIME_FORMAT = '%B %d, %Y at %I:%M %p %Z' + TIME_ONLY_FORMAT = '%I:%M %p' + def initialize(user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') @zone = zone Time.zone = zone @@ -42,7 +45,7 @@ class SummarizeUserEvents def matchers @matchers ||= [ EventSummarizer::ExampleMatcher.new, - EventSummarizer::IdvMatcher.new + EventSummarizer::IdvMatcher.new, ] end @@ -50,6 +53,11 @@ class SummarizeUserEvents find_cloudwatch_events do |event| Time.zone ||= zone + # Timestamps coming from cloudwatch are in UTC but don't include any zone + # information. Here we parse them as UTC. + if event['@timestamp'].is_a?(String) + event['@timestamp'] = Time.zone.parse("#{event['@timestamp']} UTC") + end event['@message'] = JSON.parse(event['@message']) if event['@message'].is_a?(String) matchers.each do |matcher| @@ -68,17 +76,52 @@ class SummarizeUserEvents end def format_results(results) - results.map do |r| - title = r[:title] - - [ - "## #{title}", - *r[:attributes]&.map do |attr| - "* #{attr[:description]}" - end, - '', - ] - end.join("\n") + # Each Hash in results should have _at least_ a :title key defined + + results. + sort_by { |r| r[:timestamp] || r[:started_at] || Time.zone.at(0) }. + map do |r| + timestamp = r[:timestamp] || r[:started_at] + + heading = r[:title] + + if timestamp + heading = "#{heading} (#{timestamp.strftime(NICE_DATE_AND_TIME_FORMAT)})" + end + + prev_timestamp = timestamp + + list_items = r[:attributes]&. + sort_by { |attr| attr[:timestamp] || Time.zone.at(0) }&. + map do |attr| + text = attr[:description] + + formatted_timestamp = format_time(attr[:timestamp], prev_timestamp) + prev_timestamp = attr[:timestamp] + + text = "(#{formatted_timestamp}) #{text}" if formatted_timestamp + + "* #{text}" + end + + [ + "## #{heading}", + *list_items, + '', + ] + end.join("\n") + end + + def format_time(timestamp, prev_timestamp = nil) + return if timestamp.blank? + + same_date = timestamp.to_date == prev_timestamp&.to_date + + if same_date + timestamp.strftime(TIME_ONLY_FORMAT) + else + timestamp.strftime(NICE_DATE_AND_TIME_FORMAT) + end end def query @@ -133,7 +176,7 @@ def main options = {} basename = File.basename($0) - # rubocop:disable Metrics/BlockLength, Metrics/LineLength + # rubocop:disable Metrics/LineLength optparse = OptionParser.new do |opts| opts.banner = <<-EOM @@ -171,7 +214,7 @@ def main options[:zone] = val end end - # rubocop:enable Metrics/BlockLength, Metrics/LineLength + # rubocop:enable Metrics/LineLength optparse.parse! diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index e547ffdf8be..8b251312d05 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -150,7 +150,7 @@ def finish_current_idv_attempt # @return {Hash,nil} def handle_final_resolution_event(event:) - timestamp = Time.zone.parse(event['@timestamp']) + timestamp = event['@timestamp'] gpo_pending = !!event.dig( *EVENT_PROPERTIES, @@ -208,7 +208,7 @@ def handle_final_resolution_event(event:) end def handle_gpo_code_submission(event:) - timestamp = Time.zone.parse(event['@timestamp']) + timestamp = event['@timestamp'] success = event.dig(*EVENT_PROPERTIES, 'success') if !success @@ -257,7 +257,7 @@ def handle_gpo_code_submission(event:) end def handle_ipp_enrollment_status_update(event:) - timestamp = Time.zone.parse(event['@timestamp']) + timestamp = event['@timestamp'] passed = event.dig(*EVENT_PROPERTIES, 'passed') tmx_status = event.dig(*EVENT_PROPERTIES, 'tmx_status') @@ -289,7 +289,7 @@ def handle_profile_encryption_error(event:) add_significant_event( type: :password_reset, - timestamp: Time.zone.parse(event['@timestamp']), + timestamp: event['@timestamp'], description: [ 'The user reset their password and did not provide their personal key.', caveats.length > 0 ? @@ -310,7 +310,7 @@ def handle_rate_limit_reached(event:) return if limit_name.blank? - timestamp = Time.zone.parse(event['@timestamp']) + timestamp = event['@timestamp'] for_current_idv_attempt(event:) do add_significant_event( @@ -322,7 +322,7 @@ def handle_rate_limit_reached(event:) end def handle_verify_proofing_results_event(event:) - timestamp = Time.zone.parse(event['@timestamp']) + timestamp = event['@timestamp'] success = event.dig(*EVENT_PROPERTIES, 'success') if success @@ -407,7 +407,7 @@ def start_new_idv_attempt(event:) finish_current_idv_attempt if current_idv_attempt @current_idv_attempt = IdvAttempt.new( - started_at: Time.zone.parse(event['@timestamp']), + started_at: event['@timestamp'], ) end @@ -417,6 +417,7 @@ def summarize_idv_attempt(attempt) attributes = attempt.significant_events.map do |e| { type: e.type, + timestamp: e.timestamp, description: e.description, } end From 3e5859bb09c50b5c61e289c9f52c016a0a7a57d5 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Thu, 12 Dec 2024 10:57:56 -0800 Subject: [PATCH 09/26] [HACKATHON] Minor tweaks (#11637) * Don't crash if no events found * Tweak handling of --end-date - Use a dash rather than underscore - Make sure we respect it if it's passed in * Sort events on stdin before processing Events from Cloudwatch queries will be sorted, but stdin is not guaranteed. Processing unsorted events can lead to weird, weird, outcomes --- bin/summarize-user-events | 15 +++++++++++---- lib/event_summarizer/idv_matcher.rb | 6 ++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 330067a866e..9d2b72dfc66 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -33,7 +33,7 @@ class SummarizeUserEvents Time.zone = zone @uuid = user_uuid @from_date = parse_time(start_time) || 1.week.ago - @to_date = parse_time(end_time) || start_time.present? ? from_date + 1.week : Time.zone.now + @to_date = parse_time(end_time) || (start_time.present? ? from_date + 1.week : Time.zone.now) end def parse_time(time_str) @@ -155,10 +155,17 @@ class SummarizeUserEvents end def stdin_source(&block) + events = [] + $stdin.each_line do |line| next if line.blank? - event = JSON.parse(line) - block.call(event) + events << JSON.parse(line) + end + + events.sort_by! { |e| e['@timestamp'] } + + events.each do |e| + block.call(e) end end @@ -206,7 +213,7 @@ def main options[:start_time] = val end - opts.on('-e', '--end_time END_TIME', 'Time of the end of the query period (e.g. 2024-12-09T14:30:00Z), default: 1 week from start') do |val| + opts.on('-e', '--end-time END_TIME', 'Time of the end of the query period (e.g. 2024-12-09T14:30:00Z), default: 1 week from start') do |val| options[:end_time] = val end diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index 8b251312d05..59688da3e96 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -68,10 +68,12 @@ def successful? attr_reader :current_idv_attempt attr_reader :idv_attempts + def initialize + @idv_attempts = [] + end + # @return {Hash,nil} def handle_cloudwatch_event(event) - @idv_attempts ||= [] - case event['name'] when IDV_WELCOME_SUBMITTED_EVENT start_new_idv_attempt(event:) From c8555bc41e9515b756d0fa92fcbc1ba38f95ad52 Mon Sep 17 00:00:00 2001 From: Doug Price Date: Thu, 12 Dec 2024 13:58:10 -0500 Subject: [PATCH 10/26] report on TrueID success/failure (#11638) --- lib/event_summarizer/idv_matcher.rb | 56 ++++++++++++++++++- .../vendor_result_evaluators/true_id.rb | 29 ++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 lib/event_summarizer/vendor_result_evaluators/true_id.rb diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index 59688da3e96..4cf336fbb70 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -5,12 +5,14 @@ require 'event_summarizer/vendor_result_evaluators/aamva' require 'event_summarizer/vendor_result_evaluators/instant_verify' +require 'event_summarizer/vendor_result_evaluators/true_id' module EventSummarizer class IdvMatcher IDV_WELCOME_SUBMITTED_EVENT = 'IdV: doc auth welcome submitted' IDV_GPO_CODE_SUBMITTED_EVENT = 'IdV: enter verify by mail code submitted' IDV_FINAL_RESOLUTION_EVENT = 'IdV: final resolution' + IDV_IMAGE_UPLOAD_VENDOR_SUBMITTED_EVENT = 'IdV: doc auth image upload vendor submitted' IDV_VERIFY_PROOFING_RESULTS_EVENT = 'IdV: doc auth verify proofing results' IPP_ENROLLMENT_STATUS_UPDATED_EVENT = 'GetUspsProofingResultsJob: Enrollment status updated' PROFILE_ENCRYPTION_INVALID_EVENT = 'Profile Encryption: Invalid' @@ -19,6 +21,11 @@ class IdvMatcher EVENT_PROPERTIES = ['@message', 'properties', 'event_properties'].freeze VENDORS = { + 'TrueID' => { + id: :trueid, + name: 'True ID', + evaluator_module: EventSummarizer::VendorResultEvaluators::TrueId, + }, 'lexisnexis:instant_verify' => { id: :instant_verify, name: 'Instant Verify', @@ -93,6 +100,11 @@ def handle_cloudwatch_event(event) handle_ipp_enrollment_status_update(event:) end + when IDV_IMAGE_UPLOAD_VENDOR_SUBMITTED_EVENT + for_current_idv_attempt(event:) do + handle_image_upload_vendor_submitted(event:) + end + when IDV_VERIFY_PROOFING_RESULTS_EVENT for_current_idv_attempt(event:) do handle_verify_proofing_results_event(event:) @@ -323,6 +335,47 @@ def handle_rate_limit_reached(event:) end end + def handle_image_upload_vendor_submitted(event:) + timestamp = event['@timestamp'] + success = event.dig(*EVENT_PROPERTIES, 'success') + doc_type = event.dig(*EVENT_PROPERTIES, 'DocClassName') + + if success + prior_failures = current_idv_attempt.significant_events.count do |e| + e.type == :failed_document_capture + end + attempts = prior_failures > 0 ? "after #{prior_failures} tries" : 'on the first attempt' + + add_significant_event( + timestamp:, + type: :passed_document_capture, + description: "User successfully verified their #{doc_type.downcase} #{attempts}", + ) + return + end + + prev_count = current_idv_attempt.significant_events.count + + alerts = event.dig(*EVENT_PROPERTIES, 'processed_alerts') + alerts['success'] = false + alerts['vendor_name'] = event.dig(*EVENT_PROPERTIES, 'vendor') + + add_events_for_failed_vendor_result( + alerts, + timestamp:, + ) + + any_events_added = current_idv_attempt.significant_events.count > prev_count + + if !any_events_added + add_significant_event( + timestamp:, + type: :failed_document_capture, + description: "User failed to verify their #{doc_type.downcase} (check logs for reason)", + ) + end + end + def handle_verify_proofing_results_event(event:) timestamp = event['@timestamp'] success = event.dig(*EVENT_PROPERTIES, 'success') @@ -332,7 +385,7 @@ def handle_verify_proofing_results_event(event:) # user previously failed in this attempt prior_failures = current_idv_attempt.significant_events.count do |e| - e[:type] == :failed_identity_resolution + e.type == :failed_identity_resolution end if prior_failures > 0 @@ -381,7 +434,6 @@ def handle_verify_proofing_results_event(event:) type: :failed_identity_resolution, description: 'User failed identity resolution (check logs for reason)', ) - end end diff --git a/lib/event_summarizer/vendor_result_evaluators/true_id.rb b/lib/event_summarizer/vendor_result_evaluators/true_id.rb new file mode 100644 index 00000000000..f7b59694974 --- /dev/null +++ b/lib/event_summarizer/vendor_result_evaluators/true_id.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module EventSummarizer + module VendorResultEvaluators + module TrueId + # @param result {Hash} The array of processed_alerts.failed logged to Cloudwatch + # @return [Hash] A Hash with a type and description keys. + def self.evaluate_result(result) + alerts = [] + result['failed'].each do |alert| + if alert['result'] == 'Failed' + alerts << { + type: :"trueid_#{alert['name'].parameterize(separator: '_')}", + description: alert['disposition'], + } + end + end + + if alerts.present? + alerts.uniq! { |a| a[:description] } + return { + type: :trueid_failures, + description: "TrueID request failed. #{alerts.map { |a| a[:description] }.join(' ')}", + } + end + end + end + end +end From d60a9f420bef19d572df01bb102b7cf67eb4abe5 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Thu, 12 Dec 2024 11:53:01 -0800 Subject: [PATCH 11/26] Try to identify IDV abandonment (#11639) If the user: - Has not completed the initial workflow and - Does not have an idv-related event new that 1 hour Call their attempt abandoned --- lib/event_summarizer/idv_matcher.rb | 60 ++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index 4cf336fbb70..162f41e6370 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -51,19 +51,35 @@ def initialize(started_at:, significant_events: []) super(started_at:, significant_events:) end + def flagged_for_fraud? + self.significant_events.any? { |e| e.type == :flagged_for_fraud } + end + def gpo? self.significant_events.any? { |e| e.type == :start_gpo } end + def gpo_pending? + gpo? && !self.significant_events.any? { |e| e.type == :gpo_code_success } + end + def ipp? self.significant_events.any? { |e| e.type == :start_ipp } end + def ipp_pending? + ipp? && !self.significant_events.any? { |e| e.type == :ipp_enrollment_complete } + end + def successful? self.significant_events.any? do |e| - e.type == :identity_verified + e.type == :verified end end + + def workflow_complete? + gpo? || ipp? || flagged_for_fraud? || successful? + end end.freeze SignificantIdvEvent = Data.define( @@ -74,6 +90,7 @@ def successful? attr_reader :current_idv_attempt attr_reader :idv_attempts + attr_reader :idv_abandoned_event def initialize @idv_attempts = [] @@ -86,6 +103,7 @@ def handle_cloudwatch_event(event) start_new_idv_attempt(event:) when IDV_FINAL_RESOLUTION_EVENT + for_current_idv_attempt(event:) do handle_final_resolution_event(event:) end @@ -119,8 +137,12 @@ def handle_cloudwatch_event(event) handle_rate_limit_reached(event:) else - warn event['name'] if ENV['LOG_UNHANDLED_EVENTS'] + if ENV['LOG_UNHANDLED_EVENTS'] + warn "#{event['@timestamp']} #{event['name']}" + end end + + check_for_idv_abandonment(event) end def finish @@ -145,6 +167,24 @@ def add_significant_event( ) end + def check_for_idv_abandonment(event) + return if !current_idv_attempt.present? + + looks_like_idv = /^idv/i.match(event['name']) + return if !looks_like_idv + + if idv_abandoned_event.blank? + @idv_abandoned_event = event + return + end + + is_a_little_bit_newer = event['@timestamp'] - idv_abandoned_event['@timestamp'] < 1.hour + + if is_a_little_bit_newer + @idv_abandoned_event = event + end + end + def for_current_idv_attempt(event:, &block) if !current_idv_attempt warn <<~WARNING @@ -158,8 +198,24 @@ def for_current_idv_attempt(event:, &block) end def finish_current_idv_attempt + if current_idv_attempt.present? + looks_like_abandonment = + !current_idv_attempt.workflow_complete? && + idv_abandoned_event.present? && + idv_abandoned_event['@timestamp'] < 1.hour.ago + + if looks_like_abandonment + add_significant_event( + type: :idv_abandoned, + timestamp: idv_abandoned_event['@timestamp'], + description: 'User abandoned identity verification', + ) + end + end + idv_attempts << current_idv_attempt if current_idv_attempt @current_idv_attempt = nil + @idv_abandoned_event = nil end # @return {Hash,nil} From afa3fd76ee6107dc1ff0439c7774f07041e76f6b Mon Sep 17 00:00:00 2001 From: Malick Diarra Date: Mon, 16 Dec 2024 10:07:58 -0500 Subject: [PATCH 12/26] Login hackathon 2024 user narrative account deletion (#11629) * include timestamp * add account deletion narrative matcher * remove unneeded matcher requirement * add deletion matcher * lint * rename account deletion --- bin/summarize-user-events | 1 + .../account_deletion_matcher.rb | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 lib/event_summarizer/account_deletion_matcher.rb diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 9d2b72dfc66..056e330f6dc 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -45,6 +45,7 @@ class SummarizeUserEvents def matchers @matchers ||= [ EventSummarizer::ExampleMatcher.new, + EventSummarizer::AccountDeletionMatcher.new, EventSummarizer::IdvMatcher.new, ] end diff --git a/lib/event_summarizer/account_deletion_matcher.rb b/lib/event_summarizer/account_deletion_matcher.rb new file mode 100644 index 00000000000..ec21f689cce --- /dev/null +++ b/lib/event_summarizer/account_deletion_matcher.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module EventSummarizer + class AccountDeletionMatcher + ACCOUNT_DELETION_STARTED_EVENT = 'Account Reset: request' + ACCOUNT_DELETION_SUBMITTED_EVENT = 'Account Reset: delete' + ACCOUNT_DELETION_CANCELED_EVENT = 'Account Reset: cancel' + + EVENT_PROPERTIES = ['@message', 'properties', 'event_properties'].freeze + + attr_accessor :account_deletion_events, :event_summaries + + def initialize + @account_deletion_events = Array.new + @event_summaries = Array.new + account_deletion_events + end + + def handle_cloudwatch_event(event) + case event['name'] + when ACCOUNT_DELETION_STARTED_EVENT + process_account_reset_request(event) + when ACCOUNT_DELETION_SUBMITTED_EVENT + process_account_reset_delete(event) + when ACCOUNT_DELETION_CANCELED_EVENT + process_account_reset_cancel(event) + end + end + + def finish + event_summaries + end + + private + + def process_account_reset_request(event) + event_message = { + title: 'Account deletion Request', + attributes: [ + { type: :account_deletion_request, + description: "On #{event["@timestamp"]} user initiated account deletion" }, + ], + } + event_summaries.push(event_message) + end + + def process_account_reset_cancel(event) + event_message = { + title: 'Account deletion cancelled', + attributes: [ + { type: :account_deletion_cancelled, + description: "On #{event["@timestamp"]} user canceled account deletion" }, + ], + } + event_summaries.push(event_message) + end + + def process_account_reset_delete(event) + message = event['@message'] + age = message['properties']['event_properties']['account_age_in_days'] + date = event['@timestamp'] + event_message = { + title: 'Account deleted', + attributes: [ + { + type: :account_deleted, + description: "On #{date} user deleted their account which was #{age} days old", + }, + ], + } + event_summaries.push(event_message) + end + end +end From d5d3bc7c25d425d5c1e19b73a6bf60a5a1a4a99b Mon Sep 17 00:00:00 2001 From: Eileen Date: Mon, 16 Dec 2024 13:57:24 -0500 Subject: [PATCH 13/26] read events from file without changing stdin --- bin/summarize-user-events | 24 ++++++++++++++---------- lib/event_summarizer/idv_matcher.rb | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 9d2b72dfc66..4d7e858f8e2 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -23,13 +23,13 @@ Dir[File.expand_path( end class SummarizeUserEvents - attr_reader :uuid, :from_date, :to_date, :zone + attr_reader :file_name, :uuid, :from_date, :to_date, :zone NICE_DATE_AND_TIME_FORMAT = '%B %d, %Y at %I:%M %p %Z' TIME_ONLY_FORMAT = '%I:%M %p' - def initialize(user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') - @zone = zone + def initialize(file_name: nil, user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') + @file_name = file_name Time.zone = zone @uuid = user_uuid @from_date = parse_time(start_time) || 1.week.ago @@ -146,18 +146,18 @@ class SummarizeUserEvents end def find_cloudwatch_events(&block) - if $stdin.tty? + unless file_name.nil? + warn 'Reading Cloudwatch events as newline-delimited JSON (ndjson) a file' + file_source(&block) + else cloudwatch_source(&block) - else - warn 'Reading Cloudwatch events as newline-delimited JSON (ndjson) from stdin' - stdin_source(&block) end end - def stdin_source(&block) + def file_source(&block) events = [] - $stdin.each_line do |line| + File.read(file_name).each_line do |line| next if line.blank? events << JSON.parse(line) end @@ -200,6 +200,10 @@ def main EOM + opts.on('-f', '--file_name FILE_NAME', 'filename from which to read the events') do |val| + options[:file_name] = val + end + opts.on('-h', '--help', 'Display this message') do warn opts exit @@ -224,7 +228,7 @@ def main # rubocop:enable Metrics/LineLength optparse.parse! - + SummarizeUserEvents.new(**options).run end diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index 162f41e6370..f53b356c95a 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -231,7 +231,7 @@ def handle_final_resolution_event(event:) add_significant_event( type: :start_gpo, timestamp:, - description: 'User requested a letter to verfy by mail', + description: 'User requested a letter to verify by mail', ) end From 23fa124478169bebd7d6b8f6ae6b2790a1cdaf1a Mon Sep 17 00:00:00 2001 From: Eileen Date: Mon, 16 Dec 2024 14:19:02 -0500 Subject: [PATCH 14/26] remove ipp from gpo code submission event --- bin/summarize-user-events | 2 +- lib/event_summarizer/idv_matcher.rb | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 4d7e858f8e2..a0ab2b2560a 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -228,7 +228,7 @@ def main # rubocop:enable Metrics/LineLength optparse.parse! - + SummarizeUserEvents.new(**options).run end diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index f53b356c95a..347ba4cb031 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -290,24 +290,14 @@ def handle_gpo_code_submission(event:) return end - # User successfully entered GPO code. If nothing else is pending, + # User successfully entered GPO code. If fraud review is not pending, # then they are fully verified - - ipp_pending = !!event.dig( - *EVENT_PROPERTIES, - 'pending_in_person_enrollment', - ) - fraud_review_pending = !!event.dig( *EVENT_PROPERTIES, 'fraud_check_failed', ) - fully_verified = !(ipp_pending || fraud_review_pending) - - description = ipp_pending ? - 'User successfully entered a GPO code, but is still pending in-person proofing' - : 'User successfully entered a GPO code' + fully_verified = !fraud_review_pending add_significant_event( type: :gpo_code_success, From 32a68c8b784193613e67c6b4dd27eb768f52a3ab Mon Sep 17 00:00:00 2001 From: Eileen Date: Mon, 16 Dec 2024 15:28:03 -0500 Subject: [PATCH 15/26] update example documentation in script --- bin/summarize-user-events | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index a0ab2b2560a..d9c898ad88a 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -30,6 +30,7 @@ class SummarizeUserEvents def initialize(file_name: nil, user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') @file_name = file_name + @zone = zone Time.zone = zone @uuid = user_uuid @from_date = parse_time(start_time) || 1.week.ago @@ -189,13 +190,13 @@ def main Summarize user events in a human-readable format - Cloudwatch logs can be read from stdin as newline-delimited JSON (ndjson), + Cloudwatch logs can be read from a file as newline-delimited JSON (ndjson), or fetched directly via aws-vault. usage: #{basename} [OPTIONS] Examples: - #{basename} << events.ndjson + #{basename} -f events.ndjson aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York EOM From 295a676aa766dae5cbf3e491782e069baa46a243 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Mon, 30 Dec 2024 15:21:14 -0800 Subject: [PATCH 16/26] Update lib/event_summarizer/vendor_result_evaluators/aamva.rb Co-authored-by: Zach Margolis --- lib/event_summarizer/vendor_result_evaluators/aamva.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/event_summarizer/vendor_result_evaluators/aamva.rb b/lib/event_summarizer/vendor_result_evaluators/aamva.rb index 2c1ac9ffd2f..1929b8acd9f 100644 --- a/lib/event_summarizer/vendor_result_evaluators/aamva.rb +++ b/lib/event_summarizer/vendor_result_evaluators/aamva.rb @@ -5,7 +5,7 @@ module VendorResultEvaluators module Aamva ID_TYPES = { 'state_id_card' => 'non-driving ID card', - 'drivers_license' => 'drivers\' license', + 'drivers_license' => "drivers' license", }.freeze # TODO: Load these from the AAMVA proofer or put them somewhere common From dd95b768e2c396326cefcf58dc66dde71ef918e1 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Mon, 30 Dec 2024 15:41:38 -0800 Subject: [PATCH 17/26] Start writing a spec --- lib/event_summarizer/idv_matcher.rb | 9 +-- spec/lib/event_summarizer/idv_matcher_spec.rb | 56 +++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 spec/lib/event_summarizer/idv_matcher_spec.rb diff --git a/lib/event_summarizer/idv_matcher.rb b/lib/event_summarizer/idv_matcher.rb index 347ba4cb031..fdade426166 100644 --- a/lib/event_summarizer/idv_matcher.rb +++ b/lib/event_summarizer/idv_matcher.rb @@ -94,6 +94,7 @@ def workflow_complete? def initialize @idv_attempts = [] + @current_idv_attempt = nil end # @return {Hash,nil} @@ -168,12 +169,12 @@ def add_significant_event( end def check_for_idv_abandonment(event) - return if !current_idv_attempt.present? + return if current_idv_attempt.nil? looks_like_idv = /^idv/i.match(event['name']) return if !looks_like_idv - if idv_abandoned_event.blank? + if idv_abandoned_event.nil? @idv_abandoned_event = event return end @@ -198,10 +199,10 @@ def for_current_idv_attempt(event:, &block) end def finish_current_idv_attempt - if current_idv_attempt.present? + if !current_idv_attempt.nil? looks_like_abandonment = !current_idv_attempt.workflow_complete? && - idv_abandoned_event.present? && + !idv_abandoned_event.nil? && idv_abandoned_event['@timestamp'] < 1.hour.ago if looks_like_abandonment diff --git a/spec/lib/event_summarizer/idv_matcher_spec.rb b/spec/lib/event_summarizer/idv_matcher_spec.rb new file mode 100644 index 00000000000..8c90dd3e9c4 --- /dev/null +++ b/spec/lib/event_summarizer/idv_matcher_spec.rb @@ -0,0 +1,56 @@ +require 'active_support' +require 'active_support/time' + +require 'event_summarizer/idv_matcher' + +RSpec.describe EventSummarizer::IdvMatcher do + describe '#handle_cloudwatch_event' do + let(:event) do + { + '@timestamp': '2024-01-02T03:04:05Z', + } + end + + subject(:matcher) do + described_class.new + end + + around do |example| + Time.use_zone('UTC') do + example.run + end + end + + context 'On unknown event' do + let(:event) { super().merge('name' => 'Some random event') } + it 'does not throw' do + matcher.handle_cloudwatch_event(event) + end + end + + context "On 'IdV: doc auth welcome submitted' event" do + let(:event) { super().merge('name' => 'IdV: doc auth welcome submitted') } + + it 'starts a new IdV attempt' do + matcher.handle_cloudwatch_event(event) + expect(matcher.current_idv_attempt).not_to eql(nil) + end + + context 'with an IdV attempt already started' do + before do + allow(matcher).to receive(:current_idv_attempt).and_return( + EventSummarizer::IdvMatcher::IdvAttempt.new( + started_at: Time.zone.now, + ), + ) + end + + it 'finishes it' do + expect(matcher.idv_attempts.length).to eql(0) + matcher.handle_cloudwatch_event(event) + expect(matcher.idv_attempts.length).to eql(1) + end + end + end + end +end From 4d07f3762f9b7bcab0c7b8ef4a4954e675b31513 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 13:21:50 -0800 Subject: [PATCH 18/26] Tidy up logic in IV result evaluator --- .../instant_verify.rb | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb index 81dc58ba54f..3f83ad59d57 100644 --- a/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb +++ b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb @@ -7,35 +7,31 @@ module InstantVerify # @return [Hash] A Hash with a type and description keys. def self.evaluate_result(result) if result['success'] - return { + { type: :instant_verify_success, description: 'Instant Verify call succeeded', } - end - - if result['timed_out'] - return { + elsif result['timed_out'] + { type: :instant_verify_timed_out, description: 'Instant Verify request timed out.', } - end - - if result['exception'] - return { + elsif result['exception'] + { type: :instant_verify_exception, description: 'Instant Verify request resulted in an exception', } - end - - # The API call failed because of actual errors in the user's data. - # Try to come up with an explanation + else + # The API call failed because of actual errors in the user's data. + # Try to come up with an explanation - explanation = explain_errors(result) || 'Check logs for more info.' + explanation = explain_errors(result) || 'Check logs for more info.' - return { - type: :instant_verify_error, - description: "Instant Verify request failed. #{explanation}", - } + { + type: :instant_verify_error, + description: "Instant Verify request failed. #{explanation}", + } + end end # Attempts to render a legible explanation of what went wrong in a From e03cc7300a6abb4ee336eb3a52946bd40effcf3a Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 13:37:51 -0800 Subject: [PATCH 19/26] Set event['name'] if not already set --- bin/summarize-user-events | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 0fc1bdaa233..1c13aed9897 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -62,6 +62,8 @@ class SummarizeUserEvents end event['@message'] = JSON.parse(event['@message']) if event['@message'].is_a?(String) + event['name'] ||= event['@message']['name'] + matchers.each do |matcher| matcher.handle_cloudwatch_event(event) end From 95e069c59f56c13216ac8ceef890ec14d7ad4c9c Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 13:38:04 -0800 Subject: [PATCH 20/26] Fix typo --- bin/summarize-user-events | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 1c13aed9897..088de2086bb 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -151,7 +151,7 @@ class SummarizeUserEvents def find_cloudwatch_events(&block) unless file_name.nil? - warn 'Reading Cloudwatch events as newline-delimited JSON (ndjson) a file' + warn 'Reading Cloudwatch events as newline-delimited JSON (ndjson) file' file_source(&block) else cloudwatch_source(&block) From 3e3b4c91036c1098ecb904f980e30bb467596bf7 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 13:39:30 -0800 Subject: [PATCH 21/26] Use Eastern time zone by default --- bin/summarize-user-events | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 088de2086bb..af40b50c3da 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -184,7 +184,9 @@ class SummarizeUserEvents end def main - options = {} + options = { + zone: 'America/New_York' + } basename = File.basename($0) # rubocop:disable Metrics/LineLength From e6308fd32ab44cc841b2070384c46ccad3382516 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 13:48:52 -0800 Subject: [PATCH 22/26] Update pluralization code + add spec --- .../instant_verify.rb | 6 +- .../instant_verify_spec.rb | 91 +++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb diff --git a/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb index 3f83ad59d57..e8d7941ee0e 100644 --- a/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb +++ b/lib/event_summarizer/vendor_result_evaluators/instant_verify.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'active_support/core_ext/string/inflections' + module EventSummarizer module VendorResultEvaluators module InstantVerify @@ -70,9 +72,7 @@ def self.explain_errors(result) end end - plural = checks.length == 1 ? '' : 's' - - "#{checks.length} check#{plural} failed: #{checks.join(", ")}" + "#{checks.length} #{'check'.pluralize(checks.length)} failed: #{checks.join(", ")}" end end end diff --git a/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb b/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb new file mode 100644 index 00000000000..9c8884e9290 --- /dev/null +++ b/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb @@ -0,0 +1,91 @@ +require 'json' +require 'event_summarizer/vendor_result_evaluators/instant_verify' + +RSpec.describe EventSummarizer::VendorResultEvaluators::InstantVerify do + let(:instant_verify_result) do + { + success: true, + errors: {}, + exception: nil, + timed_out: false, + } + end + + subject(:evaluation) do + described_class.evaluate_result( + JSON.parse(JSON.generate(instant_verify_result)), + ) + end + + context 'successful result' do + it 'looks correct' do + expect(evaluation).to eql( + { + type: :instant_verify_success, + description: 'Instant Verify call succeeded', + }, + ) + end + end + + context 'request timed out' do + let(:instant_verify_result) do + super().merge( + success: false, + errors: {}, + timed_out: true, + ) + end + + it 'reports the error appropriately' do + expect(evaluation).to eql( + { + type: :instant_verify_timed_out, + description: 'Instant Verify request timed out.', + }, + ) + end + end + + context 'failed result' do + let(:instant_verify_result) do + { + success: false, + errors: { + base: ["Verification failed with code: 'priority.scoring.model.verification.fail'"], + InstantVerify: [ + { + ProductType: 'InstantVerify', + ExecutedStepName: 'InstantVerify', + ProductConfigurationName: 'blah.config', + ProductStatus: 'fail', + ProductReason: { + Code: 'priority.scoring.model.verification.fail', + }, + Items: [ + { ItemName: 'Check1', ItemStatus: 'pass' }, + { ItemName: 'Check2', ItemStatus: 'fail' }, + { + ItemName: 'CheckWithCode', + ItemStatus: 'fail', + ItemReason: { Code: 'some_obscure_code ' }, + }, + ], + }, + ], + }, + exception: nil, + timed_out: false, + } + end + + it 'returns the correct result' do + expect(evaluation).to eql( + { + description: 'Instant Verify request failed. 2 checks failed: Check2, CheckWithCode (some_obscure_code )', + type: :instant_verify_error, + }, + ) + end + end +end From 47b95ca51916285b665bc13598926776d52791cf Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 14:06:24 -0800 Subject: [PATCH 23/26] Start on spec for summarize-user-events command --- bin/summarize-user-events | 21 +++++---- spec/bin/summarize-user-events_spec.rb | 62 ++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 8 deletions(-) create mode 100644 spec/bin/summarize-user-events_spec.rb diff --git a/bin/summarize-user-events b/bin/summarize-user-events index af40b50c3da..3c3672d5f61 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -55,14 +55,7 @@ class SummarizeUserEvents find_cloudwatch_events do |event| Time.zone ||= zone - # Timestamps coming from cloudwatch are in UTC but don't include any zone - # information. Here we parse them as UTC. - if event['@timestamp'].is_a?(String) - event['@timestamp'] = Time.zone.parse("#{event['@timestamp']} UTC") - end - event['@message'] = JSON.parse(event['@message']) if event['@message'].is_a?(String) - - event['name'] ||= event['@message']['name'] + normalize_event!(event) matchers.each do |matcher| matcher.handle_cloudwatch_event(event) @@ -181,6 +174,18 @@ class SummarizeUserEvents &block ) end + + def normalize_event!(event) + if event['@timestamp'].is_a?(String) + event['@timestamp'] = ActiveSupport::TimeZone['UTC'].parse(event['@timestamp']) + end + + if event['@message'].is_a?(String) + event['@message'] = JSON.parse(event['@message']) + end + + event['name'] ||= event['@message']['name'] + end end def main diff --git a/spec/bin/summarize-user-events_spec.rb b/spec/bin/summarize-user-events_spec.rb new file mode 100644 index 00000000000..bb07162cb00 --- /dev/null +++ b/spec/bin/summarize-user-events_spec.rb @@ -0,0 +1,62 @@ +require 'rails_helper' +load Rails.root.join('bin/summarize-user-events') + +RSpec.describe SummarizeUserEvents do + let(:user_uuid) { nil } + let(:start_time) { nil } + let(:end_time) { nil } + let(:zone) { 'UTC' } + + subject(:instance) do + described_class.new( + file_name: nil, + user_uuid:, + start_time:, + end_time:, + zone: 'UTC', + ) + end + + describe '#normalize_event!' do + let(:event) do + { + 'name' => 'test event', + '@timestamp' => '2024-12-31 21:21:47.374', + '@message' => { + '@timestamp' => '2024-12-31 21:21:47.374', + 'name' => 'test event', + }, + } + end + + subject(:normalized_event) do + event.dup.tap do |event| + instance.normalize_event!(event) + end + end + + context 'when @message is a string' do + let(:event) do + super().tap do |event| + event['@message'] = JSON.generate(event['@message']) + end + end + it 'parses @message as JSON' do + expect(event['@message']).to be_a(String) + expect(normalized_event['@message']).to eql( + '@timestamp' => '2024-12-31 21:21:47.374', + 'name' => 'test event', + ) + end + end + + context 'when @timestamp is a string' do + it 'parses it in UTC' do + expected = Time.zone.parse('2024-12-31 21:21:47.374 UTC') + Time.use_zone('America/Los_Angeles') do + expect(normalized_event['@timestamp']).to eql(expected) + end + end + end + end +end From 06f9ddf8b166a37238b4a0f145df54bc8e2d0486 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 31 Dec 2024 14:08:38 -0800 Subject: [PATCH 24/26] Protect rubocop's delicate sensibilities --- .../vendor_result_evaluators/instant_verify_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb b/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb index 9c8884e9290..c22291b53f5 100644 --- a/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb +++ b/spec/lib/event_summarizer/vendor_result_evaluators/instant_verify_spec.rb @@ -82,7 +82,7 @@ it 'returns the correct result' do expect(evaluation).to eql( { - description: 'Instant Verify request failed. 2 checks failed: Check2, CheckWithCode (some_obscure_code )', + description: 'Instant Verify request failed. 2 checks failed: Check2, CheckWithCode (some_obscure_code )', # rubocop:disable Layout/LineLength type: :instant_verify_error, }, ) From 93b9b3efae2ce810a827f8375f88fe7735afb6a2 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 7 Jan 2025 13:46:37 -0800 Subject: [PATCH 25/26] Add more specs Add some specs around option parsing, time parsing, and actually running the program --- bin/summarize-user-events | 186 +++++++++++++++---------- spec/bin/summarize-user-events_spec.rb | 129 ++++++++++++++++- 2 files changed, 239 insertions(+), 76 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 3c3672d5f61..49375440131 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -23,24 +23,40 @@ Dir[File.expand_path( end class SummarizeUserEvents - attr_reader :file_name, :uuid, :from_date, :to_date, :zone + attr_reader :file_name, :uuid, :from_date, :stderr, :stdout, :to_date, :zone NICE_DATE_AND_TIME_FORMAT = '%B %d, %Y at %I:%M %p %Z' TIME_ONLY_FORMAT = '%I:%M %p' - def initialize(file_name: nil, user_uuid: nil, start_time: nil, end_time: nil, zone: 'UTC') + def initialize( + file_name: nil, + user_uuid: nil, + start_time: nil, + end_time: nil, + zone: 'UTC', + stdout: STDOUT, + stderr: STDERR + ) @file_name = file_name @zone = zone - Time.zone = zone @uuid = user_uuid @from_date = parse_time(start_time) || 1.week.ago - @to_date = parse_time(end_time) || (start_time.present? ? from_date + 1.week : Time.zone.now) + @to_date = parse_time(end_time) || ( + start_time.present? ? + from_date + 1.week : + ActiveSupport::TimeZone[zone].now + ) + @stdout = stdout + @stderr = stderr end def parse_time(time_str) - Time.zone.parse(time_str) - rescue StandardError - nil + return nil if time_str.nil? + + parsed = ActiveSupport::TimeZone['UTC'].parse(time_str) + parsed = parsed.in_time_zone(zone) if zone && parsed + + parsed end def matchers @@ -51,25 +67,86 @@ class SummarizeUserEvents ] end + def self.parse_command_line_options(argv) + options = { + zone: 'America/New_York' + } + basename = File.basename($0) + + # rubocop:disable Metrics/LineLength + optparse = OptionParser.new do |opts| + opts.banner = <<-EOM + + Summarize user events in a human-readable format + + Cloudwatch logs can be read from a file as newline-delimited JSON (ndjson), + or fetched directly via aws-vault. + + usage: #{basename} [OPTIONS] + + Examples: + #{basename} -f events.ndjson + aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York + + EOM + + opts.on('-f', '--file_name FILE_NAME', 'filename from which to read the events') do |val| + options[:file_name] = val + end + + opts.on('-h', '--help', 'Display this message') do + warn opts + exit + end + + opts.on('-u', '--user-uuid USER_UUID', 'UUID of the protagonist of the story') do |val| + options[:user_uuid] = val + end + + opts.on('-s', '--start-time START_TIME', 'Time of the start of the query period (e.g. 2024-12-09T10:00:00Z), default: 1 week ago') do |val| + options[:start_time] = val + end + + opts.on('-e', '--end-time END_TIME', 'Time of the end of the query period (e.g. 2024-12-09T14:30:00Z), default: 1 week from start') do |val| + options[:end_time] = val + end + + opts.on('-z', '--timezone TIMEZONE', 'Timezone to use (e.g. America/New_York), default: UTC') do |val| + options[:zone] = val + end + end + # rubocop:enable Metrics/LineLength + + optparse.parse!(argv) + + options + end + + def run - find_cloudwatch_events do |event| - Time.zone ||= zone + in_correct_time_zone do + find_cloudwatch_events do |event| + # Time.zone is thread-local, and CloudwatchClient may use multiple + # threads to make requests. So we have to make double-sure we're + # in the right Timezone. + in_correct_time_zone do + normalize_event!(event) + + matchers.each do |matcher| + matcher.handle_cloudwatch_event(event) + end + end + end - normalize_event!(event) + overall_results = [] matchers.each do |matcher| - matcher.handle_cloudwatch_event(event) + results_for_matcher = matcher.finish + overall_results.append(*results_for_matcher) end - end - - overall_results = [] - matchers.each do |matcher| - results_for_matcher = matcher.finish - overall_results.append(*results_for_matcher) + stdout.puts format_results(overall_results) end - - puts format_results(overall_results) end def format_results(results) @@ -83,14 +160,14 @@ class SummarizeUserEvents heading = r[:title] if timestamp - heading = "#{heading} (#{timestamp.strftime(NICE_DATE_AND_TIME_FORMAT)})" + heading = "#{heading} (#{format_time(timestamp)})" end prev_timestamp = timestamp - list_items = r[:attributes]&. - sort_by { |attr| attr[:timestamp] || Time.zone.at(0) }&. - map do |attr| + list_items = r[:attributes] + &.sort_by { |attr| attr[:timestamp] || Time.zone.at(0) } + &.map do |attr| text = attr[:description] formatted_timestamp = format_time(attr[:timestamp], prev_timestamp) @@ -112,6 +189,9 @@ class SummarizeUserEvents def format_time(timestamp, prev_timestamp = nil) return if timestamp.blank? + timestamp = timestamp.in_time_zone(zone) + prev_timestamp = prev_timestamp&.in_time_zone(zone) + same_date = timestamp.to_date == prev_timestamp&.to_date if same_date @@ -175,6 +255,14 @@ class SummarizeUserEvents ) end + def in_correct_time_zone + old_time_zone = Time.zone + Time.zone = zone + yield + ensure + Time.zone = old_time_zone + end + def normalize_event!(event) if event['@timestamp'].is_a?(String) event['@timestamp'] = ActiveSupport::TimeZone['UTC'].parse(event['@timestamp']) @@ -189,57 +277,7 @@ class SummarizeUserEvents end def main - options = { - zone: 'America/New_York' - } - basename = File.basename($0) - - # rubocop:disable Metrics/LineLength - optparse = OptionParser.new do |opts| - opts.banner = <<-EOM - - Summarize user events in a human-readable format - - Cloudwatch logs can be read from a file as newline-delimited JSON (ndjson), - or fetched directly via aws-vault. - - usage: #{basename} [OPTIONS] - - Examples: - #{basename} -f events.ndjson - aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York - - EOM - - opts.on('-f', '--file_name FILE_NAME', 'filename from which to read the events') do |val| - options[:file_name] = val - end - - opts.on('-h', '--help', 'Display this message') do - warn opts - exit - end - - opts.on('-u', '--user-uuid USER_UUID', 'UUID of the protagonist of the story') do |val| - options[:user_uuid] = val - end - - opts.on('-s', '--start-time START_TIME', 'Time of the start of the query period (e.g. 2024-12-09T10:00:00Z), default: 1 week ago') do |val| - options[:start_time] = val - end - - opts.on('-e', '--end-time END_TIME', 'Time of the end of the query period (e.g. 2024-12-09T14:30:00Z), default: 1 week from start') do |val| - options[:end_time] = val - end - - opts.on('-z', '--timezone TIMEZONE', 'Timezone to use (e.g. America/New_York), default: UTC') do |val| - options[:zone] = val - end - end - # rubocop:enable Metrics/LineLength - - optparse.parse! - + options = SummarizeUserEvents.parse_command_line_options(ARGV) SummarizeUserEvents.new(**options).run end diff --git a/spec/bin/summarize-user-events_spec.rb b/spec/bin/summarize-user-events_spec.rb index bb07162cb00..9923123c849 100644 --- a/spec/bin/summarize-user-events_spec.rb +++ b/spec/bin/summarize-user-events_spec.rb @@ -5,7 +5,9 @@ let(:user_uuid) { nil } let(:start_time) { nil } let(:end_time) { nil } - let(:zone) { 'UTC' } + let(:zone) { 'America/New_York' } + let(:stdout) { StringIO.new } + let(:stderr) { StringIO.new } subject(:instance) do described_class.new( @@ -13,7 +15,9 @@ user_uuid:, start_time:, end_time:, - zone: 'UTC', + zone:, + stdout:, + stderr:, ) end @@ -59,4 +63,125 @@ end end end + + describe '#parse_command_line_options' do + let(:argv) do + [] + end + + subject(:parsed) do + described_class.parse_command_line_options(argv) + end + + it 'parses default options' do + expect(parsed).to eql( + { zone: 'America/New_York' }, + ) + end + + context '-z' do + let(:argv) { ['-z', 'America/Los_Angeles'] } + + it 'parses zone' do + expect(parsed).to eql( + { + zone: 'America/Los_Angeles', + }, + ) + end + end + end + + describe '#parse_time' do + let(:input) { nil } + + subject(:actual) do + instance.parse_time(input) + end + + context 'with valid UTC timestamp' do + let(:input) do + '2025-01-07T19:56:03Z' + end + it 'parses it as UTC, then converts to configured zone' do + expect(actual.to_s).to eql( + '2025-01-07 14:56:03 -0500', + ) + end + end + + context 'with a timestamp with no zone specified' do + let(:input) do + '2025-01-07T19:56:03' + end + it 'parses it as UTC, then converts to configured zone' do + expect(actual.to_s).to eql( + '2025-01-07 14:56:03 -0500', + ) + end + end + + context 'with a timestamp with a different zone specified' do + let(:input) do + '2025-01-07T19:56:03 -0600' + end + it 'parses it as UTC, then converts to configured zone' do + expect(actual.to_s).to eql( + '2025-01-07 20:56:03 -0500', + ) + end + end + + context 'with an invalid time value' do + let(:input) { 'not even a time' } + it 'returns nil' do + expect(actual).to eql(nil) + end + end + + context 'with blank string' do + let(:input) { '' } + it 'returns nil' do + expect(actual).to eql(nil) + end + end + end + + describe '#run' do + subject(:command_output) do + instance.run + stdout.string + end + + let(:cloudwatch_events) do + [ + { + '@timestamp' => '2024-12-30 15:42:51.336', + '@message' => JSON.generate( + { + name: 'IdV: doc auth welcome submitted', + }, + ), + }, + ] + end + + before do + allow(instance).to receive(:cloudwatch_source) do |&block| + cloudwatch_events.each do |raw_event| + block.call(raw_event) + end + end + end + + it 'matches expected output' do + expect(command_output).to eql(<<~END) + ## Processed some events + * Processed 1 event(s) + + ## Identity verification started (December 30, 2024 at 10:42 AM EST) + * (10:42 AM) User abandoned identity verification + END + end + end end From 362e1ced62c226296c6571852c3eecb5af87ce14 Mon Sep 17 00:00:00 2001 From: Matt Hinz Date: Tue, 7 Jan 2025 14:11:54 -0800 Subject: [PATCH 26/26] Look at banner michael --- bin/summarize-user-events | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/bin/summarize-user-events b/bin/summarize-user-events index 49375440131..0650f35c840 100755 --- a/bin/summarize-user-events +++ b/bin/summarize-user-events @@ -75,19 +75,19 @@ class SummarizeUserEvents # rubocop:disable Metrics/LineLength optparse = OptionParser.new do |opts| - opts.banner = <<-EOM - - Summarize user events in a human-readable format - - Cloudwatch logs can be read from a file as newline-delimited JSON (ndjson), - or fetched directly via aws-vault. - - usage: #{basename} [OPTIONS] - - Examples: - #{basename} -f events.ndjson - aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York + opts.banner = <<~EOM + Summarize user events in a human-readable format + + Cloudwatch logs can be read from a file as newline-delimited JSON (ndjson), + or fetched directly via aws-vault. + + Usage: #{basename} [OPTIONS] + + Examples: + #{basename} -f events.ndjson + aws-vault exec prod-power -- #{basename} -u 1234-5678-90ab-cdef -s 2024-12-09T10:00:00 -e 2024-12-09T14:30:00 -z America/New_York + EOM opts.on('-f', '--file_name FILE_NAME', 'filename from which to read the events') do |val|