From eab2fb2f21bee34b075adcc205495541b6c66a84 Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Thu, 16 Feb 2023 10:10:39 -0500 Subject: [PATCH 1/8] WIP - optimize s3 by streaming data changelog: Internal, Attempts API, Scalability Optimization --- .../api/irs_attempts_api_controller.rb | 29 +++++++++++++++++-- .../api/irs_attempts_api_controller_spec.rb | 5 +++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/app/controllers/api/irs_attempts_api_controller.rb b/app/controllers/api/irs_attempts_api_controller.rb index e6719e600bf..4fc265fa823 100644 --- a/app/controllers/api/irs_attempts_api_controller.rb +++ b/app/controllers/api/irs_attempts_api_controller.rb @@ -7,6 +7,7 @@ module Api class IrsAttemptsApiController < ApplicationController include RenderConditionConcern + include ActionController::Live check_or_render_not_found -> { IdentityConfig.store.irs_attempt_api_enabled } @@ -29,12 +30,31 @@ def create headers['X-Payload-Key'] = log_file_record.encrypted_key headers['X-Payload-IV'] = log_file_record.iv - requested_data = s3_helper.s3_client.get_object( + response = s3_helper.s3_client.head_object( bucket: s3_helper.attempts_bucket_name, key: log_file_record.filename, ) - send_data requested_data.body.read, disposition: "filename=#{log_file_record.filename}" + requested_data_size = response.content_length + + # Maybe this is variable based on the config? + buffer_index = 0 + buffer_size = 100000 + + send_stream(type: response.content_type, filename: log_file_record.filename) do |stream| + while buffer_index < requested_data_size + requested_data = s3_helper.s3_client.get_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + range: get_buffer_range( + current_buffer_index: buffer_index, + buffer_size: buffer_size, file_size: requested_data_size + ), + ) + buffer_index += buffer_size + 1 + stream.write(requested_data.body.read) + end + end else render json: { status: :not_found, description: 'File not found for Timestamp' }, status: :not_found @@ -62,6 +82,11 @@ def create private + def get_buffer_range(current_buffer_index:, buffer_size:, file_size:) + buffer_end = [current_buffer_index + buffer_size, file_size].min + return "bytes=#{current_buffer_index}-#{buffer_end}" + end + def authenticate_client bearer, csp_id, token = request.authorization&.split(' ', 3) if bearer != 'Bearer' || !valid_auth_tokens.include?(token) || diff --git a/spec/controllers/api/irs_attempts_api_controller_spec.rb b/spec/controllers/api/irs_attempts_api_controller_spec.rb index a3bcfc8db2c..e0a7041dc53 100644 --- a/spec/controllers/api/irs_attempts_api_controller_spec.rb +++ b/spec/controllers/api/irs_attempts_api_controller_spec.rb @@ -45,6 +45,7 @@ before do Aws.config[:s3] = { stub_responses: { + head_object: { content_length: 200000 }, get_object: { body: test_object }, }, } @@ -105,7 +106,9 @@ expect(response).to be_ok expect(Base64.strict_decode64(response.headers['X-Payload-IV'])).to be_present expect(Base64.strict_decode64(response.headers['X-Payload-Key'])).to be_present - expect(response.body).to eq(test_object) + #expect(response.body).to eq(test_object) + expect(response.content_type).to eq("application/octet-stream") + expect(response["Content-Disposition"]).to eq("attachment; filename=\"test_filename\"; filename*=UTF-8''test_filename") end end From ff19033df9e7f7df4a0cf332a401533ad5cd5926 Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Thu, 16 Feb 2023 10:19:24 -0500 Subject: [PATCH 2/8] shifted a line - rubocop apparently wasn't picky about this --- app/controllers/api/irs_attempts_api_controller.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/controllers/api/irs_attempts_api_controller.rb b/app/controllers/api/irs_attempts_api_controller.rb index 4fc265fa823..e465f30d63d 100644 --- a/app/controllers/api/irs_attempts_api_controller.rb +++ b/app/controllers/api/irs_attempts_api_controller.rb @@ -48,7 +48,8 @@ def create key: log_file_record.filename, range: get_buffer_range( current_buffer_index: buffer_index, - buffer_size: buffer_size, file_size: requested_data_size + buffer_size: buffer_size, + file_size: requested_data_size, ), ) buffer_index += buffer_size + 1 From 4b13602550c9e6d493d319c3a2ba86688c7d7ec4 Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Tue, 21 Feb 2023 10:34:12 -0500 Subject: [PATCH 3/8] added feature flag. updated spec to reflect nested conditionals --- .../api/irs_attempts_api_controller.rb | 65 ++++++----- config/application.yml.default | 2 + lib/identity_config.rb | 2 + .../api/irs_attempts_api_controller_spec.rb | 101 ++++++++++++------ 4 files changed, 111 insertions(+), 59 deletions(-) diff --git a/app/controllers/api/irs_attempts_api_controller.rb b/app/controllers/api/irs_attempts_api_controller.rb index e465f30d63d..ea14938b2b2 100644 --- a/app/controllers/api/irs_attempts_api_controller.rb +++ b/app/controllers/api/irs_attempts_api_controller.rb @@ -26,35 +26,46 @@ def create log_file_record = IrsAttemptApiLogFile.find_by( requested_time: timestamp_key(key: timestamp), ) - headers['X-Payload-Key'] = log_file_record.encrypted_key headers['X-Payload-IV'] = log_file_record.iv - response = s3_helper.s3_client.head_object( - bucket: s3_helper.attempts_bucket_name, - key: log_file_record.filename, - ) - - requested_data_size = response.content_length - - # Maybe this is variable based on the config? - buffer_index = 0 - buffer_size = 100000 - - send_stream(type: response.content_type, filename: log_file_record.filename) do |stream| - while buffer_index < requested_data_size - requested_data = s3_helper.s3_client.get_object( - bucket: s3_helper.attempts_bucket_name, - key: log_file_record.filename, - range: get_buffer_range( - current_buffer_index: buffer_index, - buffer_size: buffer_size, - file_size: requested_data_size, - ), - ) - buffer_index += buffer_size + 1 - stream.write(requested_data.body.read) + if IdentityConfig.store.irs_attempt_api_aws_s3_stream_enabled + response = s3_helper.s3_client.head_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + ) + + requested_data_size = response.content_length + + buffer_index = 0 + buffer_size = IdentityConfig.store.irs_attempt_api_aws_s3_stream_buffer_size + + send_stream( + type: response.content_type, + filename: log_file_record.filename, + ) do |stream| + while buffer_index < requested_data_size + requested_data = s3_helper.s3_client.get_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + range: buffer_range( + current_buffer_index: buffer_index, + buffer_size: buffer_size, + file_size: requested_data_size, + ), + ) + buffer_index += buffer_size + 1 + stream.write(requested_data.body.read) + end end + else + requested_data = s3_helper.s3_client.get_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + ) + + send_data requested_data.body.read, + disposition: "filename=#{log_file_record.filename}" end else render json: { status: :not_found, description: 'File not found for Timestamp' }, @@ -83,9 +94,9 @@ def create private - def get_buffer_range(current_buffer_index:, buffer_size:, file_size:) + def buffer_range(current_buffer_index:, buffer_size:, file_size:) buffer_end = [current_buffer_index + buffer_size, file_size].min - return "bytes=#{current_buffer_index}-#{buffer_end}" + "bytes=#{current_buffer_index}-#{buffer_end}" end def authenticate_client diff --git a/config/application.yml.default b/config/application.yml.default index 90352964ee5..a1ae6de8820 100644 --- a/config/application.yml.default +++ b/config/application.yml.default @@ -151,6 +151,8 @@ irs_attempt_api_auth_tokens: '' irs_attempt_api_csp_id: 'LOGIN.gov' irs_attempt_api_enabled: false irs_attempt_api_aws_s3_enabled: false +irs_attempt_api_aws_s3_stream_enabled: false +irs_attempt_api_aws_s3_stream_buffer_size: 100_000 irs_attempt_api_event_ttl_seconds: 86400 irs_attempt_api_event_count_default: 1000 irs_attempt_api_event_count_max: 10000 diff --git a/lib/identity_config.rb b/lib/identity_config.rb index ebd4f974ec1..d0a5d406737 100644 --- a/lib/identity_config.rb +++ b/lib/identity_config.rb @@ -231,6 +231,8 @@ def self.build_store(config_map) config.add(:irs_attempt_api_csp_id) config.add(:irs_attempt_api_enabled, type: :boolean) config.add(:irs_attempt_api_aws_s3_enabled, type: :boolean) + config.add(:irs_attempt_api_aws_s3_stream_enabled, type: :boolean) + config.add(:irs_attempt_api_aws_s3_stream_buffer_size, type: :integer) config.add(:irs_attempt_api_event_ttl_seconds, type: :integer) config.add(:irs_attempt_api_event_count_default, type: :integer) config.add(:irs_attempt_api_event_count_max, type: :integer) diff --git a/spec/controllers/api/irs_attempts_api_controller_spec.rb b/spec/controllers/api/irs_attempts_api_controller_spec.rb index e0a7041dc53..19e4ddcf443 100644 --- a/spec/controllers/api/irs_attempts_api_controller_spec.rb +++ b/spec/controllers/api/irs_attempts_api_controller_spec.rb @@ -41,44 +41,17 @@ let(:existing_event_jtis) { existing_events.map(&:first) } describe '#create' do + # NEST inside AWS S3 Enabled Context! Stub different reponses for non-s3 context + # TODO let(:test_object) { '{test: "test"}' } before do Aws.config[:s3] = { stub_responses: { - head_object: { content_length: 200000 }, get_object: { body: test_object }, }, } end - context 'with timestamp problems' do - it 'returns unprocessable_entity with no timestamp' do - post :create, params: { timestamp: nil } - - expect(response.status).to eq(422) - end - - it 'returns unprocessable_entity with invalid timestamp' do - post :create, params: { timestamp: 'INVALID*TIME' } - - expect(response.status).to eq(422) - end - end - - context 'with aws_s3 disabled' do - let(:timestamp) { '2022-11-08T18:00:00.000Z' } - it 'should bypass s3 retrieval' do - expect_any_instance_of(Aws::S3::Client).not_to receive(:get_object) - - post :create, params: { timestamp: timestamp } - - expect(response).to be_ok - expect(Base64.strict_decode64(response.headers['X-Payload-IV'])).to be_present - expect(Base64.strict_decode64(response.headers['X-Payload-Key'])).to be_present - expect(Base64.strict_decode64(response.body)).to be_present - end - end - context 'with aws_s3 enabled' do let(:wrong_time) { time - 1.year } let(:timestamp) { '2022-11-08T18:00:00.000Z' } @@ -106,9 +79,73 @@ expect(response).to be_ok expect(Base64.strict_decode64(response.headers['X-Payload-IV'])).to be_present expect(Base64.strict_decode64(response.headers['X-Payload-Key'])).to be_present - #expect(response.body).to eq(test_object) - expect(response.content_type).to eq("application/octet-stream") - expect(response["Content-Disposition"]).to eq("attachment; filename=\"test_filename\"; filename*=UTF-8''test_filename") + expect(response.body).to eq(test_object) + end + + context 'with aws_s3_stream enabled' do + let(:test_object) { '{test: "1234567890 12345"}' } + before do + allow(IdentityConfig.store).to receive(:irs_attempt_api_aws_s3_stream_enabled). + and_return(true) + allow(IdentityConfig.store).to receive(:irs_attempt_api_aws_s3_stream_buffer_size). + and_return(10) + + Aws.config[:s3] = { + stub_responses: { + head_object: { content_length: test_object.bytesize }, + get_object: proc do |context| + range_string = context.params[:range] + byte_string = range_string.split('=')[1] + byte_range_array = byte_string.split('-') + { body: test_object.byteslice( + byte_range_array[0].to_i, + IdentityConfig.store.irs_attempt_api_aws_s3_stream_buffer_size + 1, + ) } + end, + }, + } + end + + it 'should render data streamed from s3 correctly' do + post :create, params: { timestamp: time.iso8601 } + + expect(response).to be_ok + expect(Base64.strict_decode64(response.headers['X-Payload-IV'])).to be_present + expect(Base64.strict_decode64(response.headers['X-Payload-Key'])).to be_present + expect(response.content_type).to eq('application/octet-stream') + expect(response['Content-Disposition']). + to eq("attachment; filename=\"test_filename\"; filename*=UTF-8''test_filename") + + expect(response.stream.body).to eq(test_object) + end + end + end + + context 'with timestamp problems' do + it 'returns unprocessable_entity with no timestamp' do + post :create, params: { timestamp: nil } + + expect(response.status).to eq(422) + end + + it 'returns unprocessable_entity with invalid timestamp' do + post :create, params: { timestamp: 'INVALID*TIME' } + + expect(response.status).to eq(422) + end + end + + context 'with aws_s3 disabled' do + let(:timestamp) { '2022-11-08T18:00:00.000Z' } + it 'should bypass s3 retrieval' do + expect_any_instance_of(Aws::S3::Client).not_to receive(:get_object) + + post :create, params: { timestamp: timestamp } + + expect(response).to be_ok + expect(Base64.strict_decode64(response.headers['X-Payload-IV'])).to be_present + expect(Base64.strict_decode64(response.headers['X-Payload-Key'])).to be_present + expect(Base64.strict_decode64(response.body)).to be_present end end From 4978e00e152348513ffe209eb7a6c102612bc7cb Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Tue, 21 Feb 2023 10:45:42 -0500 Subject: [PATCH 4/8] cleaned up some comments. --- spec/controllers/api/irs_attempts_api_controller_spec.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/spec/controllers/api/irs_attempts_api_controller_spec.rb b/spec/controllers/api/irs_attempts_api_controller_spec.rb index 19e4ddcf443..f9d894d5b0e 100644 --- a/spec/controllers/api/irs_attempts_api_controller_spec.rb +++ b/spec/controllers/api/irs_attempts_api_controller_spec.rb @@ -41,8 +41,6 @@ let(:existing_event_jtis) { existing_events.map(&:first) } describe '#create' do - # NEST inside AWS S3 Enabled Context! Stub different reponses for non-s3 context - # TODO let(:test_object) { '{test: "test"}' } before do Aws.config[:s3] = { From dba99d52036325bd9db6a714828328409d324a16 Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Tue, 21 Feb 2023 11:05:37 -0500 Subject: [PATCH 5/8] broke out s3 response code into its own method. --- .../api/irs_attempts_api_controller.rb | 81 ++++++++++--------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/app/controllers/api/irs_attempts_api_controller.rb b/app/controllers/api/irs_attempts_api_controller.rb index ea14938b2b2..6f1e1be0611 100644 --- a/app/controllers/api/irs_attempts_api_controller.rb +++ b/app/controllers/api/irs_attempts_api_controller.rb @@ -29,44 +29,8 @@ def create headers['X-Payload-Key'] = log_file_record.encrypted_key headers['X-Payload-IV'] = log_file_record.iv - if IdentityConfig.store.irs_attempt_api_aws_s3_stream_enabled - response = s3_helper.s3_client.head_object( - bucket: s3_helper.attempts_bucket_name, - key: log_file_record.filename, - ) - - requested_data_size = response.content_length - - buffer_index = 0 - buffer_size = IdentityConfig.store.irs_attempt_api_aws_s3_stream_buffer_size - - send_stream( - type: response.content_type, - filename: log_file_record.filename, - ) do |stream| - while buffer_index < requested_data_size - requested_data = s3_helper.s3_client.get_object( - bucket: s3_helper.attempts_bucket_name, - key: log_file_record.filename, - range: buffer_range( - current_buffer_index: buffer_index, - buffer_size: buffer_size, - file_size: requested_data_size, - ), - ) - buffer_index += buffer_size + 1 - stream.write(requested_data.body.read) - end - end - else - requested_data = s3_helper.s3_client.get_object( - bucket: s3_helper.attempts_bucket_name, - key: log_file_record.filename, - ) - - send_data requested_data.body.read, - disposition: "filename=#{log_file_record.filename}" - end + serve_s3_response(log_file_record: log_file_record) + else render json: { status: :not_found, description: 'File not found for Timestamp' }, status: :not_found @@ -99,6 +63,47 @@ def buffer_range(current_buffer_index:, buffer_size:, file_size:) "bytes=#{current_buffer_index}-#{buffer_end}" end + def serve_s3_response(log_file_record:) + if IdentityConfig.store.irs_attempt_api_aws_s3_stream_enabled + response = s3_helper.s3_client.head_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + ) + + requested_data_size = response.content_length + + buffer_index = 0 + buffer_size = IdentityConfig.store.irs_attempt_api_aws_s3_stream_buffer_size + + send_stream( + type: response.content_type, + filename: log_file_record.filename, + ) do |stream| + while buffer_index < requested_data_size + requested_data = s3_helper.s3_client.get_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + range: buffer_range( + current_buffer_index: buffer_index, + buffer_size: buffer_size, + file_size: requested_data_size, + ), + ) + buffer_index += buffer_size + 1 + stream.write(requested_data.body.read) + end + end + else + requested_data = s3_helper.s3_client.get_object( + bucket: s3_helper.attempts_bucket_name, + key: log_file_record.filename, + ) + + send_data requested_data.body.read, + disposition: "filename=#{log_file_record.filename}" + end + end + def authenticate_client bearer, csp_id, token = request.authorization&.split(' ', 3) if bearer != 'Bearer' || !valid_auth_tokens.include?(token) || From 5401b3029afc89c9ca5b0788a555cc9dd1c0d099 Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Tue, 21 Feb 2023 11:34:20 -0500 Subject: [PATCH 6/8] cleaned up code based on feedback --- spec/controllers/api/irs_attempts_api_controller_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/controllers/api/irs_attempts_api_controller_spec.rb b/spec/controllers/api/irs_attempts_api_controller_spec.rb index f9d894d5b0e..4a3118cdd87 100644 --- a/spec/controllers/api/irs_attempts_api_controller_spec.rb +++ b/spec/controllers/api/irs_attempts_api_controller_spec.rb @@ -93,10 +93,10 @@ head_object: { content_length: test_object.bytesize }, get_object: proc do |context| range_string = context.params[:range] - byte_string = range_string.split('=')[1] - byte_range_array = byte_string.split('-') + _, byte_string = range_string.split('=') + start_byte, _ = byte_string.split('-') { body: test_object.byteslice( - byte_range_array[0].to_i, + start_byte.to_i, IdentityConfig.store.irs_attempt_api_aws_s3_stream_buffer_size + 1, ) } end, From a27fc0b51913b4ecf0e2c3ff5441a85c5037b4b7 Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Tue, 21 Feb 2023 12:12:49 -0500 Subject: [PATCH 7/8] updated default buffer size and made some clarifying changes based on feedback changelog: Internal, Attempts Api, Optimization: S3 Streaming --- config/application.yml.default | 2 +- spec/controllers/api/irs_attempts_api_controller_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/application.yml.default b/config/application.yml.default index a1ae6de8820..1de283d8af7 100644 --- a/config/application.yml.default +++ b/config/application.yml.default @@ -152,7 +152,7 @@ irs_attempt_api_csp_id: 'LOGIN.gov' irs_attempt_api_enabled: false irs_attempt_api_aws_s3_enabled: false irs_attempt_api_aws_s3_stream_enabled: false -irs_attempt_api_aws_s3_stream_buffer_size: 100_000 +irs_attempt_api_aws_s3_stream_buffer_size: 16_777_216 irs_attempt_api_event_ttl_seconds: 86400 irs_attempt_api_event_count_default: 1000 irs_attempt_api_event_count_max: 10000 diff --git a/spec/controllers/api/irs_attempts_api_controller_spec.rb b/spec/controllers/api/irs_attempts_api_controller_spec.rb index 4a3118cdd87..ec8616b05f2 100644 --- a/spec/controllers/api/irs_attempts_api_controller_spec.rb +++ b/spec/controllers/api/irs_attempts_api_controller_spec.rb @@ -120,13 +120,13 @@ end context 'with timestamp problems' do - it 'returns unprocessable_entity with no timestamp' do + it 'returns unprocessable_entity when given no timestamp' do post :create, params: { timestamp: nil } expect(response.status).to eq(422) end - it 'returns unprocessable_entity with invalid timestamp' do + it 'returns unprocessable_entity when timestamp is invalid' do post :create, params: { timestamp: 'INVALID*TIME' } expect(response.status).to eq(422) From 25e7b3cca384ba7126309fe277b2fb8f3212d16a Mon Sep 17 00:00:00 2001 From: Rwolfe-Nava Date: Tue, 21 Feb 2023 12:28:30 -0500 Subject: [PATCH 8/8] cleaned up more test descriptions. changelog: Internal, Attempts Api, Feature: S3 Streaming --- spec/controllers/api/irs_attempts_api_controller_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/controllers/api/irs_attempts_api_controller_spec.rb b/spec/controllers/api/irs_attempts_api_controller_spec.rb index ec8616b05f2..498a7bf0494 100644 --- a/spec/controllers/api/irs_attempts_api_controller_spec.rb +++ b/spec/controllers/api/irs_attempts_api_controller_spec.rb @@ -182,17 +182,17 @@ expect(response.status).to eq(404) end - it 'returns an error without required timestamp parameter' do + it 'returns an error when required timestamp parameter is missing' do post :create, params: {} expect(response.status).to eq 422 end - it 'returns an error with empty timestamp parameter' do + it 'returns an error when timestamp parameter is empty' do post :create, params: { timestamp: '' } expect(response.status).to eq 422 end - it 'returns an error with invalid timestamp parameter' do + it 'returns an error when timestamp parameter is invalid' do post :create, params: { timestamp: 'abc' } expect(response.status).to eq 422