diff --git a/Makefile b/Makefile index e6f310be8a4..bce9ab58f6c 100644 --- a/Makefile +++ b/Makefile @@ -168,8 +168,10 @@ run-https: tmp/$(HOST)-$(PORT).key tmp/$(HOST)-$(PORT).crt ## Runs the developme normalize_yaml: ## Normalizes YAML files (alphabetizes keys, fixes line length, smart quotes) yarn normalize-yaml .rubocop.yml --disable-sort-keys --disable-smart-punctuation + find ./config/locales/transliterate -type f -name '*.yml' -exec yarn normalize-yaml --disable-sort-keys --disable-smart-punctuation {} \; find ./config/locales/telephony -type f -name '*.yml' | xargs yarn normalize-yaml --disable-smart-punctuation - find ./config/locales -not -path "./config/locales/telephony*" -type f -name '*.yml' | xargs yarn normalize-yaml \ + find ./config/locales -not \( -path "./config/locales/telephony*" -o -path "./config/locales/transliterate/*" \) -type f -name '*.yml' | \ + xargs yarn normalize-yaml \ config/pinpoint_supported_countries.yml \ config/pinpoint_overrides.yml \ config/country_dialing_codes.yml diff --git a/app/services/usps_in_person_proofing/enrollment_helper.rb b/app/services/usps_in_person_proofing/enrollment_helper.rb index 861fbcb31ba..01f9cb99b37 100644 --- a/app/services/usps_in_person_proofing/enrollment_helper.rb +++ b/app/services/usps_in_person_proofing/enrollment_helper.rb @@ -50,10 +50,10 @@ def create_usps_enrollment(enrollment, pii) applicant = UspsInPersonProofing::Applicant.new( { unique_id: unique_id, - first_name: pii['first_name'], - last_name: pii['last_name'], - address: address, - city: pii['city'], + first_name: transliterate(pii['first_name']), + last_name: transliterate(pii['last_name']), + address: transliterate(address), + city: transliterate(pii['city']), state: pii['state'], zip_code: pii['zipcode'], email: 'no-reply@login.gov', @@ -98,6 +98,21 @@ def handle_standard_error(err, enrollment) def analytics(user: AnonymousUser.new) Analytics.new(user: user, request: nil, session: {}, sp: nil) end + + def transliterate(value) + return value unless IdentityConfig.store.usps_ipp_transliteration_enabled + + result = transliterator.transliterate(value) + if result.unsupported_chars.present? + result.original + else + result.transliterated + end + end + + def transliterator + Transliterator.new + end end end end diff --git a/app/services/usps_in_person_proofing/transliterator.rb b/app/services/usps_in_person_proofing/transliterator.rb new file mode 100644 index 00000000000..dfe7315758e --- /dev/null +++ b/app/services/usps_in_person_proofing/transliterator.rb @@ -0,0 +1,45 @@ +module UspsInPersonProofing + class Transliterator + # This is the default. May not be able to override this in current version. + REPLACEMENT = '?'.freeze + + # Container to hold the results of transliteration + TransliterationResult = Struct.new( + # Was the value different after transliteration? + :changed?, + # Original value submtted for transliteration + :original, + # Transliterated value + :transliterated, + # Characters from the original that could not be transliterated + :unsupported_chars, + keyword_init: true, + ) + + # Transliterate values for usage in the USPS API. This will additionally strip/reduce + # excess whitespace and check for special characters that are unsupported by transliteration. + # Additional validation may be necessary depending on the specific field being transliterated. + # + # @param [String,#to_s] value The value to transliterate for USPS + # @return [TransliterationResult] The transliterated value + def transliterate(value) + stripped = value.to_s.gsub(/\s+/, ' ').strip + transliterated = I18n.transliterate(stripped, locale: :en) + + unsupported_chars = [] + unless stripped.count(REPLACEMENT) == transliterated.count(REPLACEMENT) + transliterated.chars.zip(stripped.chars).each do |val, stripped| + unsupported_chars.append(stripped) if val == REPLACEMENT && stripped != REPLACEMENT + end + end + + # Using struct instead of exception here to reduce likelihood of logging PII + TransliterationResult.new( + changed?: value != transliterated, + original: value, + transliterated: transliterated, + unsupported_chars: unsupported_chars, + ) + end + end +end diff --git a/config/application.yml.default b/config/application.yml.default index ff8d8d92061..b9e70990540 100644 --- a/config/application.yml.default +++ b/config/application.yml.default @@ -329,6 +329,7 @@ usps_ipp_request_timeout: 10 usps_ipp_sponsor_id: '' usps_ipp_username: '' usps_mock_fallback: true +usps_ipp_transliteration_enabled: false get_usps_proofing_results_job_cron: '0/10 * * * *' get_usps_proofing_results_job_reprocess_delay_minutes: 5 get_usps_proofing_results_job_request_delay_milliseconds: 1000 @@ -398,6 +399,7 @@ development: state_tracking_enabled: true telephony_adapter: test use_dashboard_service_providers: true + usps_ipp_transliteration_enabled: true usps_upload_sftp_directory: "/gsa_order" usps_upload_sftp_host: localhost usps_upload_sftp_password: test diff --git a/config/i18n-tasks.yml b/config/i18n-tasks.yml index 7a0c2bee4c8..e1fce56b1e7 100644 --- a/config/i18n-tasks.yml +++ b/config/i18n-tasks.yml @@ -113,7 +113,8 @@ ignore_unused: # - common.brand ## Ignore these keys completely: -# ignore: +ignore: + - 'i18n.transliterate.rule.*' # - kaminari.* ## Sometimes, it isn't possible for i18n-tasks to match the key correctly, diff --git a/config/locales/transliterate/en.yml b/config/locales/transliterate/en.yml new file mode 100644 index 00000000000..ff055805825 --- /dev/null +++ b/config/locales/transliterate/en.yml @@ -0,0 +1,23 @@ +en: + i18n: + transliterate: + rule: + # Convert okina to apostrophe + ʻ: "'" + # Convert quotation marks + ’: "'" + ‘: "'" + ‛: "'" + “: '"' + ‟: '"' + ”: '"' + # Convert hyphens + ‐: '-' + ‑: '-' + ‒: '-' + –: '-' + —: '-' + ﹘: '-' + # Convert number signs + ﹟: '#' + #: '#' diff --git a/lib/identity_config.rb b/lib/identity_config.rb index cb03a5414f4..55c1f72410f 100644 --- a/lib/identity_config.rb +++ b/lib/identity_config.rb @@ -427,6 +427,7 @@ def self.build_store(config_map) config.add(:usps_ipp_username, type: :string) config.add(:usps_ipp_request_timeout, type: :integer) config.add(:usps_upload_enabled, type: :boolean) + config.add(:usps_ipp_transliteration_enabled, type: :boolean) config.add(:get_usps_proofing_results_job_cron, type: :string) config.add(:get_usps_proofing_results_job_reprocess_delay_minutes, type: :integer) config.add(:get_usps_proofing_results_job_request_delay_milliseconds, type: :integer) diff --git a/spec/i18n_spec.rb b/spec/i18n_spec.rb index fe11f2cec91..ef3f2601abe 100644 --- a/spec/i18n_spec.rb +++ b/spec/i18n_spec.rb @@ -16,6 +16,7 @@ class BaseTask { key: 'account.navigation.menu', locales: %i[fr] }, # "Menu" is "Menu" in French { key: 'doc_auth.headings.photo', locales: %i[fr] }, # "Photo" is "Photo" in French { key: /^i18n\.locale\./ }, # Show locale options translated as that language + { key: /^i18n\.transliterate\./ }, # Approximate non-ASCII characters in ASCII { key: /^countries/ }, # Some countries have the same name across languages { key: 'links.contact', locales: %i[fr] }, # "Contact" is "Contact" in French { key: 'simple_form.no', locales: %i[es] }, # "No" is "No" in Spanish @@ -89,7 +90,9 @@ def allowed_untranslated_key?(locale, key) missing_interpolation_argument_keys = [] i18n.data[i18n.base_locale].select_keys do |key, _node| - next if i18n.t(key).is_a?(Array) || i18n.t(key).nil? + if key.start_with?('i18n.transliterate.rule.') || i18n.t(key).is_a?(Array) || i18n.t(key).nil? + next + end interpolation_arguments = i18n.locales.map do |locale| extract_interpolation_arguments i18n.t(key, locale) @@ -109,20 +112,23 @@ def allowed_untranslated_key?(locale, key) i18n_file = full_path.sub("#{root_dir}/", '') describe i18n_file do - it 'has only lower_snake_case keys' do - keys = flatten_hash(YAML.load_file(full_path)).keys + # Transliteration includes special characters by definition, so it could fail the below checks + if !full_path.match?(%(/config/locales/transliterate/)) + it 'has only lower_snake_case keys' do + keys = flatten_hash(YAML.load_file(full_path)).keys - bad_keys = keys.reject { |key| key =~ /^[a-z0-9_.]+$/ } + bad_keys = keys.reject { |key| key =~ /^[a-z0-9_.]+$/ } - expect(bad_keys).to be_empty - end + expect(bad_keys).to be_empty + end - it 'has only has XML-safe identifiers (keys start with a letter)' do - keys = flatten_hash(YAML.load_file(full_path)).keys + it 'has only has XML-safe identifiers (keys start with a letter)' do + keys = flatten_hash(YAML.load_file(full_path)).keys - bad_keys = keys.select { |key| key.split('.').any? { |part| part =~ /^[0-9]/ } } + bad_keys = keys.select { |key| key.split('.').any? { |part| part =~ /^[0-9]/ } } - expect(bad_keys).to be_empty + expect(bad_keys).to be_empty + end end it 'has correctly-formatted interpolation values' do diff --git a/spec/services/usps_in_person_proofing/enrollment_helper_spec.rb b/spec/services/usps_in_person_proofing/enrollment_helper_spec.rb index 5ca6594595d..6386f38098f 100644 --- a/spec/services/usps_in_person_proofing/enrollment_helper_spec.rb +++ b/spec/services/usps_in_person_proofing/enrollment_helper_spec.rb @@ -11,15 +11,24 @@ merge(same_address_as_id: current_address_matches_id). transform_keys(&:to_s) end - let(:subject) { described_class } + subject(:subject) { described_class } let(:subject_analytics) { FakeAnalytics.new } + let(:transliterator) { UspsInPersonProofing::Transliterator.new } let(:service_provider) { nil } + let(:usps_ipp_transliteration_enabled) { true } before(:each) do stub_request_token stub_request_enroll allow(IdentityConfig.store).to receive(:usps_mock_fallback).and_return(usps_mock_fallback) + allow(subject).to receive(:transliterator).and_return(transliterator) + allow(transliterator).to receive(:transliterate). + with(anything) do |val| + transliterated_without_change(val) + end allow(subject).to receive(:analytics).and_return(subject_analytics) + allow(IdentityConfig.store).to receive(:usps_ipp_transliteration_enabled). + and_return(usps_ipp_transliteration_enabled) end describe '#schedule_in_person_enrollment' do @@ -58,38 +67,79 @@ expect(enrollment.current_address_matches_id).to eq(current_address_matches_id) end - it 'creates usps enrollment' do - proofer = UspsInPersonProofing::Mock::Proofer.new - mock = double - - expect(UspsInPersonProofing::Proofer).to receive(:new).and_return(mock) - expect(mock).to receive(:request_enroll) do |applicant| - expect(applicant.first_name).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:first_name]) - expect(applicant.last_name).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:last_name]) - expect(applicant.address).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:address1]) - expect(applicant.city).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:city]) - expect(applicant.state).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:state]) - expect(applicant.zip_code).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:zipcode]) - expect(applicant.email).to eq('no-reply@login.gov') - expect(applicant.unique_id).to eq(enrollment.unique_id) - - proofer.request_enroll(applicant) + context 'transliteration disabled' do + let(:usps_ipp_transliteration_enabled) { false } + + it 'creates usps enrollment without using transliteration' do + mock_proofer = double(UspsInPersonProofing::Mock::Proofer) + expect(subject).to receive(:usps_proofer).and_return(mock_proofer) + + expect(transliterator).not_to receive(:transliterate) + expect(mock_proofer).to receive(:request_enroll) do |applicant| + expect(applicant.first_name).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:first_name]) + expect(applicant.last_name).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:last_name]) + expect(applicant.address).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:address1]) + expect(applicant.city).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:city]) + expect(applicant.state).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:state]) + expect(applicant.zip_code).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:zipcode]) + expect(applicant.email).to eq('no-reply@login.gov') + expect(applicant.unique_id).to eq(enrollment.unique_id) + + UspsInPersonProofing::Mock::Proofer.new.request_enroll(applicant) + end + + subject.schedule_in_person_enrollment(user, pii) end + end - subject.schedule_in_person_enrollment(user, pii) + context 'transliteration enabled' do + let(:usps_ipp_transliteration_enabled) { true } + + it 'creates usps enrollment while using transliteration' do + mock_proofer = double(UspsInPersonProofing::Mock::Proofer) + expect(subject).to receive(:usps_proofer).and_return(mock_proofer) + + first_name = Idp::Constants::MOCK_IDV_APPLICANT[:first_name] + last_name = Idp::Constants::MOCK_IDV_APPLICANT[:last_name] + address = Idp::Constants::MOCK_IDV_APPLICANT[:address1] + city = Idp::Constants::MOCK_IDV_APPLICANT[:city] + + expect(transliterator).to receive(:transliterate). + with(first_name).and_return(transliterated_without_change(first_name)) + expect(transliterator).to receive(:transliterate). + with(last_name).and_return(transliterated(last_name)) + expect(transliterator).to receive(:transliterate). + with(address).and_return(transliterated_with_failure(address)) + expect(transliterator).to receive(:transliterate). + with(city).and_return(transliterated(city)) + + expect(mock_proofer).to receive(:request_enroll) do |applicant| + expect(applicant.first_name).to eq(first_name) + expect(applicant.last_name).to eq("transliterated_#{last_name}") + expect(applicant.address).to eq(address) + expect(applicant.city).to eq("transliterated_#{city}") + expect(applicant.state).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:state]) + expect(applicant.zip_code).to eq(Idp::Constants::MOCK_IDV_APPLICANT[:zipcode]) + expect(applicant.email).to eq('no-reply@login.gov') + expect(applicant.unique_id).to eq(enrollment.unique_id) + + UspsInPersonProofing::Mock::Proofer.new.request_enroll(applicant) + end + + subject.schedule_in_person_enrollment(user, pii) + end end context 'when the enrollment does not have a unique ID' do it 'uses the deprecated InPersonEnrollment#usps_unique_id value to create the enrollment' do enrollment.update(unique_id: nil) - proofer = UspsInPersonProofing::Mock::Proofer.new - mock = double + mock_proofer = double(UspsInPersonProofing::Mock::Proofer) + expect(subject).to receive(:usps_proofer).and_return(mock_proofer) - expect(UspsInPersonProofing::Proofer).to receive(:new).and_return(mock) - expect(mock).to receive(:request_enroll) do |applicant| + expect(mock_proofer).to receive(:request_enroll) do |applicant| expect(applicant.unique_id).to eq(enrollment.usps_unique_id) - proofer.request_enroll(applicant) + UspsInPersonProofing::Mock::Proofer.new.request_enroll(applicant) end subject.schedule_in_person_enrollment(user, pii) @@ -146,4 +196,31 @@ end end end + + def transliterated_without_change(value) + UspsInPersonProofing::Transliterator::TransliterationResult.new( + changed?: false, + original: value, + transliterated: value, + unsupported_chars: [], + ) + end + + def transliterated(value) + UspsInPersonProofing::Transliterator::TransliterationResult.new( + changed?: true, + original: value, + transliterated: "transliterated_#{value}", + unsupported_chars: [], + ) + end + + def transliterated_with_failure(value) + UspsInPersonProofing::Transliterator::TransliterationResult.new( + changed?: true, + original: value, + transliterated: "transliterated_failed_#{value}", + unsupported_chars: [':'], + ) + end end diff --git a/spec/services/usps_in_person_proofing/transliterator_spec.rb b/spec/services/usps_in_person_proofing/transliterator_spec.rb new file mode 100644 index 00000000000..bbc0ac03dff --- /dev/null +++ b/spec/services/usps_in_person_proofing/transliterator_spec.rb @@ -0,0 +1,93 @@ +require 'rails_helper' + +RSpec.describe UspsInPersonProofing::Transliterator do + describe '#transliterate' do + subject(:transliterator) { UspsInPersonProofing::Transliterator.new } + context 'baseline functionality' do + context 'with an input that requires transliteration' do + let(:input_value) { "\t\n BobИy \t TЉble?s\r\n" } + let(:result) { transliterator.transliterate(input_value) } + let(:transliterated_result) { 'Bob?y T?ble?s' } + + it 'returns the original value that was requested to be transliterated' do + expect(result.original).to eq(input_value) + end + it 'includes a "changed?" key indicating that transliteration did change the value' do + expect(result.changed?).to be(true) + end + it 'strips whitespace from the ends' do + expect(result.transliterated).not_to match(/^\s+/) + expect(result.transliterated).not_to match(/\s+^/) + end + it 'replaces consecutive whitespaces with regular spaces' do + expect(result.transliterated).not_to match(/\s\s/) + expect(result.transliterated).not_to match(/[^\S ]+/) + end + it 'returns a list of the characters that transliteration does not support' do + expect(result.unsupported_chars).to include('И', 'Љ') + end + it 'transliterates using English locale when default does not match' do + expect(I18n).to receive(:transliterate). + with(duck_type(:to_s), locale: :en). + and_call_original + result + end + it 'does not count question marks as unsupported characters by default' do + expect(result.unsupported_chars).not_to include('?') + expect(result.transliterated).to include('?') + end + it 'returns the transliterated value' do + expect(result.transliterated).to eq(transliterated_result) + end + end + context 'with an input that does not require transliteration' do + let(:input_value) { 'Abc Is My Fav Number' } + let(:result) { transliterator.transliterate(input_value) } + + it 'returns the original value that was requested to be transliterated' do + expect(result.original).to eq(input_value) + end + it 'includes a "changed?" key indicating that transliteration did not change the value' do + expect(result.changed?).to be(false) + end + + it 'transliterated value is identical to the original value' do + expect(result.transliterated).to eq(input_value) + end + end + end + + context 'for additional values not supported for transliteration by default' do + { + # Convert okina to apostrophe + "ʻ": "'", + # Convert quotation marks + "’": "'", + "‘": "'", + "‛": "'", + "“": '"', + "‟": '"', + "”": '"', + # Convert hyphens + "‐": '-', + "‑": '-', + "‒": '-', + "–": '-', + "—": '-', + "﹘": '-', + # Convert number signs + "﹟": '#', + "#": '#', + }.each do |key, value| + it "converts \"\\u#{key.to_s.ord.to_s(16).rjust( + 4, + '0', + )}\" to \"\\u#{value.ord.to_s(16).rjust( + 4, '0' + )}\"" do + expect(transliterator.transliterate(key).transliterated).to eq(value) + end + end + end + end +end