diff --git a/Gemfile b/Gemfile index 23e71a926..c4f4dbfb5 100644 --- a/Gemfile +++ b/Gemfile @@ -15,6 +15,7 @@ gem "govuk_personalisation" gem "govuk_publishing_components" gem "htmlentities" gem "plek" +gem "rack-utf8_sanitizer" gem "rails-controller-testing" gem "rails-i18n" gem "rails_translation_manager" diff --git a/Gemfile.lock b/Gemfile.lock index d7e040fe9..821c8017f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -482,6 +482,8 @@ GEM rack (>= 3.0.0) rack-test (2.1.0) rack (>= 1.3) + rack-utf8_sanitizer (1.9.1) + rack (>= 1.0, < 4.0) rackup (2.1.0) rack (>= 3) webrick (~> 1.8) @@ -670,6 +672,7 @@ DEPENDENCIES mocha plek pry-byebug + rack-utf8_sanitizer rails (= 7.2.1.2) rails-controller-testing rails-i18n diff --git a/config/application.rb b/config/application.rb index dfa0fed8b..f13f7a67d 100644 --- a/config/application.rb +++ b/config/application.rb @@ -13,6 +13,7 @@ require "action_view/railtie" # require "action_cable/engine" require "rails/test_unit/railtie" +require_relative "../lib/sanitiser/strategy" # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. @@ -26,7 +27,7 @@ class Application < Rails::Application # Please, add to the `ignore` list any other `lib` subdirectories that do # not contain `.rb` files, or that should not be reloaded or eager loaded. # Common ones are `templates`, `generators`, or `middleware`, for example. - config.autoload_lib(ignore: %w[assets tasks]) + config.autoload_lib(ignore: %w[assets tasks sanitiser]) # Settings in config/environments/* take precedence over those specified here. # Application configuration can go into files in config/initializers @@ -128,5 +129,15 @@ class Application < Rails::Application # Do not swallow errors in after_commit/after_rollback callbacks. # config.active_record.raise_in_transactional_callbacks = true + + # Protect from "invalid byte sequence in UTF-8" errors, + # when a query or a cookie is a string with incorrect UTF-8 encoding. + config.middleware.insert_before( + 0, + Rack::UTF8Sanitizer, + sanitizable_content_types: [], + only: %w[QUERY_STRING], + strategy: Sanitiser::Strategy, + ) end end diff --git a/lib/sanitiser/strategy.rb b/lib/sanitiser/strategy.rb new file mode 100644 index 000000000..fc2414343 --- /dev/null +++ b/lib/sanitiser/strategy.rb @@ -0,0 +1,18 @@ +module Sanitiser + class Strategy + class SanitisingError < StandardError; end + + class << self + def call(input, sanitize_null_bytes: false) + input + .force_encoding(Encoding::ASCII_8BIT) + .encode!(Encoding::UTF_8) + if sanitize_null_bytes && Rack::UTF8Sanitizer::NULL_BYTE_REGEX.match?(input) + raise NullByteInString + end + rescue StandardError + raise SanitisingError, "Non-UTF-8 (or null) character in the query or in the cookie" + end + end + end +end diff --git a/test/integration/sanitiser_test.rb b/test/integration/sanitiser_test.rb new file mode 100644 index 000000000..c75433726 --- /dev/null +++ b/test/integration/sanitiser_test.rb @@ -0,0 +1,84 @@ +require "test_helper" + +class SanitiserTest < ActiveSupport::TestCase + include Rack::Test::Methods + + def app + Rails.application + end + + test "with query being correct percent-encoded UTF-8 string does not raise exception" do + get "/?%41" + assert last_response.successful? + end + + test "with query being incorrect percent-encoded UTF-8 string raises SanitisingError" do + assert_raises(Sanitiser::Strategy::SanitisingError) do + get "/?%AD" + end + end + + test "with cookie key being correct UTF-8 does not raise exception" do + header("Cookie", "\x41=value") + get "/" + assert last_response.successful? + end + + test "with cookie key being incorrect UTF-8 raises exception" do + header("Cookie", "\xAD=value") + assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do + get "/" + end + end + + test "with cookie value being correct UTF-8 does not raise exception" do + header("Cookie", "key=\x41") + get "/" + assert last_response.successful? + end + + test "with cookie value being incorrect UTF-8 raises exception" do + header("Cookie", "key=\xAD") + assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do + get "/" + end + end + + test "with cookie path being correct UTF-8 does not raise exception" do + header("Cookie", "key=value; Path=/\x41") + get "/" + assert last_response.successful? + end + + test "with cookie path being incorrect UTF-8 raises exception" do + header("Cookie", "key=value; Path=/\xAD") + assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do + get "/" + end + end + + test "with cookie path being correct percent-encoded UTF-8 does not raise exception" do + header("Cookie", "key=value; Path=/%41") + get "/" + assert last_response.successful? + end + + test "with cookie path being incorrect percent-encoded UTF-8 raises SanitisingError" do + header("Cookie", "key=value; Path=/%AD") + assert_raises(Sanitiser::Strategy::SanitisingError) do + get "/" + end + end + + test "with referrer headerbeing correct percent-encoded UTF-8 does not raise exception" do + header("Referer", "http://example.com/?%41") + get "/" + assert last_response.successful? + end + + test "with referrer header being incorrect percent-encoded UTF-8 does not raise exception" do + header("Referer", "http://example.com/?%AD") + get "/" + assert last_response.successful? + end +end