diff --git a/envoy/common/interval_set.h b/envoy/common/interval_set.h
index 51ca2068f3091..8cfe303cf20f8 100644
--- a/envoy/common/interval_set.h
+++ b/envoy/common/interval_set.h
@@ -37,6 +37,14 @@ template <typename Value> class IntervalSet {
    * Clears the contents of the interval set.
    */
   virtual void clear() PURE;
+
+  /**
+   * Determines whether the specified Value is in any of the intervals.
+   *
+   * @param value the value
+   * @return true if value is covered in the inteval set.
+   */
+  virtual bool test(Value value) const PURE;
 };
 
 } // namespace Envoy
diff --git a/source/common/common/utility.h b/source/common/common/utility.h
index 5084dbff92733..54ce19af415bf 100644
--- a/source/common/common/utility.h
+++ b/source/common/common/utility.h
@@ -561,6 +561,11 @@ template <typename Value> class IntervalSetImpl : public IntervalSet<Value> {
     intervals_.insert(Interval(left, right));
   }
 
+  bool test(Value value) const override {
+    const auto left_pos = intervals_.lower_bound(Interval(value, value + 1));
+    return left_pos != intervals_.end() && value >= left_pos->first && value < left_pos->second;
+  }
+
   std::vector<Interval> toVector() const override {
     return std::vector<Interval>(intervals_.begin(), intervals_.end());
   }
diff --git a/source/common/json/BUILD b/source/common/json/BUILD
index 1a42cd0004385..fe750b28c3824 100644
--- a/source/common/json/BUILD
+++ b/source/common/json/BUILD
@@ -34,3 +34,10 @@ envoy_cc_library(
         "//source/common/runtime:runtime_features_lib",
     ],
 )
+
+envoy_cc_library(
+    name = "json_sanitizer_lib",
+    srcs = ["json_sanitizer.cc"],
+    hdrs = ["json_sanitizer.h"],
+    deps = ["//source/common/common:assert_lib"],
+)
diff --git a/source/common/json/json_internal.cc b/source/common/json/json_internal.cc
index 4b480831c6c19..7980edb14a644 100644
--- a/source/common/json/json_internal.cc
+++ b/source/common/json/json_internal.cc
@@ -686,6 +686,11 @@ ObjectSharedPtr Factory::loadFromString(const std::string& json) {
   return handler.getRoot();
 }
 
+std::string Factory::serialize(absl::string_view str) {
+  nlohmann::json j(str);
+  return j.dump();
+}
+
 } // namespace Nlohmann
 } // namespace Json
 } // namespace Envoy
diff --git a/source/common/json/json_internal.h b/source/common/json/json_internal.h
index de665333a1c09..686430826a7d4 100644
--- a/source/common/json/json_internal.h
+++ b/source/common/json/json_internal.h
@@ -5,6 +5,8 @@
 
 #include "envoy/json/json_object.h"
 
+#include "absl/strings/string_view.h"
+
 namespace Envoy {
 namespace Json {
 namespace Nlohmann {
@@ -15,6 +17,8 @@ class Factory {
    * Constructs a Json Object from a string.
    */
   static ObjectSharedPtr loadFromString(const std::string& json);
+
+  static std::string serialize(absl::string_view str);
 };
 
 } // namespace Nlohmann
diff --git a/source/common/json/json_sanitizer.cc b/source/common/json/json_sanitizer.cc
new file mode 100644
index 0000000000000..5ca322b2993d5
--- /dev/null
+++ b/source/common/json/json_sanitizer.cc
@@ -0,0 +1,255 @@
+#include "source/common/json/json_sanitizer.h"
+
+#include <utility>
+
+#include "source/common/common/assert.h"
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+
+namespace Envoy {
+namespace Json {
+
+namespace {
+
+const uint8_t Literal = 0;
+const uint8_t ControlEscapeSize = 2; // e.g. \b
+const uint8_t UnicodeEscapeSize = 6; // e.g. \u1234
+const uint8_t Utf8DecodeSentinel = 0xff;
+
+} // namespace
+
+JsonSanitizer::JsonSanitizer() {
+  // Single-char escape sequences for common control characters.
+  auto symbolic_escape = [this](char control_char, char symbolic) {
+    Escape& escape = char_escapes_[char2uint32(control_char)];
+    escape.size_ = ControlEscapeSize;
+    escape.chars_[0] = '\\';
+    escape.chars_[1] = symbolic;
+  };
+  symbolic_escape('\b', 'b');
+  symbolic_escape('\f', 'f');
+  symbolic_escape('\n', 'n');
+  symbolic_escape('\r', 'r');
+  symbolic_escape('\t', 't');
+  symbolic_escape('\\', '\\');
+  symbolic_escape('"', '"');
+
+  // Low characters (0-31) not listed above are encoded as unicode 4-digit hex.
+  auto unicode_escape = [this](uint32_t index) {
+    // We capture unicode Escapes both in a char-indexed array, for direct
+    // substitutions on literal inputs, and in a unicode-indexed hash-map,
+    // for lookup after utf8 decode.
+    std::string escape_str = absl::StrFormat("\\u%04x", index);
+    ASSERT(escape_str.size() == UnicodeEscapeSize);
+    Escape& escape = unicode_escapes_[index];
+    escape.size_ = escape_str.size();
+    RELEASE_ASSERT(escape.size_ <= sizeof(escape.chars_), "escaped string too large");
+    memcpy(escape.chars_, escape_str.data(), escape_str.size()); // NOLINT(safe-memcpy)*/
+    if (index < NumEscapes) {
+      char_escapes_[index] = escape;
+    }
+  };
+
+  // Add unicode escapes for control-characters below 32 that don't have symbolic escapes.
+  for (uint32_t i = 0; i < ' '; ++i) {
+    if (char_escapes_[i].size_ == 0) {
+      unicode_escape(i);
+    }
+  }
+
+  // Unicode-escaped ascii constants above SPACE (32).
+  for (char ch : {'<', '>', '\177'}) {
+    unicode_escape(char2uint32(ch));
+  }
+
+  // There's a range of 8-bit characters that are unicode escaped by the
+  // protobuf library, so we match behavior.
+  for (uint32_t i = 0x0080; i < 0x00a0; ++i) {
+    unicode_escape(i);
+  }
+
+  // The remaining unicode characters are mostly passed through literally. We'll
+  // initialize all of them and then override some below.
+  for (uint32_t i = 0x00a0; i < NumEscapes; ++i) {
+    char_escapes_[i].size_ = Literal;
+  }
+
+  // All the bytes matching pattern 11xxxxxx will be evaluated as utf-8.
+  for (uint32_t i = Utf8_2BytePattern; i <= 0xff; ++i) {
+    char_escapes_[i].size_ = Utf8DecodeSentinel;
+  }
+
+  // There are an assortment of unicode characters that protobufs quote, so we
+  // do likewise here to make differential testing/fuzzing feasible.
+  for (uint32_t i : {0x00ad, 0x0600, 0x0601, 0x0602, 0x0603, 0x06dd, 0x070f}) {
+    unicode_escape(i);
+  }
+}
+
+absl::string_view JsonSanitizer::sanitize(std::string& buffer, absl::string_view str) const {
+  // Fast-path to see whether any escapes or utf-encoding are needed. If str has
+  // only unescaped ascii characters, we can simply return it. So before doing
+  // anything too fancy, do a lookup in char_escapes_ for each character, and
+  // simply OR in the return sizes. We use 0 for the return-size when we are
+  // simply leaving the character as is, so anything non-zero means we need to
+  // initiate the slow path.
+  //
+  // Benchmarks show it's faster to just rip through the string with no
+  // conditionals, so we only check the ORed sizes after the loop. This avoids
+  // branches and allows simpler loop unrolling by the compiler.
+  uint32_t sizes_ored_together = 0;
+  for (char c : str) {
+    sizes_ored_together |= char_escapes_[char2uint32(c)].size_;
+  }
+  if (sizes_ored_together == 0) {
+    return str; // Happy path, should be executed most of the time.
+  }
+  return slowSanitize(buffer, str);
+}
+
+absl::string_view JsonSanitizer::slowSanitize(std::string& buffer, absl::string_view str) const {
+  std::string oct_escape_buf;
+  size_t past_escape = absl::string_view::npos;
+  const uint8_t* first = reinterpret_cast<const uint8_t*>(str.data());
+  const uint8_t* data = first;
+  absl::string_view escape_view;
+  for (uint32_t n = str.size(); n != 0; ++data, --n) {
+    const Escape& escape = char_escapes_[*data];
+    if (escape.size_ != Literal) {
+      uint32_t start_of_escape = data - first;
+      switch (escape.size_) {
+      case ControlEscapeSize:
+      case UnicodeEscapeSize:
+        escape_view = absl::string_view(escape.chars_, escape.size_);
+        break;
+      case Utf8DecodeSentinel: {
+        auto [unicode, consumed] = decodeUtf8(data, n);
+        if (consumed != 0) {
+          --consumed;
+          data += consumed;
+          n -= consumed;
+
+          // Having validated and constructed the unicode for the utf-8
+          // sequence we must determine whether to render it literally by
+          // simply leaving it alone, or whether we ought to render it
+          // as a unicode escape. We do this using a hash-map set up during
+          // the constructor with all desired unicode escapes, to mimic the
+          // behavior of the protobuf json serializer.
+          auto iter = unicode_escapes_.find(unicode);
+          if (iter == unicode_escapes_.end()) {
+            continue;
+          }
+          escape_view = absl::string_view(iter->second.chars_, iter->second.size_);
+        } else {
+          // Using StrFormat during decode seems slow, but this case should be
+          // rare.
+          oct_escape_buf = absl::StrFormat("\\%03o", *data);
+          escape_view = absl::string_view(oct_escape_buf);
+        }
+        break;
+      }
+      default:
+        ASSERT(false);
+      }
+
+      if (past_escape == absl::string_view::npos) {
+        // We only initialize buffer when we first learn we need to add an
+        // escape-sequence to the sanitized string.
+        if (start_of_escape == 0) {
+          // The first character is an escape, and 'buffer' has not been cleared yet,
+          // so we need to assign it rather than append to it.
+          buffer.assign(escape_view.data(), escape_view.size());
+        } else {
+          // We found our first escape, but this is not the first character in the
+          // string, so we combine the unescaped characters in the string we already
+          // looped over with the new escaped character.
+          buffer = absl::StrCat(str.substr(0, start_of_escape), escape_view);
+        }
+      } else if (start_of_escape == past_escape) {
+        // We are adding an escape immediately after another escaped character.
+        absl::StrAppend(&buffer, escape_view);
+      } else {
+        // We are adding a new escape but must first cover the characters
+        // encountered since the previous escape.
+        absl::StrAppend(&buffer, str.substr(past_escape, start_of_escape - past_escape),
+                        escape_view);
+      }
+      past_escape = data - first + 1;
+    }
+  }
+
+  // If no escape-sequence was needed, we just return the input.
+  if (past_escape == absl::string_view::npos) {
+    return str;
+  }
+
+  // Otherwise we append on any unescaped chunk at the end of the input, and
+  // return buffer as the result.
+  if (past_escape < str.size()) {
+    absl::StrAppend(&buffer, str.substr(past_escape, str.size() - past_escape));
+  }
+  return buffer;
+}
+
+std::pair<uint32_t, uint32_t> JsonSanitizer::decodeUtf8(const uint8_t* bytes, uint32_t size) {
+  uint32_t unicode = 0;
+  uint32_t consumed = 0;
+
+  // See table in https://en.wikipedia.org/wiki/UTF-8, "Encoding" section.
+  //
+  // See also https://en.cppreference.com/w/cpp/locale/codecvt_utf8 which is
+  // marked as deprecated. There is also support in Windows libraries and Boost,
+  // which can be discovered on StackOverflow. I could not find a usable OSS
+  // implementation. However it's easily derived from the spec on Wikipedia.
+  //
+  // Note that the code below could be optimized a bit, e.g. by factoring out
+  // repeated lookups of the same index in the bytes array and using SSE
+  // instructions for the multi-word bit hacking.
+  //
+  // See also http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ which might be a lot
+  // faster, though less readable. As coded, though, it looks like it would read
+  // past the end of the input if the input is malformed.
+  if (size >= 2 && (bytes[0] & Utf8_2ByteMask) == Utf8_2BytePattern &&
+      (bytes[1] & Utf8_ContinueMask) == Utf8_ContinuePattern) {
+    unicode = bytes[0] & ~Utf8_2ByteMask;
+    unicode = (unicode << Utf8_Shift) | (bytes[1] & ~Utf8_ContinueMask);
+    if (unicode < 0x80) {
+      return UnicodeSizePair(0, 0);
+    }
+    consumed = 2;
+  } else if (size >= 3 && (bytes[0] & Utf8_3ByteMask) == Utf8_3BytePattern &&
+             (bytes[1] & Utf8_ContinueMask) == Utf8_ContinuePattern &&
+             (bytes[2] & Utf8_ContinueMask) == Utf8_ContinuePattern) {
+    unicode = bytes[0] & ~Utf8_3ByteMask;
+    unicode = (unicode << Utf8_Shift) | (bytes[1] & ~Utf8_ContinueMask);
+    unicode = (unicode << Utf8_Shift) | (bytes[2] & ~Utf8_ContinueMask);
+    if (unicode < 0x800) { // 3-byte starts at 0x800
+      return UnicodeSizePair(0, 0);
+    }
+    consumed = 3;
+  } else if (size >= 4 && (bytes[0] & Utf8_4ByteMask) == Utf8_4BytePattern &&
+             (bytes[1] & Utf8_ContinueMask) == Utf8_ContinuePattern &&
+             (bytes[2] & Utf8_ContinueMask) == Utf8_ContinuePattern &&
+             (bytes[3] & Utf8_ContinueMask) == Utf8_ContinuePattern) {
+    unicode = bytes[0] & ~Utf8_4ByteMask;
+    unicode = (unicode << Utf8_Shift) | (bytes[1] & ~Utf8_ContinueMask);
+    unicode = (unicode << Utf8_Shift) | (bytes[2] & ~Utf8_ContinueMask);
+    unicode = (unicode << Utf8_Shift) | (bytes[3] & ~Utf8_ContinueMask);
+
+    // 4-byte starts at 0x10000
+    //
+    // Note from https://en.wikipedia.org/wiki/UTF-8:
+    // The earlier RFC2279 allowed UTF-8 encoding through code point U+7FFFFFF.
+    // But the current RFC3629 section 3 limits UTF-8 encoding through code
+    // point U+10FFFF, to match the limits of UTF-16.
+    if (unicode < 0x10000 || unicode > 0x10ffff) {
+      return UnicodeSizePair(0, 0);
+    }
+    consumed = 4;
+  }
+  return UnicodeSizePair(unicode, consumed);
+}
+
+} // namespace Json
+} // namespace Envoy
diff --git a/source/common/json/json_sanitizer.h b/source/common/json/json_sanitizer.h
new file mode 100644
index 0000000000000..7a175ce86d741
--- /dev/null
+++ b/source/common/json/json_sanitizer.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include <string>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+
+namespace Envoy {
+namespace Json {
+
+// Hand-rolled JSON sanitizer that has exactly the same behavior as serializing
+// through protobufs, but is more than 10x faster. From
+// test/common/json/json_sanitizer_speed_test.cc:
+//
+// ---------------------------------------------------------------------------
+// Benchmark                                 Time             CPU   Iterations
+// ---------------------------------------------------------------------------
+// BM_ProtoEncoderNoEscape                1123 ns         1123 ns       545345
+// BM_JsonSanitizerNoEscape               8.77 ns         8.77 ns     79517538
+// BM_StaticJsonSanitizerNoEscape         9.52 ns         9.52 ns     73570603
+// BM_ProtoEncoderWithEscape              1326 ns         1326 ns       528576
+// BM_JsonSanitizerWithEscape             96.3 ns         96.3 ns      7289627
+// BM_StaticJsonSanitizerWithEscape       97.5 ns         97.5 ns      7157098
+//
+class JsonSanitizer {
+public:
+  static constexpr uint32_t Utf8_2ByteMask = 0b11100000;
+  static constexpr uint32_t Utf8_3ByteMask = 0b11110000;
+  static constexpr uint32_t Utf8_4ByteMask = 0b11111000;
+
+  static constexpr uint32_t Utf8_2BytePattern = 0b11000000;
+  static constexpr uint32_t Utf8_3BytePattern = 0b11100000;
+  static constexpr uint32_t Utf8_4BytePattern = 0b11110000;
+
+  static constexpr uint32_t Utf8_ContinueMask = 0b11000000;
+  static constexpr uint32_t Utf8_ContinuePattern = 0b10000000;
+
+  static constexpr uint32_t Utf8_Shift = 6;
+
+  // Constructing the sanitizer fills in a table with all escape-sequences,
+  // indexed by character. To make this perform well, you should instantiate the
+  // sanitizer in a context that lives across a large number of sanitizations.
+  JsonSanitizer();
+
+  /**
+   * Sanitizes a string so it is suitable for JSON. The buffer is
+   * used if any of the characters in str need to be escaped.
+   *
+   * @param buffer a string in which an escaped string can be written, if needed. It
+   *   is not necessary for callers to clear the buffer first; it be cleared
+   *   by this method if the input needs to be escaped.
+   * @param str the string to be translated
+   * @return the translated string_view.
+   */
+  absl::string_view sanitize(std::string& buffer, absl::string_view str) const;
+
+  /** The Unicode code-point and the number of utf8-bytes consumed */
+  using UnicodeSizePair = std::pair<uint32_t, uint32_t>;
+
+  /**
+   * Decodes a byte-stream of UTF8, returning the resulting unicode and the
+   * number of bytes consumed as a pair.
+   *
+   * @param bytes The data with utf8 bytes.
+   * @param size The number of bytes available in data
+   * @return UnicodeSizePair(unicode, consumed) -- if the decode fails consumed will be 0.
+   */
+  static UnicodeSizePair decodeUtf8(const uint8_t* bytes, uint32_t size);
+
+private:
+  // static constexpr uint32_t NumEscapes = 1 << 11; // 2^11=2048 codes possible in 2-byte utf8.
+  static constexpr uint32_t NumEscapes = 256;
+
+  // Character-indexed array of translation strings. If an entry is nullptr then
+  // the character does not require substitution. This strategy is dependent on
+  // the property of UTF-8 where all two-byte characters have the high-order bit
+  // set for both bytes, and don't require escaping for JSON. Thus we can
+  // consider each character in isolation for escaping. Reference:
+  // https://en.wikipedia.org/wiki/UTF-8.
+  struct Escape {
+    uint8_t size_{0};
+    char chars_[7]; // No need to initialize char data, as we are not null-terminating.
+  };
+
+  static uint32_t char2uint32(char c) { return static_cast<uint32_t>(static_cast<uint8_t>(c)); }
+  absl::string_view slowSanitize(std::string& buffer, absl::string_view str) const;
+
+  Escape char_escapes_[NumEscapes];
+  absl::flat_hash_map<uint32_t, Escape> unicode_escapes_;
+};
+
+} // namespace Json
+} // namespace Envoy
diff --git a/test/common/common/utility_test.cc b/test/common/common/utility_test.cc
index 274ab220dd388..6f679d8dbc458 100644
--- a/test/common/common/utility_test.cc
+++ b/test/common/common/utility_test.cc
@@ -931,6 +931,21 @@ TEST(IntervalSet, testIntervalTargeted) {
   EXPECT_EQ("[15, 20), [25, 30), [35, 40), [41, 43)", test(41, 43));
 }
 
+TEST(IntervalSet, testTest) {
+  IntervalSetImpl<uint32_t> set;
+  set.insert(4, 6);
+  EXPECT_FALSE(set.test(0));
+  set.insert(0, 2);
+  EXPECT_TRUE(set.test(0));
+  EXPECT_TRUE(set.test(1));
+  EXPECT_FALSE(set.test(2));
+  EXPECT_FALSE(set.test(3));
+  EXPECT_TRUE(set.test(4));
+  EXPECT_TRUE(set.test(5));
+  EXPECT_FALSE(set.test(6));
+  EXPECT_FALSE(set.test(7));
+}
+
 TEST(WelfordStandardDeviation, AllEntriesTheSame) {
   WelfordStandardDeviation wsd;
   wsd.update(10);
diff --git a/test/common/json/BUILD b/test/common/json/BUILD
index ebb6d6aeb63a4..7eafb0f631b70 100644
--- a/test/common/json/BUILD
+++ b/test/common/json/BUILD
@@ -1,7 +1,10 @@
 load(
     "//bazel:envoy_build_system.bzl",
+    "envoy_cc_benchmark_binary",
+    "envoy_cc_binary",
     "envoy_cc_fuzz_test",
     "envoy_cc_test",
+    "envoy_cc_test_library",
     "envoy_package",
 )
 
@@ -36,3 +39,98 @@ envoy_cc_test(
         "//test/test_common:utility_lib",
     ],
 )
+
+envoy_cc_test(
+    name = "json_sanitizer_test",
+    srcs = ["json_sanitizer_test.cc"],
+    deps = [
+        ":json_sanitizer_test_util_lib",
+        "//source/common/json:json_internal_lib",
+        "//source/common/json:json_sanitizer_lib",
+        "//source/common/protobuf:utility_lib",
+    ],
+)
+
+envoy_cc_benchmark_binary(
+    name = "json_sanitizer_speed_test",
+    srcs = ["json_sanitizer_speed_test.cc"],
+    deps = [
+        "//source/common/json:json_internal_lib",
+        "//source/common/json:json_sanitizer_lib",
+        "//source/common/protobuf:utility_lib",
+    ],
+)
+
+envoy_cc_fuzz_test(
+    name = "json_sanitizer_fuzz_test",
+    srcs = ["json_sanitizer_fuzz_test.cc"],
+    corpus = "json_sanitizer_corpus",
+    deps = [
+        ":json_sanitizer_test_util_lib",
+        "//source/common/json:json_sanitizer_lib",
+        "//source/common/protobuf:utility_lib",
+        "//test/fuzz:utility_lib",
+    ],
+)
+
+envoy_cc_binary(
+    name = "gen_excluded_unicodes",
+    srcs = ["gen_excluded_unicodes.cc"],
+    deps = [
+        "//source/common/json:json_sanitizer_lib",
+        "//source/common/protobuf:utility_lib",
+    ],
+)
+
+#genrule(
+#    name = "extensions_security_rst",
+#    srcs = [
+#        "//source/extensions:extensions_metadata.yaml",
+#        "//contrib:extensions_metadata.yaml",
+#    ],
+#    outs = ["extensions_security_rst.tar"],
+#    cmd = """
+#    $(location //tools/docs:generate_extensions_security_rst) \\
+#        $(location //source/extensions:extensions_metadata.yaml) \\
+#        $(location //contrib:extensions_metadata.yaml) $@
+#    """,
+#    tools = ["//tools/docs:generate_extensions_security_rst"],
+#)
+#
+#envoy_directory_genrule(
+#    name = "corpus_from_config_impl",
+#    testonly = 1,
+#    srcs = [
+#        # This is deliberately in srcs, since we run into host/target confusion
+#        # otherwise in oss-fuzz builds.
+#        ":config_impl_test_static",
+#    ],
+#    cmd = " ".join([
+#        "$(location corpus_from_config_impl_sh)",
+#        "$(location //test/common/router:config_impl_test_static)",
+#    ]),
+#    tools = [":corpus_from_config_impl_sh"],
+#)
+#
+#genrule(
+#    name = "generate_excluded_unicodes",
+#    srcs = [
+#        "admin_head_start.html",
+#        "admin.css",
+#    ],
+#    outs = ["admin_html_gen.h"],
+#    cmd = "./$(location :generate_admin_html.sh) \
+#        $(location admin_head_start.html) $(location admin.css) > $@",
+#    visibility = ["//visibility:private"],
+#    deps = [":generate_excluded_unicodes"],
+#)
+
+envoy_cc_test_library(
+    name = "json_sanitizer_test_util_lib",
+    srcs = ["json_sanitizer_test_util.cc"],
+    hdrs = ["json_sanitizer_test_util.h"],
+    deps = [
+        "//source/common/common:utility_lib",
+        "//source/common/json:json_sanitizer_lib",
+    ],
+)
diff --git a/test/common/json/gen_excluded_unicodes.cc b/test/common/json/gen_excluded_unicodes.cc
new file mode 100644
index 0000000000000..fbd68237f6aaf
--- /dev/null
+++ b/test/common/json/gen_excluded_unicodes.cc
@@ -0,0 +1,130 @@
+#include "source/common/json/json_sanitizer.h"
+#include "source/common/protobuf/utility.h"
+
+#include "absl/strings/str_format.h"
+
+namespace Envoy {
+namespace Json {
+
+// Collects unicode values that cannot be handled by the protobuf json encoder.
+// This is not needed for correct operation of the json sanitizer, but it is
+// needed for comparing sanitization results against the proto serializer, and
+// for differential fuzzing. We need to avoid comparing sanitization results for
+// strings containing utf-8 sequences that protobufs cannot serialize.
+//
+// Normally when running tests, nothing will be passed to collect(), and emit()
+// will return false. But if the protobuf library changes and different unicode
+// sets become invalid, we can re-run the collector with:
+//
+// bazel build -c opt test/common/json:json_sanitizer_test
+// GENERATE_INVALID_UTF8_RANGES=1
+//   ./bazel-bin/test/common/json/json_sanitizer_test |&
+//   grep -v 'contains invalid UTF-8'
+//
+// The grep pipe is essential as otherwise you will be buried in thousands of
+// messages from the protobuf library that cannot otherwise be trapped. The
+// "-c opt" is essential because JsonSanitizerTest.AllFourByteUtf8 iterates over
+// all 4-byte sequences which takes almost 20 seconds without optimization, so
+// it is conditionally compiled on NDEBUG.
+//
+// Running in this mode causes two tests to fail, but prints two initialization
+// blocks for invalid byte code ranges, which can then be pasted into the
+// InvalidUnicodeSet constructor in json_sanitizer_test_util.cc.
+class InvalidUnicodeCollector {
+public:
+  /**
+   * Collects a unicode value that cannot be parsed as utf8 by the protobuf serializer.
+   *
+   * @param unicode the unicode value
+   */
+  void collect(uint32_t unicode) { invalid_.insert(unicode, unicode + 1); }
+
+  /**
+   * Emits the collection of invalid unicode ranges to stdout.
+   *
+   * @return true if any invalid ranges were found.
+   */
+  bool emit(absl::string_view variable_name) {
+    bool has_invalid = false;
+    for (IntervalSet<uint32_t>::Interval& interval : invalid_.toVector()) {
+      has_invalid = true;
+      std::cout << absl::StrFormat("    %s.insert(0x%x, 0x%x);\n", variable_name, interval.first,
+                                   interval.second);
+    }
+    return has_invalid;
+  }
+
+private:
+  IntervalSetImpl<uint32_t> invalid_;
+};
+
+bool isInvalidProtobufSerialization(const std::string& str) {
+  return str.size() == 2 && str[0] == '"' && str[1] == '"';
+}
+
+void AllThreeByteUtf8() {
+  std::string utf8("abc");
+  InvalidUnicodeCollector invalid;
+
+  for (uint32_t byte1 = 0; byte1 < 16; ++byte1) {
+    utf8[0] = byte1 | JsonSanitizer::Utf8_3BytePattern;
+    for (uint32_t byte2 = 0; byte2 < 64; ++byte2) {
+      utf8[1] = byte2 | JsonSanitizer::Utf8_ContinuePattern;
+      for (uint32_t byte3 = 0; byte3 < 64; ++byte3) {
+        utf8[2] = byte3 | JsonSanitizer::Utf8_ContinuePattern;
+        auto [unicode, consumed] = Envoy::Json::JsonSanitizer::decodeUtf8(
+            reinterpret_cast<const uint8_t*>(utf8.data()), 3);
+        if (consumed == 3) {
+          std::string proto_sanitized =
+              MessageUtil::getJsonStringFromMessageOrDie(ValueUtil::stringValue(utf8), false, true);
+          if (isInvalidProtobufSerialization(proto_sanitized)) {
+            invalid.collect(unicode);
+          }
+        } else {
+          ASSERT(consumed == 0);
+        }
+      }
+    }
+  }
+
+  invalid.emit("invalid_3byte_intervals_");
+}
+
+void AllFourByteUtf8() {
+  std::string utf8("abcd");
+  InvalidUnicodeCollector invalid;
+
+  for (uint32_t byte1 = 0; byte1 < 16; ++byte1) {
+    utf8[0] = byte1 | JsonSanitizer::Utf8_4BytePattern;
+    for (uint32_t byte2 = 0; byte2 < 64; ++byte2) {
+      utf8[1] = byte2 | JsonSanitizer::Utf8_ContinuePattern;
+      for (uint32_t byte3 = 0; byte3 < 64; ++byte3) {
+        utf8[2] = byte3 | JsonSanitizer::Utf8_ContinuePattern;
+        for (uint32_t byte4 = 0; byte4 < 64; ++byte4) {
+          utf8[3] = byte4 | JsonSanitizer::Utf8_ContinuePattern;
+          auto [unicode, consumed] = Envoy::Json::JsonSanitizer::decodeUtf8(
+              reinterpret_cast<const uint8_t*>(utf8.data()), 4);
+          if (consumed == 4) {
+            std::string proto_sanitized = MessageUtil::getJsonStringFromMessageOrDie(
+                ValueUtil::stringValue(utf8), false, true);
+            if (isInvalidProtobufSerialization(proto_sanitized)) {
+              invalid.collect(unicode);
+            }
+          } else {
+            ASSERT(consumed == 0);
+          }
+        }
+      }
+    }
+  }
+  invalid.emit("invalid_4byte_intervals_");
+}
+
+} // namespace Json
+} // namespace Envoy
+
+int main() {
+  Envoy::Json::AllThreeByteUtf8();
+  Envoy::Json::AllFourByteUtf8();
+  return 0;
+}
diff --git a/test/common/json/json_sanitizer_corpus/binary_file b/test/common/json/json_sanitizer_corpus/binary_file
new file mode 100644
index 0000000000000..eb70d74b0caaf
Binary files /dev/null and b/test/common/json/json_sanitizer_corpus/binary_file differ
diff --git a/test/common/json/json_sanitizer_corpus/hello_world_multi_language b/test/common/json/json_sanitizer_corpus/hello_world_multi_language
new file mode 100644
index 0000000000000..33a23898ed30c
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/hello_world_multi_language
@@ -0,0 +1 @@
+Hello world, Καλημέρα κόσμε, コンニチハ
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/lower_case b/test/common/json/json_sanitizer_corpus/lower_case
new file mode 100644
index 0000000000000..e85d5b45283ac
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/lower_case
@@ -0,0 +1 @@
+abcdefghijklmnopqrstuvwxyz
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/one_quote_begin b/test/common/json/json_sanitizer_corpus/one_quote_begin
new file mode 100644
index 0000000000000..4977cad19bb50
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/one_quote_begin
@@ -0,0 +1 @@
+"ab
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/one_quote_end b/test/common/json/json_sanitizer_corpus/one_quote_end
new file mode 100644
index 0000000000000..d897f58692ae6
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/one_quote_end
@@ -0,0 +1 @@
+ab"
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/one_quote_middle b/test/common/json/json_sanitizer_corpus/one_quote_middle
new file mode 100644
index 0000000000000..68329fbda325e
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/one_quote_middle
@@ -0,0 +1 @@
+a"b
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/punctuation b/test/common/json/json_sanitizer_corpus/punctuation
new file mode 100644
index 0000000000000..00a387e30ea7f
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/punctuation
@@ -0,0 +1 @@
+" `~!@#$%^&*()_+-={}|[]"
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/quotes_both_ends b/test/common/json/json_sanitizer_corpus/quotes_both_ends
new file mode 100644
index 0000000000000..075c842d1b8cd
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/quotes_both_ends
@@ -0,0 +1 @@
+"a"
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/two_quotes_begin b/test/common/json/json_sanitizer_corpus/two_quotes_begin
new file mode 100644
index 0000000000000..a261b18bf58be
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/two_quotes_begin
@@ -0,0 +1 @@
+""ab
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/two_quotes_end b/test/common/json/json_sanitizer_corpus/two_quotes_end
new file mode 100644
index 0000000000000..2e95ab1bcedf9
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/two_quotes_end
@@ -0,0 +1 @@
+ab""
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/two_quotes_middle b/test/common/json/json_sanitizer_corpus/two_quotes_middle
new file mode 100644
index 0000000000000..a45fcdef3dd6d
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/two_quotes_middle
@@ -0,0 +1 @@
+a""b
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_corpus/upper_case b/test/common/json/json_sanitizer_corpus/upper_case
new file mode 100644
index 0000000000000..a6860d918dfcb
--- /dev/null
+++ b/test/common/json/json_sanitizer_corpus/upper_case
@@ -0,0 +1 @@
+ABCDEFGHIJKLMNOPQRSTUVWXYZ
\ No newline at end of file
diff --git a/test/common/json/json_sanitizer_fuzz_test.cc b/test/common/json/json_sanitizer_fuzz_test.cc
new file mode 100644
index 0000000000000..93df5a03aba6e
--- /dev/null
+++ b/test/common/json/json_sanitizer_fuzz_test.cc
@@ -0,0 +1,52 @@
+#include "source/common/json/json_sanitizer.h"
+#include "source/common/protobuf/utility.h"
+
+#include "test/common/json/json_sanitizer_test_util.h"
+#include "test/fuzz/fuzz_runner.h"
+#include "test/fuzz/utility.h"
+#include "test/test_common/utility.h"
+
+#include "absl/strings/str_format.h"
+
+namespace Envoy {
+namespace Fuzz {
+
+const Envoy::Json::JsonSanitizer& staticSanitizer() {
+  CONSTRUCT_ON_FIRST_USE(Envoy::Json::JsonSanitizer);
+}
+
+DEFINE_FUZZER(const uint8_t* buf, size_t len) {
+  const Envoy::Json::JsonSanitizer& sanitizer = staticSanitizer();
+  FuzzedDataProvider provider(buf, len);
+  std::string buffer1, buffer2;
+  while (provider.remaining_bytes() != 0) {
+    std::string input = provider.ConsumeRandomLengthString(provider.remaining_bytes());
+    absl::string_view hand_sanitized = sanitizer.sanitize(buffer1, input);
+
+    // If the input is valid UTF-8 we can do a differential test against the
+    // Protobuf JSON sanitizer. Otherwise we are simply ensuring that the
+    // sanitizer does not crash.
+    if (Envoy::Json::isProtoSerializableUtf8(input)) {
+      buffer2 =
+          MessageUtil::getJsonStringFromMessageOrDie(ValueUtil::stringValue(input), false, true);
+      absl::string_view proto_sanitized = Envoy::Json::stripDoubleQuotes(buffer2);
+      if (hand_sanitized != proto_sanitized) {
+        std::cerr << "ERROR on input = ";
+        for (char c : input) {
+          if (c == '\\' || c == '"') {
+            std::cerr << "\\" << c;
+          } else if (c < ' ' || c > 126) {
+            std::cerr << "\\" << absl::StrFormat("%03o", static_cast<uint8_t>(c));
+          } else {
+            std::cerr << c;
+          }
+        }
+        std::cerr << std::endl;
+      }
+      FUZZ_ASSERT_EQ(hand_sanitized, proto_sanitized, input);
+    }
+  }
+}
+
+} // namespace Fuzz
+} // namespace Envoy
diff --git a/test/common/json/json_sanitizer_speed_test.cc b/test/common/json/json_sanitizer_speed_test.cc
new file mode 100644
index 0000000000000..873dee4f02844
--- /dev/null
+++ b/test/common/json/json_sanitizer_speed_test.cc
@@ -0,0 +1,96 @@
+#include "source/common/json/json_internal.h"
+#include "source/common/json/json_sanitizer.h"
+#include "source/common/protobuf/utility.h"
+
+#include "benchmark/benchmark.h"
+
+// NOLINT(namespace-envoy)
+
+constexpr absl::string_view pass_through_encoding = "Now is the time for all good men";
+constexpr absl::string_view escaped_encoding = "Now <is the \"time\"> for all good men";
+
+const Envoy::Json::JsonSanitizer& staticSanitizer() {
+  CONSTRUCT_ON_FIRST_USE(Envoy::Json::JsonSanitizer);
+}
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_ProtoEncoderNoEscape(benchmark::State& state) {
+  const std::string str = std::string(pass_through_encoding);
+
+  for (auto _ : state) { // NOLINT
+    Envoy::MessageUtil::getJsonStringFromMessageOrDie(Envoy::ValueUtil::stringValue(str), false,
+                                                      true);
+  }
+}
+BENCHMARK(BM_ProtoEncoderNoEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_JsonSanitizerNoEscape(benchmark::State& state) {
+  std::string buffer;
+  Envoy::Json::JsonSanitizer sanitizer;
+
+  for (auto _ : state) { // NOLINT
+    sanitizer.sanitize(buffer, pass_through_encoding);
+  }
+}
+BENCHMARK(BM_JsonSanitizerNoEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_NlohmannNoEscape(benchmark::State& state) {
+  for (auto _ : state) { // NOLINT
+    Envoy::Json::Nlohmann::Factory::serialize(pass_through_encoding);
+  }
+}
+BENCHMARK(BM_NlohmannNoEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_StaticJsonSanitizerNoEscape(benchmark::State& state) {
+  std::string buffer;
+
+  for (auto _ : state) { // NOLINT
+    staticSanitizer().sanitize(buffer, pass_through_encoding);
+  }
+}
+BENCHMARK(BM_StaticJsonSanitizerNoEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_ProtoEncoderWithEscape(benchmark::State& state) {
+  const std::string str = std::string(escaped_encoding);
+
+  for (auto _ : state) { // NOLINT
+    Envoy::MessageUtil::getJsonStringFromMessageOrDie(Envoy::ValueUtil::stringValue(str), false,
+                                                      true);
+  }
+}
+BENCHMARK(BM_ProtoEncoderWithEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_NlohmannWithEscape(benchmark::State& state) {
+  const std::string str = std::string(escaped_encoding);
+
+  for (auto _ : state) { // NOLINT
+    Envoy::Json::Nlohmann::Factory::serialize(str);
+  }
+}
+BENCHMARK(BM_NlohmannWithEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_JsonSanitizerWithEscape(benchmark::State& state) {
+  Envoy::Json::JsonSanitizer sanitizer;
+  std::string buffer;
+
+  for (auto _ : state) { // NOLINT
+    sanitizer.sanitize(buffer, escaped_encoding);
+  }
+}
+BENCHMARK(BM_JsonSanitizerWithEscape);
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+static void BM_StaticJsonSanitizerWithEscape(benchmark::State& state) {
+  std::string buffer;
+
+  for (auto _ : state) { // NOLINT
+    staticSanitizer().sanitize(buffer, escaped_encoding);
+  }
+}
+BENCHMARK(BM_StaticJsonSanitizerWithEscape);
diff --git a/test/common/json/json_sanitizer_test.cc b/test/common/json/json_sanitizer_test.cc
new file mode 100644
index 0000000000000..162a60b964fed
--- /dev/null
+++ b/test/common/json/json_sanitizer_test.cc
@@ -0,0 +1,365 @@
+#include <ostream>
+
+#include "source/common/json/json_internal.h"
+#include "source/common/json/json_sanitizer.h"
+#include "source/common/protobuf/utility.h"
+
+#include "test/common/json/json_sanitizer_test_util.h"
+
+#include "absl/strings/str_format.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::StartsWith;
+
+namespace Envoy {
+namespace Json {
+namespace {
+
+constexpr absl::string_view Lambda{"λ"};
+constexpr absl::string_view LambdaUtf8{"\316\273"};
+constexpr absl::string_view Omicron{"ό"};
+constexpr absl::string_view OmicronUtf8{"\341\275\271"};
+constexpr absl::string_view TrebleClefUtf8{"\360\235\204\236"};
+
+class JsonSanitizerTest : public testing::Test {
+protected:
+  using UnicodeSizePair = JsonSanitizer::UnicodeSizePair;
+
+  JsonSanitizerTest() {
+    if (::getenv("GENERATE_INVALID_UTF8_RANGES") != nullptr) {
+      generate_invalid_utf8_ranges_ = true;
+      static bool message_emitted = false;
+      if (!message_emitted) {
+        std::cout << "Runs full sweep of 3-byte and 4-byte utf8 to find unicodes that protobufs "
+                     "cannot serialize, to collect them in ranges. The range initialization can "
+                     "then be pasted into json_sanitizer_test_util.cc so that future fuzz tests "
+                     "and unit tests can avoid doing differentials against protobuf ranges that "
+                     "cannot be support. This likely needs to be re-run when the protobufs "
+                     "dependency is updated. Be sure to run this piping the output through "
+                     " |& grep -v 'contains invalid UTF-8' as the protobuf library will generate "
+                     " that message thousands of times and there is no way to disable it."
+                  << std::endl;
+        message_emitted = true;
+      }
+    }
+  }
+
+  absl::string_view sanitizeAndCheckAgainstProtobufJson(absl::string_view str) {
+    EXPECT_TRUE(isProtoSerializableUtf8(str)) << "str=" << str;
+    absl::string_view hand_sanitized = sanitizer_.sanitize(buffer_, str);
+    if (isProtoSerializableUtf8(str)) {
+      std::string proto_sanitized = MessageUtil::getJsonStringFromMessageOrDie(
+          ValueUtil::stringValue(std::string(str)), false, true);
+      EXPECT_EQ(stripDoubleQuotes(proto_sanitized), hand_sanitized) << "str=" << str;
+    }
+    EXPECT_EQ(hand_sanitized, stripDoubleQuotes(Nlohmann::Factory::serialize(str)));
+    return hand_sanitized;
+  }
+
+  void expectUnchanged(absl::string_view str) {
+    EXPECT_EQ(str, sanitizeAndCheckAgainstProtobufJson(str));
+  }
+
+  absl::string_view truncate(absl::string_view str) { return str.substr(0, str.size() - 1); }
+
+  std::string corruptByte2(absl::string_view str) {
+    std::string corrupt_second_byte = std::string(str);
+    ASSERT(str.size() >= 2);
+    corrupt_second_byte[1] |= '\xf0';
+    return corrupt_second_byte;
+  }
+
+  absl::string_view sanitizeInvalid(absl::string_view str) {
+    EXPECT_EQ(UnicodeSizePair(0, 0), decode(str));
+    return sanitizer_.sanitize(buffer_, str);
+  }
+
+  std::pair<uint32_t, uint32_t> decode(absl::string_view str) {
+    return JsonSanitizer::decodeUtf8(reinterpret_cast<const uint8_t*>(str.data()), str.size());
+  }
+
+  JsonSanitizer sanitizer_;
+  std::string buffer_;
+  bool generate_invalid_utf8_ranges_{false};
+};
+
+TEST_F(JsonSanitizerTest, Empty) { expectUnchanged(""); }
+
+TEST_F(JsonSanitizerTest, NoEscape) {
+  expectUnchanged("abcdefghijklmnopqrstuvwxyz");
+  expectUnchanged("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+  expectUnchanged("1234567890");
+  expectUnchanged(" `~!@#$%^&*()_+-={}|[]");
+  expectUnchanged("Hello world, Καλημέρα κόσμε, コンニチハ");
+}
+
+TEST_F(JsonSanitizerTest, SlashChars) {
+  EXPECT_EQ("\\b", sanitizeAndCheckAgainstProtobufJson("\b"));
+  EXPECT_EQ("\\f", sanitizeAndCheckAgainstProtobufJson("\f"));
+  EXPECT_EQ("\\n", sanitizeAndCheckAgainstProtobufJson("\n"));
+  EXPECT_EQ("\\r", sanitizeAndCheckAgainstProtobufJson("\r"));
+  EXPECT_EQ("\\t", sanitizeAndCheckAgainstProtobufJson("\t"));
+  EXPECT_EQ("\\\\", sanitizeAndCheckAgainstProtobufJson("\\"));
+  EXPECT_EQ("\\\"", sanitizeAndCheckAgainstProtobufJson("\""));
+}
+
+TEST_F(JsonSanitizerTest, ControlChars) {
+  EXPECT_EQ("\\u0001", sanitizeAndCheckAgainstProtobufJson("\001"));
+  EXPECT_EQ("\\u0002", sanitizeAndCheckAgainstProtobufJson("\002"));
+  EXPECT_EQ("\\b", sanitizeAndCheckAgainstProtobufJson("\010"));
+  EXPECT_EQ("\\t", sanitizeAndCheckAgainstProtobufJson("\011"));
+  EXPECT_EQ("\\n", sanitizeAndCheckAgainstProtobufJson("\012"));
+  EXPECT_EQ("\\u000b", sanitizeAndCheckAgainstProtobufJson("\013"));
+  EXPECT_EQ("\\f", sanitizeAndCheckAgainstProtobufJson("\014"));
+  EXPECT_EQ("\\r", sanitizeAndCheckAgainstProtobufJson("\015"));
+  EXPECT_EQ("\\u000e", sanitizeAndCheckAgainstProtobufJson("\016"));
+  EXPECT_EQ("\\u000f", sanitizeAndCheckAgainstProtobufJson("\017"));
+  EXPECT_EQ("\\u0010", sanitizeAndCheckAgainstProtobufJson("\020"));
+  EXPECT_EQ("\\u003c", sanitizeAndCheckAgainstProtobufJson("<"));
+  EXPECT_EQ("\\u003e", sanitizeAndCheckAgainstProtobufJson(">"));
+}
+
+TEST_F(JsonSanitizerTest, SevenBitAscii) {
+  // Cover all the 7-bit ascii values, calling sanitize so that it checks
+  // our hand-rolled sanitizer vs protobuf. We ignore the return-value of
+  // sanitize(); we are just calling for it to test against protobuf.
+  for (uint32_t i = 0; i < 128; ++i) {
+    char c = i;
+    sanitizeAndCheckAgainstProtobufJson(absl::string_view(&c, 1));
+  }
+}
+
+TEST_F(JsonSanitizerTest, Utf8) {
+  // reference; https://www.charset.org/utf-8
+  auto unicode = [](std::vector<uint8_t> chars) -> std::string {
+    return std::string(reinterpret_cast<const char*>(&chars[0]), chars.size());
+  };
+
+  sanitizeAndCheckAgainstProtobufJson(unicode({0xc2, 0xa2})); // Cent.
+  sanitizeAndCheckAgainstProtobufJson(unicode({0xc2, 0xa9})); // Copyright.
+  sanitizeAndCheckAgainstProtobufJson(unicode({0xc3, 0xa0})); // 'a' with accent grave.
+}
+
+TEST_F(JsonSanitizerTest, Interspersed) {
+  EXPECT_EQ("a\\bc", sanitizeAndCheckAgainstProtobufJson("a\bc"));
+  EXPECT_EQ("a\\b\\fc", sanitizeAndCheckAgainstProtobufJson("a\b\fc"));
+  EXPECT_EQ("\\bac", sanitizeAndCheckAgainstProtobufJson("\bac"));
+  EXPECT_EQ("\\b\\fac", sanitizeAndCheckAgainstProtobufJson("\b\fac"));
+  EXPECT_EQ("ac\\b", sanitizeAndCheckAgainstProtobufJson("ac\b"));
+  EXPECT_EQ("ac\\b", sanitizeAndCheckAgainstProtobufJson("ac\b"));
+  EXPECT_EQ("\\ra\\f", sanitizeAndCheckAgainstProtobufJson("\ra\f"));
+}
+
+TEST_F(JsonSanitizerTest, AllTwoByteUtf8) {
+  char buf[2];
+  absl::string_view utf8(buf, 2);
+  for (uint32_t byte1 = 2; byte1 < 32; ++byte1) {
+    buf[0] = byte1 | JsonSanitizer::Utf8_2BytePattern;
+    for (uint32_t byte2 = 0; byte2 < 64; ++byte2) {
+      buf[1] = byte2 | JsonSanitizer::Utf8_ContinuePattern;
+      auto [unicode, consumed] =
+          Envoy::Json::JsonSanitizer::decodeUtf8(reinterpret_cast<const uint8_t*>(buf), 2);
+      ASSERT_EQ(2, consumed);
+      sanitizeAndCheckAgainstProtobufJson(utf8);
+    }
+  }
+}
+
+TEST_F(JsonSanitizerTest, AllThreeByteUtf8) {
+  std::string utf8("abc");
+  uint32_t num_excluded = 0, num_included = 0;
+  uint32_t num_matches = 0, num_mismatches = 0;
+  for (uint32_t byte1 = 0; byte1 < 16; ++byte1) {
+    utf8[0] = byte1 | JsonSanitizer::Utf8_3BytePattern;
+    for (uint32_t byte2 = 0; byte2 < 64; ++byte2) {
+      utf8[1] = byte2 | JsonSanitizer::Utf8_ContinuePattern;
+      for (uint32_t byte3 = 0; byte3 < 64; ++byte3) {
+        utf8[2] = byte3 | JsonSanitizer::Utf8_ContinuePattern;
+        absl::string_view hand_sanitized = sanitizer_.sanitize(buffer_, utf8);
+        if (isProtoSerializableUtf8(utf8)) {
+          ++num_included;
+          auto [unicode, consumed] = Envoy::Json::JsonSanitizer::decodeUtf8(
+              reinterpret_cast<const uint8_t*>(utf8.data()), 3);
+          EXPECT_EQ(3, consumed);
+          std::string proto_sanitized =
+              MessageUtil::getJsonStringFromMessageOrDie(ValueUtil::stringValue(utf8), false, true);
+          EXPECT_TRUE(utf8Equivalent(stripDoubleQuotes(proto_sanitized), hand_sanitized))
+              << "(" << byte1 << "," << byte2 << "," << byte3 << ")";
+          if (utf8Equivalent(stripDoubleQuotes(proto_sanitized), hand_sanitized)) {
+            ++num_matches;
+          } else {
+            ENVOY_LOG_MISC(error, "unicode=0x{}, proto_sanitized={}",
+                           absl::StrFormat("%x", unicode), proto_sanitized);
+            ++num_mismatches;
+          }
+        } else {
+          ++num_excluded;
+        }
+      }
+    }
+  }
+  EXPECT_EQ(61440, num_included);
+  EXPECT_EQ(4096, num_excluded);
+  EXPECT_EQ(16 * 64 * 64, num_included + num_excluded);
+  EXPECT_EQ(61440, num_matches);
+  EXPECT_EQ(0, num_mismatches);
+}
+
+// This test takes 17 seconds without optimization.
+//#ifdef NDEBUG
+TEST_F(JsonSanitizerTest, AllFourByteUtf8) {
+  std::string utf8("abcd");
+  uint32_t num_excluded = 0, num_included = 0;
+  uint32_t num_matches = 0, num_mismatches = 0;
+
+  for (uint32_t byte1 = 0; byte1 < 16; ++byte1) {
+    utf8[0] = byte1 | JsonSanitizer::Utf8_4BytePattern;
+    for (uint32_t byte2 = 0; byte2 < 64; ++byte2) {
+      utf8[1] = byte2 | JsonSanitizer::Utf8_ContinuePattern;
+      for (uint32_t byte3 = 0; byte3 < 64; ++byte3) {
+        utf8[2] = byte3 | JsonSanitizer::Utf8_ContinuePattern;
+        for (uint32_t byte4 = 0; byte4 < 64; ++byte4) {
+          utf8[3] = byte4 | JsonSanitizer::Utf8_ContinuePattern;
+          absl::string_view hand_sanitized = sanitizer_.sanitize(buffer_, utf8);
+          if (isProtoSerializableUtf8(utf8)) {
+            ++num_included;
+            auto [unicode, consumed] = Envoy::Json::JsonSanitizer::decodeUtf8(
+                reinterpret_cast<const uint8_t*>(utf8.data()), 4);
+            EXPECT_EQ(4, consumed);
+            std::string proto_sanitized = MessageUtil::getJsonStringFromMessageOrDie(
+                ValueUtil::stringValue(utf8), false, true);
+            EXPECT_TRUE(utf8Equivalent(stripDoubleQuotes(proto_sanitized), hand_sanitized))
+                << "(" << byte1 << "," << byte2 << "," << byte3 << "," << byte4 << ")";
+            if (utf8Equivalent(stripDoubleQuotes(proto_sanitized), hand_sanitized)) {
+              ++num_matches;
+            } else {
+              ENVOY_LOG_MISC(error, "unicode=0x{}, proto_sanitized={}",
+                             absl::StrFormat("%x", unicode), proto_sanitized);
+              ++num_mismatches;
+            }
+          } else {
+            ++num_excluded;
+          }
+        }
+      }
+    }
+  }
+  /*
+  EXPECT_EQ(1048576, num_included);
+  EXPECT_EQ(3145728, num_excluded);
+  EXPECT_EQ(1048471, num_matches);
+  EXPECT_EQ(105, num_mismatches);
+  */
+}
+//#endif
+
+TEST_F(JsonSanitizerTest, MultiByteUtf8) {
+  EXPECT_EQ(UnicodeSizePair(0x3bb, 2), decode(Lambda));
+  EXPECT_EQ(UnicodeSizePair(0x3bb, 2), decode(LambdaUtf8));
+  EXPECT_EQ(UnicodeSizePair(0x1f79, 3), decode(Omicron));
+  EXPECT_EQ(UnicodeSizePair(0x1f79, 3), decode(OmicronUtf8));
+
+  // It's hard to find large unicode characters, but to test the utf8 decoder
+  // there are some in https://unicode-table.com/en/blocks/musical-symbols/
+  // with reference utf8 encoding from https://unicode-table.com/en/1D11E/
+  EXPECT_EQ(UnicodeSizePair(0x1d11e, 4), decode(TrebleClefUtf8));
+}
+
+TEST_F(JsonSanitizerTest, Low8Bit) {
+  // The characters from 0 to 0xBF (191) inclusive are all rendered identically
+  // to the protobuf json encoder.
+  std::string x0_7f;
+  for (uint32_t i = 0; i <= 0x7f; ++i) {
+    char ch = i;
+    x0_7f.push_back(ch);
+  }
+  EXPECT_EQ(
+      // Control-characters 0-31
+      "\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n"
+      "\\u000b\\f\\r\\u000e\\u000f\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015"
+      "\\u0016\\u0017\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f"
+
+      // Printable characters starting with space. Double-quote is back-slashed.
+      " !\\\"#$%&'()*+,-./0123456789:;"
+
+      // < and > are serialized by json as unicode.
+      "\\u003c=\\u003e?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+      // Remaining 7-bit codes ending with 127, which is rendered as a unicode escape.
+      "[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\u007f",
+
+      sanitizeAndCheckAgainstProtobufJson(x0_7f));
+}
+
+TEST_F(JsonSanitizerTest, High8Bit) {
+  std::string x80_ff;
+  for (uint32_t i = 0x80; i <= 0xff; ++i) {
+    char ch = i;
+    x80_ff.push_back(ch);
+  }
+  // The characters from 0x80 (192) to 255 all start out like they are
+  // multi-byte utf-8 sequences, but in this context are not followed by the
+  // right continuation pattern. The protobuf json serializer generates
+  // lots of error messages for these and yields empty strings, but we
+  // just escape them as single bytes.
+  EXPECT_EQ(
+      // The codes from 128-159 (0x9f) are rendered as several ways: unicode
+      // escapes or literal 8-bit characters.
+      "\\u0080\\u0081\\u0082\\u0083\\u0084\\u0085\\u0086\\u0087\\u0088\\u0089"
+      "\\u008a\\u008b\\u008c\\u008d\\u008e\\u008f\\u0090\\u0091\\u0092\\u0093"
+      "\\u0094\\u0095\\u0096\\u0097\\u0098\\u0099\\u009a\\u009b\\u009c\\u009d"
+      "\\u009e\\u009f"
+
+      // Then a sequence of literal 8-bit characters.
+      "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC"
+
+      // Weird special-case behavior to match json sanitizer
+      "\\u00ad"
+
+      // More literal 8-bit characters.
+      "\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC"
+      "\xBD\xBE\xBF"
+
+      // Codes with a utf8 introductory byte pattern that lack the correct
+      // pattern for the remaining codes. These get OCT-escaped by the json
+      // sanitizer, whereas the protobuf serializer generates an error message
+      // and returns an empty string.
+      "\\300\\301\\302\\303\\304\\305\\306\\307\\310\\311\\312\\313\\314\\315\\316\\317"
+      "\\320\\321\\322\\323\\324\\325\\326\\327\\330\\331\\332\\333\\334\\335\\336\\337"
+      "\\340\\341\\342\\343\\344\\345\\346\\347\\350\\351\\352\\353\\354\\355\\356\\357"
+      "\\360\\361\\362\\363\\364\\365\\366\\367\\370\\371\\372\\373\\374\\375\\376\\377",
+      sanitizer_.sanitize(buffer_, x80_ff));
+}
+
+TEST_F(JsonSanitizerTest, InvalidUtf8) {
+  // 2 byte
+  EXPECT_EQ("\\316", sanitizeInvalid(truncate(LambdaUtf8)));
+  EXPECT_EQ("\\316\\373", sanitizeInvalid(corruptByte2(LambdaUtf8)));
+
+  // 3 byte
+  absl::string_view out = sanitizeInvalid(truncate(OmicronUtf8));
+  EXPECT_THAT(out, StartsWith("\\341"));
+  EXPECT_EQ(5, out.size());
+  EXPECT_EQ('\275', out[4]);
+  EXPECT_EQ("\\341\\375\271", sanitizeInvalid(corruptByte2(OmicronUtf8)));
+
+  // 4 byte
+  EXPECT_EQ("\\360\\u009d\\u0084", sanitizeInvalid(truncate(TrebleClefUtf8)));
+  EXPECT_EQ("\\360\\375\\u0084\\u009e", sanitizeInvalid(corruptByte2(TrebleClefUtf8)));
+
+  // Invalid input embedded in normal text.
+  EXPECT_EQ(
+      "Hello, \\360\\u009d\\u0084, World!",
+      sanitizer_.sanitize(buffer_, absl::StrCat("Hello, ", truncate(TrebleClefUtf8), ", World!")));
+
+  // Replicate a few other cases that were discovered during initial fuzzing,
+  // to ensure we see these as invalid utf8 and avoid them in comparisons.
+  EXPECT_FALSE(isProtoSerializableUtf8("_K\301\234K"));
+  EXPECT_FALSE(isProtoSerializableUtf8("\xF7\xA6\x8A\x8A"));
+  EXPECT_FALSE(isProtoSerializableUtf8("\020\377\377\376\000"));
+}
+
+} // namespace
+} // namespace Json
+} // namespace Envoy
diff --git a/test/common/json/json_sanitizer_test_util.cc b/test/common/json/json_sanitizer_test_util.cc
new file mode 100644
index 0000000000000..f746253ef9907
--- /dev/null
+++ b/test/common/json/json_sanitizer_test_util.cc
@@ -0,0 +1,185 @@
+#include "test/common/json/json_sanitizer_test_util.h"
+
+#include <string>
+
+#include "source/common/common/utility.h"
+#include "source/common/json/json_sanitizer.h"
+
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+
+namespace Envoy {
+namespace Json {
+
+absl::string_view stripDoubleQuotes(absl::string_view str) {
+  if (str.size() >= 2 && str[0] == '"' && str[str.size() - 1] == '"') {
+    return str.substr(1, str.size() - 2);
+  }
+  return str;
+}
+
+namespace {
+
+class InvalidUnicodeSet {
+public:
+  InvalidUnicodeSet() {
+    // Generated with
+    //   bazel build -c opt test/common/json:json_sanitizer_test
+    //   GENERATE_INVALID_UTF8_RANGES=1
+    //     ./bazel-bin/test/common/json/json_sanitizer_test |&
+    //     grep -v 'contains invalid UTF-8'
+
+    // Avoid ranges where the protobuf serialization fails, returning
+    // an empty string.
+    invalid_3byte_intervals_.insert(0xd800, 0xe000);
+
+    // Avoid unicode ranges generated from 4-byte utf-8 where protobuf
+    // serialization generates two small unicode values instead of the correct one.
+    // This must be a protobuf serialization issue.
+    invalid_4byte_intervals_.insert(0x1d173, 0x1d17b);
+    invalid_4byte_intervals_.insert(0xe0001, 0xe0002);
+    invalid_4byte_intervals_.insert(0xe0020, 0xe0080);
+  }
+
+  // Helper functions to see if the specified unicode is in the 3-byte utf-8
+  // exclusion set or the 4-byte utf-8 exclusion-set.
+  bool isInvalid3Byte(uint32_t unicode) const { return invalid_3byte_intervals_.test(unicode); }
+  bool isInvalid4Byte(uint32_t unicode) const { return invalid_4byte_intervals_.test(unicode); }
+
+private:
+  IntervalSetImpl<uint32_t> invalid_3byte_intervals_;
+  IntervalSetImpl<uint32_t> invalid_4byte_intervals_;
+};
+
+const InvalidUnicodeSet& invalidUnicodeSet() { CONSTRUCT_ON_FIRST_USE(InvalidUnicodeSet); }
+
+} // namespace
+
+bool isProtoSerializableUtf8(absl::string_view in) {
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(in.data());
+  uint32_t size = in.size();
+  while (size != 0) {
+    if ((*data & 0x80) == 0) {
+      ++data;
+      --size;
+    } else {
+      auto [unicode, consumed] = Envoy::Json::JsonSanitizer::decodeUtf8(data, size);
+      data += consumed;
+      size -= consumed;
+
+      switch (consumed) {
+      case 2:
+        break;
+      case 3:
+        if (invalidUnicodeSet().isInvalid3Byte(unicode)) {
+          return false;
+        }
+        break;
+      case 4:
+        if (invalidUnicodeSet().isInvalid4Byte(unicode)) {
+          return false;
+        }
+        break;
+      default:
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+// Implements strtol for hex, but accepting a non-nul-terminated string_view,
+// and with one branch per character. This can be done with only one branch
+// per string if we use a table instead of a switch statement, and have all
+// the non-hex character inputs map to 0x80, and accumulate the OR of all
+// mapped values to test after the loop, but that would be harder to read.
+//
+// It is good for this code to be somewhat faster (ie not create a temp string)
+// so that fuzzers can run faster and cover more cases.
+//
+// If a string-view based hex decoder is useful in production code, this
+// could be factored into a decode() variant in source/common/common.hex.cc.
+bool parseUnicode(absl::string_view str, uint32_t& hex_value) {
+  if (absl::StartsWith(str, "\\u") && str.size() >= 6) {
+    hex_value = 0;
+    for (char c : str.substr(2, 4)) {
+      uint32_t val = 0;
+      switch (c) {
+      case '0':
+      case '1':
+      case '2':
+      case '3':
+      case '4':
+      case '5':
+      case '6':
+      case '7':
+      case '8':
+      case '9':
+        val = c - '0';
+        break;
+      case 'A':
+      case 'B':
+      case 'C':
+      case 'D':
+      case 'E':
+      case 'F':
+        val = c - 'A' + 10;
+        break;
+      case 'a':
+      case 'b':
+      case 'c':
+      case 'd':
+      case 'e':
+      case 'f':
+        val = c - 'a' + 10;
+        break;
+      default:
+        return false;
+      }
+      hex_value = 16 * hex_value + val;
+    }
+    return true;
+  }
+  return false;
+}
+
+// Compares a string that's possibly an escaped unicode, e.g. \u1234, to
+// one that is utf8-encoded.
+bool compareUnicodeEscapeAgainstUtf8(absl::string_view& escaped, absl::string_view& utf8) {
+  uint32_t escaped_unicode;
+  if (utf8.size() >= 3 && parseUnicode(escaped, escaped_unicode)) {
+    // If one side of the comparison is a unicode escape,
+    auto [unicode, consumed] = Envoy::Json::JsonSanitizer::decodeUtf8(
+        reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
+    if (consumed == 3 && unicode == escaped_unicode) {
+      utf8 = utf8.substr(3, utf8.size() - 3);
+      escaped = escaped.substr(6, escaped.size() - 6);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Determines whether two strings differ only in whether they have
+// literal utf-8 or escaped 3-byte unicode. We do this equivalence
+// comparison to enable differential fuzzing between JsonSanitizer and
+// protobuf json serialization. The protobuf implementation has made
+// some hard-to-understand decisions about what to encode via unicode
+// escapes versus what to pass through as utf-8.
+bool utf8Equivalent(absl::string_view a, absl::string_view b) {
+  while (true) {
+    if (a.empty() && b.empty()) {
+      return true;
+    } else if (a.empty() || b.empty()) {
+      return false;
+    } else if (a[0] == b[0]) {
+      a = a.substr(1, a.size() - 1);
+      b = b.substr(1, b.size() - 1);
+    } else if (!compareUnicodeEscapeAgainstUtf8(a, b) && !compareUnicodeEscapeAgainstUtf8(b, a)) {
+      return false;
+    }
+  }
+}
+
+} // namespace Json
+} // namespace Envoy
diff --git a/test/common/json/json_sanitizer_test_util.h b/test/common/json/json_sanitizer_test_util.h
new file mode 100644
index 0000000000000..e978cc86598dd
--- /dev/null
+++ b/test/common/json/json_sanitizer_test_util.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "absl/strings/string_view.h"
+
+namespace Envoy {
+namespace Json {
+
+/**
+ * Strips double-quotes on first and last characters of str.
+ *
+ * @param str The string to strip double-quotes from.
+ * @return The string without its surrounding double-quotes.
+ */
+absl::string_view stripDoubleQuotes(absl::string_view str);
+
+/**
+ * Determines whether the input string can be serialized by protobufs. This is
+ * used for testing, to avoid trying to do differentials against Protobuf json
+ * sanitization, which produces noisy error messages and empty strings when
+ * presented with some utf8 sequences that are valid according to spec.
+ *
+ * @param in the string to validate as utf-8.
+ */
+bool isProtoSerializableUtf8(absl::string_view in);
+
+bool utf8Equivalent(absl::string_view a, absl::string_view b);
+
+} // namespace Json
+} // namespace Envoy
diff --git a/test/fuzz/BUILD b/test/fuzz/BUILD
index 115801409b376..9370f55376bf5 100644
--- a/test/fuzz/BUILD
+++ b/test/fuzz/BUILD
@@ -61,10 +61,12 @@ envoy_cc_test_library(
 
 envoy_cc_test_library(
     name = "utility_lib",
+    srcs = ["utility.cc"],
     hdrs = ["utility.h"],
     deps = [
         ":common_proto_cc_proto",
         "//source/common/common:empty_string",
+        "//source/common/common:logger_lib",
         "//source/common/network:resolver_lib",
         "//source/common/network:utility_lib",
         "//test/common/stream_info:test_util",
diff --git a/test/fuzz/utility.cc b/test/fuzz/utility.cc
new file mode 100644
index 0000000000000..810007e000c07
--- /dev/null
+++ b/test/fuzz/utility.cc
@@ -0,0 +1,27 @@
+#include "test/fuzz/utility.h"
+
+#include "source/common/common/logger.h"
+
+#include "absl/strings/str_format.h"
+
+namespace Envoy {
+namespace Fuzz {
+
+std::vector<std::string> fuzzFindDiffs(absl::string_view expected, absl::string_view actual) {
+  std::vector<std::string> diffs;
+  const uint32_t max_diffs = 5;
+  if (expected.size() != actual.size()) {
+    diffs.push_back(absl::StrCat("Size mismatch: ", expected.size(), " != ", actual.size()));
+  }
+  uint32_t min_size = std::min(expected.size(), actual.size());
+  for (uint32_t i = 0; i < min_size && diffs.size() < max_diffs; ++i) {
+    if (expected[i] != actual[i]) {
+      diffs.push_back(absl::StrFormat("[%d]: %c(%u) != %c(%u)", i, expected[i], expected[i],
+                                      actual[i], actual[i]));
+    }
+  }
+  return diffs;
+}
+
+} // namespace Fuzz
+} // namespace Envoy
diff --git a/test/fuzz/utility.h b/test/fuzz/utility.h
index 55b4772e15639..7593f24f11477 100644
--- a/test/fuzz/utility.h
+++ b/test/fuzz/utility.h
@@ -195,5 +195,16 @@ inline std::vector<std::string> parseHttpData(const test::fuzz::HttpData& data)
   return data_chunks;
 }
 
+// Returns a vector of differences between expected and actual. An empty array indicates
+// expected==actual
+std::vector<std::string> fuzzFindDiffs(absl::string_view expected, absl::string_view actual);
+
+#define FUZZ_ASSERT_EQ(expected, actual, annotation)                                               \
+  {                                                                                                \
+    std::vector<std::string> diffs = fuzzFindDiffs(expected, actual);                              \
+    RELEASE_ASSERT(expected == actual, absl::StrCat(annotation, ": ", expected, " != ", actual,    \
+                                                    "\n  ", absl::StrJoin(diffs, "\n  ")));        \
+  }
+
 } // namespace Fuzz
 } // namespace Envoy
diff --git a/tools/spelling/spelling_dictionary.txt b/tools/spelling/spelling_dictionary.txt
index 9a2d46e4841c5..7d0c29bf0432f 100644
--- a/tools/spelling/spelling_dictionary.txt
+++ b/tools/spelling/spelling_dictionary.txt
@@ -135,6 +135,7 @@ FREEBIND
 FUZZER
 FUZZERS
 dereferencing
+differentially
 dnsresolvers
 guarddog
 GC
@@ -1103,6 +1104,7 @@ rver
 rxhash
 sandboxed
 sanitization
+sanitizations
 sanitizer
 satisfiable
 scalability