From 1a1da8d27910fab094060ef8558fc9681aaefa63 Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Wed, 19 Jul 2023 18:17:12 -0400
Subject: [PATCH 1/9] Performance improvements for BytesRefHash

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../common/util/BytesRefHashBenchmark.java    | 260 +++++++++++++++
 buildSrc/version.properties                   |   2 +
 server/build.gradle                           |  16 +-
 .../zero-allocation-hashing-0.16.jar.sha1     |   1 +
 .../zero-allocation-hashing-LICENSE.txt       | 201 ++++++++++++
 .../zero-allocation-hashing-NOTICE.txt        |   0
 .../common/util/CompactBytesRefHash.java      | 286 +++++++++++++++++
 .../common/util/ReorganizingBytesRefHash.java | 301 ++++++++++++++++++
 .../bucket/terms/BytesKeyedBucketOrds.java    |   5 +-
 .../org/opensearch/bootstrap/security.policy  |  10 +
 .../bootstrap/test-framework.policy           |   1 +
 .../common/util/CompactBytesRefHashTests.java |  58 ++++
 .../util/ReorganizingBytesRefHashTests.java   |  70 ++++
 13 files changed, 1208 insertions(+), 3 deletions(-)
 create mode 100644 benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
 create mode 100644 server/licenses/zero-allocation-hashing-0.16.jar.sha1
 create mode 100644 server/licenses/zero-allocation-hashing-LICENSE.txt
 create mode 100644 server/licenses/zero-allocation-hashing-NOTICE.txt
 create mode 100644 server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java
 create mode 100644 server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java
 create mode 100644 server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java
 create mode 100644 server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java

diff --git a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
new file mode 100644
index 0000000000000..8ad04a42b190f
--- /dev/null
+++ b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
@@ -0,0 +1,260 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import net.openhft.hashing.LongHashFunction;
+import org.apache.lucene.util.BytesRef;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.opensearch.common.lease.Releasable;
+import org.opensearch.common.lease.Releasables;
+
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+
+@Fork(value = 5)
+@Warmup(iterations = 1, time = 2)
+@Measurement(iterations = 3, time = 5)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public class BytesRefHashBenchmark {
+    private static final int NUM_TABLES = 20;  // run across many tables so that caches aren't effective
+    private static final int NUM_HITS = 1_000_000;  // num hits per table
+
+    @Benchmark
+    public void add(Blackhole bh, Options opts) {
+        for (int hit = 0; hit < NUM_HITS; hit++) {
+            BytesRef key = opts.keys[hit % opts.keys.length];
+            for (HashTable table : opts.tables) {
+                bh.consume(table.add(key));
+            }
+        }
+    }
+
+    @State(Scope.Benchmark)
+    public static class Options {
+        @Param({ "baseline", "compact", "reorganizing" })
+        public String type;
+
+        @Param({
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "10",
+            "12",
+            "14",
+            "16",
+            "19",
+            "22",
+            "25",
+            "29",
+            "33",
+            "38",
+            "43",
+            "50",
+            "57",
+            "65",
+            "75",
+            "86",
+            "97",
+            "109",
+            "124",
+            "141",
+            "161",
+            "182",
+            "204",
+            "229",
+            "262",
+            "297",
+            "336",
+            "380",
+            "430",
+            "482",
+            "550",
+            "610",
+            "704",
+            "801",
+            "914",
+            "1042",
+            "1178",
+            "1343",
+            "1532",
+            "1716",
+            "1940",
+            "2173",
+            "2456",
+            "2751",
+            "3082",
+            "3514",
+            "4006",
+            "4487",
+            "5026",
+            "5730",
+            "6418",
+            "7317",
+            "8196",
+            "9180",
+            "10374",
+            "11723",
+            "13247",
+            "14837",
+            "16915",
+            "19114",
+            "21599",
+            "24623",
+            "28071",
+            "32001",
+            "36482",
+            "41590",
+            "46581",
+            "52637",
+            "58954",
+            "67208",
+            "76618",
+            "86579",
+            "97835",
+            "109576",
+            "122726",
+            "138681",
+            "156710",
+            "175516",
+            "198334",
+            "222135",
+            "248792",
+            "281135",
+            "320494",
+            "365364",
+            "409208",
+            "466498",
+            "527143",
+            "595672",
+            "667153",
+            "753883",
+            "851888",
+            "971153" })
+
+        public Integer size;
+
+        @Param({ "8", "32", "128" })
+        public Integer length;
+
+        private HashTable[] tables;
+
+        private BytesRef[] keys;
+
+        @Setup
+        public void setup() {
+            assert size <= Math.pow(26, length) : "key length too small to generate the required number of keys";
+            tables = Stream.generate(this::newHashTable).limit(NUM_TABLES).toArray(HashTable[]::new);
+            Random random = new Random(0);
+            Set<BytesRef> seen = new HashSet<>();
+            keys = new BytesRef[size];
+            for (int i = 0; i < size; i++) {
+                BytesRef key;
+                do {
+                    key = new BytesRef(
+                        random.ints(97, 123)
+                            .limit(length)
+                            .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
+                            .toString()
+                    );
+                } while (seen.contains(key));
+                keys[i] = key;
+                seen.add(key);
+            }
+        }
+
+        @TearDown
+        public void tearDown() {
+            Releasables.close(tables);
+        }
+
+        private HashTable newHashTable() {
+            switch (type) {
+                case "baseline":
+                    return new HashTable() {
+                        private final BytesRefHash table = new BytesRefHash(1, 0.6f, BigArrays.NON_RECYCLING_INSTANCE);
+
+                        @Override
+                        public long add(BytesRef key) {
+                            return table.add(key);
+                        }
+
+                        @Override
+                        public void close() {
+                            table.close();
+                        }
+                    };
+                case "compact":
+                    return new HashTable() {
+                        private final CompactBytesRefHash table = new CompactBytesRefHash(
+                            1,
+                            0.6f,
+                            key -> LongHashFunction.xx3().hashBytes(key.bytes, key.offset, key.length),
+                            BigArrays.NON_RECYCLING_INSTANCE
+                        );
+
+                        @Override
+                        public long add(BytesRef key) {
+                            return table.add(key);
+                        }
+
+                        @Override
+                        public void close() {
+                            table.close();
+                        }
+                    };
+                case "reorganizing":
+                    return new HashTable() {
+                        private final ReorganizingBytesRefHash table = new ReorganizingBytesRefHash(
+                            1,
+                            0.6f,
+                            key -> LongHashFunction.xx3().hashBytes(key.bytes, key.offset, key.length),
+                            BigArrays.NON_RECYCLING_INSTANCE
+                        );
+
+                        @Override
+                        public long add(BytesRef key) {
+                            return table.add(key);
+                        }
+
+                        @Override
+                        public void close() {
+                            table.close();
+                        }
+                    };
+                default:
+                    throw new IllegalArgumentException("invalid hash table type: " + type);
+            }
+        }
+    }
+
+    private interface HashTable extends Releasable {
+        long add(BytesRef key);
+    }
+}
diff --git a/buildSrc/version.properties b/buildSrc/version.properties
index ff962309cf084..2bb21dfca4b14 100644
--- a/buildSrc/version.properties
+++ b/buildSrc/version.properties
@@ -69,3 +69,5 @@ resteasy          = 6.2.4.Final
 # opentelemetry dependencies
 opentelemetry    = 1.26.0
 
+# hashing dependencies
+zero_allocation_hashing = 0.16
diff --git a/server/build.gradle b/server/build.gradle
index f6db3d53a0dcc..3b1fe9554a309 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -158,6 +158,9 @@ dependencies {
   api "com.google.protobuf:protobuf-java:${versions.protobuf}"
   api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}"
 
+  // hashing
+  api "net.openhft:zero-allocation-hashing:${versions.zero_allocation_hashing}"
+
   testImplementation(project(":test:framework")) {
     // tests use the locally compiled version of server
     exclude group: 'org.opensearch', module: 'server'
@@ -364,7 +367,18 @@ tasks.named("thirdPartyAudit").configure {
             'com.google.protobuf.UnsafeUtil$Android32MemoryAccessor',
             'com.google.protobuf.UnsafeUtil$Android64MemoryAccessor',
             'com.google.protobuf.UnsafeUtil$JvmMemoryAccessor',
-            'com.google.protobuf.UnsafeUtil$MemoryAccessor'
+            'com.google.protobuf.UnsafeUtil$MemoryAccessor',
+
+            // from zero-allocation-hashing
+            'net.openhft.hashing.HotSpotPrior7u6StringHash',
+            'net.openhft.hashing.LongHashFunction',
+            'net.openhft.hashing.LongTupleHashFunction',
+            'net.openhft.hashing.ModernCompactStringHash',
+            'net.openhft.hashing.ModernHotSpotStringHash',
+            'net.openhft.hashing.UnsafeAccess',
+            'net.openhft.hashing.UnsafeAccess$OldUnsafeAccessBigEndian',
+            'net.openhft.hashing.UnsafeAccess$OldUnsafeAccessLittleEndian',
+            'net.openhft.hashing.Util'
     )
 }
 
diff --git a/server/licenses/zero-allocation-hashing-0.16.jar.sha1 b/server/licenses/zero-allocation-hashing-0.16.jar.sha1
new file mode 100644
index 0000000000000..e82e885f269ce
--- /dev/null
+++ b/server/licenses/zero-allocation-hashing-0.16.jar.sha1
@@ -0,0 +1 @@
+0ca252f328160ed5d027f100a4fe525d6d21daaf
\ No newline at end of file
diff --git a/server/licenses/zero-allocation-hashing-LICENSE.txt b/server/licenses/zero-allocation-hashing-LICENSE.txt
new file mode 100644
index 0000000000000..261eeb9e9f8b2
--- /dev/null
+++ b/server/licenses/zero-allocation-hashing-LICENSE.txt
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/server/licenses/zero-allocation-hashing-NOTICE.txt b/server/licenses/zero-allocation-hashing-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java b/server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java
new file mode 100644
index 0000000000000..d8d4690d14e90
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java
@@ -0,0 +1,286 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import net.openhft.hashing.LongHashFunction;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.common.lease.Releasable;
+import org.opensearch.common.lease.Releasables;
+import org.opensearch.core.common.util.ByteArray;
+
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+/**
+ * Specialized hash table implementation that maps a {@link BytesRef} key to a long ordinal.
+ *
+ * <p>
+ * It uses a compact byte-packing strategy to encode the ordinal and fingerprint information
+ * in the hash table value. It makes lookups faster by short-circuiting expensive equality checks
+ * for keys that collide onto the same hash table slot.
+ *
+ * <p>
+ * This class is not thread-safe.
+ *
+ * @opensearch.internal
+ */
+public class CompactBytesRefHash implements Releasable {
+    private static final LongHashFunction XX3 = AccessController.doPrivileged(
+        (PrivilegedAction<LongHashFunction>) () -> LongHashFunction.xx3(System.nanoTime())
+    );
+
+    private static final long MAX_CAPACITY = 1L << 32;
+    private static final long DEFAULT_INITIAL_CAPACITY = 32;
+    private static final float DEFAULT_LOAD_FACTOR = 0.6f;
+    private static final Hasher DEFAULT_HASHER = key -> XX3.hashBytes(key.bytes, key.offset, key.length);
+
+    private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
+    private static final long MASK_FINGERPRINT = 0xFFFFFFFF00000000L;  // extract fingerprint
+
+    /**
+     * Maximum load factor after which the capacity is doubled.
+     */
+    private final float loadFactor;
+
+    /**
+     * Calculates the hash of a {@link BytesRef} key.
+     */
+    private final Hasher hasher;
+
+    /**
+     * Utility class to allocate recyclable arrays.
+     */
+    private final BigArrays bigArrays;
+
+    /**
+     * Reusable BytesRef to read keys.
+     */
+    private final BytesRef scratch = new BytesRef();
+
+    /**
+     * Current capacity of the hash table. This must be a power of two so that the hash table slot
+     * can be identified quickly using bitmasks, thus avoiding expensive modulo or integer division.
+     */
+    private long capacity;
+
+    /**
+     * Bitmask to identify the hash table slot from a key's hash.
+     */
+    private long mask;
+
+    /**
+     * Size threshold after which the hash table needs to be doubled in capacity.
+     */
+    private long grow;
+
+    /**
+     * Current size of the hash table.
+     */
+    private long size;
+
+    /**
+     * Underlying array to store the hash table values.
+     *
+     * <p>
+     * Each hash table value (64-bit) uses the following byte packing strategy:
+     * <pre>
+     * |================================|================================|
+     * | Fingerprint                    | Ordinal                        |
+     * |--------------------------------|--------------------------------|
+     * | 32 bits                        | 32 bits                        |
+     * |================================|================================|
+     * </pre>
+     *
+     * <p>
+     * This allows us to encode and manipulate additional information in the hash table
+     * itself without having to look elsewhere in the memory, which is much slower.
+     *
+     * <p>
+     * Terminology: <code>table[index] = value = (fingerprint | ordinal)</code>
+     */
+    private LongArray table;
+
+    /**
+     * Underlying array to store the starting offsets of keys.
+     *
+     * <p>
+     * Terminology:
+     * <pre>
+     *   offsets[ordinal] = starting offset (inclusive)
+     *   offsets[ordinal + 1] = ending offset (exclusive)
+     * </pre>
+     */
+    private LongArray offsets;
+
+    /**
+     * Underlying byte array to store the keys.
+     *
+     * <p>
+     * Terminology: <code>keys[start...end] = key</code>
+     */
+    private ByteArray keys;
+
+    public CompactBytesRefHash(final BigArrays bigArrays) {
+        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
+    }
+
+    public CompactBytesRefHash(final long initialCapacity, final float loadFactor, final Hasher hasher, final BigArrays bigArrays) {
+        assert initialCapacity > 0 : "initial capacity must be greater than 0";
+        assert loadFactor > 0 && loadFactor < 1 : "load factor must be between 0 and 1";
+
+        this.loadFactor = loadFactor;
+        this.hasher = hasher;
+        this.bigArrays = bigArrays;
+
+        capacity = Math.max(1, Long.highestOneBit((long) (initialCapacity / loadFactor)) << 1);
+        mask = capacity - 1;
+        size = 0;
+        grow = (long) (capacity * loadFactor);
+
+        table = bigArrays.newLongArray(capacity, false);
+        table.fill(0, capacity, -1);
+        offsets = bigArrays.newLongArray(initialCapacity + 1, false);
+        offsets.set(0, 0);
+        keys = bigArrays.newByteArray(initialCapacity * 3, false);
+    }
+
+    /**
+     * Adds the given key to the hash table and returns its ordinal.
+     * If the key exists already, it returns (-1 - ordinal).
+     */
+    public long add(final BytesRef key) {
+        final long hash = hasher.hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                final long val = fingerprint | size;
+                if (size >= grow) {
+                    growAndInsert(hash, val);
+                } else {
+                    table.set(idx, val);
+                }
+                return append(key);
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
+                return -1 - ordinal;
+            }
+        }
+    }
+
+    /**
+     * Returns the ordinal associated with the given key, or -1 if the key doesn't exist.
+     *
+     * <p>
+     * Using the 64-bit hash value, up to 32 least significant bits (LSB) are used to identify the
+     * home slot in the hash table, and an additional 32 bits are used to identify the fingerprint.
+     * The fingerprint further increases the entropy and reduces the number of false lookups in the
+     * keys' table during equality checks, which is expensive.
+     *
+     * <p>
+     * Total entropy bits = 32 + log2(capacity)
+     *
+     * <p>
+     * Linear probing starts from the home slot, until a match or an empty slot is found.
+     * Values are first checked using their fingerprint (to reduce false positives), then verified
+     * in the keys' table using an equality check.
+     */
+    public long find(final BytesRef key) {
+        final long hash = hasher.hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                return -1;
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
+                return ordinal;
+            }
+        }
+    }
+
+    /**
+     * Returns the key associated with the given ordinal.
+     * The result is undefined for an unused ordinal.
+     *
+     * <p>
+     * Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called
+     */
+    public BytesRef get(final long ordinal, final BytesRef dest) {
+        final long start = offsets.get(ordinal);
+        final int length = (int) (offsets.get(ordinal + 1) - start);
+        keys.get(start, length, dest);
+        return dest;
+    }
+
+    /**
+     * Returns the number of mappings in this hash table.
+     */
+    public long size() {
+        return size;
+    }
+
+    /**
+     * Appends the key in the keys' and offsets' tables.
+     */
+    private long append(final BytesRef key) {
+        final long start = offsets.get(size);
+        final long end = start + key.length;
+        offsets = bigArrays.grow(offsets, size + 2);
+        offsets.set(size + 1, end);
+        keys = bigArrays.grow(keys, end);
+        keys.set(start, key.bytes, key.offset, key.length);
+        return size++;
+    }
+
+    /**
+     * Grows the hash table by doubling its capacity, inserting the provided value,
+     * and reinserting the previous values at their updated slots.
+     */
+    private void growAndInsert(final long hash, final long value) {
+        // Ensure that the hash table doesn't grow too large.
+        // This implicitly also ensures that the ordinals are no larger than 2^32, thus,
+        // preventing them from polluting the fingerprint bits in the hash table values.
+        assert capacity < MAX_CAPACITY : "hash table already at the max capacity";
+
+        capacity <<= 1;
+        mask = capacity - 1;
+        grow = (long) (capacity * loadFactor);
+        table = bigArrays.grow(table, capacity);
+        table.fill(0, capacity, -1);
+        table.set(hash & mask, value);
+
+        for (long ordinal = 0; ordinal < size; ordinal++) {
+            reinsert(ordinal, hasher.hash(get(ordinal, scratch)));
+        }
+    }
+
+    /**
+     * Reinserts the hash table value for an existing key stored at the given ordinal.
+     */
+    private void reinsert(final long ordinal, final long hash) {
+        for (long idx = hash & mask;; idx = (idx + 1) & mask) {
+            if (table.get(idx) == -1) {
+                table.set(idx, (hash & MASK_FINGERPRINT) | ordinal);
+                return;
+            }
+        }
+    }
+
+    @Override
+    public void close() {
+        Releasables.close(table, offsets, keys);
+    }
+
+    /**
+     * Hasher calculates the hash of a {@link BytesRef} key.
+     */
+    @FunctionalInterface
+    public interface Hasher {
+        long hash(BytesRef key);
+    }
+}
diff --git a/server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java b/server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java
new file mode 100644
index 0000000000000..1806733cc1567
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java
@@ -0,0 +1,301 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import net.openhft.hashing.LongHashFunction;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.common.lease.Releasable;
+import org.opensearch.common.lease.Releasables;
+import org.opensearch.core.common.util.ByteArray;
+
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+/**
+ * Specialized hash table implementation that maps a {@link BytesRef} key to a long ordinal.
+ *
+ * <p>
+ * It organizes itself by moving keys around dynamically in order to reduce the
+ * longest probe sequence length (PSL), which makes lookups faster as keys are likely to
+ * be found in the same CPU cache line. It also uses fingerprints to short-circuit expensive
+ * equality checks for keys that collide onto the same hash table slot.
+ *
+ * <p>
+ * This class is not thread-safe.
+ *
+ * @opensearch.internal
+ */
+public class ReorganizingBytesRefHash implements Releasable {
+    private static final LongHashFunction XX3 = AccessController.doPrivileged(
+        (PrivilegedAction<LongHashFunction>) () -> LongHashFunction.xx3(System.nanoTime())
+    );
+
+    private static final long MAX_CAPACITY = 1L << 32;
+    private static final long DEFAULT_INITIAL_CAPACITY = 32;
+    private static final float DEFAULT_LOAD_FACTOR = 0.6f;
+    private static final Hasher DEFAULT_HASHER = key -> XX3.hashBytes(key.bytes, key.offset, key.length);
+
+    private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
+    private static final long MASK_FINGERPRINT = 0x0000FFFF00000000L;  // extract fingerprint
+    private static final long MASK_PSL = 0x7FFF000000000000L;  // extract PSL
+    private static final long INCR_PSL = 0x0001000000000000L;  // increment PSL by one
+
+    /**
+     * Maximum load factor after which the capacity is doubled.
+     */
+    private final float loadFactor;
+
+    /**
+     * Calculates the hash of a {@link BytesRef} key.
+     */
+    private final Hasher hasher;
+
+    /**
+     * Utility class to allocate recyclable arrays.
+     */
+    private final BigArrays bigArrays;
+
+    /**
+     * Reusable BytesRef to read keys.
+     */
+    private final BytesRef scratch = new BytesRef();
+
+    /**
+     * Current capacity of the hash table. This must be a power of two so that the hash table slot
+     * can be identified quickly using bitmasks, thus avoiding expensive modulo or integer division.
+     */
+    private long capacity;
+
+    /**
+     * Bitmask to identify the hash table slot from a key's hash.
+     */
+    private long mask;
+
+    /**
+     * Size threshold after which the hash table needs to be doubled in capacity.
+     */
+    private long grow;
+
+    /**
+     * Current size of the hash table.
+     */
+    private long size;
+
+    /**
+     * Underlying array to store the hash table values.
+     *
+     * <p>
+     * Each hash table value (64-bit) uses the following byte packing strategy:
+     * <pre>
+     * |=========|===============|================|================================|
+     * | Discard | PSL           | Fingerprint    | Ordinal                        |
+     * |    -    |---------------|----------------|--------------------------------|
+     * | 1 bit   | 15 bits       | 16 bits        | 32 bits                        |
+     * |=========|===============|================|================================|
+     * </pre>
+     *
+     * <p>
+     * This allows us to encode and manipulate additional information in the hash table
+     * itself without having to look elsewhere in the memory, which is much slower.
+     *
+     * <p>
+     * Terminology: <code>table[index] = value = (discard | psl | fingerprint | ordinal)</code>
+     */
+    private LongArray table;
+
+    /**
+     * Underlying array to store the starting offsets of keys.
+     *
+     * <p>
+     * Terminology:
+     * <pre>
+     *   offsets[ordinal] = starting offset (inclusive)
+     *   offsets[ordinal + 1] = ending offset (exclusive)
+     * </pre>
+     */
+    private LongArray offsets;
+
+    /**
+     * Underlying byte array to store the keys.
+     *
+     * <p>
+     * Terminology: <code>keys[start...end] = key</code>
+     */
+    private ByteArray keys;
+
+    public ReorganizingBytesRefHash(final BigArrays bigArrays) {
+        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
+    }
+
+    public ReorganizingBytesRefHash(final long initialCapacity, final float loadFactor, final Hasher hasher, final BigArrays bigArrays) {
+        assert initialCapacity > 0 : "initial capacity must be greater than 0";
+        assert loadFactor > 0 && loadFactor < 1 : "load factor must be between 0 and 1";
+
+        this.loadFactor = loadFactor;
+        this.hasher = hasher;
+        this.bigArrays = bigArrays;
+
+        capacity = Math.max(1, Long.highestOneBit((long) (initialCapacity / loadFactor)) << 1);
+        mask = capacity - 1;
+        size = 0;
+        grow = (long) (capacity * loadFactor);
+
+        table = bigArrays.newLongArray(capacity, false);
+        table.fill(0, capacity, -1);
+        offsets = bigArrays.newLongArray(initialCapacity + 1, false);
+        offsets.set(0, 0);
+        keys = bigArrays.newByteArray(initialCapacity * 3, false);
+    }
+
+    /**
+     * Adds the given key to the hash table and returns its ordinal.
+     * If the key exists already, it returns (-1 - ordinal).
+     */
+    public long add(final BytesRef key) {
+        final long hash = hasher.hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                final long val = (fingerprint | size);
+                if (size >= grow) {
+                    growAndInsert(hash, val);
+                } else {
+                    insert(hash, val);
+                }
+                return append(key);
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
+                return -(1 + ordinal);
+            }
+        }
+    }
+
+    /**
+     * Returns the ordinal associated with the given key, or -1 if the key doesn't exist.
+     *
+     * <p>
+     * Using the 64-bit hash value, up to 32 least significant bits (LSB) are used to identify the
+     * home slot in the hash table, and an additional 16 bits are used to identify the fingerprint.
+     * The fingerprint further increases the entropy and reduces the number of false lookups in the
+     * keys' table during equality checks, which is expensive.
+     *
+     * <p>
+     * Total entropy bits = 16 + log2(capacity)
+     *
+     * <p>
+     * Linear probing starts from the home slot, until a match or an empty slot is found.
+     * Values are first checked using their fingerprint (to reduce false positives), then verified
+     * in the keys' table using an equality check.
+     */
+    public long find(final BytesRef key) {
+        final long hash = hasher.hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                return -1;
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
+                return ordinal;
+            }
+        }
+    }
+
+    /**
+     * Returns the key associated with the given ordinal.
+     * The result is undefined for an unused ordinal.
+     *
+     * <p>
+     * Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called
+     */
+    public BytesRef get(final long ordinal, final BytesRef dest) {
+        final long start = offsets.get(ordinal);
+        final int length = (int) (offsets.get(ordinal + 1) - start);
+        keys.get(start, length, dest);
+        return dest;
+    }
+
+    /**
+     * Returns the number of mappings in this hash table.
+     */
+    public long size() {
+        return size;
+    }
+
+    /**
+     * Appends the key in the keys' and offsets' tables.
+     */
+    private long append(final BytesRef key) {
+        final long start = offsets.get(size);
+        final long end = start + key.length;
+        offsets = bigArrays.grow(offsets, size + 2);
+        offsets.set(size + 1, end);
+        keys = bigArrays.grow(keys, end);
+        keys.set(start, key.bytes, key.offset, key.length);
+        return size++;
+    }
+
+    /**
+     * Grows the hash table by doubling its capacity, inserting the provided value,
+     * and reinserting the previous values at their updated slots.
+     */
+    private void growAndInsert(final long hash, final long value) {
+        // Ensure that the hash table doesn't grow too large.
+        // This implicitly also ensures that the ordinals are no larger than 2^32, thus,
+        // preventing them from polluting the fingerprint bits in the hash table values.
+        assert capacity < MAX_CAPACITY : "hash table already at the max capacity";
+
+        capacity <<= 1;
+        mask = capacity - 1;
+        grow = (long) (capacity * loadFactor);
+        table = bigArrays.grow(table, capacity);
+        table.fill(0, capacity, -1);
+        table.set(hash & mask, value);
+
+        for (long ordinal = 0; ordinal < size; ordinal++) {
+            final long h = hasher.hash(get(ordinal, scratch));
+            insert(h, (h & MASK_FINGERPRINT) | ordinal);
+        }
+    }
+
+    /**
+     * Inserts the hash table value for a missing key.
+     */
+    private void insert(final long hash, final long value) {
+        for (long idx = hash & mask, current = value, existing;; idx = (idx + 1) & mask) {
+            if ((existing = table.get(idx)) == -1) {
+                table.set(idx, current);
+                return;
+            } else if ((existing & MASK_PSL) < (current & MASK_PSL)) {
+                current = table.set(idx, current);
+            }
+            current += INCR_PSL;
+        }
+    }
+
+    @Override
+    public void close() {
+        Releasables.close(table, offsets, keys);
+    }
+
+    /**
+     * Returns the underlying hash table.
+     * Visible for unit-tests.
+     */
+    LongArray getTable() {
+        return table;
+    }
+
+    /**
+     * Hasher calculates the hash of a {@link BytesRef} key.
+     */
+    @FunctionalInterface
+    public interface Hasher {
+        long hash(BytesRef key);
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
index 0eb23013d1e47..2c804166eed78 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
@@ -37,6 +37,7 @@
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.common.util.BigArrays;
 import org.opensearch.common.util.BytesRefHash;
+import org.opensearch.common.util.CompactBytesRefHash;
 import org.opensearch.search.aggregations.CardinalityUpperBound;
 
 /**
@@ -128,10 +129,10 @@ public void readValue(BytesRef dest) {}
      * @opensearch.internal
      */
     private static class FromSingle extends BytesKeyedBucketOrds {
-        private final BytesRefHash ords;
+        private final CompactBytesRefHash ords;
 
         private FromSingle(BigArrays bigArrays) {
-            ords = new BytesRefHash(1, bigArrays);
+            ords = new CompactBytesRefHash(bigArrays);
         }
 
         @Override
diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy
index 77cd0ab05278e..2fde31cb1d648 100644
--- a/server/src/main/resources/org/opensearch/bootstrap/security.policy
+++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy
@@ -48,6 +48,10 @@ grant codeBase "${codebase.opensearch}" {
   permission java.lang.RuntimePermission "setContextClassLoader";
   // needed for SPI class loading
   permission java.lang.RuntimePermission "accessDeclaredMembers";
+
+  // needed for zero-allocation-hashing
+  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
+  permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
 };
 
 //// Very special jar permissions:
@@ -85,6 +89,12 @@ grant codeBase "${codebase.zstd-jni}" {
   permission java.lang.RuntimePermission "loadLibrary.*";
 };
 
+grant codeBase "${codebase.zero-allocation-hashing}" {
+  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
+  permission java.lang.RuntimePermission "accessDeclaredMembers";
+  permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
+};
+
 //// Everything else:
 
 grant {
diff --git a/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy b/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy
index 0abfd7ef22ae7..7d35d439bd373 100644
--- a/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy
+++ b/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy
@@ -156,5 +156,6 @@ grant {
   permission java.lang.RuntimePermission "accessDeclaredMembers";
   permission java.lang.RuntimePermission "reflectionFactoryAccess";
   permission java.lang.RuntimePermission "accessClassInPackage.sun.reflect";
+  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
   permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
 };
diff --git a/server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java
new file mode 100644
index 0000000000000..297fe82a2a505
--- /dev/null
+++ b/server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java
@@ -0,0 +1,58 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import net.openhft.hashing.LongHashFunction;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Stream;
+
+public class CompactBytesRefHashTests extends OpenSearchTestCase {
+
+    public void testFuzzy() {
+        LongHashFunction hasher = LongHashFunction.xx3(randomLong());
+        Map<BytesRef, Long> reference = new HashMap<>();
+        BytesRef[] keys = Stream.generate(() -> new BytesRef(randomAlphaOfLength(20))).limit(1000).toArray(BytesRef[]::new);
+
+        try (
+            CompactBytesRefHash h = new CompactBytesRefHash(
+                randomIntBetween(1, 100),      // random capacity
+                0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
+                key -> hasher.hashBytes(key.bytes, key.offset, key.length),
+                BigArrays.NON_RECYCLING_INSTANCE
+            )
+        ) {
+            // Verify the behaviour of "add" and "find".
+            for (int i = 0; i < keys.length * 10; i++) {
+                BytesRef key = keys[i % keys.length];
+                if (reference.containsKey(key)) {
+                    long expectedOrdinal = reference.get(key);
+                    assertEquals(-1 - expectedOrdinal, h.add(key));
+                    assertEquals(expectedOrdinal, h.find(key));
+                } else {
+                    assertEquals(-1, h.find(key));
+                    reference.put(key, (long) reference.size());
+                    assertEquals((long) reference.get(key), h.add(key));
+                }
+            }
+
+            // Verify the behaviour of "get".
+            BytesRef scratch = new BytesRef();
+            for (Map.Entry<BytesRef, Long> entry : reference.entrySet()) {
+                assertEquals(entry.getKey(), h.get(entry.getValue(), scratch));
+            }
+
+            // Verify the behaviour of "size".
+            assertEquals(reference.size(), h.size());
+        }
+    }
+}
diff --git a/server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java
new file mode 100644
index 0000000000000..e7ab6d1fa21a6
--- /dev/null
+++ b/server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import net.openhft.hashing.LongHashFunction;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Stream;
+
+public class ReorganizingBytesRefHashTests extends OpenSearchTestCase {
+
+    public void testFuzzy() {
+        LongHashFunction hasher = LongHashFunction.xx3(randomLong());
+        Map<BytesRef, Long> reference = new HashMap<>();
+        BytesRef[] keys = Stream.generate(() -> new BytesRef(randomAlphaOfLength(20))).limit(1000).toArray(BytesRef[]::new);
+
+        try (
+            ReorganizingBytesRefHash h = new ReorganizingBytesRefHash(
+                randomIntBetween(1, 100),      // random capacity
+                0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
+                key -> hasher.hashBytes(key.bytes, key.offset, key.length),
+                BigArrays.NON_RECYCLING_INSTANCE
+            )
+        ) {
+            // Verify the behaviour of "add" and "find".
+            for (int i = 0; i < keys.length * 10; i++) {
+                BytesRef key = keys[i % keys.length];
+                if (reference.containsKey(key)) {
+                    long expectedOrdinal = reference.get(key);
+                    assertEquals(-1 - expectedOrdinal, h.add(key));
+                    assertEquals(expectedOrdinal, h.find(key));
+                } else {
+                    assertEquals(-1, h.find(key));
+                    reference.put(key, (long) reference.size());
+                    assertEquals((long) reference.get(key), h.add(key));
+                }
+            }
+
+            // Verify the behaviour of "get".
+            BytesRef scratch = new BytesRef();
+            for (Map.Entry<BytesRef, Long> entry : reference.entrySet()) {
+                assertEquals(entry.getKey(), h.get(entry.getValue(), scratch));
+            }
+
+            // Verify the behaviour of "size".
+            assertEquals(reference.size(), h.size());
+
+            // Verify the calculation of PSLs.
+            long capacity = h.getTable().size();
+            long mask = capacity - 1;
+            for (long idx = 0; idx < h.getTable().size(); idx++) {
+                long value = h.getTable().get(idx);
+                if (value != -1) {
+                    BytesRef key = h.get((int) value, scratch);
+                    long homeIdx = hasher.hashBytes(key.bytes, key.offset, key.length) & mask;
+                    assertEquals((capacity + idx - homeIdx) & mask, value >>> 48);
+                }
+            }
+        }
+    }
+}

From f8052d165227d1d50a3133055f742b78a1dbb4ad Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Sun, 30 Jul 2023 23:11:52 +0530
Subject: [PATCH 2/9] Replace BytesRefHash and clean up alternative
 implementations

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 CHANGELOG.md                                  |   1 +
 .../common/util/BytesRefHashBenchmark.java    |  78 +---
 .../java/org/opensearch/common/Numbers.java   |   8 +
 .../opensearch/common/util/BytesRefHash.java  | 366 ++++++++++++------
 .../common/util/CompactBytesRefHash.java      | 286 --------------
 .../common/util/ReorganizingBytesRefHash.java | 301 --------------
 .../common/util/ReorganizingLongHash.java     |  12 +-
 .../bucket/terms/BytesKeyedBucketOrds.java    |   7 +-
 .../bucket/terms/SignificanceLookup.java      |   2 +-
 .../terms/StringRareTermsAggregator.java      |   2 +-
 .../org/opensearch/common/NumbersTests.java   |  20 +
 .../common/util/BytesRefHashTests.java        |  61 +--
 .../common/util/CompactBytesRefHashTests.java |  58 ---
 .../util/ReorganizingBytesRefHashTests.java   |  70 ----
 14 files changed, 315 insertions(+), 957 deletions(-)
 delete mode 100644 server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java
 delete mode 100644 server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java
 delete mode 100644 server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java
 delete mode 100644 server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7af3c171e8c6a..9d81d24e15b9b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -130,6 +130,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Replace the deprecated IndexReader APIs with new storedFields() & termVectors() ([#7792](https://github.com/opensearch-project/OpenSearch/pull/7792))
 - [Remote Store] Add support to restore only unassigned shards of an index ([#8792](https://github.com/opensearch-project/OpenSearch/pull/8792))
 - Add safeguard limits for file cache during node level allocation ([#8208](https://github.com/opensearch-project/OpenSearch/pull/8208))
+- Performance improvements for BytesRefHash ([#8788](https://github.com/opensearch-project/OpenSearch/pull/8788))
 - Add support for aggregation profiler with concurrent aggregation ([#8801](https://github.com/opensearch-project/OpenSearch/pull/8801))
 - [Remove] Deprecated Fractional ByteSizeValue support #9005 ([#9005](https://github.com/opensearch-project/OpenSearch/pull/9005))
 - Add support for aggregation profiler with concurrent aggregation ([#8801](https://github.com/opensearch-project/OpenSearch/pull/8801))
diff --git a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
index 8ad04a42b190f..2e2a2399e9c0d 100644
--- a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
+++ b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
@@ -8,7 +8,6 @@
 
 package org.opensearch.common.util;
 
-import net.openhft.hashing.LongHashFunction;
 import org.apache.lucene.util.BytesRef;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -23,7 +22,6 @@
 import org.openjdk.jmh.annotations.TearDown;
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.infra.Blackhole;
-import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 
 import java.util.HashSet;
@@ -32,7 +30,7 @@
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Stream;
 
-@Fork(value = 5)
+@Fork(value = 3)
 @Warmup(iterations = 1, time = 2)
 @Measurement(iterations = 3, time = 5)
 @BenchmarkMode(Mode.AverageTime)
@@ -45,7 +43,7 @@ public class BytesRefHashBenchmark {
     public void add(Blackhole bh, Options opts) {
         for (int hit = 0; hit < NUM_HITS; hit++) {
             BytesRef key = opts.keys[hit % opts.keys.length];
-            for (HashTable table : opts.tables) {
+            for (BytesRefHash table : opts.tables) {
                 bh.consume(table.add(key));
             }
         }
@@ -53,9 +51,6 @@ public void add(Blackhole bh, Options opts) {
 
     @State(Scope.Benchmark)
     public static class Options {
-        @Param({ "baseline", "compact", "reorganizing" })
-        public String type;
-
         @Param({
             "1",
             "2",
@@ -163,14 +158,16 @@ public static class Options {
         @Param({ "8", "32", "128" })
         public Integer length;
 
-        private HashTable[] tables;
+        private BytesRefHash[] tables;
 
         private BytesRef[] keys;
 
         @Setup
         public void setup() {
             assert size <= Math.pow(26, length) : "key length too small to generate the required number of keys";
-            tables = Stream.generate(this::newHashTable).limit(NUM_TABLES).toArray(HashTable[]::new);
+            tables = Stream.generate(() -> new BytesRefHash(BigArrays.NON_RECYCLING_INSTANCE))
+                .limit(NUM_TABLES)
+                .toArray(BytesRefHash[]::new);
             Random random = new Random(0);
             Set<BytesRef> seen = new HashSet<>();
             keys = new BytesRef[size];
@@ -193,68 +190,5 @@ public void setup() {
         public void tearDown() {
             Releasables.close(tables);
         }
-
-        private HashTable newHashTable() {
-            switch (type) {
-                case "baseline":
-                    return new HashTable() {
-                        private final BytesRefHash table = new BytesRefHash(1, 0.6f, BigArrays.NON_RECYCLING_INSTANCE);
-
-                        @Override
-                        public long add(BytesRef key) {
-                            return table.add(key);
-                        }
-
-                        @Override
-                        public void close() {
-                            table.close();
-                        }
-                    };
-                case "compact":
-                    return new HashTable() {
-                        private final CompactBytesRefHash table = new CompactBytesRefHash(
-                            1,
-                            0.6f,
-                            key -> LongHashFunction.xx3().hashBytes(key.bytes, key.offset, key.length),
-                            BigArrays.NON_RECYCLING_INSTANCE
-                        );
-
-                        @Override
-                        public long add(BytesRef key) {
-                            return table.add(key);
-                        }
-
-                        @Override
-                        public void close() {
-                            table.close();
-                        }
-                    };
-                case "reorganizing":
-                    return new HashTable() {
-                        private final ReorganizingBytesRefHash table = new ReorganizingBytesRefHash(
-                            1,
-                            0.6f,
-                            key -> LongHashFunction.xx3().hashBytes(key.bytes, key.offset, key.length),
-                            BigArrays.NON_RECYCLING_INSTANCE
-                        );
-
-                        @Override
-                        public long add(BytesRef key) {
-                            return table.add(key);
-                        }
-
-                        @Override
-                        public void close() {
-                            table.close();
-                        }
-                    };
-                default:
-                    throw new IllegalArgumentException("invalid hash table type: " + type);
-            }
-        }
-    }
-
-    private interface HashTable extends Releasable {
-        long add(BytesRef key);
     }
 }
diff --git a/libs/common/src/main/java/org/opensearch/common/Numbers.java b/libs/common/src/main/java/org/opensearch/common/Numbers.java
index 084e52a41f8b1..d5a364a4a934e 100644
--- a/libs/common/src/main/java/org/opensearch/common/Numbers.java
+++ b/libs/common/src/main/java/org/opensearch/common/Numbers.java
@@ -260,4 +260,12 @@ public static double unsignedLongToDouble(long value) {
         // want to replace that with 1 in the shifted value for correct rounding.
         return (double) ((value >>> 1) | (value & 1)) * 2.0;
     }
+
+    /**
+     * Return the strictly greater next power of two for the given value.
+     * For zero and negative numbers, it returns 1.
+     */
+    public static long nextPowerOfTwo(long value) {
+        return 1L << (Long.SIZE - Long.numberOfLeadingZeros(value));
+    }
 }
diff --git a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
index ecc93d017beaf..c2f394c627806 100644
--- a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
+++ b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
@@ -6,179 +6,291 @@
  * compatible open source license.
  */
 
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Modifications Copyright OpenSearch Contributors. See
- * GitHub history for details.
- */
-
 package org.opensearch.common.util;
 
+import net.openhft.hashing.LongHashFunction;
 import org.apache.lucene.util.BytesRef;
+import org.opensearch.common.Numbers;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.core.common.util.ByteArray;
 
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
 /**
- *  Specialized hash table implementation similar to Lucene's BytesRefHash that maps
- *  BytesRef values to ids. Collisions are resolved with open addressing and linear
- *  probing, growth is smooth thanks to {@link BigArrays}, hashes are cached for faster
- *  re-hashing and capacity is always a multiple of 2 for faster identification of buckets.
- *  This class is not thread-safe.
+ * Specialized hash table implementation that maps a {@link BytesRef} key to a long ordinal.
+ *
+ * <p>
+ * It uses a compact byte-packing strategy to encode the ordinal and fingerprint information
+ * in the hash table value. It makes lookups faster by short-circuiting expensive equality checks
+ * for keys that collide onto the same hash table slot.
  *
- *  @opensearch.internal
+ * <p>
+ * This class is not thread-safe.
+ *
+ * @opensearch.internal
  */
-public final class BytesRefHash extends AbstractHash {
+public class BytesRefHash implements Releasable {
+    private static final LongHashFunction XX3 = AccessController.doPrivileged(
+        (PrivilegedAction<LongHashFunction>) () -> LongHashFunction.xx3(System.nanoTime())
+    );
+
+    private static final long MAX_CAPACITY = 1L << 32;
+    private static final long DEFAULT_INITIAL_CAPACITY = 32;
+    private static final float DEFAULT_LOAD_FACTOR = 0.6f;
+    private static final Hasher DEFAULT_HASHER = key -> XX3.hashBytes(key.bytes, key.offset, key.length);
+
+    private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
+    private static final long MASK_FINGERPRINT = 0xFFFFFFFF00000000L;  // extract fingerprint
+
+    /**
+     * Maximum load factor after which the capacity is doubled.
+     */
+    private final float loadFactor;
 
-    private LongArray startOffsets;
-    private ByteArray bytes;
-    private IntArray hashes; // we cache hashes for faster re-hashing
-    private final BytesRef spare;
+    /**
+     * Calculates the hash of a {@link BytesRef} key.
+     */
+    private final Hasher hasher;
+
+    /**
+     * Utility class to allocate recyclable arrays.
+     */
+    private final BigArrays bigArrays;
+
+    /**
+     * Reusable BytesRef to read keys.
+     */
+    private final BytesRef scratch = new BytesRef();
+
+    /**
+     * Current capacity of the hash table. This must be a power of two so that the hash table slot
+     * can be identified quickly using bitmasks, thus avoiding expensive modulo or integer division.
+     */
+    private long capacity;
+
+    /**
+     * Bitmask to identify the hash table slot from a key's hash.
+     */
+    private long mask;
+
+    /**
+     * Size threshold after which the hash table needs to be doubled in capacity.
+     */
+    private long grow;
+
+    /**
+     * Current size of the hash table.
+     */
+    private long size;
+
+    /**
+     * Underlying array to store the hash table values.
+     *
+     * <p>
+     * Each hash table value (64-bit) uses the following byte packing strategy:
+     * <pre>
+     * |================================|================================|
+     * | Fingerprint                    | Ordinal                        |
+     * |--------------------------------|--------------------------------|
+     * | 32 bits                        | 32 bits                        |
+     * |================================|================================|
+     * </pre>
+     *
+     * <p>
+     * This allows us to encode and manipulate additional information in the hash table
+     * itself without having to look elsewhere in the memory, which is much slower.
+     *
+     * <p>
+     * Terminology: <code>table[index] = value = (fingerprint | ordinal)</code>
+     */
+    private LongArray table;
 
-    // Constructor with configurable capacity and default maximum load factor.
-    public BytesRefHash(long capacity, BigArrays bigArrays) {
-        this(capacity, DEFAULT_MAX_LOAD_FACTOR, bigArrays);
+    /**
+     * Underlying array to store the starting offsets of keys.
+     *
+     * <p>
+     * Terminology:
+     * <pre>
+     *   offsets[ordinal] = starting offset (inclusive)
+     *   offsets[ordinal + 1] = ending offset (exclusive)
+     * </pre>
+     */
+    private LongArray offsets;
+
+    /**
+     * Underlying byte array to store the keys.
+     *
+     * <p>
+     * Terminology: <code>keys[start...end] = key</code>
+     */
+    private ByteArray keys;
+
+    public BytesRefHash(final BigArrays bigArrays) {
+        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
     }
 
-    // Constructor with configurable capacity and load factor.
-    public BytesRefHash(long capacity, float maxLoadFactor, BigArrays bigArrays) {
-        super(capacity, maxLoadFactor, bigArrays);
-        startOffsets = bigArrays.newLongArray(capacity + 1, false);
-        startOffsets.set(0, 0);
-        bytes = bigArrays.newByteArray(capacity * 3, false);
-        hashes = bigArrays.newIntArray(capacity, false);
-        spare = new BytesRef();
+    public BytesRefHash(final long initialCapacity, final BigArrays bigArrays) {
+        this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
     }
 
-    // BytesRef has a weak hashCode function so we try to improve it by rehashing using Murmur3
-    // Feel free to remove rehashing if BytesRef gets a better hash function
-    private static int rehash(int hash) {
-        return BitMixer.mix32(hash);
+    public BytesRefHash(final long initialCapacity, final float loadFactor, final BigArrays bigArrays) {
+        this(initialCapacity, loadFactor, DEFAULT_HASHER, bigArrays);
     }
 
-    /**
-     * Return the key at <code>0 &lt;= index &lt;= capacity()</code>. The result is undefined if the slot is unused.
-     * <p>Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called</p>
-     */
-    public BytesRef get(long id, BytesRef dest) {
-        final long startOffset = startOffsets.get(id);
-        final int length = (int) (startOffsets.get(id + 1) - startOffset);
-        bytes.get(startOffset, length, dest);
-        return dest;
+    public BytesRefHash(final long initialCapacity, final float loadFactor, final Hasher hasher, final BigArrays bigArrays) {
+        assert initialCapacity > 0 : "initial capacity must be greater than 0";
+        assert loadFactor > 0 && loadFactor < 1 : "load factor must be between 0 and 1";
+
+        this.loadFactor = loadFactor;
+        this.hasher = hasher;
+        this.bigArrays = bigArrays;
+
+        capacity = Numbers.nextPowerOfTwo((long) (initialCapacity / loadFactor));
+        assert capacity <= MAX_CAPACITY : "required capacity too large";
+        mask = capacity - 1;
+        size = 0;
+        grow = (long) (capacity * loadFactor);
+
+        table = bigArrays.newLongArray(capacity, false);
+        table.fill(0, capacity, -1);
+        offsets = bigArrays.newLongArray(initialCapacity + 1, false);
+        offsets.set(0, 0);
+        keys = bigArrays.newByteArray(initialCapacity * 3, false);
     }
 
     /**
-     * Get the id associated with <code>key</code>
+     * Adds the given key to the hash table and returns its ordinal.
+     * If the key exists already, it returns (-1 - ordinal).
      */
-    public long find(BytesRef key, int code) {
-        final long slot = slot(rehash(code), mask);
-        for (long index = slot;; index = nextSlot(index, mask)) {
-            final long id = id(index);
-            if (id == -1L || key.bytesEquals(get(id, spare))) {
-                return id;
+    public long add(final BytesRef key) {
+        final long hash = hasher.hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                final long val = fingerprint | size;
+                if (size >= grow) {
+                    growAndInsert(hash, val);
+                } else {
+                    table.set(idx, val);
+                }
+                return append(key);
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
+                return -1 - ordinal;
             }
         }
     }
 
-    /** Sugar for {@link #find(BytesRef, int) find(key, key.hashCode()} */
-    public long find(BytesRef key) {
-        return find(key, key.hashCode());
-    }
+    /**
+     * Returns the ordinal associated with the given key, or -1 if the key doesn't exist.
+     *
+     * <p>
+     * Using the 64-bit hash value, up to 32 least significant bits (LSB) are used to identify the
+     * home slot in the hash table, and an additional 32 bits are used to identify the fingerprint.
+     * The fingerprint further increases the entropy and reduces the number of false lookups in the
+     * keys' table during equality checks, which is expensive.
+     *
+     * <p>
+     * Total entropy bits = 32 + log2(capacity)
+     *
+     * <p>
+     * Linear probing starts from the home slot, until a match or an empty slot is found.
+     * Values are first checked using their fingerprint (to reduce false positives), then verified
+     * in the keys' table using an equality check.
+     */
+    public long find(final BytesRef key) {
+        final long hash = hasher.hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
 
-    private long set(BytesRef key, int code, long id) {
-        assert rehash(key.hashCode()) == code;
-        assert size < maxSize;
-        final long slot = slot(code, mask);
-        for (long index = slot;; index = nextSlot(index, mask)) {
-            final long curId = id(index);
-            if (curId == -1) { // means unset
-                id(index, id);
-                append(id, key, code);
-                ++size;
-                return id;
-            } else if (key.bytesEquals(get(curId, spare))) {
-                return -1 - curId;
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                return -1;
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
+                return ordinal;
             }
         }
     }
 
-    private void append(long id, BytesRef key, int code) {
-        assert size == id;
-        final long startOffset = startOffsets.get(size);
-        bytes = bigArrays.grow(bytes, startOffset + key.length);
-        bytes.set(startOffset, key.bytes, key.offset, key.length);
-        startOffsets = bigArrays.grow(startOffsets, size + 2);
-        startOffsets.set(size + 1, startOffset + key.length);
-        hashes = bigArrays.grow(hashes, id + 1);
-        hashes.set(id, code);
+    /**
+     * Returns the key associated with the given ordinal.
+     * The result is undefined for an unused ordinal.
+     *
+     * <p>
+     * Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called
+     */
+    public BytesRef get(final long ordinal, final BytesRef dest) {
+        final long start = offsets.get(ordinal);
+        final int length = (int) (offsets.get(ordinal + 1) - start);
+        keys.get(start, length, dest);
+        return dest;
     }
 
-    private boolean assertConsistent(long id, int code) {
-        get(id, spare);
-        return rehash(spare.hashCode()) == code;
+    /**
+     * Returns the number of mappings in this hash table.
+     */
+    public long size() {
+        return size;
     }
 
-    private void reset(int code, long id) {
-        assert assertConsistent(id, code);
-        final long slot = slot(code, mask);
-        for (long index = slot;; index = nextSlot(index, mask)) {
-            final long curId = id(index);
-            if (curId == -1) { // means unset
-                id(index, id);
-                break;
-            }
-        }
+    /**
+     * Appends the key in the keys' and offsets' tables.
+     */
+    private long append(final BytesRef key) {
+        final long start = offsets.get(size);
+        final long end = start + key.length;
+        offsets = bigArrays.grow(offsets, size + 2);
+        offsets.set(size + 1, end);
+        keys = bigArrays.grow(keys, end);
+        keys.set(start, key.bytes, key.offset, key.length);
+        return size++;
     }
 
     /**
-     * Try to add <code>key</code>. Return its newly allocated id if it wasn't in the hash table yet, or <code>-1-id</code>
-     * if it was already present in the hash table.
+     * Grows the hash table by doubling its capacity, inserting the provided value,
+     * and reinserting the previous values at their updated slots.
      */
-    public long add(BytesRef key, int code) {
-        if (size >= maxSize) {
-            assert size == maxSize;
-            grow();
-        }
-        assert size < maxSize;
-        return set(key, rehash(code), size);
-    }
+    private void growAndInsert(final long hash, final long value) {
+        // Ensure that the hash table doesn't grow too large.
+        // This implicitly also ensures that the ordinals are no larger than 2^32, thus,
+        // preventing them from polluting the fingerprint bits in the hash table values.
+        assert capacity < MAX_CAPACITY : "hash table already at the max capacity";
 
-    /** Sugar to {@link #add(BytesRef, int) add(key, key.hashCode()}. */
-    public long add(BytesRef key) {
-        return add(key, key.hashCode());
+        capacity <<= 1;
+        mask = capacity - 1;
+        grow = (long) (capacity * loadFactor);
+        table = bigArrays.grow(table, capacity);
+        table.fill(0, capacity, -1);
+        table.set(hash & mask, value);
+
+        for (long ordinal = 0; ordinal < size; ordinal++) {
+            reinsert(ordinal, hasher.hash(get(ordinal, scratch)));
+        }
     }
 
-    @Override
-    protected void removeAndAdd(long index) {
-        final long id = id(index, -1);
-        assert id >= 0;
-        final int code = hashes.get(id);
-        reset(code, id);
+    /**
+     * Reinserts the hash table value for an existing key stored at the given ordinal.
+     */
+    private void reinsert(final long ordinal, final long hash) {
+        for (long idx = hash & mask;; idx = (idx + 1) & mask) {
+            if (table.get(idx) == -1) {
+                table.set(idx, (hash & MASK_FINGERPRINT) | ordinal);
+                return;
+            }
+        }
     }
 
     @Override
     public void close() {
-        try (Releasable releasable = Releasables.wrap(bytes, hashes, startOffsets)) {
-            super.close();
-        }
+        Releasables.close(table, offsets, keys);
     }
 
+    /**
+     * Hasher calculates the hash of a {@link BytesRef} key.
+     */
+    @FunctionalInterface
+    public interface Hasher {
+        long hash(BytesRef key);
+    }
 }
diff --git a/server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java b/server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java
deleted file mode 100644
index d8d4690d14e90..0000000000000
--- a/server/src/main/java/org/opensearch/common/util/CompactBytesRefHash.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.common.util;
-
-import net.openhft.hashing.LongHashFunction;
-import org.apache.lucene.util.BytesRef;
-import org.opensearch.common.lease.Releasable;
-import org.opensearch.common.lease.Releasables;
-import org.opensearch.core.common.util.ByteArray;
-
-import java.security.AccessController;
-import java.security.PrivilegedAction;
-
-/**
- * Specialized hash table implementation that maps a {@link BytesRef} key to a long ordinal.
- *
- * <p>
- * It uses a compact byte-packing strategy to encode the ordinal and fingerprint information
- * in the hash table value. It makes lookups faster by short-circuiting expensive equality checks
- * for keys that collide onto the same hash table slot.
- *
- * <p>
- * This class is not thread-safe.
- *
- * @opensearch.internal
- */
-public class CompactBytesRefHash implements Releasable {
-    private static final LongHashFunction XX3 = AccessController.doPrivileged(
-        (PrivilegedAction<LongHashFunction>) () -> LongHashFunction.xx3(System.nanoTime())
-    );
-
-    private static final long MAX_CAPACITY = 1L << 32;
-    private static final long DEFAULT_INITIAL_CAPACITY = 32;
-    private static final float DEFAULT_LOAD_FACTOR = 0.6f;
-    private static final Hasher DEFAULT_HASHER = key -> XX3.hashBytes(key.bytes, key.offset, key.length);
-
-    private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
-    private static final long MASK_FINGERPRINT = 0xFFFFFFFF00000000L;  // extract fingerprint
-
-    /**
-     * Maximum load factor after which the capacity is doubled.
-     */
-    private final float loadFactor;
-
-    /**
-     * Calculates the hash of a {@link BytesRef} key.
-     */
-    private final Hasher hasher;
-
-    /**
-     * Utility class to allocate recyclable arrays.
-     */
-    private final BigArrays bigArrays;
-
-    /**
-     * Reusable BytesRef to read keys.
-     */
-    private final BytesRef scratch = new BytesRef();
-
-    /**
-     * Current capacity of the hash table. This must be a power of two so that the hash table slot
-     * can be identified quickly using bitmasks, thus avoiding expensive modulo or integer division.
-     */
-    private long capacity;
-
-    /**
-     * Bitmask to identify the hash table slot from a key's hash.
-     */
-    private long mask;
-
-    /**
-     * Size threshold after which the hash table needs to be doubled in capacity.
-     */
-    private long grow;
-
-    /**
-     * Current size of the hash table.
-     */
-    private long size;
-
-    /**
-     * Underlying array to store the hash table values.
-     *
-     * <p>
-     * Each hash table value (64-bit) uses the following byte packing strategy:
-     * <pre>
-     * |================================|================================|
-     * | Fingerprint                    | Ordinal                        |
-     * |--------------------------------|--------------------------------|
-     * | 32 bits                        | 32 bits                        |
-     * |================================|================================|
-     * </pre>
-     *
-     * <p>
-     * This allows us to encode and manipulate additional information in the hash table
-     * itself without having to look elsewhere in the memory, which is much slower.
-     *
-     * <p>
-     * Terminology: <code>table[index] = value = (fingerprint | ordinal)</code>
-     */
-    private LongArray table;
-
-    /**
-     * Underlying array to store the starting offsets of keys.
-     *
-     * <p>
-     * Terminology:
-     * <pre>
-     *   offsets[ordinal] = starting offset (inclusive)
-     *   offsets[ordinal + 1] = ending offset (exclusive)
-     * </pre>
-     */
-    private LongArray offsets;
-
-    /**
-     * Underlying byte array to store the keys.
-     *
-     * <p>
-     * Terminology: <code>keys[start...end] = key</code>
-     */
-    private ByteArray keys;
-
-    public CompactBytesRefHash(final BigArrays bigArrays) {
-        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
-    }
-
-    public CompactBytesRefHash(final long initialCapacity, final float loadFactor, final Hasher hasher, final BigArrays bigArrays) {
-        assert initialCapacity > 0 : "initial capacity must be greater than 0";
-        assert loadFactor > 0 && loadFactor < 1 : "load factor must be between 0 and 1";
-
-        this.loadFactor = loadFactor;
-        this.hasher = hasher;
-        this.bigArrays = bigArrays;
-
-        capacity = Math.max(1, Long.highestOneBit((long) (initialCapacity / loadFactor)) << 1);
-        mask = capacity - 1;
-        size = 0;
-        grow = (long) (capacity * loadFactor);
-
-        table = bigArrays.newLongArray(capacity, false);
-        table.fill(0, capacity, -1);
-        offsets = bigArrays.newLongArray(initialCapacity + 1, false);
-        offsets.set(0, 0);
-        keys = bigArrays.newByteArray(initialCapacity * 3, false);
-    }
-
-    /**
-     * Adds the given key to the hash table and returns its ordinal.
-     * If the key exists already, it returns (-1 - ordinal).
-     */
-    public long add(final BytesRef key) {
-        final long hash = hasher.hash(key);
-        final long fingerprint = hash & MASK_FINGERPRINT;
-
-        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
-            if ((value = table.get(idx)) == -1) {
-                final long val = fingerprint | size;
-                if (size >= grow) {
-                    growAndInsert(hash, val);
-                } else {
-                    table.set(idx, val);
-                }
-                return append(key);
-            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
-                return -1 - ordinal;
-            }
-        }
-    }
-
-    /**
-     * Returns the ordinal associated with the given key, or -1 if the key doesn't exist.
-     *
-     * <p>
-     * Using the 64-bit hash value, up to 32 least significant bits (LSB) are used to identify the
-     * home slot in the hash table, and an additional 32 bits are used to identify the fingerprint.
-     * The fingerprint further increases the entropy and reduces the number of false lookups in the
-     * keys' table during equality checks, which is expensive.
-     *
-     * <p>
-     * Total entropy bits = 32 + log2(capacity)
-     *
-     * <p>
-     * Linear probing starts from the home slot, until a match or an empty slot is found.
-     * Values are first checked using their fingerprint (to reduce false positives), then verified
-     * in the keys' table using an equality check.
-     */
-    public long find(final BytesRef key) {
-        final long hash = hasher.hash(key);
-        final long fingerprint = hash & MASK_FINGERPRINT;
-
-        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
-            if ((value = table.get(idx)) == -1) {
-                return -1;
-            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
-                return ordinal;
-            }
-        }
-    }
-
-    /**
-     * Returns the key associated with the given ordinal.
-     * The result is undefined for an unused ordinal.
-     *
-     * <p>
-     * Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called
-     */
-    public BytesRef get(final long ordinal, final BytesRef dest) {
-        final long start = offsets.get(ordinal);
-        final int length = (int) (offsets.get(ordinal + 1) - start);
-        keys.get(start, length, dest);
-        return dest;
-    }
-
-    /**
-     * Returns the number of mappings in this hash table.
-     */
-    public long size() {
-        return size;
-    }
-
-    /**
-     * Appends the key in the keys' and offsets' tables.
-     */
-    private long append(final BytesRef key) {
-        final long start = offsets.get(size);
-        final long end = start + key.length;
-        offsets = bigArrays.grow(offsets, size + 2);
-        offsets.set(size + 1, end);
-        keys = bigArrays.grow(keys, end);
-        keys.set(start, key.bytes, key.offset, key.length);
-        return size++;
-    }
-
-    /**
-     * Grows the hash table by doubling its capacity, inserting the provided value,
-     * and reinserting the previous values at their updated slots.
-     */
-    private void growAndInsert(final long hash, final long value) {
-        // Ensure that the hash table doesn't grow too large.
-        // This implicitly also ensures that the ordinals are no larger than 2^32, thus,
-        // preventing them from polluting the fingerprint bits in the hash table values.
-        assert capacity < MAX_CAPACITY : "hash table already at the max capacity";
-
-        capacity <<= 1;
-        mask = capacity - 1;
-        grow = (long) (capacity * loadFactor);
-        table = bigArrays.grow(table, capacity);
-        table.fill(0, capacity, -1);
-        table.set(hash & mask, value);
-
-        for (long ordinal = 0; ordinal < size; ordinal++) {
-            reinsert(ordinal, hasher.hash(get(ordinal, scratch)));
-        }
-    }
-
-    /**
-     * Reinserts the hash table value for an existing key stored at the given ordinal.
-     */
-    private void reinsert(final long ordinal, final long hash) {
-        for (long idx = hash & mask;; idx = (idx + 1) & mask) {
-            if (table.get(idx) == -1) {
-                table.set(idx, (hash & MASK_FINGERPRINT) | ordinal);
-                return;
-            }
-        }
-    }
-
-    @Override
-    public void close() {
-        Releasables.close(table, offsets, keys);
-    }
-
-    /**
-     * Hasher calculates the hash of a {@link BytesRef} key.
-     */
-    @FunctionalInterface
-    public interface Hasher {
-        long hash(BytesRef key);
-    }
-}
diff --git a/server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java b/server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java
deleted file mode 100644
index 1806733cc1567..0000000000000
--- a/server/src/main/java/org/opensearch/common/util/ReorganizingBytesRefHash.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.common.util;
-
-import net.openhft.hashing.LongHashFunction;
-import org.apache.lucene.util.BytesRef;
-import org.opensearch.common.lease.Releasable;
-import org.opensearch.common.lease.Releasables;
-import org.opensearch.core.common.util.ByteArray;
-
-import java.security.AccessController;
-import java.security.PrivilegedAction;
-
-/**
- * Specialized hash table implementation that maps a {@link BytesRef} key to a long ordinal.
- *
- * <p>
- * It organizes itself by moving keys around dynamically in order to reduce the
- * longest probe sequence length (PSL), which makes lookups faster as keys are likely to
- * be found in the same CPU cache line. It also uses fingerprints to short-circuit expensive
- * equality checks for keys that collide onto the same hash table slot.
- *
- * <p>
- * This class is not thread-safe.
- *
- * @opensearch.internal
- */
-public class ReorganizingBytesRefHash implements Releasable {
-    private static final LongHashFunction XX3 = AccessController.doPrivileged(
-        (PrivilegedAction<LongHashFunction>) () -> LongHashFunction.xx3(System.nanoTime())
-    );
-
-    private static final long MAX_CAPACITY = 1L << 32;
-    private static final long DEFAULT_INITIAL_CAPACITY = 32;
-    private static final float DEFAULT_LOAD_FACTOR = 0.6f;
-    private static final Hasher DEFAULT_HASHER = key -> XX3.hashBytes(key.bytes, key.offset, key.length);
-
-    private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
-    private static final long MASK_FINGERPRINT = 0x0000FFFF00000000L;  // extract fingerprint
-    private static final long MASK_PSL = 0x7FFF000000000000L;  // extract PSL
-    private static final long INCR_PSL = 0x0001000000000000L;  // increment PSL by one
-
-    /**
-     * Maximum load factor after which the capacity is doubled.
-     */
-    private final float loadFactor;
-
-    /**
-     * Calculates the hash of a {@link BytesRef} key.
-     */
-    private final Hasher hasher;
-
-    /**
-     * Utility class to allocate recyclable arrays.
-     */
-    private final BigArrays bigArrays;
-
-    /**
-     * Reusable BytesRef to read keys.
-     */
-    private final BytesRef scratch = new BytesRef();
-
-    /**
-     * Current capacity of the hash table. This must be a power of two so that the hash table slot
-     * can be identified quickly using bitmasks, thus avoiding expensive modulo or integer division.
-     */
-    private long capacity;
-
-    /**
-     * Bitmask to identify the hash table slot from a key's hash.
-     */
-    private long mask;
-
-    /**
-     * Size threshold after which the hash table needs to be doubled in capacity.
-     */
-    private long grow;
-
-    /**
-     * Current size of the hash table.
-     */
-    private long size;
-
-    /**
-     * Underlying array to store the hash table values.
-     *
-     * <p>
-     * Each hash table value (64-bit) uses the following byte packing strategy:
-     * <pre>
-     * |=========|===============|================|================================|
-     * | Discard | PSL           | Fingerprint    | Ordinal                        |
-     * |    -    |---------------|----------------|--------------------------------|
-     * | 1 bit   | 15 bits       | 16 bits        | 32 bits                        |
-     * |=========|===============|================|================================|
-     * </pre>
-     *
-     * <p>
-     * This allows us to encode and manipulate additional information in the hash table
-     * itself without having to look elsewhere in the memory, which is much slower.
-     *
-     * <p>
-     * Terminology: <code>table[index] = value = (discard | psl | fingerprint | ordinal)</code>
-     */
-    private LongArray table;
-
-    /**
-     * Underlying array to store the starting offsets of keys.
-     *
-     * <p>
-     * Terminology:
-     * <pre>
-     *   offsets[ordinal] = starting offset (inclusive)
-     *   offsets[ordinal + 1] = ending offset (exclusive)
-     * </pre>
-     */
-    private LongArray offsets;
-
-    /**
-     * Underlying byte array to store the keys.
-     *
-     * <p>
-     * Terminology: <code>keys[start...end] = key</code>
-     */
-    private ByteArray keys;
-
-    public ReorganizingBytesRefHash(final BigArrays bigArrays) {
-        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
-    }
-
-    public ReorganizingBytesRefHash(final long initialCapacity, final float loadFactor, final Hasher hasher, final BigArrays bigArrays) {
-        assert initialCapacity > 0 : "initial capacity must be greater than 0";
-        assert loadFactor > 0 && loadFactor < 1 : "load factor must be between 0 and 1";
-
-        this.loadFactor = loadFactor;
-        this.hasher = hasher;
-        this.bigArrays = bigArrays;
-
-        capacity = Math.max(1, Long.highestOneBit((long) (initialCapacity / loadFactor)) << 1);
-        mask = capacity - 1;
-        size = 0;
-        grow = (long) (capacity * loadFactor);
-
-        table = bigArrays.newLongArray(capacity, false);
-        table.fill(0, capacity, -1);
-        offsets = bigArrays.newLongArray(initialCapacity + 1, false);
-        offsets.set(0, 0);
-        keys = bigArrays.newByteArray(initialCapacity * 3, false);
-    }
-
-    /**
-     * Adds the given key to the hash table and returns its ordinal.
-     * If the key exists already, it returns (-1 - ordinal).
-     */
-    public long add(final BytesRef key) {
-        final long hash = hasher.hash(key);
-        final long fingerprint = hash & MASK_FINGERPRINT;
-
-        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
-            if ((value = table.get(idx)) == -1) {
-                final long val = (fingerprint | size);
-                if (size >= grow) {
-                    growAndInsert(hash, val);
-                } else {
-                    insert(hash, val);
-                }
-                return append(key);
-            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
-                return -(1 + ordinal);
-            }
-        }
-    }
-
-    /**
-     * Returns the ordinal associated with the given key, or -1 if the key doesn't exist.
-     *
-     * <p>
-     * Using the 64-bit hash value, up to 32 least significant bits (LSB) are used to identify the
-     * home slot in the hash table, and an additional 16 bits are used to identify the fingerprint.
-     * The fingerprint further increases the entropy and reduces the number of false lookups in the
-     * keys' table during equality checks, which is expensive.
-     *
-     * <p>
-     * Total entropy bits = 16 + log2(capacity)
-     *
-     * <p>
-     * Linear probing starts from the home slot, until a match or an empty slot is found.
-     * Values are first checked using their fingerprint (to reduce false positives), then verified
-     * in the keys' table using an equality check.
-     */
-    public long find(final BytesRef key) {
-        final long hash = hasher.hash(key);
-        final long fingerprint = hash & MASK_FINGERPRINT;
-
-        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
-            if ((value = table.get(idx)) == -1) {
-                return -1;
-            } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
-                return ordinal;
-            }
-        }
-    }
-
-    /**
-     * Returns the key associated with the given ordinal.
-     * The result is undefined for an unused ordinal.
-     *
-     * <p>
-     * Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called
-     */
-    public BytesRef get(final long ordinal, final BytesRef dest) {
-        final long start = offsets.get(ordinal);
-        final int length = (int) (offsets.get(ordinal + 1) - start);
-        keys.get(start, length, dest);
-        return dest;
-    }
-
-    /**
-     * Returns the number of mappings in this hash table.
-     */
-    public long size() {
-        return size;
-    }
-
-    /**
-     * Appends the key in the keys' and offsets' tables.
-     */
-    private long append(final BytesRef key) {
-        final long start = offsets.get(size);
-        final long end = start + key.length;
-        offsets = bigArrays.grow(offsets, size + 2);
-        offsets.set(size + 1, end);
-        keys = bigArrays.grow(keys, end);
-        keys.set(start, key.bytes, key.offset, key.length);
-        return size++;
-    }
-
-    /**
-     * Grows the hash table by doubling its capacity, inserting the provided value,
-     * and reinserting the previous values at their updated slots.
-     */
-    private void growAndInsert(final long hash, final long value) {
-        // Ensure that the hash table doesn't grow too large.
-        // This implicitly also ensures that the ordinals are no larger than 2^32, thus,
-        // preventing them from polluting the fingerprint bits in the hash table values.
-        assert capacity < MAX_CAPACITY : "hash table already at the max capacity";
-
-        capacity <<= 1;
-        mask = capacity - 1;
-        grow = (long) (capacity * loadFactor);
-        table = bigArrays.grow(table, capacity);
-        table.fill(0, capacity, -1);
-        table.set(hash & mask, value);
-
-        for (long ordinal = 0; ordinal < size; ordinal++) {
-            final long h = hasher.hash(get(ordinal, scratch));
-            insert(h, (h & MASK_FINGERPRINT) | ordinal);
-        }
-    }
-
-    /**
-     * Inserts the hash table value for a missing key.
-     */
-    private void insert(final long hash, final long value) {
-        for (long idx = hash & mask, current = value, existing;; idx = (idx + 1) & mask) {
-            if ((existing = table.get(idx)) == -1) {
-                table.set(idx, current);
-                return;
-            } else if ((existing & MASK_PSL) < (current & MASK_PSL)) {
-                current = table.set(idx, current);
-            }
-            current += INCR_PSL;
-        }
-    }
-
-    @Override
-    public void close() {
-        Releasables.close(table, offsets, keys);
-    }
-
-    /**
-     * Returns the underlying hash table.
-     * Visible for unit-tests.
-     */
-    LongArray getTable() {
-        return table;
-    }
-
-    /**
-     * Hasher calculates the hash of a {@link BytesRef} key.
-     */
-    @FunctionalInterface
-    public interface Hasher {
-        long hash(BytesRef key);
-    }
-}
diff --git a/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java b/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
index 417eb6a316d86..d6c29fcae3a94 100644
--- a/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
+++ b/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
@@ -8,7 +8,9 @@
 
 package org.opensearch.common.util;
 
+import org.opensearch.common.Numbers;
 import org.opensearch.common.lease.Releasable;
+import org.opensearch.common.lease.Releasables;
 
 /**
  * Specialized hash table implementation that maps a (primitive) long to long.
@@ -109,7 +111,8 @@ public ReorganizingLongHash(final long initialCapacity, final float loadFactor,
         this.bigArrays = bigArrays;
         this.loadFactor = loadFactor;
 
-        capacity = nextPowerOfTwo((long) (initialCapacity / loadFactor));
+        capacity = Numbers.nextPowerOfTwo((long) (initialCapacity / loadFactor));
+        assert capacity <= MAX_CAPACITY : "required capacity too large";
         mask = capacity - 1;
         grow = (long) (capacity * loadFactor);
         size = 0;
@@ -296,11 +299,6 @@ private void grow() {
 
     @Override
     public void close() {
-        table.close();
-        keys.close();
-    }
-
-    private static long nextPowerOfTwo(final long value) {
-        return Math.max(1, Long.highestOneBit(value - 1) << 1);
+        Releasables.close(table, keys);
     }
 }
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
index 2c804166eed78..5d7c5c2976169 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
@@ -37,7 +37,6 @@
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.common.util.BigArrays;
 import org.opensearch.common.util.BytesRefHash;
-import org.opensearch.common.util.CompactBytesRefHash;
 import org.opensearch.search.aggregations.CardinalityUpperBound;
 
 /**
@@ -129,10 +128,10 @@ public void readValue(BytesRef dest) {}
      * @opensearch.internal
      */
     private static class FromSingle extends BytesKeyedBucketOrds {
-        private final CompactBytesRefHash ords;
+        private final BytesRefHash ords;
 
         private FromSingle(BigArrays bigArrays) {
-            ords = new CompactBytesRefHash(bigArrays);
+            ords = new BytesRefHash(bigArrays);
         }
 
         @Override
@@ -191,7 +190,7 @@ private static class FromMany extends BytesKeyedBucketOrds {
         private final LongKeyedBucketOrds longToBucketOrds;
 
         private FromMany(BigArrays bigArrays) {
-            bytesToLong = new BytesRefHash(1, bigArrays);
+            bytesToLong = new BytesRefHash(bigArrays);
             longToBucketOrds = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY);
         }
 
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificanceLookup.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificanceLookup.java
index aee4caa67afa1..34bbac55900a8 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificanceLookup.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificanceLookup.java
@@ -123,7 +123,7 @@ public void close() {}
             };
         }
         return new BackgroundFrequencyForBytes() {
-            private final BytesRefHash termToPosition = new BytesRefHash(1, bigArrays);
+            private final BytesRefHash termToPosition = new BytesRefHash(bigArrays);
             private LongArray positionToFreq = bigArrays.newLongArray(1, false);
 
             @Override
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java
index c796faa6a8b76..cc35fe75e5e92 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java
@@ -135,7 +135,7 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I
         Arrays.fill(mergeMap, -1);
         long offset = 0;
         for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) {
-            try (BytesRefHash bucketsInThisOwningBucketToCollect = new BytesRefHash(1, context.bigArrays())) {
+            try (BytesRefHash bucketsInThisOwningBucketToCollect = new BytesRefHash(context.bigArrays())) {
                 filters[owningOrdIdx] = newFilter();
                 List<StringRareTerms.Bucket> builtBuckets = new ArrayList<>();
                 BytesKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds[owningOrdIdx]);
diff --git a/server/src/test/java/org/opensearch/common/NumbersTests.java b/server/src/test/java/org/opensearch/common/NumbersTests.java
index 5fb85d815ded2..ff12b3bc4cc96 100644
--- a/server/src/test/java/org/opensearch/common/NumbersTests.java
+++ b/server/src/test/java/org/opensearch/common/NumbersTests.java
@@ -221,4 +221,24 @@ public void testToUnsignedBigInteger() {
         assertEquals(random, Numbers.toUnsignedBigInteger(random.longValue()));
         assertEquals(Numbers.MAX_UNSIGNED_LONG_VALUE, Numbers.toUnsignedBigInteger(Numbers.MAX_UNSIGNED_LONG_VALUE.longValue()));
     }
+
+    public void testNextPowerOfTwo() {
+        // Negative values:
+        for (int i = 0; i < 1000; i++) {
+            long value = randomLongBetween(-500, -1);
+            assertEquals(1, Numbers.nextPowerOfTwo(value));
+        }
+
+        // Zero value:
+        assertEquals(1, Numbers.nextPowerOfTwo(0L));
+
+        // Positive values:
+        for (int i = 0; i < 1000; i++) {
+            long value = randomLongBetween(1, 500);
+            long nextPowerOfTwo = Numbers.nextPowerOfTwo(value);
+
+            assertTrue(nextPowerOfTwo > value); // must be strictly greater
+            assertEquals(0, nextPowerOfTwo & (nextPowerOfTwo - 1)); // must be a power of two
+        }
+    }
 }
diff --git a/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
index a78a35e5a2412..d40012accbb7a 100644
--- a/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
+++ b/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
@@ -33,6 +33,7 @@
 package org.opensearch.common.util;
 
 import org.apache.lucene.tests.util.TestUtil;
+import net.openhft.hashing.LongHashFunction;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.opensearch.common.settings.Settings;
@@ -44,6 +45,7 @@
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.stream.Stream;
 
 public class BytesRefHashTests extends OpenSearchTestCase {
 
@@ -57,9 +59,13 @@ private void newHash() {
         if (hash != null) {
             hash.close();
         }
-        // Test high load factors to make sure that collision resolution works fine
-        final float maxLoadFactor = 0.6f + randomFloat() * 0.39f;
-        hash = new BytesRefHash(randomIntBetween(0, 100), maxLoadFactor, randomBigArrays());
+        LongHashFunction hasher = LongHashFunction.xx3(randomLong());
+        hash = new BytesRefHash(
+            randomIntBetween(1, 100),      // random capacity
+            0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
+            key -> hasher.hashBytes(key.bytes, key.offset, key.length),
+            randomBigArrays()
+        );
     }
 
     @Override
@@ -68,39 +74,34 @@ public void setUp() throws Exception {
         newHash();
     }
 
-    public void testDuel() {
-        final int len = randomIntBetween(1, 100000);
-        final BytesRef[] values = new BytesRef[len];
-        for (int i = 0; i < values.length; ++i) {
-            values[i] = new BytesRef(randomAlphaOfLength(5));
-        }
-        final Map<BytesRef, Integer> valueToId = new HashMap<>();
-        final BytesRef[] idToValue = new BytesRef[values.length];
-        final int iters = randomInt(1000000);
-        for (int i = 0; i < iters; ++i) {
-            final BytesRef value = randomFrom(values);
-            if (valueToId.containsKey(value)) {
-                assertEquals(-1 - valueToId.get(value), hash.add(value, value.hashCode()));
+    public void testFuzzy() {
+        Map<BytesRef, Long> reference = new HashMap<>();
+        BytesRef[] keys = Stream.generate(() -> new BytesRef(randomAlphaOfLength(20)))
+            .limit(randomIntBetween(1000, 2000))
+            .toArray(BytesRef[]::new);
+
+        // Verify the behaviour of "add" and "find".
+        for (int i = 0; i < keys.length * 10; i++) {
+            BytesRef key = keys[i % keys.length];
+            if (reference.containsKey(key)) {
+                long expectedOrdinal = reference.get(key);
+                assertEquals(-1 - expectedOrdinal, hash.add(key));
+                assertEquals(expectedOrdinal, hash.find(key));
             } else {
-                assertEquals(valueToId.size(), hash.add(value, value.hashCode()));
-                idToValue[valueToId.size()] = value;
-                valueToId.put(value, valueToId.size());
+                assertEquals(-1, hash.find(key));
+                reference.put(key, (long) reference.size());
+                assertEquals((long) reference.get(key), hash.add(key));
             }
         }
 
-        assertEquals(valueToId.size(), hash.size());
-        for (final var next : valueToId.entrySet()) {
-            assertEquals(next.getValue().longValue(), hash.find(next.getKey(), next.getKey().hashCode()));
+        // Verify the behaviour of "get".
+        BytesRef scratch = new BytesRef();
+        for (Map.Entry<BytesRef, Long> entry : reference.entrySet()) {
+            assertEquals(entry.getKey(), hash.get(entry.getValue(), scratch));
         }
 
-        for (long i = 0; i < hash.capacity(); ++i) {
-            final long id = hash.id(i);
-            BytesRef spare = new BytesRef();
-            if (id >= 0) {
-                hash.get(id, spare);
-                assertEquals(idToValue[(int) id], spare);
-            }
-        }
+        // Verify the behaviour of "size".
+        assertEquals(reference.size(), hash.size());
         hash.close();
     }
 
diff --git a/server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java
deleted file mode 100644
index 297fe82a2a505..0000000000000
--- a/server/src/test/java/org/opensearch/common/util/CompactBytesRefHashTests.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.common.util;
-
-import net.openhft.hashing.LongHashFunction;
-import org.apache.lucene.util.BytesRef;
-import org.opensearch.test.OpenSearchTestCase;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.stream.Stream;
-
-public class CompactBytesRefHashTests extends OpenSearchTestCase {
-
-    public void testFuzzy() {
-        LongHashFunction hasher = LongHashFunction.xx3(randomLong());
-        Map<BytesRef, Long> reference = new HashMap<>();
-        BytesRef[] keys = Stream.generate(() -> new BytesRef(randomAlphaOfLength(20))).limit(1000).toArray(BytesRef[]::new);
-
-        try (
-            CompactBytesRefHash h = new CompactBytesRefHash(
-                randomIntBetween(1, 100),      // random capacity
-                0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
-                key -> hasher.hashBytes(key.bytes, key.offset, key.length),
-                BigArrays.NON_RECYCLING_INSTANCE
-            )
-        ) {
-            // Verify the behaviour of "add" and "find".
-            for (int i = 0; i < keys.length * 10; i++) {
-                BytesRef key = keys[i % keys.length];
-                if (reference.containsKey(key)) {
-                    long expectedOrdinal = reference.get(key);
-                    assertEquals(-1 - expectedOrdinal, h.add(key));
-                    assertEquals(expectedOrdinal, h.find(key));
-                } else {
-                    assertEquals(-1, h.find(key));
-                    reference.put(key, (long) reference.size());
-                    assertEquals((long) reference.get(key), h.add(key));
-                }
-            }
-
-            // Verify the behaviour of "get".
-            BytesRef scratch = new BytesRef();
-            for (Map.Entry<BytesRef, Long> entry : reference.entrySet()) {
-                assertEquals(entry.getKey(), h.get(entry.getValue(), scratch));
-            }
-
-            // Verify the behaviour of "size".
-            assertEquals(reference.size(), h.size());
-        }
-    }
-}
diff --git a/server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java
deleted file mode 100644
index e7ab6d1fa21a6..0000000000000
--- a/server/src/test/java/org/opensearch/common/util/ReorganizingBytesRefHashTests.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.common.util;
-
-import net.openhft.hashing.LongHashFunction;
-import org.apache.lucene.util.BytesRef;
-import org.opensearch.test.OpenSearchTestCase;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.stream.Stream;
-
-public class ReorganizingBytesRefHashTests extends OpenSearchTestCase {
-
-    public void testFuzzy() {
-        LongHashFunction hasher = LongHashFunction.xx3(randomLong());
-        Map<BytesRef, Long> reference = new HashMap<>();
-        BytesRef[] keys = Stream.generate(() -> new BytesRef(randomAlphaOfLength(20))).limit(1000).toArray(BytesRef[]::new);
-
-        try (
-            ReorganizingBytesRefHash h = new ReorganizingBytesRefHash(
-                randomIntBetween(1, 100),      // random capacity
-                0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
-                key -> hasher.hashBytes(key.bytes, key.offset, key.length),
-                BigArrays.NON_RECYCLING_INSTANCE
-            )
-        ) {
-            // Verify the behaviour of "add" and "find".
-            for (int i = 0; i < keys.length * 10; i++) {
-                BytesRef key = keys[i % keys.length];
-                if (reference.containsKey(key)) {
-                    long expectedOrdinal = reference.get(key);
-                    assertEquals(-1 - expectedOrdinal, h.add(key));
-                    assertEquals(expectedOrdinal, h.find(key));
-                } else {
-                    assertEquals(-1, h.find(key));
-                    reference.put(key, (long) reference.size());
-                    assertEquals((long) reference.get(key), h.add(key));
-                }
-            }
-
-            // Verify the behaviour of "get".
-            BytesRef scratch = new BytesRef();
-            for (Map.Entry<BytesRef, Long> entry : reference.entrySet()) {
-                assertEquals(entry.getKey(), h.get(entry.getValue(), scratch));
-            }
-
-            // Verify the behaviour of "size".
-            assertEquals(reference.size(), h.size());
-
-            // Verify the calculation of PSLs.
-            long capacity = h.getTable().size();
-            long mask = capacity - 1;
-            for (long idx = 0; idx < h.getTable().size(); idx++) {
-                long value = h.getTable().get(idx);
-                if (value != -1) {
-                    BytesRef key = h.get((int) value, scratch);
-                    long homeIdx = hasher.hashBytes(key.bytes, key.offset, key.length) & mask;
-                    assertEquals((capacity + idx - homeIdx) & mask, value >>> 48);
-                }
-            }
-        }
-    }
-}

From b4e27a4efe2592320ce41689f553fcaf97d2fefe Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Mon, 14 Aug 2023 21:00:36 +0530
Subject: [PATCH 3/9] Added t1ha1 to replace xxh3 hash function

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../common/hash/HashFunctionBenchmark.java    | 172 +++++++++++++
 .../common/util/BytesRefHashBenchmark.java    |  81 ++++++-
 buildSrc/version.properties                   |   2 -
 .../java/org/opensearch/common/hash/T1ha.java | 226 +++++++++++++++++
 .../opensearch/common/hash/package-info.java  |  12 +
 .../common/hash/HashFunctionTestCase.java     | 137 +++++++++++
 .../org/opensearch/common/hash/T1haTests.java | 229 ++++++++++++++++++
 server/build.gradle                           |  16 +-
 .../zero-allocation-hashing-0.16.jar.sha1     |   1 -
 .../zero-allocation-hashing-LICENSE.txt       | 201 ---------------
 .../zero-allocation-hashing-NOTICE.txt        |   0
 .../opensearch/common/util/BytesRefHash.java  |  37 ++-
 .../org/opensearch/bootstrap/security.policy  |  10 -
 .../bootstrap/test-framework.policy           |   1 -
 .../org/opensearch/common/NumbersTests.java   |   1 +
 .../common/util/BytesRefHashTests.java        |   6 +-
 16 files changed, 876 insertions(+), 256 deletions(-)
 create mode 100644 benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java
 create mode 100644 libs/common/src/main/java/org/opensearch/common/hash/T1ha.java
 create mode 100644 libs/common/src/main/java/org/opensearch/common/hash/package-info.java
 create mode 100644 libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java
 create mode 100644 libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java
 delete mode 100644 server/licenses/zero-allocation-hashing-0.16.jar.sha1
 delete mode 100644 server/licenses/zero-allocation-hashing-LICENSE.txt
 delete mode 100644 server/licenses/zero-allocation-hashing-NOTICE.txt

diff --git a/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java
new file mode 100644
index 0000000000000..bae6a813fc5a1
--- /dev/null
+++ b/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java
@@ -0,0 +1,172 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+import org.apache.lucene.util.StringHelper;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.util.Random;
+
+@Fork(value = 3)
+@Warmup(iterations = 1, time = 1)
+@Measurement(iterations = 3, time = 3)
+@BenchmarkMode(Mode.Throughput)
+public class HashFunctionBenchmark {
+
+    @Benchmark
+    public void hash(Blackhole bh, Options opts) {
+        bh.consume(opts.type.hash(opts.data));
+    }
+
+    @State(Scope.Benchmark)
+    public static class Options {
+        @Param({ "MURMUR3", "T1HA" })
+        public Type type;
+
+        @Param({
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "10",
+            "12",
+            "14",
+            "16",
+            "18",
+            "21",
+            "24",
+            "28",
+            "32",
+            "36",
+            "41",
+            "47",
+            "54",
+            "62",
+            "71",
+            "81",
+            "90",
+            "100",
+            "112",
+            "125",
+            "139",
+            "156",
+            "174",
+            "194",
+            "220",
+            "245",
+            "272",
+            "302",
+            "339",
+            "384",
+            "431",
+            "488",
+            "547",
+            "608",
+            "675",
+            "763",
+            "863",
+            "967",
+            "1084",
+            "1225",
+            "1372",
+            "1537",
+            "1737",
+            "1929",
+            "2142",
+            "2378",
+            "2664",
+            "3011",
+            "3343",
+            "3778",
+            "4232",
+            "4783",
+            "5310",
+            "5895",
+            "6662",
+            "7529",
+            "8508",
+            "9444",
+            "10483",
+            "11741",
+            "13150",
+            "14597",
+            "16495",
+            "18475",
+            "20877",
+            "23383",
+            "25956",
+            "29071",
+            "32560",
+            "36142",
+            "40841",
+            "46151",
+            "52151",
+            "57888",
+            "65414",
+            "72610",
+            "82050",
+            "91076",
+            "102006",
+            "114247",
+            "127957",
+            "143312",
+            "159077",
+            "176576",
+            "199531",
+            "223475",
+            "250292",
+            "277825",
+            "313943",
+            "351617",
+            "393812" })
+        public Integer length;
+        public byte[] data;
+
+        @Setup
+        public void setup() {
+            data = new byte[length];
+            new Random(0).nextBytes(data);
+        }
+    }
+
+    public enum Type {
+        MURMUR3((data, offset, length) -> StringHelper.murmurhash3_x86_32(data, offset, length, 0)),
+        T1HA((data, offset, length) -> T1ha.hash(data, offset, length, 0));
+
+        private final Hasher hasher;
+
+        Type(Hasher hasher) {
+            this.hasher = hasher;
+        }
+
+        public long hash(byte[] data) {
+            return hasher.hash(data, 0, data.length);
+        }
+    }
+
+    @FunctionalInterface
+    interface Hasher {
+        long hash(byte[] data, int offset, int length);
+    }
+}
diff --git a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
index 2e2a2399e9c0d..379653a53bfd2 100644
--- a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
+++ b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
@@ -9,6 +9,7 @@
 package org.opensearch.common.util;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@@ -19,15 +20,17 @@
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
-import org.openjdk.jmh.annotations.TearDown;
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.infra.Blackhole;
+import org.opensearch.common.hash.T1ha;
+import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 
 import java.util.HashSet;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Supplier;
 import java.util.stream.Stream;
 
 @Fork(value = 3)
@@ -41,16 +44,23 @@ public class BytesRefHashBenchmark {
 
     @Benchmark
     public void add(Blackhole bh, Options opts) {
+        HashTable[] tables = Stream.generate(opts.type::create).limit(NUM_TABLES).toArray(HashTable[]::new);
+
         for (int hit = 0; hit < NUM_HITS; hit++) {
             BytesRef key = opts.keys[hit % opts.keys.length];
-            for (BytesRefHash table : opts.tables) {
+            for (HashTable table : tables) {
                 bh.consume(table.add(key));
             }
         }
+
+        Releasables.close(tables);
     }
 
     @State(Scope.Benchmark)
     public static class Options {
+        @Param({ "MURMUR3", "T1HA" })
+        public Type type;
+
         @Param({
             "1",
             "2",
@@ -152,23 +162,19 @@ public static class Options {
             "753883",
             "851888",
             "971153" })
-
         public Integer size;
 
-        @Param({ "8", "32", "128" })
+        @Param({ "5", "28", "59", "105" })
         public Integer length;
 
-        private BytesRefHash[] tables;
-
         private BytesRef[] keys;
 
         @Setup
         public void setup() {
             assert size <= Math.pow(26, length) : "key length too small to generate the required number of keys";
-            tables = Stream.generate(() -> new BytesRefHash(BigArrays.NON_RECYCLING_INSTANCE))
-                .limit(NUM_TABLES)
-                .toArray(BytesRefHash[]::new);
-            Random random = new Random(0);
+            // Seeding with size will help produce deterministic results for the same size, and avoid similar
+            // looking clusters for different sizes, in case one hash function got unlucky.
+            Random random = new Random(size);
             Set<BytesRef> seen = new HashSet<>();
             keys = new BytesRef[size];
             for (int i = 0; i < size; i++) {
@@ -185,10 +191,59 @@ public void setup() {
                 seen.add(key);
             }
         }
+    }
+
+    public enum Type {
+        MURMUR3(() -> new HashTable() {
+            private final BytesRefHash table = new BytesRefHash(1, 0.6f, key -> {
+                // Repeating the lower bits into upper bits to make the fingerprint work.
+                // Alternatively, use a 64-bit murmur3 hash, but that won't represent the baseline.
+                long h = StringHelper.murmurhash3_x86_32(key.bytes, key.offset, key.length, 0) & 0xFFFFFFFFL;
+                return h | (h << 32);
+            }, BigArrays.NON_RECYCLING_INSTANCE);
+
+            @Override
+            public long add(BytesRef key) {
+                return table.add(key);
+            }
+
+            @Override
+            public void close() {
+                table.close();
+            }
+        }),
 
-        @TearDown
-        public void tearDown() {
-            Releasables.close(tables);
+        T1HA(() -> new HashTable() {
+            private final BytesRefHash table = new BytesRefHash(
+                1,
+                0.6f,
+                key -> T1ha.hash(key.bytes, key.offset, key.length, 0),
+                BigArrays.NON_RECYCLING_INSTANCE
+            );
+
+            @Override
+            public long add(BytesRef key) {
+                return table.add(key);
+            }
+
+            @Override
+            public void close() {
+                table.close();
+            }
+        });
+
+        private final Supplier<HashTable> supplier;
+
+        Type(Supplier<HashTable> supplier) {
+            this.supplier = supplier;
+        }
+
+        public HashTable create() {
+            return supplier.get();
         }
     }
+
+    interface HashTable extends Releasable {
+        long add(BytesRef key);
+    }
 }
diff --git a/buildSrc/version.properties b/buildSrc/version.properties
index 2bb21dfca4b14..ff962309cf084 100644
--- a/buildSrc/version.properties
+++ b/buildSrc/version.properties
@@ -69,5 +69,3 @@ resteasy          = 6.2.4.Final
 # opentelemetry dependencies
 opentelemetry    = 1.26.0
 
-# hashing dependencies
-zero_allocation_hashing = 0.16
diff --git a/libs/common/src/main/java/org/opensearch/common/hash/T1ha.java b/libs/common/src/main/java/org/opensearch/common/hash/T1ha.java
new file mode 100644
index 0000000000000..185875f4a2d68
--- /dev/null
+++ b/libs/common/src/main/java/org/opensearch/common/hash/T1ha.java
@@ -0,0 +1,226 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+import java.nio.ByteOrder;
+
+import static java.lang.Long.rotateRight;
+
+/**
+ * t1ha: Fast Positive Hash
+ *
+ * <p>
+ * Implements <a href="https://github.com/erthink/t1ha#t1ha1--64-bits-baseline-fast-portable-hash">t1ha1</a>;
+ * a fast portable hash function with reasonable quality for checksums, hash tables, and thin fingerprinting.
+ *
+ * <p>
+ * To overcome language and performance limitations, this implementation differs slightly from the reference
+ * implementation in C++, so the returned values will vary.
+ *
+ * <p>
+ * Intended for little-endian systems but returns the same result on big-endian, albeit marginally slower.
+ */
+public class T1ha {
+    private static final long SEED = System.nanoTime();
+
+    private static final VarHandle LONG_HANDLE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
+    private static final VarHandle INT_HANDLE = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN);
+    private static final VarHandle SHORT_HANDLE = MethodHandles.byteArrayViewVarHandle(short[].class, ByteOrder.LITTLE_ENDIAN);
+
+    // "Magic" primes:
+    private static final long p0 = 0xEC99BF0D8372CAABL;
+    private static final long p1 = 0x82434FE90EDCEF39L;
+    private static final long p2 = 0xD4F06DB99D67BE4BL;
+    private static final long p3 = 0xBD9CACC22C6E9571L;
+    private static final long p4 = 0x9C06FAF4D023E3ABL;
+    private static final long p5 = 0xC060724A8424F345L;
+    private static final long p6 = 0xCB5AF53AE3AAAC31L;
+
+    // Rotations:
+    private static final int s0 = 41;
+    private static final int s1 = 17;
+    private static final int s2 = 31;
+
+    /**
+     * Returns the hash code for the specified range of the given {@code byte} array.
+     * @param input the input byte array
+     * @param offset the starting offset
+     * @param length the length of the range
+     * @return hash code
+     */
+    public static long hash(byte[] input, int offset, int length) {
+        return hash(input, offset, length, SEED);
+    }
+
+    /**
+     * Returns the hash code for the specified range of the given {@code byte} array.
+     * @param input the input byte array
+     * @param offset the starting offset
+     * @param length the length of the range
+     * @param seed customized seed
+     * @return hash code
+     */
+    public static long hash(byte[] input, int offset, int length, long seed) {
+        long a = seed;
+        long b = length;
+
+        if (length > 32) {
+            long c = rotateRight(length, s1) + seed;
+            long d = length ^ rotateRight(seed, s1);
+
+            do {
+                long w0 = fetch64(input, offset);
+                long w1 = fetch64(input, offset + 8);
+                long w2 = fetch64(input, offset + 16);
+                long w3 = fetch64(input, offset + 24);
+
+                long d02 = w0 ^ rotateRight(w2 + d, s1);
+                long c13 = w1 ^ rotateRight(w3 + c, s1);
+                c += a ^ rotateRight(w0, s0);
+                d -= b ^ rotateRight(w1, s2);
+                a ^= p1 * (d02 + w3);
+                b ^= p0 * (c13 + w2);
+
+                offset += 32;
+                length -= 32;
+            } while (length >= 32);
+
+            a ^= p6 * (rotateRight(c, s1) + d);
+            b ^= p5 * (rotateRight(d, s1) + c);
+        }
+
+        return h32(input, offset, length, a, b);
+    }
+
+    /**
+     * Computes the hash of up to 32 bytes.
+     * Constants in the switch expression are dense; JVM will use them as indices into a table of
+     * instruction pointers (tableswitch instruction), making lookups really fast.
+     */
+    @SuppressWarnings("fallthrough")
+    private static long h32(byte[] input, int offset, int length, long a, long b) {
+        switch (length) {
+            default:
+                b += mux64(fetch64(input, offset), p4);
+                offset += 8;
+                length -= 8;
+            case 24:
+            case 23:
+            case 22:
+            case 21:
+            case 20:
+            case 19:
+            case 18:
+            case 17:
+                a += mux64(fetch64(input, offset), p3);
+                offset += 8;
+                length -= 8;
+            case 16:
+            case 15:
+            case 14:
+            case 13:
+            case 12:
+            case 11:
+            case 10:
+            case 9:
+                b += mux64(fetch64(input, offset), p2);
+                offset += 8;
+                length -= 8;
+            case 8:
+            case 7:
+            case 6:
+            case 5:
+            case 4:
+            case 3:
+            case 2:
+            case 1:
+                a += mux64(tail64(input, offset, length), p1);
+            case 0:
+                // Final weak avalanche
+                return mux64(rotateRight(a + b, s1), p4) + mix64(a ^ b, p0);
+        }
+    }
+
+    /**
+     * XOR the high and low parts of the full 128-bit product.
+     */
+    private static long mux64(long a, long b) {
+        // Ideally, the following should be used to match the reference implementation:
+        // return Math.unsignedMultiplyHigh(a, b) ^ (a * b);
+        // Since unsignedMultiplyHigh isn't available before JDK 18, and calculating it without intrinsics is quite slow,
+        // the multiplyHigh method is used instead. Slight loss in quality is imperceptible for our use-case: a hash table.
+        return Math.multiplyHigh(a, b) ^ (a * b);
+    }
+
+    /**
+     * XOR-MUL-XOR bit-mixer.
+     */
+    private static long mix64(long a, long b) {
+        a *= b;
+        return a ^ rotateRight(a, s0);
+    }
+
+    /**
+     * Reads "length" bytes starting at "offset" in little-endian order; returned as long.
+     * It is assumed that the length is between 1 and 8 (inclusive); but no defensive checks are made as such.
+     */
+    private static long tail64(byte[] input, int offset, int length) {
+        switch (length) {
+            case 1:
+                return fetch8(input, offset);
+            case 2:
+                return fetch16(input, offset);
+            case 3:
+                return fetch16(input, offset) | (fetch8(input, offset + 2) << 16);
+            case 4:
+                return fetch32(input, offset);
+            case 5:
+                return fetch32(input, offset) | (fetch8(input, offset + 4) << 32);
+            case 6:
+                return fetch32(input, offset) | (fetch16(input, offset + 4) << 32);
+            case 7:
+                // This is equivalent to:
+                // return fetch32(input, offset) | (fetch16(input, offset + 4) << 32) | (fetch8(input, offset + 6) << 48);
+                // But reading two ints overlapping by one byte is faster due to lesser instructions.
+                return fetch32(input, offset) | (fetch32(input, offset + 3) << 24);
+            default:
+                return fetch64(input, offset);
+        }
+    }
+
+    /**
+     * Reads a 64-bit long.
+     */
+    private static long fetch64(byte[] input, int offset) {
+        return (long) LONG_HANDLE.get(input, offset);
+    }
+
+    /**
+     * Reads a 32-bit unsigned integer, returned as long.
+     */
+    private static long fetch32(byte[] input, int offset) {
+        return (int) INT_HANDLE.get(input, offset) & 0xFFFFFFFFL;
+    }
+
+    /**
+     * Reads a 16-bit unsigned short, returned as long.
+     */
+    private static long fetch16(byte[] input, int offset) {
+        return (short) SHORT_HANDLE.get(input, offset) & 0xFFFFL;
+    }
+
+    /**
+     * Reads an 8-bit unsigned byte, returned as long.
+     */
+    private static long fetch8(byte[] input, int offset) {
+        return input[offset] & 0xFFL;
+    }
+}
diff --git a/libs/common/src/main/java/org/opensearch/common/hash/package-info.java b/libs/common/src/main/java/org/opensearch/common/hash/package-info.java
new file mode 100644
index 0000000000000..bd393b8b921ed
--- /dev/null
+++ b/libs/common/src/main/java/org/opensearch/common/hash/package-info.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * Common hashing utilities.
+ */
+package org.opensearch.common.hash;
diff --git a/libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java b/libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java
new file mode 100644
index 0000000000000..6f7f813bffa24
--- /dev/null
+++ b/libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java
@@ -0,0 +1,137 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+import org.opensearch.common.Randomness;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.Locale;
+import java.util.Random;
+
+public abstract class HashFunctionTestCase extends OpenSearchTestCase {
+    private static final int[] INPUT_BITS = new int[] { 24, 32, 40, 48, 56, 64, 72, 80, 96, 112, 128, 160, 512, 1024 };
+    private static final int OUTPUT_BITS = 64;
+    private static final int ITERATIONS = 1000;
+    private static final double BIAS_THRESHOLD = 0.01; // 1%
+
+    public abstract long hash(byte[] input);
+
+    /**
+     * Tests if the hash function shows an avalanche effect, i.e, flipping a single input bit
+     * should flip half the output bits.
+     */
+    public final void testAvalanche() {
+        for (int inputBits : INPUT_BITS) {
+            AvalancheStats stats = simulate(inputBits, OUTPUT_BITS, new RandomInputGenerator(inputBits));
+            if (stats.bias() >= BIAS_THRESHOLD) {
+                fail("bias exceeds threshold: " + stats);
+            }
+        }
+    }
+
+    private AvalancheStats simulate(int inputBits, int outputBits, InputGenerator inputGenerator) {
+        int[][] flips = new int[inputBits][outputBits];
+
+        for (int iter = 0; iter < ITERATIONS; iter++) {
+            byte[] input = inputGenerator.next();
+            long hash = hash(input);
+
+            for (int i = 0; i < inputBits; i++) {
+                flip(input, i); // flip one bit
+                long newHash = hash(input); // recompute the hash; half the bits should have flipped
+                flip(input, i); // return to original
+
+                long diff = hash ^ newHash;
+                for (int o = 0; o < OUTPUT_BITS; o++) {
+                    if ((diff & 1) == 1) {
+                        flips[i][o] += 1;
+                    }
+                    diff >>>= 1;
+                }
+            }
+        }
+
+        return new AvalancheStats(flips);
+    }
+
+    private static void flip(byte[] input, int position) {
+        int offset = position / 8;
+        int bit = position & 7;
+        input[offset] ^= (1 << bit);
+    }
+
+    @FunctionalInterface
+    interface InputGenerator {
+        byte[] next();
+    }
+
+    private static class RandomInputGenerator implements InputGenerator {
+        private final Random random = Randomness.get();
+        private final byte[] input;
+
+        public RandomInputGenerator(int size) {
+            input = new byte[size];
+        }
+
+        @Override
+        public byte[] next() {
+            random.nextBytes(input);
+            return input;
+        }
+    }
+
+    private static class AvalancheStats {
+        private final int inputBits;
+        private final int outputBits;
+        private final double bias;
+        private final double sumOfSquaredErrors;
+
+        public AvalancheStats(int[][] flips) {
+            this.inputBits = flips.length;
+            this.outputBits = flips[0].length;
+            double sumOfBiases = 0;
+            double sumOfSquaredErrors = 0;
+
+            for (int i = 0; i < inputBits; i++) {
+                for (int o = 0; o < outputBits; o++) {
+                    sumOfSquaredErrors += Math.pow(0.5 - ((double) flips[i][o] / ITERATIONS), 2);
+                    sumOfBiases += 2 * ((double) flips[i][o] / ITERATIONS) - 1;
+                }
+            }
+
+            this.bias = Math.abs(sumOfBiases / (inputBits * outputBits));
+            this.sumOfSquaredErrors = sumOfSquaredErrors;
+        }
+
+        public double bias() {
+            return bias;
+        }
+
+        public double diffusion() {
+            return 1 - bias;
+        }
+
+        public double sumOfSquaredErrors() {
+            return sumOfSquaredErrors;
+        }
+
+        @Override
+        public String toString() {
+            return String.format(
+                Locale.ROOT,
+                "AvalancheStats{inputBits=%d, outputBits=%d, bias=%.4f%%, diffusion=%.4f%%, sumOfSquaredErrors=%.2f}",
+                inputBits,
+                outputBits,
+                bias() * 100,
+                diffusion() * 100,
+                sumOfSquaredErrors()
+            );
+        }
+    }
+}
diff --git a/libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java b/libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java
new file mode 100644
index 0000000000000..d6132e235d89b
--- /dev/null
+++ b/libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java
@@ -0,0 +1,229 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+public class T1haTests extends HashFunctionTestCase {
+
+    /**
+     * Inspired from the tests defined in the reference implementation:
+     * <a href="https://github.com/erthink/t1ha/blob/master/src/t1ha_selfcheck.c">t1ha_selfcheck.c</a>
+     */
+    public void testSelfCheck() {
+        byte[] testPattern = {
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            (byte) 0xFF,
+            0x7F,
+            0x3F,
+            0x1F,
+            0xF,
+            8,
+            16,
+            32,
+            64,
+            (byte) 0x80,
+            (byte) 0xFE,
+            (byte) 0xFC,
+            (byte) 0xF8,
+            (byte) 0xF0,
+            (byte) 0xE0,
+            (byte) 0xC0,
+            (byte) 0xFD,
+            (byte) 0xFB,
+            (byte) 0xF7,
+            (byte) 0xEF,
+            (byte) 0xDF,
+            (byte) 0xBF,
+            0x55,
+            (byte) 0xAA,
+            11,
+            17,
+            19,
+            23,
+            29,
+            37,
+            42,
+            43,
+            'a',
+            'b',
+            'c',
+            'd',
+            'e',
+            'f',
+            'g',
+            'h',
+            'i',
+            'j',
+            'k',
+            'l',
+            'm',
+            'n',
+            'o',
+            'p',
+            'q',
+            'r',
+            's',
+            't',
+            'u',
+            'v',
+            'w',
+            'x' };
+
+        /* Reference hashes when using {@link Math::unsignedMultiplyHigh} in the mux64 step.
+        These values match the ones defined in the reference implementation:
+        https://github.com/erthink/t1ha/blob/master/src/t1ha1_selfcheck.c#L51-L72
+
+        We don't expect our implementation to return these values as we use {@link Math::multiplyHigh} instead.
+        Keeping it here for context.
+
+        long[] reference = {
+                0L,
+                0x6A580668D6048674L, 0xA2FE904AFF0D0879L, 0xE3AB9C06FAF4D023L, 0x6AF1C60874C95442L,
+                0xB3557E561A6C5D82L, 0x0AE73C696F3D37C0L, 0x5EF25F7062324941L, 0x9B784F3B4CE6AF33L,
+                0x6993BB206A74F070L, 0xF1E95DF109076C4CL, 0x4E1EB70C58E48540L, 0x5FDD7649D8EC44E4L,
+                0x559122C706343421L, 0x380133D58665E93DL, 0x9CE74296C8C55AE4L, 0x3556F9A5757AB6D0L,
+                0xF62751F7F25C469EL, 0x851EEC67F6516D94L, 0xED463EE3848A8695L, 0xDC8791FEFF8ED3ACL,
+                0x2569C744E1A282CFL, 0xF90EB7C1D70A80B9L, 0x68DFA6A1B8050A4CL, 0x94CCA5E8210D2134L,
+                0xF5CC0BEABC259F52L, 0x40DBC1F51618FDA7L, 0x0807945BF0FB52C6L, 0xE5EF7E09DE70848DL,
+                0x63E1DF35FEBE994AL, 0x2025E73769720D5AL, 0xAD6120B2B8A152E1L, 0x2A71D9F13959F2B7L,
+                0x8A20849A27C32548L, 0x0BCBC9FE3B57884EL, 0x0E028D255667AEADL, 0xBE66DAD3043AB694L,
+                0xB00E4C1238F9E2D4L, 0x5C54BDE5AE280E82L, 0x0E22B86754BC3BC4L, 0x016707EBF858B84DL,
+                0x990015FBC9E095EEL, 0x8B9AF0A3E71F042FL, 0x6AA56E88BD380564L, 0xAACE57113E681A0FL,
+                0x19F81514AFA9A22DL, 0x80DABA3D62BEAC79L, 0x715210412CABBF46L, 0xD8FA0B9E9D6AA93FL,
+                0x6C2FC5A4109FD3A2L, 0x5B3E60EEB51DDCD8L, 0x0A7C717017756FE7L, 0xA73773805CA31934L,
+                0x4DBD6BB7A31E85FDL, 0x24F619D3D5BC2DB4L, 0x3E4AF35A1678D636L, 0x84A1A8DF8D609239L,
+                0x359C862CD3BE4FCDL, 0xCF3A39F5C27DC125L, 0xC0FF62F8FD5F4C77L, 0x5E9F2493DDAA166CL,
+                0x17424152BE1CA266L, 0xA78AFA5AB4BBE0CDL, 0x7BFB2E2CEF118346L, 0x647C3E0FF3E3D241L,
+                0x0352E4055C13242EL, 0x6F42FC70EB660E38L, 0x0BEBAD4FABF523BAL, 0x9269F4214414D61DL,
+                0x1CA8760277E6006CL, 0x7BAD25A859D87B5DL, 0xAD645ADCF7414F1DL, 0xB07F517E88D7AFB3L,
+                0xB321C06FB5FFAB5CL, 0xD50F162A1EFDD844L, 0x1DFD3D1924FBE319L, 0xDFAEAB2F09EF7E78L,
+                0xA7603B5AF07A0B1EL, 0x41CD044C0E5A4EE3L, 0xF64D2F86E813BF33L, 0xFF9FDB99305EB06AL
+        }; */
+
+        // Reference hashes when using {@link Math::multiplyHigh} in the mux64 step.
+        long[] reference = {
+            0L,
+            0xCE510B7405E0A2CAL,
+            0xC0A2DA74A8271FCBL,
+            0x1C549C06FAF4D023L,
+            0x084CDA0ED41CD2D4L,
+            0xD05BA7AA9FEECE5BL,
+            0x7D6128AB2CCC4EB1L,
+            0x62332FA6EC1B50AAL,
+            0x1B66C81767870EF2L,
+            0xEC6B92A37AED73B8L,
+            0x1712987232EF4ED3L,
+            0xAA503A04AE2450B5L,
+            0x15D25DE445730A6CL,
+            0xAB87E38AA8D21746L,
+            0x18CAE735BBF62D15L,
+            0x0D56DFF9914CA656L,
+            0xCB4F5859A9AE5B52L,
+            0xEE97003F7B1283E1L,
+            0x50CFB2AF0F54BA6DL,
+            0x570B4D6AE4C67814L,
+            0x1ED59274A97497EBL,
+            0x8608D03D165C59BFL,
+            0x6CBE0E537BE04C02L,
+            0xD4C8FCFD4179A874L,
+            0xFB4E677D876118A1L,
+            0x6B1A96F1B4765D79L,
+            0x1075B9B89BDFE5F8L,
+            0x02771D08F2891CB1L,
+            0x4BB8E16FF410F19EL,
+            0x3EB7849C0DFAF566L,
+            0x173B09359DE422CFL,
+            0xFE212C6DB7474306L,
+            0xA74E7C2D632664EFL,
+            0x56ECDED6546F0914L,
+            0x08DEF866EF20A94BL,
+            0x7D0BAC64606521F1L,
+            0xCA6BA9817A357FA9L,
+            0x0873B834A6E2AAE4L,
+            0x45EE02D6DCF8992EL,
+            0x3EA060225B3E1C1FL,
+            0x24DBB6D02D5CC531L,
+            0xE5E91A7340BF9382L,
+            0x28975F86E2E2177FL,
+            0x80E48374A6B42E85L,
+            0xDF40392265BB4A66L,
+            0x43750475A48C7023L,
+            0x5648BD3E391C01D3L,
+            0x9BE9E11AD1A6C369L,
+            0x2E079CB8C1A11F50L,
+            0xB2D538403F1020F1L,
+            0x297518A4EF6AF5F1L,
+            0xA8CE1B90167A6F8BL,
+            0xB926B2FA50541BA9L,
+            0xC46A2D3BD6925A35L,
+            0x3071BC8E6C400487L,
+            0x300D3885894BA47FL,
+            0x840BFF3BEB7EEADDL,
+            0xDC9E04DF744BDC0CL,
+            0xBE01CF6841412C77L,
+            0x6C55B2DC74B816A1L,
+            0x4D4C63128A344F82L,
+            0xC6227497E100B463L,
+            0x53C9987705EA71C0L,
+            0x3E355394668C3559L,
+            0x05984B7D358B107AL,
+            0x4D32FA1D79002A57L,
+            0x910B0DAD1440EC24L,
+            0x025BDE6A7BEBF320L,
+            0x0D33817EF345D999L,
+            0xBA0DE64B3F4DB34AL,
+            0x54666461D0EB4FD7L,
+            0x746ECFA92D1CAF81L,
+            0x6E6A774ACD266DF2L,
+            0x1A86161AE8E82A85L,
+            0xFFF7C351A4CEC13DL,
+            0xFFF05844F57498B8L,
+            0x8DB71789127C6C13L,
+            0x4A52ACF805F370ABL,
+            0xFE13F90A1ACFBD58L,
+            0x615730E301ED12E2L,
+            0x1A2D4AA43B6C0103L };
+
+        int offset = 0;
+        assertEquals(reference[offset++], T1ha.hash(null, 0, 0, 0L)); // empty-zero
+        assertEquals(reference[offset++], T1ha.hash(null, 0, 0, ~0L)); // empty-all1
+        assertEquals(reference[offset++], T1ha.hash(testPattern, 0, 64, 0L)); // bin64-zero
+
+        long seed = 1;
+        for (int i = 1; i < 64; i++) {
+            assertEquals(reference[offset++], T1ha.hash(testPattern, 0, i, seed)); // bin%i-1p%i
+            seed <<= 1;
+        }
+
+        seed = ~0L;
+        for (int i = 1; i <= 7; i++) {
+            seed <<= 1;
+            assertEquals(reference[offset++], T1ha.hash(testPattern, i, 64 - i, seed)); // align%i_F%i
+        }
+
+        byte[] testPatternLong = new byte[512];
+        for (int i = 0; i < testPatternLong.length; i++) {
+            testPatternLong[i] = (byte) i;
+        }
+        for (int i = 0; i <= 7; i++) {
+            assertEquals(reference[offset++], T1ha.hash(testPatternLong, i, 128 + i * 17, seed)); // long-%05i
+        }
+    }
+
+    @Override
+    public long hash(byte[] input) {
+        return T1ha.hash(input, 0, input.length);
+    }
+}
diff --git a/server/build.gradle b/server/build.gradle
index 3b1fe9554a309..f6db3d53a0dcc 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -158,9 +158,6 @@ dependencies {
   api "com.google.protobuf:protobuf-java:${versions.protobuf}"
   api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}"
 
-  // hashing
-  api "net.openhft:zero-allocation-hashing:${versions.zero_allocation_hashing}"
-
   testImplementation(project(":test:framework")) {
     // tests use the locally compiled version of server
     exclude group: 'org.opensearch', module: 'server'
@@ -367,18 +364,7 @@ tasks.named("thirdPartyAudit").configure {
             'com.google.protobuf.UnsafeUtil$Android32MemoryAccessor',
             'com.google.protobuf.UnsafeUtil$Android64MemoryAccessor',
             'com.google.protobuf.UnsafeUtil$JvmMemoryAccessor',
-            'com.google.protobuf.UnsafeUtil$MemoryAccessor',
-
-            // from zero-allocation-hashing
-            'net.openhft.hashing.HotSpotPrior7u6StringHash',
-            'net.openhft.hashing.LongHashFunction',
-            'net.openhft.hashing.LongTupleHashFunction',
-            'net.openhft.hashing.ModernCompactStringHash',
-            'net.openhft.hashing.ModernHotSpotStringHash',
-            'net.openhft.hashing.UnsafeAccess',
-            'net.openhft.hashing.UnsafeAccess$OldUnsafeAccessBigEndian',
-            'net.openhft.hashing.UnsafeAccess$OldUnsafeAccessLittleEndian',
-            'net.openhft.hashing.Util'
+            'com.google.protobuf.UnsafeUtil$MemoryAccessor'
     )
 }
 
diff --git a/server/licenses/zero-allocation-hashing-0.16.jar.sha1 b/server/licenses/zero-allocation-hashing-0.16.jar.sha1
deleted file mode 100644
index e82e885f269ce..0000000000000
--- a/server/licenses/zero-allocation-hashing-0.16.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-0ca252f328160ed5d027f100a4fe525d6d21daaf
\ No newline at end of file
diff --git a/server/licenses/zero-allocation-hashing-LICENSE.txt b/server/licenses/zero-allocation-hashing-LICENSE.txt
deleted file mode 100644
index 261eeb9e9f8b2..0000000000000
--- a/server/licenses/zero-allocation-hashing-LICENSE.txt
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/server/licenses/zero-allocation-hashing-NOTICE.txt b/server/licenses/zero-allocation-hashing-NOTICE.txt
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
index c2f394c627806..5107feb6b3a26 100644
--- a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
+++ b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
@@ -6,18 +6,39 @@
  * compatible open source license.
  */
 
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
 package org.opensearch.common.util;
 
-import net.openhft.hashing.LongHashFunction;
 import org.apache.lucene.util.BytesRef;
 import org.opensearch.common.Numbers;
+import org.opensearch.common.hash.T1ha;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.core.common.util.ByteArray;
 
-import java.security.AccessController;
-import java.security.PrivilegedAction;
-
 /**
  * Specialized hash table implementation that maps a {@link BytesRef} key to a long ordinal.
  *
@@ -31,15 +52,11 @@
  *
  * @opensearch.internal
  */
-public class BytesRefHash implements Releasable {
-    private static final LongHashFunction XX3 = AccessController.doPrivileged(
-        (PrivilegedAction<LongHashFunction>) () -> LongHashFunction.xx3(System.nanoTime())
-    );
-
+public final class BytesRefHash implements Releasable {
     private static final long MAX_CAPACITY = 1L << 32;
     private static final long DEFAULT_INITIAL_CAPACITY = 32;
     private static final float DEFAULT_LOAD_FACTOR = 0.6f;
-    private static final Hasher DEFAULT_HASHER = key -> XX3.hashBytes(key.bytes, key.offset, key.length);
+    private static final Hasher DEFAULT_HASHER = key -> T1ha.hash(key.bytes, key.offset, key.length);
 
     private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
     private static final long MASK_FINGERPRINT = 0xFFFFFFFF00000000L;  // extract fingerprint
diff --git a/server/src/main/resources/org/opensearch/bootstrap/security.policy b/server/src/main/resources/org/opensearch/bootstrap/security.policy
index 2fde31cb1d648..77cd0ab05278e 100644
--- a/server/src/main/resources/org/opensearch/bootstrap/security.policy
+++ b/server/src/main/resources/org/opensearch/bootstrap/security.policy
@@ -48,10 +48,6 @@ grant codeBase "${codebase.opensearch}" {
   permission java.lang.RuntimePermission "setContextClassLoader";
   // needed for SPI class loading
   permission java.lang.RuntimePermission "accessDeclaredMembers";
-
-  // needed for zero-allocation-hashing
-  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
-  permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
 };
 
 //// Very special jar permissions:
@@ -89,12 +85,6 @@ grant codeBase "${codebase.zstd-jni}" {
   permission java.lang.RuntimePermission "loadLibrary.*";
 };
 
-grant codeBase "${codebase.zero-allocation-hashing}" {
-  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
-  permission java.lang.RuntimePermission "accessDeclaredMembers";
-  permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
-};
-
 //// Everything else:
 
 grant {
diff --git a/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy b/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy
index 7d35d439bd373..0abfd7ef22ae7 100644
--- a/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy
+++ b/server/src/main/resources/org/opensearch/bootstrap/test-framework.policy
@@ -156,6 +156,5 @@ grant {
   permission java.lang.RuntimePermission "accessDeclaredMembers";
   permission java.lang.RuntimePermission "reflectionFactoryAccess";
   permission java.lang.RuntimePermission "accessClassInPackage.sun.reflect";
-  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
   permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
 };
diff --git a/server/src/test/java/org/opensearch/common/NumbersTests.java b/server/src/test/java/org/opensearch/common/NumbersTests.java
index ff12b3bc4cc96..d6745f53f3569 100644
--- a/server/src/test/java/org/opensearch/common/NumbersTests.java
+++ b/server/src/test/java/org/opensearch/common/NumbersTests.java
@@ -238,6 +238,7 @@ public void testNextPowerOfTwo() {
             long nextPowerOfTwo = Numbers.nextPowerOfTwo(value);
 
             assertTrue(nextPowerOfTwo > value); // must be strictly greater
+            assertTrue((nextPowerOfTwo >>> 1) <= value); // must be greater by no more than one power of two
             assertEquals(0, nextPowerOfTwo & (nextPowerOfTwo - 1)); // must be a power of two
         }
     }
diff --git a/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
index d40012accbb7a..b182ba11198be 100644
--- a/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
+++ b/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
@@ -33,9 +33,9 @@
 package org.opensearch.common.util;
 
 import org.apache.lucene.tests.util.TestUtil;
-import net.openhft.hashing.LongHashFunction;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
+import org.opensearch.common.hash.T1ha;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.core.indices.breaker.NoneCircuitBreakerService;
 import org.opensearch.test.OpenSearchTestCase;
@@ -59,11 +59,11 @@ private void newHash() {
         if (hash != null) {
             hash.close();
         }
-        LongHashFunction hasher = LongHashFunction.xx3(randomLong());
+        long seed = randomLong();
         hash = new BytesRefHash(
             randomIntBetween(1, 100),      // random capacity
             0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
-            key -> hasher.hashBytes(key.bytes, key.offset, key.length),
+            key -> T1ha.hash(key.bytes, key.offset, key.length, seed),
             randomBigArrays()
         );
     }

From 1c76d9f420c665df08d024cad1bc5706ac4f9606 Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Wed, 16 Aug 2023 11:59:20 +0530
Subject: [PATCH 4/9] Update t1ha1 to use unsignedMultiplyHigh on JDK 18 and
 above

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../common/hash/HashFunctionBenchmark.java    |   4 +-
 .../common/util/BytesRefHashBenchmark.java    |   8 +-
 .../common/hash/{T1ha.java => T1ha1.java}     |  54 ++++++-
 .../hash/{T1haTests.java => T1Ha1Tests.java}  | 150 +++++++++++++-----
 .../opensearch/common/util/BytesRefHash.java  |   4 +-
 .../org/opensearch/common/NumbersTests.java   |   4 +-
 .../common/util/BytesRefHashTests.java        |   4 +-
 7 files changed, 170 insertions(+), 58 deletions(-)
 rename libs/common/src/main/java/org/opensearch/common/hash/{T1ha.java => T1ha1.java} (80%)
 rename libs/common/src/test/java/org/opensearch/common/hash/{T1haTests.java => T1Ha1Tests.java} (53%)

diff --git a/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java
index bae6a813fc5a1..8842337a468a1 100644
--- a/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java
+++ b/benchmarks/src/main/java/org/opensearch/common/hash/HashFunctionBenchmark.java
@@ -36,7 +36,7 @@ public void hash(Blackhole bh, Options opts) {
 
     @State(Scope.Benchmark)
     public static class Options {
-        @Param({ "MURMUR3", "T1HA" })
+        @Param({ "MURMUR3", "T1HA1" })
         public Type type;
 
         @Param({
@@ -152,7 +152,7 @@ public void setup() {
 
     public enum Type {
         MURMUR3((data, offset, length) -> StringHelper.murmurhash3_x86_32(data, offset, length, 0)),
-        T1HA((data, offset, length) -> T1ha.hash(data, offset, length, 0));
+        T1HA1((data, offset, length) -> T1ha1.hash(data, offset, length, 0));
 
         private final Hasher hasher;
 
diff --git a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
index 379653a53bfd2..3071d1f4c7d7e 100644
--- a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
+++ b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
@@ -22,7 +22,7 @@
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.infra.Blackhole;
-import org.opensearch.common.hash.T1ha;
+import org.opensearch.common.hash.T1ha1;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 
@@ -58,7 +58,7 @@ public void add(Blackhole bh, Options opts) {
 
     @State(Scope.Benchmark)
     public static class Options {
-        @Param({ "MURMUR3", "T1HA" })
+        @Param({ "MURMUR3", "T1HA1" })
         public Type type;
 
         @Param({
@@ -213,11 +213,11 @@ public void close() {
             }
         }),
 
-        T1HA(() -> new HashTable() {
+        T1HA1(() -> new HashTable() {
             private final BytesRefHash table = new BytesRefHash(
                 1,
                 0.6f,
-                key -> T1ha.hash(key.bytes, key.offset, key.length, 0),
+                key -> T1ha1.hash(key.bytes, key.offset, key.length, 0),
                 BigArrays.NON_RECYCLING_INSTANCE
             );
 
diff --git a/libs/common/src/main/java/org/opensearch/common/hash/T1ha.java b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
similarity index 80%
rename from libs/common/src/main/java/org/opensearch/common/hash/T1ha.java
rename to libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
index 185875f4a2d68..3a640257af32b 100644
--- a/libs/common/src/main/java/org/opensearch/common/hash/T1ha.java
+++ b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
@@ -8,7 +8,9 @@
 
 package org.opensearch.common.hash;
 
+import java.lang.invoke.MethodHandle;
 import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
 import java.lang.invoke.VarHandle;
 import java.nio.ByteOrder;
 
@@ -23,13 +25,14 @@
  *
  * <p>
  * To overcome language and performance limitations, this implementation differs slightly from the reference
- * implementation in C++, so the returned values will vary.
+ * implementation in C++, so the returned values may vary before JDK 18.
  *
  * <p>
  * Intended for little-endian systems but returns the same result on big-endian, albeit marginally slower.
  */
-public class T1ha {
+public class T1ha1 {
     private static final long SEED = System.nanoTime();
+    private static final Mux64 MUX_64_IMPL = fastestMux64Impl();
 
     private static final VarHandle LONG_HANDLE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
     private static final VarHandle INT_HANDLE = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN);
@@ -153,11 +156,7 @@ private static long h32(byte[] input, int offset, int length, long a, long b) {
      * XOR the high and low parts of the full 128-bit product.
      */
     private static long mux64(long a, long b) {
-        // Ideally, the following should be used to match the reference implementation:
-        // return Math.unsignedMultiplyHigh(a, b) ^ (a * b);
-        // Since unsignedMultiplyHigh isn't available before JDK 18, and calculating it without intrinsics is quite slow,
-        // the multiplyHigh method is used instead. Slight loss in quality is imperceptible for our use-case: a hash table.
-        return Math.multiplyHigh(a, b) ^ (a * b);
+        return MUX_64_IMPL.mux64(a, b);
     }
 
     /**
@@ -223,4 +222,45 @@ private static long fetch16(byte[] input, int offset) {
     private static long fetch8(byte[] input, int offset) {
         return input[offset] & 0xFFL;
     }
+
+    /**
+     * The implementation of mux64.
+     */
+    @FunctionalInterface
+    private interface Mux64 {
+        long mux64(long a, long b);
+    }
+
+    /**
+     * Provides the fastest available implementation of mux64 on this platform.
+     *
+     * <p>
+     * Ideally, the following should be returned to match the reference implementation:
+     * {@code Math.unsignedMultiplyHigh(a, b) ^ (a * b)}
+     *
+     * <p>
+     * Since unsignedMultiplyHigh isn't available before JDK 18, and calculating it without intrinsics is quite slow,
+     * the multiplyHigh method is used instead. Slight loss in quality is imperceptible for our use-case: a hash table.
+     * {@code Math.multiplyHigh(a, b) ^ (a * b)}
+     *
+     * <p>
+     * This indirection can be removed once we stop supporting older JDKs.
+     */
+    private static Mux64 fastestMux64Impl() {
+        try {
+            final MethodHandle unsignedMultiplyHigh = MethodHandles.publicLookup()
+                .findStatic(Math.class, "unsignedMultiplyHigh", MethodType.methodType(long.class, long.class, long.class));
+            return (a, b) -> {
+                try {
+                    return (long) unsignedMultiplyHigh.invokeExact(a, b) ^ (a * b);
+                } catch (Throwable e) {
+                    throw new RuntimeException(e);
+                }
+            };
+        } catch (NoSuchMethodException e) {
+            return (a, b) -> Math.multiplyHigh(a, b) ^ (a * b);
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
 }
diff --git a/libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java b/libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java
similarity index 53%
rename from libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java
rename to libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java
index d6132e235d89b..bd72a5dcac703 100644
--- a/libs/common/src/test/java/org/opensearch/common/hash/T1haTests.java
+++ b/libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java
@@ -8,7 +8,10 @@
 
 package org.opensearch.common.hash;
 
-public class T1haTests extends HashFunctionTestCase {
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+
+public class T1Ha1Tests extends HashFunctionTestCase {
 
     /**
      * Inspired from the tests defined in the reference implementation:
@@ -81,39 +84,94 @@ public void testSelfCheck() {
             'w',
             'x' };
 
-        /* Reference hashes when using {@link Math::unsignedMultiplyHigh} in the mux64 step.
-        These values match the ones defined in the reference implementation:
-        https://github.com/erthink/t1ha/blob/master/src/t1ha1_selfcheck.c#L51-L72
-
-        We don't expect our implementation to return these values as we use {@link Math::multiplyHigh} instead.
-        Keeping it here for context.
-
-        long[] reference = {
-                0L,
-                0x6A580668D6048674L, 0xA2FE904AFF0D0879L, 0xE3AB9C06FAF4D023L, 0x6AF1C60874C95442L,
-                0xB3557E561A6C5D82L, 0x0AE73C696F3D37C0L, 0x5EF25F7062324941L, 0x9B784F3B4CE6AF33L,
-                0x6993BB206A74F070L, 0xF1E95DF109076C4CL, 0x4E1EB70C58E48540L, 0x5FDD7649D8EC44E4L,
-                0x559122C706343421L, 0x380133D58665E93DL, 0x9CE74296C8C55AE4L, 0x3556F9A5757AB6D0L,
-                0xF62751F7F25C469EL, 0x851EEC67F6516D94L, 0xED463EE3848A8695L, 0xDC8791FEFF8ED3ACL,
-                0x2569C744E1A282CFL, 0xF90EB7C1D70A80B9L, 0x68DFA6A1B8050A4CL, 0x94CCA5E8210D2134L,
-                0xF5CC0BEABC259F52L, 0x40DBC1F51618FDA7L, 0x0807945BF0FB52C6L, 0xE5EF7E09DE70848DL,
-                0x63E1DF35FEBE994AL, 0x2025E73769720D5AL, 0xAD6120B2B8A152E1L, 0x2A71D9F13959F2B7L,
-                0x8A20849A27C32548L, 0x0BCBC9FE3B57884EL, 0x0E028D255667AEADL, 0xBE66DAD3043AB694L,
-                0xB00E4C1238F9E2D4L, 0x5C54BDE5AE280E82L, 0x0E22B86754BC3BC4L, 0x016707EBF858B84DL,
-                0x990015FBC9E095EEL, 0x8B9AF0A3E71F042FL, 0x6AA56E88BD380564L, 0xAACE57113E681A0FL,
-                0x19F81514AFA9A22DL, 0x80DABA3D62BEAC79L, 0x715210412CABBF46L, 0xD8FA0B9E9D6AA93FL,
-                0x6C2FC5A4109FD3A2L, 0x5B3E60EEB51DDCD8L, 0x0A7C717017756FE7L, 0xA73773805CA31934L,
-                0x4DBD6BB7A31E85FDL, 0x24F619D3D5BC2DB4L, 0x3E4AF35A1678D636L, 0x84A1A8DF8D609239L,
-                0x359C862CD3BE4FCDL, 0xCF3A39F5C27DC125L, 0xC0FF62F8FD5F4C77L, 0x5E9F2493DDAA166CL,
-                0x17424152BE1CA266L, 0xA78AFA5AB4BBE0CDL, 0x7BFB2E2CEF118346L, 0x647C3E0FF3E3D241L,
-                0x0352E4055C13242EL, 0x6F42FC70EB660E38L, 0x0BEBAD4FABF523BAL, 0x9269F4214414D61DL,
-                0x1CA8760277E6006CL, 0x7BAD25A859D87B5DL, 0xAD645ADCF7414F1DL, 0xB07F517E88D7AFB3L,
-                0xB321C06FB5FFAB5CL, 0xD50F162A1EFDD844L, 0x1DFD3D1924FBE319L, 0xDFAEAB2F09EF7E78L,
-                0xA7603B5AF07A0B1EL, 0x41CD044C0E5A4EE3L, 0xF64D2F86E813BF33L, 0xFF9FDB99305EB06AL
-        }; */
+        // Reference hashes when using {@link Math::unsignedMultiplyHigh} in the mux64 step.
+        // These values match the ones defined in the reference implementation:
+        // https://github.com/erthink/t1ha/blob/master/src/t1ha1_selfcheck.c#L51-L72
+        long[] referenceUnsignedMultiplyHigh = {
+            0L,
+            0x6A580668D6048674L,
+            0xA2FE904AFF0D0879L,
+            0xE3AB9C06FAF4D023L,
+            0x6AF1C60874C95442L,
+            0xB3557E561A6C5D82L,
+            0x0AE73C696F3D37C0L,
+            0x5EF25F7062324941L,
+            0x9B784F3B4CE6AF33L,
+            0x6993BB206A74F070L,
+            0xF1E95DF109076C4CL,
+            0x4E1EB70C58E48540L,
+            0x5FDD7649D8EC44E4L,
+            0x559122C706343421L,
+            0x380133D58665E93DL,
+            0x9CE74296C8C55AE4L,
+            0x3556F9A5757AB6D0L,
+            0xF62751F7F25C469EL,
+            0x851EEC67F6516D94L,
+            0xED463EE3848A8695L,
+            0xDC8791FEFF8ED3ACL,
+            0x2569C744E1A282CFL,
+            0xF90EB7C1D70A80B9L,
+            0x68DFA6A1B8050A4CL,
+            0x94CCA5E8210D2134L,
+            0xF5CC0BEABC259F52L,
+            0x40DBC1F51618FDA7L,
+            0x0807945BF0FB52C6L,
+            0xE5EF7E09DE70848DL,
+            0x63E1DF35FEBE994AL,
+            0x2025E73769720D5AL,
+            0xAD6120B2B8A152E1L,
+            0x2A71D9F13959F2B7L,
+            0x8A20849A27C32548L,
+            0x0BCBC9FE3B57884EL,
+            0x0E028D255667AEADL,
+            0xBE66DAD3043AB694L,
+            0xB00E4C1238F9E2D4L,
+            0x5C54BDE5AE280E82L,
+            0x0E22B86754BC3BC4L,
+            0x016707EBF858B84DL,
+            0x990015FBC9E095EEL,
+            0x8B9AF0A3E71F042FL,
+            0x6AA56E88BD380564L,
+            0xAACE57113E681A0FL,
+            0x19F81514AFA9A22DL,
+            0x80DABA3D62BEAC79L,
+            0x715210412CABBF46L,
+            0xD8FA0B9E9D6AA93FL,
+            0x6C2FC5A4109FD3A2L,
+            0x5B3E60EEB51DDCD8L,
+            0x0A7C717017756FE7L,
+            0xA73773805CA31934L,
+            0x4DBD6BB7A31E85FDL,
+            0x24F619D3D5BC2DB4L,
+            0x3E4AF35A1678D636L,
+            0x84A1A8DF8D609239L,
+            0x359C862CD3BE4FCDL,
+            0xCF3A39F5C27DC125L,
+            0xC0FF62F8FD5F4C77L,
+            0x5E9F2493DDAA166CL,
+            0x17424152BE1CA266L,
+            0xA78AFA5AB4BBE0CDL,
+            0x7BFB2E2CEF118346L,
+            0x647C3E0FF3E3D241L,
+            0x0352E4055C13242EL,
+            0x6F42FC70EB660E38L,
+            0x0BEBAD4FABF523BAL,
+            0x9269F4214414D61DL,
+            0x1CA8760277E6006CL,
+            0x7BAD25A859D87B5DL,
+            0xAD645ADCF7414F1DL,
+            0xB07F517E88D7AFB3L,
+            0xB321C06FB5FFAB5CL,
+            0xD50F162A1EFDD844L,
+            0x1DFD3D1924FBE319L,
+            0xDFAEAB2F09EF7E78L,
+            0xA7603B5AF07A0B1EL,
+            0x41CD044C0E5A4EE3L,
+            0xF64D2F86E813BF33L,
+            0xFF9FDB99305EB06AL };
 
         // Reference hashes when using {@link Math::multiplyHigh} in the mux64 step.
-        long[] reference = {
+        long[] referenceMultiplyHigh = {
             0L,
             0xCE510B7405E0A2CAL,
             0xC0A2DA74A8271FCBL,
@@ -196,21 +254,23 @@ public void testSelfCheck() {
             0x615730E301ED12E2L,
             0x1A2D4AA43B6C0103L };
 
+        long[] reference = hasUnsignedMultiplyHigh() ? referenceUnsignedMultiplyHigh : referenceMultiplyHigh;
+
         int offset = 0;
-        assertEquals(reference[offset++], T1ha.hash(null, 0, 0, 0L)); // empty-zero
-        assertEquals(reference[offset++], T1ha.hash(null, 0, 0, ~0L)); // empty-all1
-        assertEquals(reference[offset++], T1ha.hash(testPattern, 0, 64, 0L)); // bin64-zero
+        assertEquals(reference[offset++], T1ha1.hash(null, 0, 0, 0L)); // empty-zero
+        assertEquals(reference[offset++], T1ha1.hash(null, 0, 0, ~0L)); // empty-all1
+        assertEquals(reference[offset++], T1ha1.hash(testPattern, 0, 64, 0L)); // bin64-zero
 
         long seed = 1;
         for (int i = 1; i < 64; i++) {
-            assertEquals(reference[offset++], T1ha.hash(testPattern, 0, i, seed)); // bin%i-1p%i
+            assertEquals(reference[offset++], T1ha1.hash(testPattern, 0, i, seed)); // bin%i-1p%i
             seed <<= 1;
         }
 
         seed = ~0L;
         for (int i = 1; i <= 7; i++) {
             seed <<= 1;
-            assertEquals(reference[offset++], T1ha.hash(testPattern, i, 64 - i, seed)); // align%i_F%i
+            assertEquals(reference[offset++], T1ha1.hash(testPattern, i, 64 - i, seed)); // align%i_F%i
         }
 
         byte[] testPatternLong = new byte[512];
@@ -218,12 +278,24 @@ public void testSelfCheck() {
             testPatternLong[i] = (byte) i;
         }
         for (int i = 0; i <= 7; i++) {
-            assertEquals(reference[offset++], T1ha.hash(testPatternLong, i, 128 + i * 17, seed)); // long-%05i
+            assertEquals(reference[offset++], T1ha1.hash(testPatternLong, i, 128 + i * 17, seed)); // long-%05i
+        }
+    }
+
+    private static boolean hasUnsignedMultiplyHigh() {
+        try {
+            MethodHandles.publicLookup()
+                .findStatic(Math.class, "unsignedMultiplyHigh", MethodType.methodType(long.class, long.class, long.class));
+            return true;
+        } catch (NoSuchMethodException e) {
+            return false;
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
         }
     }
 
     @Override
     public long hash(byte[] input) {
-        return T1ha.hash(input, 0, input.length);
+        return T1ha1.hash(input, 0, input.length);
     }
 }
diff --git a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
index 5107feb6b3a26..2985a21a5100e 100644
--- a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
+++ b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
@@ -34,7 +34,7 @@
 
 import org.apache.lucene.util.BytesRef;
 import org.opensearch.common.Numbers;
-import org.opensearch.common.hash.T1ha;
+import org.opensearch.common.hash.T1ha1;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.core.common.util.ByteArray;
@@ -56,7 +56,7 @@ public final class BytesRefHash implements Releasable {
     private static final long MAX_CAPACITY = 1L << 32;
     private static final long DEFAULT_INITIAL_CAPACITY = 32;
     private static final float DEFAULT_LOAD_FACTOR = 0.6f;
-    private static final Hasher DEFAULT_HASHER = key -> T1ha.hash(key.bytes, key.offset, key.length);
+    private static final Hasher DEFAULT_HASHER = key -> T1ha1.hash(key.bytes, key.offset, key.length);
 
     private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
     private static final long MASK_FINGERPRINT = 0xFFFFFFFF00000000L;  // extract fingerprint
diff --git a/server/src/test/java/org/opensearch/common/NumbersTests.java b/server/src/test/java/org/opensearch/common/NumbersTests.java
index d6745f53f3569..7990ba74f162a 100644
--- a/server/src/test/java/org/opensearch/common/NumbersTests.java
+++ b/server/src/test/java/org/opensearch/common/NumbersTests.java
@@ -225,7 +225,7 @@ public void testToUnsignedBigInteger() {
     public void testNextPowerOfTwo() {
         // Negative values:
         for (int i = 0; i < 1000; i++) {
-            long value = randomLongBetween(-500, -1);
+            long value = randomLongBetween(-500000, -1);
             assertEquals(1, Numbers.nextPowerOfTwo(value));
         }
 
@@ -234,7 +234,7 @@ public void testNextPowerOfTwo() {
 
         // Positive values:
         for (int i = 0; i < 1000; i++) {
-            long value = randomLongBetween(1, 500);
+            long value = randomLongBetween(1, 500000);
             long nextPowerOfTwo = Numbers.nextPowerOfTwo(value);
 
             assertTrue(nextPowerOfTwo > value); // must be strictly greater
diff --git a/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java b/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
index b182ba11198be..adcec8f07f702 100644
--- a/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
+++ b/server/src/test/java/org/opensearch/common/util/BytesRefHashTests.java
@@ -35,7 +35,7 @@
 import org.apache.lucene.tests.util.TestUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
-import org.opensearch.common.hash.T1ha;
+import org.opensearch.common.hash.T1ha1;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.core.indices.breaker.NoneCircuitBreakerService;
 import org.opensearch.test.OpenSearchTestCase;
@@ -63,7 +63,7 @@ private void newHash() {
         hash = new BytesRefHash(
             randomIntBetween(1, 100),      // random capacity
             0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
-            key -> T1ha.hash(key.bytes, key.offset, key.length, seed),
+            key -> T1ha1.hash(key.bytes, key.offset, key.length, seed),
             randomBigArrays()
         );
     }

From 374db9772460b96859ab46c4dfb53b3cc8e67f2f Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Wed, 16 Aug 2023 19:17:03 +0530
Subject: [PATCH 5/9] Add link to the reference implementation for t1ha1

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../src/main/java/org/opensearch/common/hash/T1ha1.java      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
index 3a640257af32b..92be34a64fb13 100644
--- a/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
+++ b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
@@ -24,8 +24,9 @@
  * a fast portable hash function with reasonable quality for checksums, hash tables, and thin fingerprinting.
  *
  * <p>
- * To overcome language and performance limitations, this implementation differs slightly from the reference
- * implementation in C++, so the returned values may vary before JDK 18.
+ * To overcome language and performance limitations, this implementation differs slightly from the
+ * <a href="https://github.com/erthink/t1ha/blob/master/src/t1ha1.c">reference implementation</a> in C++,
+ * so the returned values may vary before JDK 18.
  *
  * <p>
  * Intended for little-endian systems but returns the same result on big-endian, albeit marginally slower.

From 832f61a24e61c7a12f3a7295c96f69c4616fa7c7 Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Thu, 17 Aug 2023 21:36:40 +0530
Subject: [PATCH 6/9] Annotate t1ha1 with @opensearch.internal

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../src/main/java/org/opensearch/common/hash/T1ha1.java  | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
index 92be34a64fb13..b02e3d777364b 100644
--- a/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
+++ b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
@@ -30,8 +30,10 @@
  *
  * <p>
  * Intended for little-endian systems but returns the same result on big-endian, albeit marginally slower.
+ *
+ * @opensearch.internal
  */
-public class T1ha1 {
+public final class T1ha1 {
     private static final long SEED = System.nanoTime();
     private static final Mux64 MUX_64_IMPL = fastestMux64Impl();
 
@@ -53,6 +55,11 @@ public class T1ha1 {
     private static final int s1 = 17;
     private static final int s2 = 31;
 
+    /**
+     * No public constructor.
+     */
+    private T1ha1() {}
+
     /**
      * Returns the hash code for the specified range of the given {@code byte} array.
      * @param input the input byte array

From 7b53727213e2b239645f9810f3597b54a94ffacf Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Thu, 17 Aug 2023 22:33:21 +0530
Subject: [PATCH 7/9] Run spotless

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../org/opensearch/common/util/BytesRefHashBenchmark.java   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
index 3071d1f4c7d7e..fef12b6d9f84a 100644
--- a/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
+++ b/benchmarks/src/main/java/org/opensearch/common/util/BytesRefHashBenchmark.java
@@ -10,6 +10,9 @@
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;
+import org.opensearch.common.hash.T1ha1;
+import org.opensearch.common.lease.Releasable;
+import org.opensearch.common.lease.Releasables;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@@ -22,9 +25,6 @@
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.infra.Blackhole;
-import org.opensearch.common.hash.T1ha1;
-import org.opensearch.common.lease.Releasable;
-import org.opensearch.common.lease.Releasables;
 
 import java.util.HashSet;
 import java.util.Random;

From f9c874d3a27c24e38165c0907d82cf73e51682b6 Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Wed, 23 Aug 2023 17:39:39 +0530
Subject: [PATCH 8/9] Add pre-computed hashes to speed up reinserts

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../opensearch/common/util/BytesRefHash.java    | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
index 2985a21a5100e..efa3c470265bb 100644
--- a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
+++ b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
@@ -144,6 +144,12 @@ public final class BytesRefHash implements Releasable {
      */
     private ByteArray keys;
 
+    /**
+     * Pre-computed hashes of the stored keys.
+     * It is used to speed up reinserts when doubling the capacity.
+     */
+    private LongArray hashes;
+
     public BytesRefHash(final BigArrays bigArrays) {
         this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_HASHER, bigArrays);
     }
@@ -175,6 +181,7 @@ public BytesRefHash(final long initialCapacity, final float loadFactor, final Ha
         offsets = bigArrays.newLongArray(initialCapacity + 1, false);
         offsets.set(0, 0);
         keys = bigArrays.newByteArray(initialCapacity * 3, false);
+        hashes = bigArrays.newLongArray(initialCapacity, false);
     }
 
     /**
@@ -193,7 +200,7 @@ public long add(final BytesRef key) {
                 } else {
                     table.set(idx, val);
                 }
-                return append(key);
+                return append(key, hash);
             } else if (((value & MASK_FINGERPRINT) == fingerprint) && key.bytesEquals(get(ordinal = (value & MASK_ORDINAL), scratch))) {
                 return -1 - ordinal;
             }
@@ -254,13 +261,15 @@ public long size() {
     /**
      * Appends the key in the keys' and offsets' tables.
      */
-    private long append(final BytesRef key) {
+    private long append(final BytesRef key, final long hash) {
         final long start = offsets.get(size);
         final long end = start + key.length;
         offsets = bigArrays.grow(offsets, size + 2);
         offsets.set(size + 1, end);
         keys = bigArrays.grow(keys, end);
         keys.set(start, key.bytes, key.offset, key.length);
+        hashes = bigArrays.grow(hashes, size + 1);
+        hashes.set(size, hash);
         return size++;
     }
 
@@ -282,7 +291,7 @@ private void growAndInsert(final long hash, final long value) {
         table.set(hash & mask, value);
 
         for (long ordinal = 0; ordinal < size; ordinal++) {
-            reinsert(ordinal, hasher.hash(get(ordinal, scratch)));
+            reinsert(ordinal, hashes.get(ordinal));
         }
     }
 
@@ -300,7 +309,7 @@ private void reinsert(final long ordinal, final long hash) {
 
     @Override
     public void close() {
-        Releasables.close(table, offsets, keys);
+        Releasables.close(table, offsets, keys, hashes);
     }
 
     /**

From 58d3394a70622365902d55e500d64dbe6523fef0 Mon Sep 17 00:00:00 2001
From: Ketan Verma <ketan9495@gmail.com>
Date: Fri, 25 Aug 2023 09:11:57 +0530
Subject: [PATCH 9/9] Refactor HashFunctionTestCase

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 .../org/opensearch/common/hash/T1ha1.java     |   3 +
 .../common/hash/HashFunctionTestCase.java     | 137 ------------------
 .../opensearch/common/hash/T1Ha1Tests.java    |  21 ++-
 .../opensearch/common/util/BytesRefHash.java  |   2 +
 .../common/util/ReorganizingLongHash.java     |   2 +
 .../common/hash/AvalancheStats.java           |  63 ++++++++
 .../common/hash/HashFunctionTestCase.java     |  79 ++++++++++
 .../hash/HashFunctionTestCaseTests.java       |  68 +++++++++
 8 files changed, 233 insertions(+), 142 deletions(-)
 delete mode 100644 libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java
 create mode 100644 test/framework/src/main/java/org/opensearch/common/hash/AvalancheStats.java
 create mode 100644 test/framework/src/main/java/org/opensearch/common/hash/HashFunctionTestCase.java
 create mode 100644 test/framework/src/test/java/org/opensearch/common/hash/HashFunctionTestCaseTests.java

diff --git a/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
index b02e3d777364b..07b2306eda4e5 100644
--- a/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
+++ b/libs/common/src/main/java/org/opensearch/common/hash/T1ha1.java
@@ -8,6 +8,8 @@
 
 package org.opensearch.common.hash;
 
+import org.opensearch.common.annotation.InternalApi;
+
 import java.lang.invoke.MethodHandle;
 import java.lang.invoke.MethodHandles;
 import java.lang.invoke.MethodType;
@@ -33,6 +35,7 @@
  *
  * @opensearch.internal
  */
+@InternalApi
 public final class T1ha1 {
     private static final long SEED = System.nanoTime();
     private static final Mux64 MUX_64_IMPL = fastestMux64Impl();
diff --git a/libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java b/libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java
deleted file mode 100644
index 6f7f813bffa24..0000000000000
--- a/libs/common/src/test/java/org/opensearch/common/hash/HashFunctionTestCase.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.common.hash;
-
-import org.opensearch.common.Randomness;
-import org.opensearch.test.OpenSearchTestCase;
-
-import java.util.Locale;
-import java.util.Random;
-
-public abstract class HashFunctionTestCase extends OpenSearchTestCase {
-    private static final int[] INPUT_BITS = new int[] { 24, 32, 40, 48, 56, 64, 72, 80, 96, 112, 128, 160, 512, 1024 };
-    private static final int OUTPUT_BITS = 64;
-    private static final int ITERATIONS = 1000;
-    private static final double BIAS_THRESHOLD = 0.01; // 1%
-
-    public abstract long hash(byte[] input);
-
-    /**
-     * Tests if the hash function shows an avalanche effect, i.e, flipping a single input bit
-     * should flip half the output bits.
-     */
-    public final void testAvalanche() {
-        for (int inputBits : INPUT_BITS) {
-            AvalancheStats stats = simulate(inputBits, OUTPUT_BITS, new RandomInputGenerator(inputBits));
-            if (stats.bias() >= BIAS_THRESHOLD) {
-                fail("bias exceeds threshold: " + stats);
-            }
-        }
-    }
-
-    private AvalancheStats simulate(int inputBits, int outputBits, InputGenerator inputGenerator) {
-        int[][] flips = new int[inputBits][outputBits];
-
-        for (int iter = 0; iter < ITERATIONS; iter++) {
-            byte[] input = inputGenerator.next();
-            long hash = hash(input);
-
-            for (int i = 0; i < inputBits; i++) {
-                flip(input, i); // flip one bit
-                long newHash = hash(input); // recompute the hash; half the bits should have flipped
-                flip(input, i); // return to original
-
-                long diff = hash ^ newHash;
-                for (int o = 0; o < OUTPUT_BITS; o++) {
-                    if ((diff & 1) == 1) {
-                        flips[i][o] += 1;
-                    }
-                    diff >>>= 1;
-                }
-            }
-        }
-
-        return new AvalancheStats(flips);
-    }
-
-    private static void flip(byte[] input, int position) {
-        int offset = position / 8;
-        int bit = position & 7;
-        input[offset] ^= (1 << bit);
-    }
-
-    @FunctionalInterface
-    interface InputGenerator {
-        byte[] next();
-    }
-
-    private static class RandomInputGenerator implements InputGenerator {
-        private final Random random = Randomness.get();
-        private final byte[] input;
-
-        public RandomInputGenerator(int size) {
-            input = new byte[size];
-        }
-
-        @Override
-        public byte[] next() {
-            random.nextBytes(input);
-            return input;
-        }
-    }
-
-    private static class AvalancheStats {
-        private final int inputBits;
-        private final int outputBits;
-        private final double bias;
-        private final double sumOfSquaredErrors;
-
-        public AvalancheStats(int[][] flips) {
-            this.inputBits = flips.length;
-            this.outputBits = flips[0].length;
-            double sumOfBiases = 0;
-            double sumOfSquaredErrors = 0;
-
-            for (int i = 0; i < inputBits; i++) {
-                for (int o = 0; o < outputBits; o++) {
-                    sumOfSquaredErrors += Math.pow(0.5 - ((double) flips[i][o] / ITERATIONS), 2);
-                    sumOfBiases += 2 * ((double) flips[i][o] / ITERATIONS) - 1;
-                }
-            }
-
-            this.bias = Math.abs(sumOfBiases / (inputBits * outputBits));
-            this.sumOfSquaredErrors = sumOfSquaredErrors;
-        }
-
-        public double bias() {
-            return bias;
-        }
-
-        public double diffusion() {
-            return 1 - bias;
-        }
-
-        public double sumOfSquaredErrors() {
-            return sumOfSquaredErrors;
-        }
-
-        @Override
-        public String toString() {
-            return String.format(
-                Locale.ROOT,
-                "AvalancheStats{inputBits=%d, outputBits=%d, bias=%.4f%%, diffusion=%.4f%%, sumOfSquaredErrors=%.2f}",
-                inputBits,
-                outputBits,
-                bias() * 100,
-                diffusion() * 100,
-                sumOfSquaredErrors()
-            );
-        }
-    }
-}
diff --git a/libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java b/libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java
index bd72a5dcac703..e348fbf759bdd 100644
--- a/libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java
+++ b/libs/common/src/test/java/org/opensearch/common/hash/T1Ha1Tests.java
@@ -10,8 +10,12 @@
 
 import java.lang.invoke.MethodHandles;
 import java.lang.invoke.MethodType;
+import java.lang.invoke.VarHandle;
+import java.nio.ByteOrder;
 
 public class T1Ha1Tests extends HashFunctionTestCase {
+    private static final VarHandle LONG_HANDLE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
+    private final byte[] scratch = new byte[8];
 
     /**
      * Inspired from the tests defined in the reference implementation:
@@ -282,6 +286,18 @@ public void testSelfCheck() {
         }
     }
 
+    @Override
+    public byte[] hash(byte[] input) {
+        long hash = T1ha1.hash(input, 0, input.length);
+        LONG_HANDLE.set(scratch, 0, hash);
+        return scratch;
+    }
+
+    @Override
+    public int outputBits() {
+        return 64;
+    }
+
     private static boolean hasUnsignedMultiplyHigh() {
         try {
             MethodHandles.publicLookup()
@@ -293,9 +309,4 @@ private static boolean hasUnsignedMultiplyHigh() {
             throw new RuntimeException(e);
         }
     }
-
-    @Override
-    public long hash(byte[] input) {
-        return T1ha1.hash(input, 0, input.length);
-    }
 }
diff --git a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
index efa3c470265bb..4afba2905019a 100644
--- a/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
+++ b/server/src/main/java/org/opensearch/common/util/BytesRefHash.java
@@ -34,6 +34,7 @@
 
 import org.apache.lucene.util.BytesRef;
 import org.opensearch.common.Numbers;
+import org.opensearch.common.annotation.InternalApi;
 import org.opensearch.common.hash.T1ha1;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
@@ -52,6 +53,7 @@
  *
  * @opensearch.internal
  */
+@InternalApi
 public final class BytesRefHash implements Releasable {
     private static final long MAX_CAPACITY = 1L << 32;
     private static final long DEFAULT_INITIAL_CAPACITY = 32;
diff --git a/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java b/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
index d6c29fcae3a94..86e7227cb6c85 100644
--- a/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
+++ b/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
@@ -9,6 +9,7 @@
 package org.opensearch.common.util;
 
 import org.opensearch.common.Numbers;
+import org.opensearch.common.annotation.InternalApi;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
 
@@ -26,6 +27,7 @@
  *
  * @opensearch.internal
  */
+@InternalApi
 public class ReorganizingLongHash implements Releasable {
     private static final long MAX_CAPACITY = 1L << 32;
     private static final long DEFAULT_INITIAL_CAPACITY = 32;
diff --git a/test/framework/src/main/java/org/opensearch/common/hash/AvalancheStats.java b/test/framework/src/main/java/org/opensearch/common/hash/AvalancheStats.java
new file mode 100644
index 0000000000000..c1600abcacd3e
--- /dev/null
+++ b/test/framework/src/main/java/org/opensearch/common/hash/AvalancheStats.java
@@ -0,0 +1,63 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+import java.util.Locale;
+
+/**
+ * Represents the avalanche statistics of a hash function.
+ */
+public class AvalancheStats {
+    private final int inputBits;
+    private final int outputBits;
+    private final double bias;
+    private final double sumOfSquaredErrors;
+
+    public AvalancheStats(int[][] flips, int iterations) {
+        this.inputBits = flips.length;
+        this.outputBits = flips[0].length;
+        double sumOfBiases = 0;
+        double sumOfSquaredErrors = 0;
+
+        for (int i = 0; i < inputBits; i++) {
+            for (int o = 0; o < outputBits; o++) {
+                sumOfSquaredErrors += Math.pow(0.5 - ((double) flips[i][o] / iterations), 2);
+                sumOfBiases += 2 * ((double) flips[i][o] / iterations) - 1;
+            }
+        }
+
+        this.bias = Math.abs(sumOfBiases / (inputBits * outputBits));
+        this.sumOfSquaredErrors = sumOfSquaredErrors;
+    }
+
+    public double bias() {
+        return bias;
+    }
+
+    public double diffusion() {
+        return 1 - bias;
+    }
+
+    public double sumOfSquaredErrors() {
+        return sumOfSquaredErrors;
+    }
+
+    @Override
+    public String toString() {
+        return String.format(
+            Locale.ROOT,
+            "AvalancheStats{inputBits=%d, outputBits=%d, bias=%.4f%%, diffusion=%.4f%%, sumOfSquaredErrors=%.2f}",
+            inputBits,
+            outputBits,
+            bias() * 100,
+            diffusion() * 100,
+            sumOfSquaredErrors()
+        );
+    }
+}
diff --git a/test/framework/src/main/java/org/opensearch/common/hash/HashFunctionTestCase.java b/test/framework/src/main/java/org/opensearch/common/hash/HashFunctionTestCase.java
new file mode 100644
index 0000000000000..e272fe0962047
--- /dev/null
+++ b/test/framework/src/main/java/org/opensearch/common/hash/HashFunctionTestCase.java
@@ -0,0 +1,79 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+import org.opensearch.common.Randomness;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Base class for testing the quality of hash functions.
+ */
+public abstract class HashFunctionTestCase extends OpenSearchTestCase {
+    private static final int[] INPUT_BITS = new int[] { 24, 32, 40, 48, 56, 64, 72, 80, 96, 112, 128, 160, 512, 1024 };
+    private static final int ITERATIONS = 1000;
+    private static final double BIAS_THRESHOLD = 0.01; // 1%
+
+    public abstract byte[] hash(byte[] input);
+
+    public abstract int outputBits();
+
+    /**
+     * Tests if the hash function shows an avalanche effect, i.e, flipping a single input bit
+     * should flip half the output bits.
+     */
+    public void testAvalanche() {
+        for (int inputBits : INPUT_BITS) {
+            AvalancheStats stats = simulate(inputBits);
+            if (stats.bias() >= BIAS_THRESHOLD) {
+                fail("bias exceeds threshold: " + stats);
+            }
+        }
+    }
+
+    private AvalancheStats simulate(int inputBits) {
+        int outputBits = outputBits();
+        assert inputBits % 8 == 0; // using full bytes for simplicity
+        assert outputBits % 8 == 0; // using full bytes for simplicity
+        byte[] input = new byte[inputBits >>> 3];
+        Random random = Randomness.get();
+        int[][] flips = new int[inputBits][outputBits];
+
+        for (int iter = 0; iter < ITERATIONS; iter++) {
+            random.nextBytes(input);
+            byte[] hash = Arrays.copyOf(hash(input), outputBits >>> 3); // copying since the underlying byte-array is reused
+
+            for (int i = 0; i < inputBits; i++) {
+                flipBit(input, i); // flip one bit
+                byte[] newHash = hash(input); // recompute the hash; half the bits should have flipped
+                flipBit(input, i); // return to original
+
+                for (int o = 0; o < outputBits; o++) {
+                    flips[i][o] += getBit(hash, o) ^ getBit(newHash, o);
+                }
+            }
+        }
+
+        return new AvalancheStats(flips, ITERATIONS);
+    }
+
+    private static void flipBit(byte[] input, int position) {
+        int offset = position / 8;
+        int bit = position & 7;
+        input[offset] ^= (1 << bit);
+    }
+
+    private static int getBit(byte[] input, int position) {
+        int offset = position / 8;
+        int bit = position & 7;
+        return (input[offset] >>> bit) & 1;
+    }
+}
diff --git a/test/framework/src/test/java/org/opensearch/common/hash/HashFunctionTestCaseTests.java b/test/framework/src/test/java/org/opensearch/common/hash/HashFunctionTestCaseTests.java
new file mode 100644
index 0000000000000..d5fdaf10999fc
--- /dev/null
+++ b/test/framework/src/test/java/org/opensearch/common/hash/HashFunctionTestCaseTests.java
@@ -0,0 +1,68 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.hash;
+
+import org.apache.lucene.util.StringHelper;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+public class HashFunctionTestCaseTests extends OpenSearchTestCase {
+    private static final VarHandle INT_HANDLE = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN);
+
+    /**
+     * Asserts the positive case where a hash function passes the avalanche test.
+     */
+    public void testStrongHashFunction() {
+        HashFunctionTestCase murmur3 = new HashFunctionTestCase() {
+            private final byte[] scratch = new byte[4];
+
+            @Override
+            public byte[] hash(byte[] input) {
+                int hash = StringHelper.murmurhash3_x86_32(input, 0, input.length, StringHelper.GOOD_FAST_HASH_SEED);
+                INT_HANDLE.set(scratch, 0, hash);
+                return scratch;
+            }
+
+            @Override
+            public int outputBits() {
+                return 32;
+            }
+        };
+
+        murmur3.testAvalanche();
+    }
+
+    /**
+     * Asserts the negative case where a hash function fails the avalanche test.
+     */
+    public void testWeakHashFunction() {
+        HashFunctionTestCase arraysHashCode = new HashFunctionTestCase() {
+            private final byte[] scratch = new byte[4];
+
+            @Override
+            public byte[] hash(byte[] input) {
+                int hash = Arrays.hashCode(input);
+                INT_HANDLE.set(scratch, 0, hash);
+                return scratch;
+            }
+
+            @Override
+            public int outputBits() {
+                return 32;
+            }
+        };
+
+        AssertionError ex = expectThrows(AssertionError.class, arraysHashCode::testAvalanche);
+        assertTrue(ex.getMessage().contains("bias exceeds threshold"));
+    }
+}