diff --git a/android/guava-tests/test/com/google/common/hash/HashFunctionEnum.java b/android/guava-tests/test/com/google/common/hash/HashFunctionEnum.java index 3a98fede9540..34717477e645 100644 --- a/android/guava-tests/test/com/google/common/hash/HashFunctionEnum.java +++ b/android/guava-tests/test/com/google/common/hash/HashFunctionEnum.java @@ -32,6 +32,7 @@ enum HashFunctionEnum { MD5(Hashing.md5()), MURMUR3_128(Hashing.murmur3_128()), MURMUR3_32(Hashing.murmur3_32()), + MURMUR3_32_FIXED(Hashing.murmur3_32_fixed()), SHA1(Hashing.sha1()), SHA256(Hashing.sha256()), SHA384(Hashing.sha384()), diff --git a/android/guava-tests/test/com/google/common/hash/HashingTest.java b/android/guava-tests/test/com/google/common/hash/HashingTest.java index 58c93b41c360..bc3db34f050a 100644 --- a/android/guava-tests/test/com/google/common/hash/HashingTest.java +++ b/android/guava-tests/test/com/google/common/hash/HashingTest.java @@ -432,6 +432,9 @@ public void testHashIntVsForLoop() { .put(Hashing.murmur3_32(), EMPTY_STRING, "00000000") .put(Hashing.murmur3_32(), TQBFJOTLD, "23f74f2e") .put(Hashing.murmur3_32(), TQBFJOTLDP, "fc8bc4d5") + .put(Hashing.murmur3_32_fixed(), EMPTY_STRING, "00000000") + .put(Hashing.murmur3_32_fixed(), TQBFJOTLD, "23f74f2e") + .put(Hashing.murmur3_32_fixed(), TQBFJOTLDP, "fc8bc4d5") .put(Hashing.sha1(), EMPTY_STRING, "da39a3ee5e6b4b0d3255bfef95601890afd80709") .put(Hashing.sha1(), TQBFJOTLD, "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12") .put(Hashing.sha1(), TQBFJOTLDP, "408d94384216f890ff7a0c3528e8bed1e0b01621") diff --git a/android/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java b/android/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java index bda6a9457527..181b2a7bb071 100644 --- a/android/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java +++ b/android/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java @@ -17,9 +17,11 @@ package com.google.common.hash; import static com.google.common.hash.Hashing.murmur3_32; +import static com.google.common.hash.Hashing.murmur3_32_fixed; import com.google.common.base.Charsets; import com.google.common.hash.HashTestUtils.HashFn; +import java.nio.charset.Charset; import java.util.Random; import junit.framework.TestCase; @@ -51,28 +53,53 @@ public void testKnownStringInputs() { -528633700, murmur3_32().hashUnencodedChars("The quick brown fox jumps over the lazy dog")); } - public void testKnownUtf8StringInputs() { - assertHash(0, murmur3_32().hashString("", Charsets.UTF_8)); - assertHash(0xcfbda5d1, murmur3_32().hashString("k", Charsets.UTF_8)); - assertHash(0xa167dbf3, murmur3_32().hashString("hell", Charsets.UTF_8)); - assertHash(0x248bfa47, murmur3_32().hashString("hello", Charsets.UTF_8)); - assertHash(0x3d41b97c, murmur3_32().hashString("http://www.google.com/", Charsets.UTF_8)); - assertHash( - 0x2e4ff723, - murmur3_32().hashString("The quick brown fox jumps over the lazy dog", Charsets.UTF_8)); - assertHash(0xfc5ba834, murmur3_32().hashString("毎月1日,毎週月曜日", Charsets.UTF_8)); - assertHash( - 0x3f4aff5c, - murmur3_32().hashString(Character.toString(Character.MAX_VALUE), Charsets.UTF_8)); - assertHash( - 0x81db5903, - murmur3_32() - .hashString(new String(Character.toChars(Character.MAX_VALUE + 1)), Charsets.UTF_8)); - // Note (https://github.com/google/guava/issues/5648) the hash expected here is not correct - assertHash( - 0x256068c8, - murmur3_32() - .hashString(new String(Character.toChars(Character.MAX_CODE_POINT)), Charsets.UTF_8)); + @SuppressWarnings("deprecation") + public void testKnownEncodedStringInputs() { + assertStringHash(0, "", Charsets.UTF_8); + assertStringHash(0xcfbda5d1, "k", Charsets.UTF_8); + assertStringHash(0xa167dbf3, "hell", Charsets.UTF_8); + assertStringHash(0x248bfa47, "hello", Charsets.UTF_8); + assertStringHash(0x3d41b97c, "http://www.google.com/", Charsets.UTF_8); + assertStringHash(0x2e4ff723, "The quick brown fox jumps over the lazy dog", Charsets.UTF_8); + assertStringHash(0xb5a4be05, "ABCDefGHI\u0799", Charsets.UTF_8); + assertStringHash(0xfc5ba834, "毎月1日,毎週月曜日", Charsets.UTF_8); + assertStringHash(0x8a5c3699, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_8); + + assertStringHash(0, "", Charsets.UTF_16LE); + assertStringHash(0x288418e4, "k", Charsets.UTF_16LE); + assertStringHash(0x5a0cb7c3, "hell", Charsets.UTF_16LE); + assertStringHash(0xd7c31989, "hello", Charsets.UTF_16LE); + assertStringHash(0x73564d8c, "http://www.google.com/", Charsets.UTF_16LE); + assertStringHash(0xe07db09c, "The quick brown fox jumps over the lazy dog", Charsets.UTF_16LE); + assertStringHash(0xfefa3e76, "ABCDefGHI\u0799", Charsets.UTF_16LE); + assertStringHash(0x6a7be132, "毎月1日,毎週月曜日", Charsets.UTF_16LE); + assertStringHash(0x5a2d41c7, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_16LE); + } + + @SuppressWarnings("deprecation") + private void assertStringHash(int expected, String string, Charset charset) { + if (allBmp(string)) { + assertHash(expected, murmur3_32().hashString(string, charset)); + } + assertHash(expected, murmur3_32_fixed().hashString(string, charset)); + assertHash(expected, murmur3_32().newHasher().putString(string, charset).hash()); + assertHash(expected, murmur3_32_fixed().newHasher().putString(string, charset).hash()); + assertHash(expected, murmur3_32().hashBytes(string.getBytes(charset))); + assertHash(expected, murmur3_32_fixed().hashBytes(string.getBytes(charset))); + assertHash(expected, murmur3_32().newHasher().putBytes(string.getBytes(charset)).hash()); + assertHash(expected, murmur3_32_fixed().newHasher().putBytes(string.getBytes(charset)).hash()); + } + + private boolean allBmp(String string) { + // Ordinarily we'd use something like i += Character.charCount(string.codePointAt(i)) here. But + // we can get away with i++ because the whole point of this method is to return false if we find + // a code point that doesn't fit in a char. + for (int i = 0; i < string.length(); i++) { + if (string.codePointAt(i) > 0xffff) { + return false; + } + } + return true; } @SuppressWarnings("deprecation") @@ -83,7 +110,7 @@ public void testSimpleStringUtf8() { } @SuppressWarnings("deprecation") - public void testStringInputsUtf8() { + public void testEncodedStringInputs() { Random rng = new Random(0); for (int z = 0; z < 100; z++) { String str; @@ -100,9 +127,16 @@ public void testStringInputsUtf8() { builder.appendCodePoint(codePoints[i]); } str = builder.toString(); + HashCode hashUtf8 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)); + assertEquals( + hashUtf8, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_8)).hash()); + assertEquals(hashUtf8, murmur3_32().hashString(str, Charsets.UTF_8)); + assertEquals(hashUtf8, murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash()); + HashCode hashUtf16 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_16)); assertEquals( - murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), - murmur3_32().hashString(str, Charsets.UTF_8)); + hashUtf16, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_16)).hash()); + assertEquals(hashUtf16, murmur3_32().hashString(str, Charsets.UTF_16)); + assertEquals(hashUtf16, murmur3_32().newHasher().putString(str, Charsets.UTF_16).hash()); } } @@ -150,8 +184,12 @@ public void testInvalidUnicodeHashString() { assertEquals( murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), murmur3_32().hashString(str, Charsets.UTF_8)); + assertEquals( + murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)), + murmur3_32().hashString(str, Charsets.UTF_8)); } + @SuppressWarnings("deprecation") public void testInvalidUnicodeHasherPutString() { String str = new String( @@ -159,5 +197,8 @@ public void testInvalidUnicodeHasherPutString() { assertEquals( murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash()); + assertEquals( + murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)), + murmur3_32_fixed().newHasher().putString(str, Charsets.UTF_8).hash()); } } diff --git a/android/guava/src/com/google/common/hash/Hashing.java b/android/guava/src/com/google/common/hash/Hashing.java index f53c9d3a7b0f..e8c7445e04d4 100644 --- a/android/guava/src/com/google/common/hash/Hashing.java +++ b/android/guava/src/com/google/common/hash/Hashing.java @@ -91,15 +91,56 @@ public static HashFunction goodFastHash(int minimumBits) { @SuppressWarnings("GoodTime") // reading system time without TimeSource static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); + /** + * Returns a hash function implementing the 32-bit murmur3 + * algorithm, x86 variant (little-endian variant), using the given seed value, with a known + * bug as described in the deprecation text. + * + *
The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not + * have the bug. + * + * @deprecated This implementation produces incorrect hash values from the {@link + * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link + * #murmur3_32_fixed(int)} instead. + */ + @Deprecated + public static HashFunction murmur3_32(int seed) { + return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); + } + + /** + * Returns a hash function implementing the 32-bit murmur3 + * algorithm, x86 variant (little-endian variant), using the given seed value, with a known + * bug as described in the deprecation text. + * + *
The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not + * have the bug. + * + * @deprecated This implementation produces incorrect hash values from the {@link + * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link + * #murmur3_32_fixed()} instead. + */ + @Deprecated + public static HashFunction murmur3_32() { + return Murmur3_32HashFunction.MURMUR3_32; + } + /** * Returns a hash function implementing the 32-bit murmur3 * algorithm, x86 variant (little-endian variant), using the given seed value. * *
The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). + * + *
This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code + * HashFunction} returned by the original {@code murmur3_32} method. + * + * @since NEXT */ - public static HashFunction murmur3_32(int seed) { - return new Murmur3_32HashFunction(seed); + public static HashFunction murmur3_32_fixed(int seed) { + return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); } /** @@ -108,9 +149,14 @@ public static HashFunction murmur3_32(int seed) { * algorithm, x86 variant (little-endian variant), using a seed value of zero. * *
The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). + * + *
This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code + * HashFunction} returned by the original {@code murmur3_32} method. + * + * @since NEXT */ - public static HashFunction murmur3_32() { - return Murmur3_32HashFunction.MURMUR3_32; + public static HashFunction murmur3_32_fixed() { + return Murmur3_32HashFunction.MURMUR3_32_FIXED; } /** diff --git a/android/guava/src/com/google/common/hash/Murmur3_32HashFunction.java b/android/guava/src/com/google/common/hash/Murmur3_32HashFunction.java index 5e26ec404884..a47184be58b9 100644 --- a/android/guava/src/com/google/common/hash/Murmur3_32HashFunction.java +++ b/android/guava/src/com/google/common/hash/Murmur3_32HashFunction.java @@ -53,10 +53,15 @@ @Immutable @ElementTypesAreNonnullByDefault final class Murmur3_32HashFunction extends AbstractHashFunction implements Serializable { - static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); + static final HashFunction MURMUR3_32 = + new Murmur3_32HashFunction(0, /* supplementaryPlaneFix= */ false); + static final HashFunction MURMUR3_32_FIXED = + new Murmur3_32HashFunction(0, /* supplementaryPlaneFix= */ true); + // We can include the non-BMP fix here because Hashing.goodFastHash stresses that the hash is a + // temporary-use one. Therefore it shouldn't be persisted. static final HashFunction GOOD_FAST_HASH_32 = - new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED); + new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED, /* supplementaryPlaneFix= */ true); private static final int CHUNK_SIZE = 4; @@ -64,9 +69,11 @@ final class Murmur3_32HashFunction extends AbstractHashFunction implements Seria private static final int C2 = 0x1b873593; private final int seed; + private final boolean supplementaryPlaneFix; - Murmur3_32HashFunction(int seed) { + Murmur3_32HashFunction(int seed, boolean supplementaryPlaneFix) { this.seed = seed; + this.supplementaryPlaneFix = supplementaryPlaneFix; } @Override @@ -88,7 +95,7 @@ public String toString() { public boolean equals(@CheckForNull Object object) { if (object instanceof Murmur3_32HashFunction) { Murmur3_32HashFunction other = (Murmur3_32HashFunction) object; - return seed == other.seed; + return seed == other.seed && supplementaryPlaneFix == other.supplementaryPlaneFix; } return false; } @@ -191,6 +198,9 @@ public HashCode hashString(CharSequence input, Charset charset) { } i++; buffer |= codePointToFourUtf8Bytes(codePoint) << shift; + if (supplementaryPlaneFix) { // bug compatibility: earlier versions did not have this add + shift += 32; + } len += 4; } diff --git a/guava-tests/test/com/google/common/hash/HashFunctionEnum.java b/guava-tests/test/com/google/common/hash/HashFunctionEnum.java index 3a98fede9540..34717477e645 100644 --- a/guava-tests/test/com/google/common/hash/HashFunctionEnum.java +++ b/guava-tests/test/com/google/common/hash/HashFunctionEnum.java @@ -32,6 +32,7 @@ enum HashFunctionEnum { MD5(Hashing.md5()), MURMUR3_128(Hashing.murmur3_128()), MURMUR3_32(Hashing.murmur3_32()), + MURMUR3_32_FIXED(Hashing.murmur3_32_fixed()), SHA1(Hashing.sha1()), SHA256(Hashing.sha256()), SHA384(Hashing.sha384()), diff --git a/guava-tests/test/com/google/common/hash/HashingTest.java b/guava-tests/test/com/google/common/hash/HashingTest.java index 58c93b41c360..bc3db34f050a 100644 --- a/guava-tests/test/com/google/common/hash/HashingTest.java +++ b/guava-tests/test/com/google/common/hash/HashingTest.java @@ -432,6 +432,9 @@ public void testHashIntVsForLoop() { .put(Hashing.murmur3_32(), EMPTY_STRING, "00000000") .put(Hashing.murmur3_32(), TQBFJOTLD, "23f74f2e") .put(Hashing.murmur3_32(), TQBFJOTLDP, "fc8bc4d5") + .put(Hashing.murmur3_32_fixed(), EMPTY_STRING, "00000000") + .put(Hashing.murmur3_32_fixed(), TQBFJOTLD, "23f74f2e") + .put(Hashing.murmur3_32_fixed(), TQBFJOTLDP, "fc8bc4d5") .put(Hashing.sha1(), EMPTY_STRING, "da39a3ee5e6b4b0d3255bfef95601890afd80709") .put(Hashing.sha1(), TQBFJOTLD, "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12") .put(Hashing.sha1(), TQBFJOTLDP, "408d94384216f890ff7a0c3528e8bed1e0b01621") diff --git a/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java b/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java index bda6a9457527..181b2a7bb071 100644 --- a/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java +++ b/guava-tests/test/com/google/common/hash/Murmur3Hash32Test.java @@ -17,9 +17,11 @@ package com.google.common.hash; import static com.google.common.hash.Hashing.murmur3_32; +import static com.google.common.hash.Hashing.murmur3_32_fixed; import com.google.common.base.Charsets; import com.google.common.hash.HashTestUtils.HashFn; +import java.nio.charset.Charset; import java.util.Random; import junit.framework.TestCase; @@ -51,28 +53,53 @@ public void testKnownStringInputs() { -528633700, murmur3_32().hashUnencodedChars("The quick brown fox jumps over the lazy dog")); } - public void testKnownUtf8StringInputs() { - assertHash(0, murmur3_32().hashString("", Charsets.UTF_8)); - assertHash(0xcfbda5d1, murmur3_32().hashString("k", Charsets.UTF_8)); - assertHash(0xa167dbf3, murmur3_32().hashString("hell", Charsets.UTF_8)); - assertHash(0x248bfa47, murmur3_32().hashString("hello", Charsets.UTF_8)); - assertHash(0x3d41b97c, murmur3_32().hashString("http://www.google.com/", Charsets.UTF_8)); - assertHash( - 0x2e4ff723, - murmur3_32().hashString("The quick brown fox jumps over the lazy dog", Charsets.UTF_8)); - assertHash(0xfc5ba834, murmur3_32().hashString("毎月1日,毎週月曜日", Charsets.UTF_8)); - assertHash( - 0x3f4aff5c, - murmur3_32().hashString(Character.toString(Character.MAX_VALUE), Charsets.UTF_8)); - assertHash( - 0x81db5903, - murmur3_32() - .hashString(new String(Character.toChars(Character.MAX_VALUE + 1)), Charsets.UTF_8)); - // Note (https://github.com/google/guava/issues/5648) the hash expected here is not correct - assertHash( - 0x256068c8, - murmur3_32() - .hashString(new String(Character.toChars(Character.MAX_CODE_POINT)), Charsets.UTF_8)); + @SuppressWarnings("deprecation") + public void testKnownEncodedStringInputs() { + assertStringHash(0, "", Charsets.UTF_8); + assertStringHash(0xcfbda5d1, "k", Charsets.UTF_8); + assertStringHash(0xa167dbf3, "hell", Charsets.UTF_8); + assertStringHash(0x248bfa47, "hello", Charsets.UTF_8); + assertStringHash(0x3d41b97c, "http://www.google.com/", Charsets.UTF_8); + assertStringHash(0x2e4ff723, "The quick brown fox jumps over the lazy dog", Charsets.UTF_8); + assertStringHash(0xb5a4be05, "ABCDefGHI\u0799", Charsets.UTF_8); + assertStringHash(0xfc5ba834, "毎月1日,毎週月曜日", Charsets.UTF_8); + assertStringHash(0x8a5c3699, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_8); + + assertStringHash(0, "", Charsets.UTF_16LE); + assertStringHash(0x288418e4, "k", Charsets.UTF_16LE); + assertStringHash(0x5a0cb7c3, "hell", Charsets.UTF_16LE); + assertStringHash(0xd7c31989, "hello", Charsets.UTF_16LE); + assertStringHash(0x73564d8c, "http://www.google.com/", Charsets.UTF_16LE); + assertStringHash(0xe07db09c, "The quick brown fox jumps over the lazy dog", Charsets.UTF_16LE); + assertStringHash(0xfefa3e76, "ABCDefGHI\u0799", Charsets.UTF_16LE); + assertStringHash(0x6a7be132, "毎月1日,毎週月曜日", Charsets.UTF_16LE); + assertStringHash(0x5a2d41c7, "surrogate pair: \uD83D\uDCB0", Charsets.UTF_16LE); + } + + @SuppressWarnings("deprecation") + private void assertStringHash(int expected, String string, Charset charset) { + if (allBmp(string)) { + assertHash(expected, murmur3_32().hashString(string, charset)); + } + assertHash(expected, murmur3_32_fixed().hashString(string, charset)); + assertHash(expected, murmur3_32().newHasher().putString(string, charset).hash()); + assertHash(expected, murmur3_32_fixed().newHasher().putString(string, charset).hash()); + assertHash(expected, murmur3_32().hashBytes(string.getBytes(charset))); + assertHash(expected, murmur3_32_fixed().hashBytes(string.getBytes(charset))); + assertHash(expected, murmur3_32().newHasher().putBytes(string.getBytes(charset)).hash()); + assertHash(expected, murmur3_32_fixed().newHasher().putBytes(string.getBytes(charset)).hash()); + } + + private boolean allBmp(String string) { + // Ordinarily we'd use something like i += Character.charCount(string.codePointAt(i)) here. But + // we can get away with i++ because the whole point of this method is to return false if we find + // a code point that doesn't fit in a char. + for (int i = 0; i < string.length(); i++) { + if (string.codePointAt(i) > 0xffff) { + return false; + } + } + return true; } @SuppressWarnings("deprecation") @@ -83,7 +110,7 @@ public void testSimpleStringUtf8() { } @SuppressWarnings("deprecation") - public void testStringInputsUtf8() { + public void testEncodedStringInputs() { Random rng = new Random(0); for (int z = 0; z < 100; z++) { String str; @@ -100,9 +127,16 @@ public void testStringInputsUtf8() { builder.appendCodePoint(codePoints[i]); } str = builder.toString(); + HashCode hashUtf8 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)); + assertEquals( + hashUtf8, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_8)).hash()); + assertEquals(hashUtf8, murmur3_32().hashString(str, Charsets.UTF_8)); + assertEquals(hashUtf8, murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash()); + HashCode hashUtf16 = murmur3_32().hashBytes(str.getBytes(Charsets.UTF_16)); assertEquals( - murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), - murmur3_32().hashString(str, Charsets.UTF_8)); + hashUtf16, murmur3_32().newHasher().putBytes(str.getBytes(Charsets.UTF_16)).hash()); + assertEquals(hashUtf16, murmur3_32().hashString(str, Charsets.UTF_16)); + assertEquals(hashUtf16, murmur3_32().newHasher().putString(str, Charsets.UTF_16).hash()); } } @@ -150,8 +184,12 @@ public void testInvalidUnicodeHashString() { assertEquals( murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), murmur3_32().hashString(str, Charsets.UTF_8)); + assertEquals( + murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)), + murmur3_32().hashString(str, Charsets.UTF_8)); } + @SuppressWarnings("deprecation") public void testInvalidUnicodeHasherPutString() { String str = new String( @@ -159,5 +197,8 @@ public void testInvalidUnicodeHasherPutString() { assertEquals( murmur3_32().hashBytes(str.getBytes(Charsets.UTF_8)), murmur3_32().newHasher().putString(str, Charsets.UTF_8).hash()); + assertEquals( + murmur3_32_fixed().hashBytes(str.getBytes(Charsets.UTF_8)), + murmur3_32_fixed().newHasher().putString(str, Charsets.UTF_8).hash()); } } diff --git a/guava/src/com/google/common/hash/Hashing.java b/guava/src/com/google/common/hash/Hashing.java index f53c9d3a7b0f..e8c7445e04d4 100644 --- a/guava/src/com/google/common/hash/Hashing.java +++ b/guava/src/com/google/common/hash/Hashing.java @@ -91,15 +91,56 @@ public static HashFunction goodFastHash(int minimumBits) { @SuppressWarnings("GoodTime") // reading system time without TimeSource static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); + /** + * Returns a hash function implementing the 32-bit murmur3 + * algorithm, x86 variant (little-endian variant), using the given seed value, with a known + * bug as described in the deprecation text. + * + *
The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not + * have the bug. + * + * @deprecated This implementation produces incorrect hash values from the {@link + * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link + * #murmur3_32_fixed(int)} instead. + */ + @Deprecated + public static HashFunction murmur3_32(int seed) { + return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); + } + + /** + * Returns a hash function implementing the 32-bit murmur3 + * algorithm, x86 variant (little-endian variant), using the given seed value, with a known + * bug as described in the deprecation text. + * + *
The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not + * have the bug. + * + * @deprecated This implementation produces incorrect hash values from the {@link + * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link + * #murmur3_32_fixed()} instead. + */ + @Deprecated + public static HashFunction murmur3_32() { + return Murmur3_32HashFunction.MURMUR3_32; + } + /** * Returns a hash function implementing the 32-bit murmur3 * algorithm, x86 variant (little-endian variant), using the given seed value. * *
The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). + * + *
This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code + * HashFunction} returned by the original {@code murmur3_32} method. + * + * @since NEXT */ - public static HashFunction murmur3_32(int seed) { - return new Murmur3_32HashFunction(seed); + public static HashFunction murmur3_32_fixed(int seed) { + return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); } /** @@ -108,9 +149,14 @@ public static HashFunction murmur3_32(int seed) { * algorithm, x86 variant (little-endian variant), using a seed value of zero. * *
The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). + * + *
This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code + * HashFunction} returned by the original {@code murmur3_32} method. + * + * @since NEXT */ - public static HashFunction murmur3_32() { - return Murmur3_32HashFunction.MURMUR3_32; + public static HashFunction murmur3_32_fixed() { + return Murmur3_32HashFunction.MURMUR3_32_FIXED; } /** diff --git a/guava/src/com/google/common/hash/Murmur3_32HashFunction.java b/guava/src/com/google/common/hash/Murmur3_32HashFunction.java index 5e26ec404884..a47184be58b9 100644 --- a/guava/src/com/google/common/hash/Murmur3_32HashFunction.java +++ b/guava/src/com/google/common/hash/Murmur3_32HashFunction.java @@ -53,10 +53,15 @@ @Immutable @ElementTypesAreNonnullByDefault final class Murmur3_32HashFunction extends AbstractHashFunction implements Serializable { - static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); + static final HashFunction MURMUR3_32 = + new Murmur3_32HashFunction(0, /* supplementaryPlaneFix= */ false); + static final HashFunction MURMUR3_32_FIXED = + new Murmur3_32HashFunction(0, /* supplementaryPlaneFix= */ true); + // We can include the non-BMP fix here because Hashing.goodFastHash stresses that the hash is a + // temporary-use one. Therefore it shouldn't be persisted. static final HashFunction GOOD_FAST_HASH_32 = - new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED); + new Murmur3_32HashFunction(Hashing.GOOD_FAST_HASH_SEED, /* supplementaryPlaneFix= */ true); private static final int CHUNK_SIZE = 4; @@ -64,9 +69,11 @@ final class Murmur3_32HashFunction extends AbstractHashFunction implements Seria private static final int C2 = 0x1b873593; private final int seed; + private final boolean supplementaryPlaneFix; - Murmur3_32HashFunction(int seed) { + Murmur3_32HashFunction(int seed, boolean supplementaryPlaneFix) { this.seed = seed; + this.supplementaryPlaneFix = supplementaryPlaneFix; } @Override @@ -88,7 +95,7 @@ public String toString() { public boolean equals(@CheckForNull Object object) { if (object instanceof Murmur3_32HashFunction) { Murmur3_32HashFunction other = (Murmur3_32HashFunction) object; - return seed == other.seed; + return seed == other.seed && supplementaryPlaneFix == other.supplementaryPlaneFix; } return false; } @@ -191,6 +198,9 @@ public HashCode hashString(CharSequence input, Charset charset) { } i++; buffer |= codePointToFourUtf8Bytes(codePoint) << shift; + if (supplementaryPlaneFix) { // bug compatibility: earlier versions did not have this add + shift += 32; + } len += 4; }