diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java index b57f172428ac..b4d03a542578 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java @@ -302,114 +302,82 @@ private static int compareLowerCaseSlow(final UTF8String left, final UTF8String return lowerCaseCodePoints(left).binaryCompare(lowerCaseCodePoints(right)); } - /* + /** * Performs string replacement for ICU collations by searching for instances of the search - * string in the `src` string, with respect to the specified collation, and then replacing + * string in the `target` string, with respect to the specified collation, and then replacing * them with the replace string. The method returns a new UTF8String with all instances of the * search string replaced using the replace string. Similar to UTF8String.findInSet behavior - * used for UTF8_BINARY, the method returns the `src` string if the `search` string is empty. + * used for UTF8_BINARY, the method returns the `target` string if the `search` string is empty. * - * @param src the string to be searched in + * @param target the string to be searched in * @param search the string to be searched for * @param replace the string to be used as replacement * @param collationId the collation ID to use for string search * @return the position of the first occurrence of `match` in `set` */ - public static UTF8String replace(final UTF8String src, final UTF8String search, + public static UTF8String replace(final UTF8String target, final UTF8String search, final UTF8String replace, final int collationId) { // This collation aware implementation is based on existing implementation on UTF8String - if (src.numBytes() == 0 || search.numBytes() == 0) { - return src; + if (target.numBytes() == 0 || search.numBytes() == 0) { + return target; } - StringSearch stringSearch = CollationFactory.getStringSearch(src, search, collationId); - - // Find the first occurrence of the search string. - int end = stringSearch.next(); - if (end == StringSearch.DONE) { - // Search string was not found, so string is unchanged. - return src; - } - - // Initialize byte positions - int c = 0; - int byteStart = 0; // position in byte - int byteEnd = 0; // position in byte - while (byteEnd < src.numBytes() && c < end) { - byteEnd += UTF8String.numBytesForFirstByte(src.getByte(byteEnd)); - c += 1; - } - - // At least one match was found. Estimate space needed for result. - // The 16x multiplier here is chosen to match commons-lang3's implementation. - int increase = Math.max(0, Math.abs(replace.numBytes() - search.numBytes())) * 16; - final UTF8StringBuilder buf = new UTF8StringBuilder(src.numBytes() + increase); - while (end != StringSearch.DONE) { - buf.appendBytes(src.getBaseObject(), src.getBaseOffset() + byteStart, byteEnd - byteStart); - buf.append(replace); + String targetStr = target.toValidString(); + String searchStr = search.toValidString(); + StringSearch stringSearch = CollationFactory.getStringSearch(targetStr, searchStr, collationId); - // Move byteStart to the beginning of the current match - byteStart = byteEnd; - int cs = c; - // Move cs to the end of the current match - // This is necessary because the search string may contain 'multi-character' characters - while (byteStart < src.numBytes() && cs < c + stringSearch.getMatchLength()) { - byteStart += UTF8String.numBytesForFirstByte(src.getByte(byteStart)); - cs += 1; - } - // Go to next match - end = stringSearch.next(); - // Update byte positions - while (byteEnd < src.numBytes() && c < end) { - byteEnd += UTF8String.numBytesForFirstByte(src.getByte(byteEnd)); - c += 1; - } + StringBuilder sb = new StringBuilder(); + int start = 0; + int matchStart = stringSearch.first(); + while (matchStart != StringSearch.DONE) { + sb.append(targetStr, start, matchStart); + sb.append(replace.toValidString()); + start = matchStart + stringSearch.getMatchLength(); + matchStart = stringSearch.next(); } - buf.appendBytes(src.getBaseObject(), src.getBaseOffset() + byteStart, - src.numBytes() - byteStart); - return buf.build(); + sb.append(targetStr, start, targetStr.length()); + return UTF8String.fromString(sb.toString()); } - /* + /** * Performs string replacement for UTF8_LCASE collation by searching for instances of the search - * string in the src string, with respect to lowercased string versions, and then replacing + * string in the target string, with respect to lowercased string versions, and then replacing * them with the replace string. The method returns a new UTF8String with all instances of the * search string replaced using the replace string. Similar to UTF8String.findInSet behavior - * used for UTF8_BINARY, the method returns the `src` string if the `search` string is empty. + * used for UTF8_BINARY, the method returns the `target` string if the `search` string is empty. * - * @param src the string to be searched in + * @param target the string to be searched in * @param search the string to be searched for * @param replace the string to be used as replacement - * @param collationId the collation ID to use for string search * @return the position of the first occurrence of `match` in `set` */ - public static UTF8String lowercaseReplace(final UTF8String src, final UTF8String search, + public static UTF8String lowercaseReplace(final UTF8String target, final UTF8String search, final UTF8String replace) { - if (src.numBytes() == 0 || search.numBytes() == 0) { - return src; + if (target.numBytes() == 0 || search.numBytes() == 0) { + return target; } UTF8String lowercaseSearch = lowerCaseCodePoints(search); int start = 0; - int end = lowercaseFind(src, lowercaseSearch, start); + int end = lowercaseFind(target, lowercaseSearch, start); if (end == -1) { // Search string was not found, so string is unchanged. - return src; + return target; } // At least one match was found. Estimate space needed for result. // The 16x multiplier here is chosen to match commons-lang3's implementation. int increase = Math.max(0, replace.numBytes() - search.numBytes()) * 16; - final UTF8StringBuilder buf = new UTF8StringBuilder(src.numBytes() + increase); + final UTF8StringBuilder buf = new UTF8StringBuilder(target.numBytes() + increase); while (end != -1) { - buf.append(src.substring(start, end)); + buf.append(target.substring(start, end)); buf.append(replace); // Update character positions - start = end + lowercaseMatchLengthFrom(src, lowercaseSearch, end); - end = lowercaseFind(src, lowercaseSearch, start); + start = end + lowercaseMatchLengthFrom(target, lowercaseSearch, end); + end = lowercaseFind(target, lowercaseSearch, start); } - buf.append(src.substring(start, src.numChars())); + buf.append(target.substring(start, target.numChars())); return buf.build(); } diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java index 4301bf56b6d5..9b098c12c3e9 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java @@ -1094,95 +1094,161 @@ public void testFindInSet() throws SparkException { assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1); } - private void assertReplace(String source, String search, String replace, String collationName, - String expected) throws SparkException { + /** + * Verify the behaviour of the `StringReplace` collation support class. + */ + + private void assertStringReplace(String source, String search, String replace, + String collationName, String expected) throws SparkException { UTF8String src = UTF8String.fromString(source); UTF8String sear = UTF8String.fromString(search); UTF8String repl = UTF8String.fromString(replace); int collationId = CollationFactory.collationNameToId(collationName); - assertEquals(expected, CollationSupport.StringReplace - .exec(src, sear, repl, collationId).toString()); + UTF8String result = CollationSupport.StringReplace.exec(src, sear, repl, collationId); + assertEquals(UTF8String.fromString(expected), result); } @Test - public void testReplace() throws SparkException { - assertReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace"); - assertReplace("replace", "pl", "", "UTF8_BINARY", "reace"); - assertReplace("repl世ace", "Pl", "", "UTF8_BINARY", "repl世ace"); - assertReplace("replace", "", "123", "UTF8_BINARY", "replace"); - assertReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c"); - assertReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad"); - assertReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace"); - assertReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace"); - assertReplace("Replace", "", "123", "UTF8_LCASE", "Replace"); - assertReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace"); - assertReplace("abcaBc", "B", "12", "UTF8_LCASE", "a12ca12c"); - assertReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad"); - assertReplace("re世place", "plx", "123", "UNICODE", "re世place"); - assertReplace("世Replace", "re", "", "UNICODE", "世Replace"); - assertReplace("replace世", "", "123", "UNICODE", "replace世"); - assertReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c"); - assertReplace("abcdabcd", "bc", "", "UNICODE", "adad"); - assertReplace("replace", "plx", "123", "UNICODE_CI", "replace"); - assertReplace("Replace", "re", "", "UNICODE_CI", "place"); - assertReplace("replace", "", "123", "UNICODE_CI", "replace"); - assertReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c"); - assertReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad"); - assertReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12"); - assertReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12"); - assertReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12"); - assertReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12"); - assertReplace("İi̇İi̇İi̇", "i̇", "x", "UNICODE_CI", "xxxxxx"); - assertReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇"); - assertReplace("abİo12i̇o", "i̇o", "xx", "UNICODE_CI", "abxx12xx"); - assertReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy"); - assertReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI - assertReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI - assertReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12"); - assertReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12"); - assertReplace("İi̇İi̇İi̇", "i̇", "x", "UTF8_LCASE", "xxxxxx"); - assertReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE", + public void testStringReplace() throws SparkException { + // Empty strings. + assertStringReplace("", "", "", "UTF8_BINARY", ""); + assertStringReplace("", "", "", "UTF8_LCASE", ""); + assertStringReplace("", "", "", "UNICODE", ""); + assertStringReplace("", "", "", "UNICODE_CI", ""); + assertStringReplace("abc", "", "", "UTF8_BINARY", "abc"); + assertStringReplace("abc", "", "", "UTF8_LCASE", "abc"); + assertStringReplace("abc", "", "", "UNICODE", "abc"); + assertStringReplace("abc", "", "", "UNICODE_CI", "abc"); + assertStringReplace("", "x", "", "UTF8_BINARY", ""); + assertStringReplace("", "x", "", "UTF8_LCASE", ""); + assertStringReplace("", "x", "", "UNICODE", ""); + assertStringReplace("", "x", "", "UNICODE_CI", ""); + assertStringReplace("", "", "x", "UTF8_BINARY", ""); + assertStringReplace("", "", "x", "UTF8_LCASE", ""); + assertStringReplace("", "", "x", "UNICODE", ""); + assertStringReplace("", "", "x", "UNICODE_CI", ""); + assertStringReplace("", "b", "x", "UTF8_BINARY", ""); + assertStringReplace("", "b", "x", "UTF8_LCASE", ""); + assertStringReplace("", "b", "x", "UNICODE", ""); + assertStringReplace("", "b", "x", "UNICODE_CI", ""); + assertStringReplace("abc", "b", "", "UTF8_BINARY", "ac"); + assertStringReplace("abc", "b", "", "UTF8_LCASE", "ac"); + assertStringReplace("abc", "b", "", "UNICODE", "ac"); + assertStringReplace("abc", "b", "", "UNICODE_CI", "ac"); + assertStringReplace("abc", "", "x", "UTF8_BINARY", "abc"); + assertStringReplace("abc", "", "x", "UTF8_LCASE", "abc"); + assertStringReplace("abc", "", "x", "UNICODE", "abc"); + assertStringReplace("abc", "", "x", "UNICODE_CI", "abc"); + // Basic tests. + assertStringReplace("replace", "pl", "", "UTF8_BINARY", "reace"); + assertStringReplace("replace", "pl", "", "UTF8_LCASE", "reace"); + assertStringReplace("replace", "pl", "", "UNICODE", "reace"); + assertStringReplace("replace", "pl", "", "UNICODE_CI", "reace"); + assertStringReplace("replace", "", "123", "UTF8_BINARY", "replace"); + assertStringReplace("replace", "", "123", "UTF8_LCASE", "replace"); + assertStringReplace("replace", "", "123", "UNICODE", "replace"); + assertStringReplace("replace", "", "123", "UNICODE_CI", "replace"); + assertStringReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c"); + assertStringReplace("abcabc", "b", "12", "UTF8_LCASE", "a12ca12c"); + assertStringReplace("abcabc", "b", "12", "UNICODE", "a12ca12c"); + assertStringReplace("abcabc", "b", "12", "UNICODE_CI", "a12ca12c"); + assertStringReplace("replace", "plx", "123", "UTF8_BINARY", "replace"); + assertStringReplace("replace", "plx", "123", "UTF8_LCASE", "replace"); + assertStringReplace("replace", "plx", "123", "UNICODE", "replace"); + assertStringReplace("replace", "plx", "123", "UNICODE_CI", "replace"); + assertStringReplace("Replace", "re", "", "UTF8_BINARY", "Replace"); + assertStringReplace("Replace", "re", "", "UTF8_LCASE", "place"); + assertStringReplace("Replace", "re", "", "UNICODE", "Replace"); + assertStringReplace("Replace", "re", "", "UNICODE_CI", "place"); + assertStringReplace("abcdabcd", "Bc", "", "UTF8_BINARY", "abcdabcd"); + assertStringReplace("abcdabcd", "Bc", "", "UTF8_LCASE", "adad"); + assertStringReplace("abcdabcd", "Bc", "", "UNICODE", "abcdabcd"); + assertStringReplace("abcdabcd", "Bc", "", "UNICODE_CI", "adad"); + assertStringReplace("AbcdabCd", "Bc", "", "UTF8_BINARY", "AbcdabCd"); + assertStringReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad"); + assertStringReplace("AbcdabCd", "Bc", "", "UNICODE", "AbcdabCd"); + assertStringReplace("AbcdabCd", "Bc", "", "UNICODE_CI", "Adad"); + // Advanced tests. + assertStringReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad"); + assertStringReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace"); + assertStringReplace("世Replace", "re", "", "UTF8_BINARY", "世Replace"); + assertStringReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace"); + assertStringReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace"); + assertStringReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace"); + assertStringReplace("re世place", "plx", "123", "UNICODE", "re世place"); + assertStringReplace("replace世", "", "123", "UNICODE", "replace世"); + assertStringReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c"); + assertStringReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c"); + assertStringReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad"); + assertStringReplace("repl世ace", "Pl", "", "UNICODE_CI", "re世ace"); + // One-to-many case mapping (e.g. Turkish dotted I). + assertStringReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12"); + assertStringReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12"); + assertStringReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12"); + assertStringReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12"); + assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UNICODE_CI", "xxxxxx"); + assertStringReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇"); + assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UNICODE_CI", "abxx12xx"); + assertStringReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy"); + assertStringReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI + assertStringReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI + assertStringReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12"); + assertStringReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12"); + assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UTF8_LCASE", "xxxxxx"); + assertStringReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE", "İx\u0307İx\u0307İx\u0307"); // != UNICODE_CI - assertReplace("abİo12i̇o", "i̇o", "xx", "UTF8_LCASE", "abxx12xx"); - assertReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy"); - // Greek sigmas. - assertReplace("σ", "σ", "x", "UTF8_BINARY", "x"); - assertReplace("σ", "ς", "x", "UTF8_BINARY", "σ"); - assertReplace("σ", "Σ", "x", "UTF8_BINARY", "σ"); - assertReplace("ς", "σ", "x", "UTF8_BINARY", "ς"); - assertReplace("ς", "ς", "x", "UTF8_BINARY", "x"); - assertReplace("ς", "Σ", "x", "UTF8_BINARY", "ς"); - assertReplace("Σ", "σ", "x", "UTF8_BINARY", "Σ"); - assertReplace("Σ", "ς", "x", "UTF8_BINARY", "Σ"); - assertReplace("Σ", "Σ", "x", "UTF8_BINARY", "x"); - assertReplace("σ", "σ", "x", "UTF8_LCASE", "x"); - assertReplace("σ", "ς", "x", "UTF8_LCASE", "x"); - assertReplace("σ", "Σ", "x", "UTF8_LCASE", "x"); - assertReplace("ς", "σ", "x", "UTF8_LCASE", "x"); - assertReplace("ς", "ς", "x", "UTF8_LCASE", "x"); - assertReplace("ς", "Σ", "x", "UTF8_LCASE", "x"); - assertReplace("Σ", "σ", "x", "UTF8_LCASE", "x"); - assertReplace("Σ", "ς", "x", "UTF8_LCASE", "x"); - assertReplace("Σ", "Σ", "x", "UTF8_LCASE", "x"); - assertReplace("σ", "σ", "x", "UNICODE", "x"); - assertReplace("σ", "ς", "x", "UNICODE", "σ"); - assertReplace("σ", "Σ", "x", "UNICODE", "σ"); - assertReplace("ς", "σ", "x", "UNICODE", "ς"); - assertReplace("ς", "ς", "x", "UNICODE", "x"); - assertReplace("ς", "Σ", "x", "UNICODE", "ς"); - assertReplace("Σ", "σ", "x", "UNICODE", "Σ"); - assertReplace("Σ", "ς", "x", "UNICODE", "Σ"); - assertReplace("Σ", "Σ", "x", "UNICODE", "x"); - assertReplace("σ", "σ", "x", "UNICODE_CI", "x"); - assertReplace("σ", "ς", "x", "UNICODE_CI", "x"); - assertReplace("σ", "Σ", "x", "UNICODE_CI", "x"); - assertReplace("ς", "σ", "x", "UNICODE_CI", "x"); - assertReplace("ς", "ς", "x", "UNICODE_CI", "x"); - assertReplace("ς", "Σ", "x", "UNICODE_CI", "x"); - assertReplace("Σ", "σ", "x", "UNICODE_CI", "x"); - assertReplace("Σ", "ς", "x", "UNICODE_CI", "x"); - assertReplace("Σ", "Σ", "x", "UNICODE_CI", "x"); - + assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UTF8_LCASE", "abxx12xx"); + assertStringReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy"); + // Conditional case mapping (e.g. Greek sigmas). + assertStringReplace("σ", "σ", "x", "UTF8_BINARY", "x"); + assertStringReplace("σ", "ς", "x", "UTF8_BINARY", "σ"); + assertStringReplace("σ", "Σ", "x", "UTF8_BINARY", "σ"); + assertStringReplace("ς", "σ", "x", "UTF8_BINARY", "ς"); + assertStringReplace("ς", "ς", "x", "UTF8_BINARY", "x"); + assertStringReplace("ς", "Σ", "x", "UTF8_BINARY", "ς"); + assertStringReplace("Σ", "σ", "x", "UTF8_BINARY", "Σ"); + assertStringReplace("Σ", "ς", "x", "UTF8_BINARY", "Σ"); + assertStringReplace("Σ", "Σ", "x", "UTF8_BINARY", "x"); + assertStringReplace("σ", "σ", "x", "UTF8_LCASE", "x"); + assertStringReplace("σ", "ς", "x", "UTF8_LCASE", "x"); + assertStringReplace("σ", "Σ", "x", "UTF8_LCASE", "x"); + assertStringReplace("ς", "σ", "x", "UTF8_LCASE", "x"); + assertStringReplace("ς", "ς", "x", "UTF8_LCASE", "x"); + assertStringReplace("ς", "Σ", "x", "UTF8_LCASE", "x"); + assertStringReplace("Σ", "σ", "x", "UTF8_LCASE", "x"); + assertStringReplace("Σ", "ς", "x", "UTF8_LCASE", "x"); + assertStringReplace("Σ", "Σ", "x", "UTF8_LCASE", "x"); + assertStringReplace("σ", "σ", "x", "UNICODE", "x"); + assertStringReplace("σ", "ς", "x", "UNICODE", "σ"); + assertStringReplace("σ", "Σ", "x", "UNICODE", "σ"); + assertStringReplace("ς", "σ", "x", "UNICODE", "ς"); + assertStringReplace("ς", "ς", "x", "UNICODE", "x"); + assertStringReplace("ς", "Σ", "x", "UNICODE", "ς"); + assertStringReplace("Σ", "σ", "x", "UNICODE", "Σ"); + assertStringReplace("Σ", "ς", "x", "UNICODE", "Σ"); + assertStringReplace("Σ", "Σ", "x", "UNICODE", "x"); + assertStringReplace("σ", "σ", "x", "UNICODE_CI", "x"); + assertStringReplace("σ", "ς", "x", "UNICODE_CI", "x"); + assertStringReplace("σ", "Σ", "x", "UNICODE_CI", "x"); + assertStringReplace("ς", "σ", "x", "UNICODE_CI", "x"); + assertStringReplace("ς", "ς", "x", "UNICODE_CI", "x"); + assertStringReplace("ς", "Σ", "x", "UNICODE_CI", "x"); + assertStringReplace("Σ", "σ", "x", "UNICODE_CI", "x"); + assertStringReplace("Σ", "ς", "x", "UNICODE_CI", "x"); + assertStringReplace("Σ", "Σ", "x", "UNICODE_CI", "x"); + // Surrogate pairs. + assertStringReplace("a🙃b", "a", "x", "UTF8_BINARY", "x🙃b"); + assertStringReplace("a🙃b", "b", "x", "UTF8_BINARY", "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", "UTF8_BINARY", "axb"); + assertStringReplace("a🙃b", "b", "c", "UTF8_LCASE", "a🙃c"); + assertStringReplace("a🙃b", "b", "x", "UTF8_LCASE", "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", "UTF8_LCASE", "axb"); + assertStringReplace("a🙃b", "b", "c", "UNICODE", "a🙃c"); + assertStringReplace("a🙃b", "b", "x", "UNICODE", "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", "UNICODE", "axb"); + assertStringReplace("a🙃b", "b", "c", "UNICODE_CI", "a🙃c"); + assertStringReplace("a🙃b", "b", "x", "UNICODE_CI", "a🙃x"); + assertStringReplace("a🙃b", "🙃", "x", "UNICODE_CI", "axb"); } private void assertLocate(String substring, String string, Integer start, String collationName,