Skip to content

Commit 05b0e7a

Browse files
committed
ICU-22517 Limit the closure expansion loop and return error
To avoid very slow return from the constructor, we return error while the Collation rule expand too big. Add a soft limit to limit to the number of loop needed for 8 Hanguls Necessary number of loop: H(0)=0; H(i)=3H(i-1)+2. Where i is the length of Hangul in the rule. H(1) = 2, H(2) = 8, H(3)=26, H(4)=80, H(5) = 242 ...
1 parent f6d09d5 commit 05b0e7a

File tree

5 files changed

+68
-0
lines changed

5 files changed

+68
-0
lines changed

icu4c/source/i18n/collationbuilder.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,12 +1113,23 @@ CollationBuilder::addWithClosure(const UnicodeString &nfdPrefix, const UnicodeSt
11131113
return ce32;
11141114
}
11151115

1116+
// ICU-22517
1117+
// This constant defines a limit for the addOnlyClosure to return
1118+
// error, to avoid taking a long time for canonical closure expansion.
1119+
// Please let us know if you have a reasonable use case that needed
1120+
// for a practical Collation rule that needs to increase this limit.
1121+
// This value is needed for compiling a rule with eight Hangul syllables such as
1122+
// "&a=b쫊쫊쫊쫊쫊쫊쫊쫊" without error, which should be more than realistic
1123+
// usage.
1124+
static constexpr int32_t kClosureLoopLimit = 6560;
1125+
11161126
uint32_t
11171127
CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeString &nfdString,
11181128
const int64_t newCEs[], int32_t newCEsLength, uint32_t ce32,
11191129
UErrorCode &errorCode) {
11201130
if(U_FAILURE(errorCode)) { return ce32; }
11211131

1132+
int32_t loop = 0;
11221133
// Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
11231134
if(nfdPrefix.isEmpty()) {
11241135
CanonicalIterator stringIter(nfdString, errorCode);
@@ -1128,6 +1139,11 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt
11281139
UnicodeString str = stringIter.next();
11291140
if(str.isBogus()) { break; }
11301141
if(ignoreString(str, errorCode) || str == nfdString) { continue; }
1142+
if (loop++ > kClosureLoopLimit) {
1143+
// To avoid hang as in ICU-22517, return with error.
1144+
errorCode = U_INPUT_TOO_LONG_ERROR;
1145+
return ce32;
1146+
}
11311147
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32, errorCode);
11321148
if(U_FAILURE(errorCode)) { return ce32; }
11331149
}
@@ -1144,6 +1160,11 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt
11441160
UnicodeString str = stringIter.next();
11451161
if(str.isBogus()) { break; }
11461162
if(ignoreString(str, errorCode) || (samePrefix && str == nfdString)) { continue; }
1163+
if (loop++ > kClosureLoopLimit) {
1164+
// To avoid hang as in ICU-22517, return with error.
1165+
errorCode = U_INPUT_TOO_LONG_ERROR;
1166+
return ce32;
1167+
}
11471168
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32, errorCode);
11481169
if(U_FAILURE(errorCode)) { return ce32; }
11491170
}

icu4c/source/test/intltest/regcoll.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,18 @@ void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
12491249
}
12501250
}
12511251

1252+
void CollationRegressionTest::TestICU22517() {
1253+
IcuTestErrorCode errorCode(*this, "TestICU22517");
1254+
char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
1255+
icu::UnicodeString rule(true, data, -1);
1256+
int length = quick ? rule.length()-2 : rule.length();
1257+
for (int i = 4; i <= length; i++) {
1258+
UErrorCode status = U_ZERO_ERROR;
1259+
icu::LocalPointer<icu::RuleBasedCollator> col1(
1260+
new icu::RuleBasedCollator(rule.tempSubString(0, i), status));
1261+
}
1262+
}
1263+
12521264
void CollationRegressionTest::TestICU22277() {
12531265
IcuTestErrorCode errorCode(*this, "TestICU22277");
12541266
UErrorCode status = U_ZERO_ERROR;
@@ -1408,6 +1420,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
14081420
TESTCASE_AUTO(TestTrailingComment);
14091421
TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
14101422
TESTCASE_AUTO(TestICU22277);
1423+
TESTCASE_AUTO(TestICU22517);
14111424
TESTCASE_AUTO_END;
14121425
}
14131426

icu4c/source/test/intltest/regcoll.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ class CollationRegressionTest: public IntlTestCollator
240240
// Test use-of-uninitialized-value
241241
void TestICU22277();
242242

243+
void TestICU22517();
244+
243245
private:
244246
//------------------------------------------------------------------------
245247
// Internal utilities

icu4j/main/collate/src/main/java/com/ibm/icu/impl/coll/CollationBuilder.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.ibm.icu.text.Normalizer2;
2525
import com.ibm.icu.text.UnicodeSet;
2626
import com.ibm.icu.text.UnicodeSetIterator;
27+
import com.ibm.icu.util.ICUInputTooLongException;
2728
import com.ibm.icu.util.ULocale;
2829

2930
public final class CollationBuilder extends CollationRuleParser.Sink {
@@ -862,17 +863,31 @@ private int addWithClosure(CharSequence nfdPrefix, CharSequence nfdString,
862863
return ce32;
863864
}
864865

866+
// ICU-22517
867+
// This constant defines a limit for the addOnlyClosure to return
868+
// error, to avoid taking a long time for canonical closure expansion.
869+
// Please let us know if you have a reasonable use case that needed
870+
// for a practical Collation rule that needs to increase this limit.
871+
// This value is needed for compiling a rule with eight Hangul syllables such as
872+
// "&a=b쫊쫊쫊쫊쫊쫊쫊쫊" without error, which should be more than realistic
873+
// usage.
874+
static private int kClosureLoopLimit = 6560;
875+
865876
private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString,
866877
long[] newCEs, int newCEsLength, int ce32) {
867878
// Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
868879
// TODO: make CanonicalIterator work with CharSequence, or maybe change arguments here to String
880+
int loop = 0;
869881
if(nfdPrefix.length() == 0) {
870882
CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
871883
String prefix = "";
872884
for(;;) {
873885
String str = stringIter.next();
874886
if(str == null) { break; }
875887
if(ignoreString(str) || str.contentEquals(nfdString)) { continue; }
888+
if (loop++ > kClosureLoopLimit) {
889+
throw new ICUInputTooLongException("Too many closure");
890+
}
876891
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
877892
}
878893
} else {
@@ -887,6 +902,9 @@ private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString,
887902
String str = stringIter.next();
888903
if(str == null) { break; }
889904
if(ignoreString(str) || (samePrefix && str.contentEquals(nfdString))) { continue; }
905+
if (loop++ > kClosureLoopLimit) {
906+
throw new ICUInputTooLongException("Too many closure");
907+
}
890908
ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
891909
}
892910
stringIter.reset();

icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationRegressionTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,20 @@ public void TestTrailingComment() throws Exception {
12291229
assertTrue("b<a", coll.compare("b", "a") < 0);
12301230
}
12311231

1232+
@Test
1233+
public void TestICU22517() {
1234+
boolean quick = TestFmwk.getExhaustiveness() <= 5;
1235+
String rule = "&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
1236+
int length = quick ? (rule.length()-2) : rule.length();
1237+
for (int i = 4; i <= length; i++) {
1238+
try {
1239+
RuleBasedCollator coll = new RuleBasedCollator(rule.substring(0, i));
1240+
} catch (Exception e) {
1241+
// silence ignore.
1242+
}
1243+
}
1244+
}
1245+
12321246
@Test
12331247
public void TestBeforeWithTooStrongAfter() {
12341248
// ICU ticket #9959:

0 commit comments

Comments
 (0)