Skip to content

Commit 6441f29

Browse files
committed
ICU-22834 Update tests to reflect MF2 schema in conformance repo
This also updates the spec tests from the current version of the MFWG repository and removes some duplicate tests. Spec tests now reflect the message-format-wg repo as of unicode-org/message-format-wg@5612f3b It also updates both the ICU4C and ICU4J parsers to follow the current test schema in the conformance repository. This includes adding code to both parsers to allow `src` to be either a single string or an array of strings (per unicode-org/conformance#255 ), and eliminating `srcs` in tests. It also includes other changes to make updated spec tests pass: ICU4C: Allow trailing whitespace for complex messages, due to spec change ICU4C: Parse number literals correctly in Number::format ICU4J: Allow trailing whitespace after complex body, per spec change ICU4C: Fix bug that was assuming an .input variable can't have a reserved annotation ICU4C: Fix bug where unsupported '.i' was parsed as an '.input' ICU4C/ICU4J: Handle markup with space after the initial left curly brace ICU4C: Check for duplicate variant errors ICU4C/ICU4J: Handle leading whitespace in complex messages ICU4J: Treat whitespace after .input keyword as optional ICU4J: Don't format unannotated number literals as numbers
1 parent 263c735 commit 6441f29

File tree

66 files changed

+3682
-2317
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+3682
-2317
lines changed

Diff for: icu4c/source/common/unicode/utypes.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -584,12 +584,13 @@ typedef enum UErrorCode {
584584
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
585585
U_MF_UNSUPPORTED_STATEMENT_ERROR, /**< A message includes a reserved statement. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
586586
U_MF_UNSUPPORTED_EXPRESSION_ERROR, /**< A message includes syntax reserved for future standardization or private implementation use. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
587+
U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
587588
#ifndef U_HIDE_DEPRECATED_API
588589
/**
589590
* One more than the highest normal formatting API error code.
590591
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
591592
*/
592-
U_FMT_PARSE_ERROR_LIMIT = 0x10121,
593+
U_FMT_PARSE_ERROR_LIMIT = 0x10122,
593594
#endif // U_HIDE_DEPRECATED_API
594595

595596
/*

Diff for: icu4c/source/common/utypes.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
141141
"U_MF_DUPLICATE_DECLARATION_ERROR",
142142
"U_MF_OPERAND_MISMATCH_ERROR",
143143
"U_MF_UNSUPPORTED_STATEMENT_ERROR",
144-
"U_MF_UNSUPPORTED_EXPRESSION_ERROR"
144+
"U_MF_UNSUPPORTED_EXPRESSION_ERROR",
145+
"U_MF_DUPLICATE_VARIANT_ERROR"
145146
};
146147

147148
static const char * const

Diff for: icu4c/source/i18n/messageformat2_checker.cpp

+28-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Checks data model errors
2222
2323
The following are checked here:
2424
Variant Key Mismatch
25+
Duplicate Variant
2526
Missing Fallback Variant (called NonexhaustivePattern here)
2627
Missing Selector Annotation
2728
Duplicate Declaration
@@ -162,6 +163,7 @@ void Checker::checkVariants(UErrorCode& status) {
162163

163164
// Check that one variant includes only wildcards
164165
bool defaultExists = false;
166+
bool duplicatesExist = false;
165167

166168
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
167169
const SelectorKeys& k = variants[i].getKeys();
@@ -173,10 +175,35 @@ void Checker::checkVariants(UErrorCode& status) {
173175
return;
174176
}
175177
defaultExists |= areDefaultKeys(keys, len);
178+
179+
// Check if this variant's keys are duplicated by any other variant's keys
180+
if (!duplicatesExist) {
181+
// This check takes quadratic time, but it can be optimized if checking
182+
// this property turns out to be a bottleneck.
183+
for (int32_t j = 0; j < i; j++) {
184+
const SelectorKeys& k1 = variants[j].getKeys();
185+
const Key* keys1 = k1.getKeysInternal();
186+
bool allEqual = true;
187+
// This variant was already checked,
188+
// so we know keys1.len == len
189+
for (int32_t kk = 0; kk < len; kk++) {
190+
if (!(keys[kk] == keys1[kk])) {
191+
allEqual = false;
192+
break;
193+
}
194+
}
195+
if (allEqual) {
196+
duplicatesExist = true;
197+
}
198+
}
199+
}
200+
}
201+
202+
if (duplicatesExist) {
203+
errors.addError(StaticErrorType::DuplicateVariant, status);
176204
}
177205
if (!defaultExists) {
178206
errors.addError(StaticErrorType::NonexhaustivePattern, status);
179-
return;
180207
}
181208
}
182209

Diff for: icu4c/source/i18n/messageformat2_data_model.cpp

+17-17
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ bool Key::operator==(const Key& other) const {
186186
if (isWildcard()) {
187187
return other.isWildcard();
188188
}
189+
if (other.isWildcard()) {
190+
return false;
191+
}
189192
return (asLiteral() == other.asLiteral());
190193
}
191194

@@ -829,31 +832,28 @@ const Expression& Binding::getValue() const {
829832
} else {
830833
const Operator* rator = rhs.getOperator(errorCode);
831834
bool hasOperator = U_SUCCESS(errorCode);
832-
if (hasOperator && rator->isReserved()) {
833-
errorCode = U_INVALID_STATE_ERROR;
835+
// Clear error code -- the "error" from the absent operator
836+
// is handled
837+
errorCode = U_ZERO_ERROR;
838+
b = Binding(variableName, std::move(rhs));
839+
b.local = false;
840+
if (hasOperator) {
841+
rator = b.getValue().getOperator(errorCode);
842+
U_ASSERT(U_SUCCESS(errorCode));
843+
b.annotation = &rator->contents;
834844
} else {
835-
// Clear error code -- the "error" from the absent operator
836-
// is handled
837-
errorCode = U_ZERO_ERROR;
838-
b = Binding(variableName, std::move(rhs));
839-
b.local = false;
840-
if (hasOperator) {
841-
rator = b.getValue().getOperator(errorCode);
842-
U_ASSERT(U_SUCCESS(errorCode));
843-
b.annotation = std::get_if<Callable>(&(rator->contents));
844-
} else {
845-
b.annotation = nullptr;
846-
}
847-
U_ASSERT(!hasOperator || b.annotation != nullptr);
845+
b.annotation = nullptr;
848846
}
847+
U_ASSERT(!hasOperator || b.annotation != nullptr);
849848
}
850849
}
851850
return b;
852851
}
853852

854853
const OptionMap& Binding::getOptionsInternal() const {
855854
U_ASSERT(annotation != nullptr);
856-
return annotation->getOptions();
855+
U_ASSERT(std::holds_alternative<Callable>(*annotation));
856+
return std::get_if<Callable>(annotation)->getOptions();
857857
}
858858

859859
void Binding::updateAnnotation() {
@@ -863,7 +863,7 @@ void Binding::updateAnnotation() {
863863
return;
864864
}
865865
U_ASSERT(U_SUCCESS(localErrorCode) && !rator->isReserved());
866-
annotation = std::get_if<Callable>(&(rator->contents));
866+
annotation = &rator->contents;
867867
}
868868

869869
Binding::Binding(const Binding& other) : var(other.var), expr(other.expr), local(other.local) {

Diff for: icu4c/source/i18n/messageformat2_errors.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ namespace message2 {
135135
status = U_MF_VARIANT_KEY_MISMATCH_ERROR;
136136
break;
137137
}
138+
case StaticErrorType::DuplicateVariant: {
139+
status = U_MF_DUPLICATE_VARIANT_ERROR;
140+
break;
141+
}
138142
case StaticErrorType::NonexhaustivePattern: {
139143
status = U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
140144
break;
@@ -211,6 +215,10 @@ namespace message2 {
211215
dataModelError = true;
212216
break;
213217
}
218+
case StaticErrorType::DuplicateVariant: {
219+
dataModelError = true;
220+
break;
221+
}
214222
case StaticErrorType::NonexhaustivePattern: {
215223
dataModelError = true;
216224
break;

Diff for: icu4c/source/i18n/messageformat2_errors.h

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ namespace message2 {
5454
enum StaticErrorType {
5555
DuplicateDeclarationError,
5656
DuplicateOptionName,
57+
DuplicateVariant,
5758
MissingSelectorAnnotation,
5859
NonexhaustivePattern,
5960
SyntaxError,

Diff for: icu4c/source/i18n/messageformat2_function_registry.cpp

+35-6
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77

88
#if !UCONFIG_NO_MF2
99

10+
#include <math.h>
11+
1012
#include "unicode/dtptngen.h"
1113
#include "unicode/messageformat2_data_model_names.h"
1214
#include "unicode/messageformat2_function_registry.h"
1315
#include "unicode/smpdtfmt.h"
1416
#include "charstr.h"
17+
#include "double-conversion.h"
1518
#include "messageformat2_allocation.h"
1619
#include "messageformat2_function_registry_internal.h"
1720
#include "messageformat2_macros.h"
@@ -421,25 +424,51 @@ static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) {
421424
return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")));
422425
}
423426

424-
static FormattedPlaceholder stringAsNumber(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
427+
static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) {
425428
if (U_FAILURE(errorCode)) {
426429
return {};
427430
}
428431

429-
double numberValue;
430432
// Copying string to avoid GCC dangling-reference warning
431433
// (although the reference is safe)
432434
UnicodeString inputStr = input.asFormattable().getString(errorCode);
433435
// Precondition: `input`'s source Formattable has type string
434436
if (U_FAILURE(errorCode)) {
435437
return {};
436438
}
437-
UErrorCode localErrorCode = U_ZERO_ERROR;
438-
strToDouble(inputStr, numberValue, localErrorCode);
439-
if (U_FAILURE(localErrorCode)) {
439+
440+
// Hack: Check for cases that are forbidden by the MF2 grammar
441+
// but allowed by StringToDouble
442+
int32_t len = inputStr.length();
443+
444+
if (len > 0 && ((inputStr[0] == '+')
445+
|| (inputStr[0] == '0' && len > 1 && inputStr[1] != '.')
446+
|| (inputStr[len - 1] == '.')
447+
|| (inputStr[0] == '.'))) {
440448
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
449+
return 0;
450+
}
451+
452+
// Otherwise, convert to double using double_conversion::StringToDoubleConverter
453+
using namespace double_conversion;
454+
int processedCharactersCount = 0;
455+
StringToDoubleConverter converter(0, 0, 0, "", "");
456+
double result =
457+
converter.StringToDouble(reinterpret_cast<const uint16_t*>(inputStr.getBuffer()),
458+
len,
459+
&processedCharactersCount);
460+
if (processedCharactersCount != len) {
461+
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
462+
}
463+
return result;
464+
}
465+
466+
static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
467+
double numberValue = parseNumberLiteral(input, errorCode);
468+
if (U_FAILURE(errorCode)) {
441469
return notANumber(input);
442470
}
471+
443472
UErrorCode savedStatus = errorCode;
444473
number::FormattedNumber result = nf.formatDouble(numberValue, errorCode);
445474
// Ignore U_USING_DEFAULT_WARNING
@@ -590,7 +619,7 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
590619
}
591620
case UFMT_STRING: {
592621
// Try to parse the string as a number
593-
return stringAsNumber(realFormatter, arg, errorCode);
622+
return tryParsingNumberLiteral(realFormatter, arg, errorCode);
594623
}
595624
default: {
596625
// Other types can't be parsed as a number

Diff for: icu4c/source/i18n/messageformat2_macros.h

+4-12
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,11 @@ using namespace pluralimpl;
6060
// Fallback
6161
#define REPLACEMENT ((UChar32) 0xFFFD)
6262

63-
// MessageFormat2 uses four keywords: `.input`, `.local`, `.when`, and `.match`.
63+
// MessageFormat2 uses three keywords: `.input`, `.local`, and `.match`.
6464

65-
static constexpr UChar32 ID_INPUT[] = {
66-
0x2E, 0x69, 0x6E, 0x70, 0x75, 0x74, 0 /* ".input" */
67-
};
68-
69-
static constexpr UChar32 ID_LOCAL[] = {
70-
0x2E, 0x6C, 0x6F, 0x63, 0x61, 0x6C, 0 /* ".local" */
71-
};
72-
73-
static constexpr UChar32 ID_MATCH[] = {
74-
0x2E, 0x6D, 0x61, 0x74, 0x63, 0x68, 0 /* ".match" */
75-
};
65+
static constexpr std::u16string_view ID_INPUT = u".input";
66+
static constexpr std::u16string_view ID_LOCAL = u".local";
67+
static constexpr std::u16string_view ID_MATCH = u".match";
7668

7769
// Returns immediately if `errorCode` indicates failure
7870
#define CHECK_ERROR(errorCode) \

0 commit comments

Comments
 (0)