Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Intl.NumberFormat.prototype.formatToParts #5105

Merged
merged 2 commits into from
May 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions lib/Runtime/Library/InJavascript/Intl.js
Original file line number Diff line number Diff line change
Expand Up @@ -980,11 +980,10 @@
};

let localeCompareStateCache;
platform.registerBuiltInFunction(tagPublicFunction("String.prototype.localeCompare", function (that, locales, options) {
// Make arguments undefined to ensure that localeCompare.length === 1
platform.registerBuiltInFunction(tagPublicFunction("String.prototype.localeCompare", function (that, locales = undefined, options = undefined) {
if (this === undefined || this === null) {
platform.raiseThis_NullOrUndefined("String.prototype.localeCompare");
} else if (that === null) {
platform.raiseNeedObject();
}

const thisStr = String(this);
Expand Down Expand Up @@ -1220,7 +1219,7 @@

const n = Internal.ToNumber(this);
// Need to special case the "-0" case to format as 0 instead of -0.
return String(platform.formatNumber(n === -0 ? 0 : n, stateObject));
return platform.formatNumber(n === -0 ? 0 : n, stateObject, /* toParts */ false);
}), IntlBuiltInFunctionID.NumberToLocaleString);

if (InitType === "Number") {
Expand Down Expand Up @@ -1266,7 +1265,23 @@
}

// Need to special case the '-0' case to format as 0 instead of -0.
return String(platform.formatNumber(n === -0 ? 0 : n, hiddenObject));
return platform.formatNumber(n === -0 ? 0 : n, hiddenObject, /* toParts */ false);
});

const formatToParts = tagPublicFunction("Intl.NumberFormat.prototype.formatToParts", function formatToParts(n) {
n = Internal.ToNumber(n);

if (typeof this !== "object") {
platform.raiseNeedObjectOfType("NumberFormat.prototype.formatToParts", "NumberFormat");
}

const hiddenObject = platform.getHiddenObject(this);
if (hiddenObject === undefined || !hiddenObject.initializedNumberFormat) {
platform.raiseNeedObjectOfType("NumberFormat.prototype.formatToParts", "NumberFormat");
}

// Need to special case the '-0' case to format as 0 instead of -0.
return platform.formatNumber(n === -0 ? 0 : n, hiddenObject, /* toParts */ true);
});

// See explanation of `getCanonicalLocales`
Expand Down Expand Up @@ -1348,6 +1363,13 @@
configurable: true,
});

_.defineProperty(NumberFormat.prototype, "formatToParts", {
value: formatToParts,
enumerable: false,
configurable: true,
writable: true,
});

return NumberFormat;
})();

Expand Down
15,137 changes: 7,602 additions & 7,535 deletions lib/Runtime/Library/InJavascript/Intl.js.bc.32b.h

Large diffs are not rendered by default.

15,136 changes: 7,602 additions & 7,534 deletions lib/Runtime/Library/InJavascript/Intl.js.bc.64b.h

Large diffs are not rendered by default.

13,782 changes: 6,923 additions & 6,859 deletions lib/Runtime/Library/InJavascript/Intl.js.nojit.bc.32b.h

Large diffs are not rendered by default.

13,780 changes: 6,922 additions & 6,858 deletions lib/Runtime/Library/InJavascript/Intl.js.nojit.bc.64b.h

Large diffs are not rendered by default.

232 changes: 209 additions & 23 deletions lib/Runtime/Library/IntlEngineInterfaceExtensionObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2154,51 +2154,237 @@ DEFINE_ISXLOCALEAVAILABLE(PR, uloc)
}
#endif

/*
* This function has the following options:
* - Format as Percent.
* - Format as Number.
* - If significant digits are present, format using the significant digts;
* - Otherwise format using minimumFractionDigits, maximumFractionDigits, minimumIntegerDigits
*/
#ifdef INTL_ICU
// Rationale for this data structure: ICU reports back a tree of parts where each node in the tree
// has a type and a width, and there must be at least one node corresponding to each character in the tree
// (nodes can be wider than one character). Nodes can have children, and the parent-child relationship is
// that child node represents a more specific type for a given character range than the parent. Since ICU
// doesn't ever report "literal" parts of strings (like spaces or other extra characters), the root node in
// the tree will always be the entire width of the string with the type UnsetField, and we later map UnsetField
// to the "literal" part. Then, for a string like "US$ 1,000", there will be two child nodes, one of type
// currency with width [0, 3) and one of type integer with width [4, 9). The integer node will have a child
// of type group and width [6, 7). So the most specific type for characters 0 to 3 is currency, 3 to 4 is unset
// (literal), 4 to 5 is integer, 5 to 6 is group, and 6 to 9 is integer. This linear scan across the string,
// determining the part of consecutive spans of characters, is what the NumberFormat.prototype.formatToParts
// API needs to return to the user.
//
// I thought about this a bunch and I didn't like the idea of traversing an actual tree structure to get that
// information because it felt awkward to encode the "width" of nodes with specific meaning during traveral.
// So, I came up with an array structure where basically when we are told a part exists from position x to y,
// we can figure out what type used to apply to that span and update that section of the array with the new type.
// We skip over sections of the span [x, y) that have a type that doesn't match the start and end because that
// means we have already gotten a more specific part for that sub-span (for instance if we got a grouping
// separator before it's parent integer)
class NumberFormatPartsBuilder
{
private:
double num;
Field(const char16 *) formatted;
const charcount_t formattedLength;
Field(ScriptContext *) sc;
Field(UNumberFormatFields *) fields;

static const UNumberFormatFields UnsetField = static_cast<UNumberFormatFields>(0xFFFFFFFF);

JavascriptString *GetPartTypeString(UNumberFormatFields field)
{
JavascriptLibrary *library = sc->GetLibrary();

// this is outside the switch because MSVC doesn't like that UnsetField is not a valid enum value
if (field == UnsetField)
{
return library->GetIntlLiteralPartString();
}

switch (field)
{
case UNUM_INTEGER_FIELD:
{
if (JavascriptNumber::IsNan(num))
{
return library->GetIntlNanPartString();
}
else if (!NumberUtilities::IsFinite(num))
{
return library->GetIntlInfinityPartString();
}
else
{
return library->GetIntlIntegerPartString();
}
}
case UNUM_FRACTION_FIELD: return library->GetIntlFractionPartString();
case UNUM_DECIMAL_SEPARATOR_FIELD: return library->GetIntlDecimalPartString();

// The following three should only show up if UNUM_SCIENTIFIC is used, which Intl.NumberFormat doesn't currently use
case UNUM_EXPONENT_SYMBOL_FIELD: AssertOrFailFastMsg(false, "Unexpected exponent symbol field");
case UNUM_EXPONENT_SIGN_FIELD: AssertOrFailFastMsg(false, "Unexpected exponent sign field");
case UNUM_EXPONENT_FIELD: AssertOrFailFastMsg(false, "Unexpected exponent field");

case UNUM_GROUPING_SEPARATOR_FIELD: return library->GetIntlGroupPartString();
case UNUM_CURRENCY_FIELD: return library->GetIntlCurrencyPartString();
case UNUM_PERCENT_FIELD: return library->GetIntlPercentPartString();

// TODO(jahorto): Determine if this would ever be returned and what it would map to
case UNUM_PERMILL_FIELD: AssertOrFailFastMsg(false, "Unexpected permill field");

case UNUM_SIGN_FIELD: num < 0 ? library->GetIntlMinusSignPartString() : library->GetIntlPlusSignPartString();
default: AssertOrFailFastMsg(false, "Unexpected unknown part"); return nullptr;
}
}

public:
NumberFormatPartsBuilder(double num, const char16 *formatted, charcount_t formattedLength, ScriptContext *scriptContext)
: num(num)
, formatted(formatted)
, formattedLength(formattedLength)
, sc(scriptContext)
, fields(RecyclerNewArrayLeaf(sc->GetRecycler(), UNumberFormatFields, formattedLength))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised the Linux build didn't complain at you for not having fields marked as a Field, since it can hold a recycler pointer. Any idea why that would be?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, looks like the checker only applies that rule to classes that have ever been recycler-allocated themselves, and NumberFormatPartsBuilder is only created on the stack. Carry on.


In reply to: 186459626 [](ancestors = 186459626)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should probably still mark it as a Field in case this ever needs to be created in the heap in the future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I though the pointer itself would be a Field, not the thing being pointed at, like Field(UNumberFormatFields *). Do I have this backwards?


In reply to: 186466305 [](ancestors = 186466305)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe you're correct, all the Field specifiers on this class should contain the whole type

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, you're right.

{
// this will allow InsertPart to tell what fields have been initialized or not, and will
// be the value of resulting { type: "literal" } fields in ToPartsArray
memset(fields, UnsetField, sizeof(UNumberFormatFields) * formattedLength);
}

// Note -- ICU reports fields as inclusive start, exclusive end, so thats how we handle the parameters here
void InsertPart(UNumberFormatFields field, int start, int end)
{
AssertOrFailFast(start >= 0);
AssertOrFailFast(start < end);

// the asserts above mean the cast to charcount_t is safe
charcount_t ccStart = static_cast<charcount_t>(start);
charcount_t ccEnd = static_cast<charcount_t>(end);

AssertOrFailFast(ccEnd <= formattedLength);

// Make sure that the part does not overlap/is a strict subset or superset of other fields
// The part builder could probably be built and tested to handle this structure not being so rigid,
// but its safer for now to make sure this is true.
UNumberFormatFields parentStartField = fields[ccStart];
UNumberFormatFields parentEndField = fields[ccEnd - 1];
AssertOrFailFast(parentStartField == parentEndField);

// Actually insert the part now
// Only overwrite fields in the span that match start and end, since fields that don't match are sub-parts that were already inserted
for (charcount_t i = ccStart; i < ccEnd; ++i)
{
if (fields[i] == parentStartField)
{
fields[i] = field;
}
}
}

JavascriptArray *ToPartsArray()
{
JavascriptArray *ret = sc->GetLibrary()->CreateArray(0);
int retIndex = 0;

// With the structure of the fields array, creating the resulting parts list to return to JS is simple
// Map set parts to the corresponding `type` value (see ECMA-402 #sec-partitionnumberpattern), and unset parts to the `literal` type
for (charcount_t start = 0; start < formattedLength; ++retIndex)
{
// At the top of the loop, fields[start] should always be different from fields[start - 1] (we should be at the start of a new part),
// since start is adjusted later in the loop to the end of the part
AssertOrFailFast(start == 0 || fields[start - 1] != fields[start]);

charcount_t end = start + 1;
while (end < formattedLength && fields[end] == fields[start])
{
++end;
}

JavascriptString *partType = GetPartTypeString(fields[start]);
JavascriptString *partValue = JavascriptString::NewCopyBuffer(formatted + start, end - start, sc);

DynamicObject* part = sc->GetLibrary()->CreateObject();
JavascriptOperators::InitProperty(part, PropertyIds::type, partType);
JavascriptOperators::InitProperty(part, PropertyIds::value, partValue);

ret->SetItem(retIndex, part, PropertyOperationFlags::PropertyOperation_None);

start = end;
}

return ret;
}
};
#endif

Var IntlEngineInterfaceExtensionObject::EntryIntl_FormatNumber(RecyclableObject* function, CallInfo callInfo, ...)
{
EngineInterfaceObject_CommonFunctionProlog(function, callInfo);

#if defined(INTL_ICU)
INTL_CHECK_ARGS(
args.Info.Count == 3 &&
(TaggedInt::Is(args.Values[1]) || JavascriptNumber::Is(args.Values[1])) &&
DynamicObject::Is(args.Values[2])
args.Info.Count == 4 &&
(TaggedInt::Is(args[1]) || JavascriptNumber::Is(args[1])) &&
DynamicObject::Is(args[2]) &&
JavascriptBoolean::Is(args[3])
);

#if defined(INTL_ICU)
DynamicObject *state = DynamicObject::UnsafeFromVar(args.Values[2]);
double num = JavascriptConversion::ToNumber(args[1], scriptContext);
DynamicObject *state = DynamicObject::UnsafeFromVar(args[2]);
bool toParts = JavascriptBoolean::FromVar(args[3])->GetValue();
Var cachedFormatter = nullptr; // cached by EntryIntl_CacheNumberFormat
AssertOrFailFast(state->GetInternalProperty(state, Js::InternalPropertyIds::HiddenObject, &cachedFormatter, NULL, scriptContext));

auto fmt = static_cast<FinalizableUNumberFormat *>(cachedFormatter);
char16 *formatted = nullptr;
int formattedLen = 0;

if (TaggedInt::Is(args.Values[1]))
if (!toParts)
{
int num = TaggedInt::ToInt32(args.Values[1]);
EnsureBuffer([&](UChar *buf, int bufLen, UErrorCode *status)
{
return unum_format(*fmt, num, buf, bufLen, nullptr, status);
return unum_formatDouble(*fmt, num, buf, bufLen, nullptr, status);
}, scriptContext->GetRecycler(), &formatted, &formattedLen);

return JavascriptString::NewWithBuffer(formatted, formattedLen, scriptContext);
}
else

#if defined(ICU_VERSION) && ICU_VERSION >= 61
UErrorCode status = U_ZERO_ERROR;
ScopedUFieldPositionIterator fpi(ufieldpositer_open(&status));

EnsureBuffer([&](UChar *buf, int bufLen, UErrorCode *status)
{
double num = JavascriptNumber::GetValue(args.Values[1]);
EnsureBuffer([&](UChar *buf, int bufLen, UErrorCode *status)
{
return unum_formatDouble(*fmt, num, buf, bufLen, nullptr, status);
}, scriptContext->GetRecycler(), &formatted, &formattedLen);
return unum_formatDoubleForFields(*fmt, num, buf, bufLen, fpi, status);
}, scriptContext->GetRecycler(), &formatted, &formattedLen);

NumberFormatPartsBuilder nfpb(num, formatted, formattedLen, scriptContext);

int partStart = 0;
int partEnd = 0;
int i = 0;
for (int kind = ufieldpositer_next(fpi, &partStart, &partEnd); kind >= 0; kind = ufieldpositer_next(fpi, &partStart, &partEnd), ++i)
{
nfpb.InsertPart(static_cast<UNumberFormatFields>(kind), partStart, partEnd);
}

return JavascriptString::NewWithBuffer(formatted, formattedLen, scriptContext);;
return nfpb.ToPartsArray();
#else
JavascriptLibrary *library = scriptContext->GetLibrary();
JavascriptArray *ret = library->CreateArray(1);
DynamicObject* part = library->CreateObject();
EnsureBuffer([&](UChar *buf, int bufLen, UErrorCode *status)
{
return unum_formatDouble(*fmt, num, buf, bufLen, nullptr, status);
}, scriptContext->GetRecycler(), &formatted, &formattedLen);
JavascriptOperators::InitProperty(part, PropertyIds::type, library->GetIntlLiteralPartString());
JavascriptOperators::InitProperty(part, PropertyIds::value, JavascriptString::NewWithBuffer(formatted, formattedLen, scriptContext));

ret->SetItem(0, part, PropertyOperationFlags::PropertyOperation_None);
return ret;
#endif // #if ICU_VERSION >= 61 ... #else
#else
INTL_CHECK_ARGS(
args.Info.Count == 3 &&
(TaggedInt::Is(args.Values[1]) || JavascriptNumber::Is(args.Values[1])) &&
DynamicObject::Is(args.Values[2])
);

DynamicObject *options = DynamicObject::FromVar(args.Values[2]);
Var hiddenObject = nullptr;
AssertOrFailFastMsg(options->GetInternalProperty(options, Js::InternalPropertyIds::HiddenObject, &hiddenObject, NULL, scriptContext),
Expand Down Expand Up @@ -2450,7 +2636,7 @@ DEFINE_ISXLOCALEAVAILABLE(PR, uloc)
int i = 0;
for (
int kind = ufieldpositer_next(fpi, &partStart, &partEnd);
kind > 0;
kind >= 0;
kind = ufieldpositer_next(fpi, &partStart, &partEnd), ++i
)
{
Expand Down
10 changes: 10 additions & 0 deletions lib/Runtime/Library/StringCacheList.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,14 @@ STRING(IntlTimeZoneNamePart, _u("timeZoneName"))
STRING(IntlLiteralPart, _u("literal"))
STRING(IntlUnknownPart, _u("unknown"))
STRING(IntlPluralRulesOther, _u("other"))
STRING(IntlNanPart, _u("nan"))
STRING(IntlInfinityPart, _u("infinity"))
STRING(IntlIntegerPart, _u("integer"))
STRING(IntlGroupPart, _u("group"))
STRING(IntlDecimalPart, _u("decimal"))
STRING(IntlFractionPart, _u("fraction"))
STRING(IntlCurrencyPart, _u("currency"))
STRING(IntlPercentPart, _u("percent"))
STRING(IntlPlusSignPart, _u("plusSign"))
STRING(IntlMinusSignPart, _u("minusSign"))
#endif
12 changes: 12 additions & 0 deletions test/Intl/Collator.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,16 @@ testRunner.runTests([
pairs.forEach((pair) => test(pair[0], pair[1]));
}
},
{
name: "https://github.com/Microsoft/ChakraCore/issues/5097",
body() {
const cases = [0, 1, true, false, null, undefined, { toString() { return "hello!" }}, [1, 2, 3, 4], {}, new (class ToStringTag { get [Symbol.toStringTag]() { return "MyClass" } })];

const coll = new Intl.Collator();
cases.forEach((test) => {
assert.areEqual(0, ("" + test).localeCompare(test), `${test} did not compare equal to itself using String.prototype.localeCompare`);
assert.areEqual(0, coll.compare("" + test, test), `${test} did not compare equal to itself using Collator.prototype.compare`);
});
}
}
], { verbose: !WScript.Arguments.includes("summary") });
Loading