Skip to content

Commit

Permalink
deps: update icu to 75.1
Browse files Browse the repository at this point in the history
PR-URL: #52573
Reviewed-By: Richard Lau <[email protected]>
Reviewed-By: Michaël Zasso <[email protected]>
Reviewed-By: Mohammed Keyvanzadeh <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
  • Loading branch information
nodejs-github-bot authored and marco-ippolito committed May 3, 2024
1 parent b49464b commit a135027
Show file tree
Hide file tree
Showing 408 changed files with 25,369 additions and 12,777 deletions.
4 changes: 3 additions & 1 deletion deps/icu-small/LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2016-2023 Unicode, Inc.
Copyright © 2016-2024 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
Expand Down Expand Up @@ -38,6 +38,8 @@ not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.

SPDX-License-Identifier: Unicode-3.0

----------------------------------------------------------------------

Third-Party Software Licenses
Expand Down
4 changes: 2 additions & 2 deletions deps/icu-small/README-FULL-ICU.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ICU sources - auto generated by shrink-icu-src.py

This directory contains the ICU subset used by --with-intl=full-icu
It is a strict subset of ICU 74 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt74l.dat.bz2 : compressed data file
It is a strict subset of ICU 75 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt75l.dat.bz2 : compressed data file


To rebuild this directory, see ../../tools/icu/README.md
Expand Down
6 changes: 2 additions & 4 deletions deps/icu-small/source/common/brkeng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,11 @@ UnhandledEngine::handleCharacter(UChar32 c) {
*/

ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
fEngines = 0;
fEngines = nullptr;
}

ICULanguageBreakFactory::~ICULanguageBreakFactory() {
if (fEngines != 0) {
delete fEngines;
}
delete fEngines;
}

void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
Expand Down
9 changes: 3 additions & 6 deletions deps/icu-small/source/common/brkiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,17 +438,14 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
uprv_strcpy(lb_lw, "line");
UErrorCode kvStatus = U_ZERO_ERROR;
CharString value;
CharStringByteSink valueSink(&value);
loc.getKeywordValue("lb", valueSink, kvStatus);
auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
}
// lw=phrase is only supported in Japanese and Korean
if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
value.clear();
loc.getKeywordValue("lw", valueSink, kvStatus);
value = loc.getKeywordValue<CharString>("lw", kvStatus);
if (U_SUCCESS(kvStatus) && value == "phrase") {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
Expand Down Expand Up @@ -500,7 +497,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
if (type == ULOC_REQUESTED_LOCALE) {
return Locale(requestLocale);
return {requestLocale};
}
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
Expand Down
112 changes: 90 additions & 22 deletions deps/icu-small/source/common/bytesinkutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,52 @@
#ifndef BYTESINKUTIL_H
#define BYTESINKUTIL_H

#include <type_traits>

#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "charstr.h"
#include "cmemory.h"
#include "uassert.h"
#include "ustr_imp.h"

U_NAMESPACE_BEGIN

class ByteSink;
class CharString;
class Edits;

class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;

CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;

void Append(const char* bytes, int32_t n) override;

char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;

private:
CharString& dest_;
};

// CharString doesn't provide the public API that StringByteSink requires a
// string class to have so this template specialization replaces the default
// implementation of StringByteSink<CharString> with CharStringByteSink.
template<>
class StringByteSink<CharString> : public CharStringByteSink {
public:
StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
};

class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
Expand Down Expand Up @@ -57,30 +91,64 @@ class U_COMMON_API ByteSinkUtil {
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);

private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};

class U_COMMON_API CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;

CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;

void Append(const char* bytes, int32_t n) override;
/**
* Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
* and then returns through u_terminateChars(), in order to implement
* the classic ICU4C C API writing to a fix sized buffer on top of a
* contemporary C++ API.
*
* @param buffer receiving buffer
* @param capacity capacity of receiving buffer
* @param lambda that gets called with the sink as an argument
* @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
* @return number of bytes written, or needed (in case of overflow)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
F&& lambda,
UErrorCode& status) {
if (U_FAILURE(status)) { return 0; }
CheckedArrayByteSink sink(buffer, capacity);
lambda(sink, status);
if (U_FAILURE(status)) { return 0; }

int32_t reslen = sink.NumberOfBytesAppended();

if (sink.Overflowed()) {
status = U_BUFFER_OVERFLOW_ERROR;
return reslen;
}

return u_terminateChars(buffer, capacity, reslen, &status);
}

char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;
/**
* Calls a lambda that writes to a ByteSink with a CharStringByteSink and
* then returns a CharString, in order to implement a contemporary C++ API
* on top of a C/C++ compatibility ByteSink API.
*
* @param lambda that gets called with the sink as an argument
* @param status to check and report
* @return the resulting string, or an empty string (in case of error)
* @internal
*/
template <typename F,
typename = std::enable_if_t<
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
if (U_FAILURE(status)) { return {}; }
CharString result;
CharStringByteSink sink(&result);
lambda(sink, status);
return result;
}

private:
CharString& dest_;
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};

U_NAMESPACE_END
Expand Down
50 changes: 29 additions & 21 deletions deps/icu-small/source/common/caniter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)


/**
*@param source string to get results for
*/
Expand All @@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
pieces_lengths(nullptr),
current(nullptr),
current_length(0),
nfd(*Normalizer2::getNFDInstance(status)),
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
nfd(Normalizer2::getNFDInstance(status)),
nfcImpl(Normalizer2Factory::getNFCImpl(status))
{
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) {
setSource(sourceStr, status);
}
}
Expand Down Expand Up @@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
int32_t i = 0;
UnicodeString *list = nullptr;

nfd.normalize(newSource, source, status);
nfd->normalize(newSource, source, status);
if(U_FAILURE(status)) {
return;
}
Expand All @@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
current[0] = 0;
pieces[0] = new UnicodeString[1];
pieces_lengths[0] = 1;
if (pieces[0] == 0) {
if (pieces[0] == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
Expand All @@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st


list = new UnicodeString[source.length()];
if (list == 0) {
if (list == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
Expand All @@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
// on the NFD form - see above).
for (; i < source.length(); i += U16_LENGTH(cp)) {
cp = source.char32At(i);
if (nfcImpl.isCanonSegmentStarter(cp)) {
if (nfcImpl->isCanonSegmentStarter(cp)) {
source.extract(start, i-start, list[list_length++]); // add up to i
start = i;
}
Expand Down Expand Up @@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
return;
// Common section to cleanup all local variables and reset object variables.
CleanPartialInitialization:
if (list != nullptr) {
delete[] list;
}
delete[] list;
cleanPieces();
}

Expand All @@ -264,10 +263,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
if(U_FAILURE(status)) {
return;
}
// To avoid infinity loop caused by permute, we limit the depth of recursive
// call to permute and return U_UNSUPPORTED_ERROR.
// We know in some unit test we need at least 4. Set to 8 just in case some
// unforseen use cases.
constexpr int32_t kPermuteDepthLimit = 8;
if (depth > kPermuteDepthLimit) {
status = U_UNSUPPORTED_ERROR;
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;

Expand All @@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
if (source.length() <= 2 && source.countChar32() <= 1) {
UnicodeString *toPut = new UnicodeString(source);
/* test for nullptr */
if (toPut == 0) {
if (toPut == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
Expand Down Expand Up @@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros

// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
Expand Down Expand Up @@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
Hashtable permutations(status);
Hashtable basic(status);
if (U_FAILURE(status)) {
return 0;
return nullptr;
}
result.setValueDeleter(uprv_deleteUObject);
permutations.setValueDeleter(uprv_deleteUObject);
Expand Down Expand Up @@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString attempt;
nfd.normalize(possible, attempt, status);
nfd->normalize(possible, attempt, status);

// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
if (attempt==segment) {
Expand All @@ -399,15 +407,15 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i

/* Test for buffer overflows */
if(U_FAILURE(status)) {
return 0;
return nullptr;
}
// convert into a String[] to clean up storage
//String[] finalResult = new String[result.size()];
UnicodeString *finalResult = nullptr;
int32_t resultCount;
if((resultCount = result.count()) != 0) {
finalResult = new UnicodeString[resultCount];
if (finalResult == 0) {
if (finalResult == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
Expand Down Expand Up @@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
// see if any character is at the start of some decomposition
U16_GET(segment, 0, i, segLen, cp);
if (!nfcImpl.getCanonStartSet(cp, starts)) {
if (!nfcImpl->getCanonStartSet(cp, starts)) {
continue;
}
// if so, see which decompositions match
Expand All @@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
UnicodeString *toAdd = new UnicodeString(prefix);
/* test for nullptr */
if (toAdd == 0) {
if (toAdd == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
Expand Down Expand Up @@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
UnicodeString temp(comp);
int32_t inputLen=temp.length();
UnicodeString decompString;
nfd.normalize(temp, decompString, status);
nfd->normalize(temp, decompString, status);
if (U_FAILURE(status)) {
return nullptr;
}
Expand Down Expand Up @@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
// brute force approach
// check to make sure result is canonically equivalent
UnicodeString trial;
nfd.normalize(temp, trial, status);
nfd->normalize(temp, trial, status);
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
return nullptr;
}
Expand Down
Loading

0 comments on commit a135027

Please sign in to comment.