-
Notifications
You must be signed in to change notification settings - Fork 29.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
net: use icu's punycode implementation #7355
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
'use strict'; | ||
|
||
const common = require('../common.js'); | ||
const icu = process.binding('icu'); | ||
const punycode = require('punycode'); | ||
|
||
const bench = common.createBenchmark(main, { | ||
method: ['punycode', 'icu'], | ||
n: [1024], | ||
val: [ | ||
'افغانستا.icom.museum', | ||
'الجزائر.icom.museum', | ||
'österreich.icom.museum', | ||
'বাংলাদেশ.icom.museum', | ||
'беларусь.icom.museum', | ||
'belgië.icom.museum', | ||
'българия.icom.museum', | ||
'تشادر.icom.museum', | ||
'中国.icom.museum', | ||
'القمر.icom.museum', | ||
'κυπρος.icom.museum', | ||
'českárepublika.icom.museum', | ||
'مصر.icom.museum', | ||
'ελλάδα.icom.museum', | ||
'magyarország.icom.museum', | ||
'ísland.icom.museum', | ||
'भारत.icom.museum', | ||
'ايران.icom.museum', | ||
'éire.icom.museum', | ||
'איקו״ם.ישראל.museum', | ||
'日本.icom.museum', | ||
'الأردن.icom.museum' | ||
] | ||
}); | ||
|
||
function usingPunycode(val) { | ||
punycode.toUnicode(punycode.toASCII(val)); | ||
} | ||
|
||
function usingICU(val) { | ||
icu.toUnicode(icu.toASCII(val)); | ||
} | ||
|
||
function runPunycode(n, val) { | ||
common.v8ForceOptimization(usingPunycode, val); | ||
var i = 0; | ||
bench.start(); | ||
for (; i < n; i++) | ||
usingPunycode(val); | ||
bench.end(n); | ||
} | ||
|
||
function runICU(n, val) { | ||
common.v8ForceOptimization(usingICU, val); | ||
var i = 0; | ||
bench.start(); | ||
for (; i < n; i++) | ||
usingICU(val); | ||
bench.end(n); | ||
} | ||
|
||
function main(conf) { | ||
const n = +conf.n; | ||
const val = conf.val; | ||
switch (conf.method) { | ||
case 'punycode': | ||
runPunycode(n, val); | ||
break; | ||
case 'icu': | ||
runICU(n, val); | ||
break; | ||
default: | ||
throw new Error('Unexpected method'); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,8 +23,16 @@ | |
|
||
#if defined(NODE_HAVE_I18N_SUPPORT) | ||
|
||
#include "node.h" | ||
#include "env.h" | ||
#include "env-inl.h" | ||
#include "util.h" | ||
#include "util-inl.h" | ||
#include "v8.h" | ||
|
||
#include <unicode/putil.h> | ||
#include <unicode/udata.h> | ||
#include <unicode/uidna.h> | ||
|
||
#ifdef NODE_HAVE_SMALL_ICU | ||
/* if this is defined, we have a 'secondary' entry point. | ||
|
@@ -43,6 +51,13 @@ extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; | |
|
||
namespace node { | ||
|
||
using v8::Context; | ||
using v8::FunctionCallbackInfo; | ||
using v8::Local; | ||
using v8::Object; | ||
using v8::String; | ||
using v8::Value; | ||
|
||
bool flag_icu_data_dir = false; | ||
|
||
namespace i18n { | ||
|
@@ -64,7 +79,124 @@ bool InitializeICUDirectory(const char* icu_data_path) { | |
} | ||
} | ||
|
||
static int32_t ToUnicode(MaybeStackBuffer<char>* buf, | ||
const char* input, | ||
size_t length) { | ||
UErrorCode status = U_ZERO_ERROR; | ||
uint32_t options = UIDNA_DEFAULT; | ||
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; | ||
UIDNA* uidna = uidna_openUTS46(options, &status); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not super familiar with ICU but shouldn't you check EDIT: Or do all ICU functions bail out if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. technically, yeah. I haven't found a case where this line actually fails but it would be good house keeping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the next use of &status would bail if it isn't |
||
if (U_FAILURE(status)) | ||
return -1; | ||
UIDNAInfo info = UIDNA_INFO_INITIALIZER; | ||
|
||
int32_t len = uidna_nameToUnicodeUTF8(uidna, | ||
input, length, | ||
**buf, buf->length(), | ||
&info, | ||
&status); | ||
|
||
if (status == U_BUFFER_OVERFLOW_ERROR) { | ||
status = U_ZERO_ERROR; | ||
buf->AllocateSufficientStorage(len); | ||
len = uidna_nameToUnicodeUTF8(uidna, | ||
input, length, | ||
**buf, buf->length(), | ||
&info, | ||
&status); | ||
} | ||
|
||
if (U_FAILURE(status)) | ||
len = -1; | ||
|
||
uidna_close(uidna); | ||
return len; | ||
} | ||
|
||
static int32_t ToASCII(MaybeStackBuffer<char>* buf, | ||
const char* input, | ||
size_t length) { | ||
UErrorCode status = U_ZERO_ERROR; | ||
uint32_t options = UIDNA_DEFAULT; | ||
options |= UIDNA_NONTRANSITIONAL_TO_ASCII; | ||
UIDNA* uidna = uidna_openUTS46(options, &status); | ||
if (U_FAILURE(status)) | ||
return -1; | ||
UIDNAInfo info = UIDNA_INFO_INITIALIZER; | ||
|
||
int32_t len = uidna_nameToASCII_UTF8(uidna, | ||
input, length, | ||
**buf, buf->length(), | ||
&info, | ||
&status); | ||
|
||
if (status == U_BUFFER_OVERFLOW_ERROR) { | ||
status = U_ZERO_ERROR; | ||
buf->AllocateSufficientStorage(len); | ||
len = uidna_nameToASCII_UTF8(uidna, | ||
input, length, | ||
**buf, buf->length(), | ||
&info, | ||
&status); | ||
} | ||
|
||
if (U_FAILURE(status)) | ||
len = -1; | ||
|
||
uidna_close(uidna); | ||
return len; | ||
} | ||
|
||
static void ToUnicode(const FunctionCallbackInfo<Value>& args) { | ||
Environment* env = Environment::GetCurrent(args); | ||
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsString()); | ||
Utf8Value val(env->isolate(), args[0]); | ||
MaybeStackBuffer<char> buf; | ||
int32_t len = ToUnicode(&buf, *val, val.length()); | ||
|
||
if (len < 0) { | ||
return env->ThrowError("Cannot convert name to Unicode"); | ||
} | ||
|
||
args.GetReturnValue().Set( | ||
String::NewFromUtf8(env->isolate(), | ||
*buf, | ||
v8::NewStringType::kNormal, | ||
len).ToLocalChecked()); | ||
} | ||
|
||
static void ToASCII(const FunctionCallbackInfo<Value>& args) { | ||
Environment* env = Environment::GetCurrent(args); | ||
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsString()); | ||
Utf8Value val(env->isolate(), args[0]); | ||
MaybeStackBuffer<char> buf; | ||
int32_t len = ToASCII(&buf, *val, val.length()); | ||
|
||
if (len < 0) { | ||
return env->ThrowError("Cannot convert name to ASCII"); | ||
} | ||
|
||
args.GetReturnValue().Set( | ||
String::NewFromUtf8(env->isolate(), | ||
*buf, | ||
v8::NewStringType::kNormal, | ||
len).ToLocalChecked()); | ||
} | ||
|
||
void Init(Local<Object> target, | ||
Local<Value> unused, | ||
Local<Context> context, | ||
void* priv) { | ||
Environment* env = Environment::GetCurrent(context); | ||
env->SetMethod(target, "toUnicode", ToUnicode); | ||
env->SetMethod(target, "toASCII", ToASCII); | ||
} | ||
|
||
} // namespace i18n | ||
} // namespace node | ||
|
||
NODE_MODULE_CONTEXT_AWARE_BUILTIN(icu, node::i18n::Init) | ||
|
||
#endif // NODE_HAVE_I18N_SUPPORT |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
'use strict'; | ||
|
||
const common = require('../common'); | ||
const icu = getPunycode(); | ||
const assert = require('assert'); | ||
|
||
function getPunycode() { | ||
try { | ||
return process.binding('icu'); | ||
} catch (err) { | ||
return undefined; | ||
} | ||
} | ||
|
||
if (!icu) { | ||
common.skip('icu punycode tests because ICU is not present.'); | ||
return; | ||
} | ||
|
||
// Credit for list: http://www.i18nguy.com/markup/idna-examples.html | ||
const tests = [ | ||
'افغانستا.icom.museum', | ||
'الجزائر.icom.museum', | ||
'österreich.icom.museum', | ||
'বাংলাদেশ.icom.museum', | ||
'беларусь.icom.museum', | ||
'belgië.icom.museum', | ||
'българия.icom.museum', | ||
'تشادر.icom.museum', | ||
'中国.icom.museum', | ||
'القمر.icom.museum', | ||
'κυπρος.icom.museum', | ||
'českárepublika.icom.museum', | ||
'مصر.icom.museum', | ||
'ελλάδα.icom.museum', | ||
'magyarország.icom.museum', | ||
'ísland.icom.museum', | ||
'भारत.icom.museum', | ||
'ايران.icom.museum', | ||
'éire.icom.museum', | ||
'איקו״ם.ישראל.museum', | ||
'日本.icom.museum', | ||
'الأردن.icom.museum', | ||
'қазақстан.icom.museum', | ||
'한국.icom.museum', | ||
'кыргызстан.icom.museum', | ||
'ລາວ.icom.museum', | ||
'لبنان.icom.museum', | ||
'македонија.icom.museum', | ||
'موريتانيا.icom.museum', | ||
'méxico.icom.museum', | ||
'монголулс.icom.museum', | ||
'المغرب.icom.museum', | ||
'नेपाल.icom.museum', | ||
'عمان.icom.museum', | ||
'قطر.icom.museum', | ||
'românia.icom.museum', | ||
'россия.иком.museum', | ||
'србијаицрнагора.иком.museum', | ||
'இலங்கை.icom.museum', | ||
'españa.icom.museum', | ||
'ไทย.icom.museum', | ||
'تونس.icom.museum', | ||
'türkiye.icom.museum', | ||
'украина.icom.museum', | ||
'việtnam.icom.museum' | ||
]; | ||
|
||
// Testing the roundtrip | ||
tests.forEach((i) => { | ||
assert.strictEqual(i, icu.toUnicode(icu.toASCII(i))); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When would this fail?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
./configure --without-intl
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note, may want to look at that flag. I swear it still builds icu even though it isn't linked.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It shouldn't.. Should be same As --with-intl=none