Skip to content

Commit dc74f17

Browse files
authored
buffer: re-enable Fast API for Buffer.write
Re-enables fast Fast API for Buffer.write after fixing UTF8 handling. Fixes: nodejs#54521 PR-URL: nodejs#54526 Reviewed-By: Daniel Lemire <[email protected]> Reviewed-By: Benjamin Gruenbaum <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Anna Henningsen <[email protected]> Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: Paolo Insogna <[email protected]>
1 parent 2bd6a57 commit dc74f17

File tree

2 files changed

+165
-14
lines changed

2 files changed

+165
-14
lines changed

src/node_buffer.cc

+120-14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "node_buffer.h"
2323
#include "node.h"
2424
#include "node_blob.h"
25+
#include "node_debug.h"
2526
#include "node_errors.h"
2627
#include "node_external_reference.h"
2728
#include "node_i18n.h"
@@ -1442,6 +1443,79 @@ void CopyArrayBuffer(const FunctionCallbackInfo<Value>& args) {
14421443
memcpy(dest, src, bytes_to_copy);
14431444
}
14441445

1446+
size_t convert_latin1_to_utf8_s(const char* src,
1447+
size_t src_len,
1448+
char* dst,
1449+
size_t dst_len) noexcept {
1450+
size_t src_pos = 0;
1451+
size_t dst_pos = 0;
1452+
1453+
const auto safe_len = std::min(src_len, dst_len >> 1);
1454+
if (safe_len > 16) {
1455+
// convert_latin1_to_utf8 will never write more than input length * 2.
1456+
dst_pos += simdutf::convert_latin1_to_utf8(src, safe_len, dst);
1457+
src_pos += safe_len;
1458+
}
1459+
1460+
// Based on:
1461+
// https://github.com/simdutf/simdutf/blob/master/src/scalar/latin1_to_utf8/latin1_to_utf8.h
1462+
// with an upper limit on the number of bytes to write.
1463+
1464+
const auto src_ptr = reinterpret_cast<const uint8_t*>(src);
1465+
const auto dst_ptr = reinterpret_cast<uint8_t*>(dst);
1466+
1467+
size_t skip_pos = src_pos;
1468+
while (src_pos < src_len && dst_pos < dst_len) {
1469+
if (skip_pos <= src_pos && src_pos + 16 <= src_len &&
1470+
dst_pos + 16 <= dst_len) {
1471+
uint64_t v1;
1472+
memcpy(&v1, src_ptr + src_pos + 0, 8);
1473+
uint64_t v2;
1474+
memcpy(&v2, src_ptr + src_pos + 8, 8);
1475+
if (((v1 | v2) & UINT64_C(0x8080808080808080)) == 0) {
1476+
memcpy(dst_ptr + dst_pos, src_ptr + src_pos, 16);
1477+
dst_pos += 16;
1478+
src_pos += 16;
1479+
} else {
1480+
skip_pos = src_pos + 16;
1481+
}
1482+
} else {
1483+
const auto byte = src_ptr[src_pos++];
1484+
if ((byte & 0x80) == 0) {
1485+
dst_ptr[dst_pos++] = byte;
1486+
} else if (dst_pos + 2 <= dst_len) {
1487+
dst_ptr[dst_pos++] = (byte >> 6) | 0b11000000;
1488+
dst_ptr[dst_pos++] = (byte & 0b111111) | 0b10000000;
1489+
} else {
1490+
break;
1491+
}
1492+
}
1493+
}
1494+
1495+
return dst_pos;
1496+
}
1497+
1498+
template <encoding encoding>
1499+
uint32_t WriteOneByteString(const char* src,
1500+
uint32_t src_len,
1501+
char* dst,
1502+
uint32_t dst_len) {
1503+
if (dst_len == 0) {
1504+
return 0;
1505+
}
1506+
1507+
if (encoding == UTF8) {
1508+
return convert_latin1_to_utf8_s(src, src_len, dst, dst_len);
1509+
} else if (encoding == LATIN1 || encoding == ASCII) {
1510+
const auto size = std::min(src_len, dst_len);
1511+
memcpy(dst, src, size);
1512+
return size;
1513+
} else {
1514+
// TODO(ronag): Add support for more encoding.
1515+
UNREACHABLE();
1516+
}
1517+
}
1518+
14451519
template <encoding encoding>
14461520
void SlowWriteString(const FunctionCallbackInfo<Value>& args) {
14471521
Environment* env = Environment::GetCurrent(args);
@@ -1464,11 +1538,22 @@ void SlowWriteString(const FunctionCallbackInfo<Value>& args) {
14641538

14651539
if (max_length == 0) return args.GetReturnValue().Set(0);
14661540

1467-
uint32_t written = StringBytes::Write(
1468-
env->isolate(), ts_obj_data + offset, max_length, str, encoding);
1541+
uint32_t written = 0;
1542+
1543+
if ((encoding == UTF8 || encoding == LATIN1 || encoding == ASCII) &&
1544+
str->IsExternalOneByte()) {
1545+
const auto src = str->GetExternalOneByteStringResource();
1546+
written = WriteOneByteString<encoding>(
1547+
src->data(), src->length(), ts_obj_data + offset, max_length);
1548+
} else {
1549+
written = StringBytes::Write(
1550+
env->isolate(), ts_obj_data + offset, max_length, str, encoding);
1551+
}
1552+
14691553
args.GetReturnValue().Set(written);
14701554
}
14711555

1556+
template <encoding encoding>
14721557
uint32_t FastWriteString(Local<Value> receiver,
14731558
const v8::FastApiTypedArray<uint8_t>& dst,
14741559
const v8::FastOneByteString& src,
@@ -1478,16 +1563,21 @@ uint32_t FastWriteString(Local<Value> receiver,
14781563
CHECK(dst.getStorageIfAligned(&dst_data));
14791564
CHECK(offset <= dst.length());
14801565
CHECK(dst.length() - offset <= std::numeric_limits<uint32_t>::max());
1566+
TRACK_V8_FAST_API_CALL("buffer.writeString");
14811567

1482-
const auto size = std::min(
1483-
{static_cast<uint32_t>(dst.length() - offset), max_length, src.length});
1484-
1485-
memcpy(dst_data + offset, src.data, size);
1486-
1487-
return size;
1568+
return WriteOneByteString<encoding>(
1569+
src.data,
1570+
src.length,
1571+
reinterpret_cast<char*>(dst_data + offset),
1572+
std::min<uint32_t>(dst.length() - offset, max_length));
14881573
}
14891574

1490-
static v8::CFunction fast_write_string(v8::CFunction::Make(FastWriteString));
1575+
static v8::CFunction fast_write_string_ascii(
1576+
v8::CFunction::Make(FastWriteString<ASCII>));
1577+
static v8::CFunction fast_write_string_latin1(
1578+
v8::CFunction::Make(FastWriteString<LATIN1>));
1579+
static v8::CFunction fast_write_string_utf8(
1580+
v8::CFunction::Make(FastWriteString<UTF8>));
14911581

14921582
void Initialize(Local<Object> target,
14931583
Local<Value> unused,
@@ -1554,9 +1644,21 @@ void Initialize(Local<Object> target,
15541644
SetMethod(context, target, "hexWrite", StringWrite<HEX>);
15551645
SetMethod(context, target, "ucs2Write", StringWrite<UCS2>);
15561646

1557-
SetMethod(context, target, "asciiWriteStatic", SlowWriteString<ASCII>);
1558-
SetMethod(context, target, "latin1WriteStatic", SlowWriteString<LATIN1>);
1559-
SetMethod(context, target, "utf8WriteStatic", SlowWriteString<UTF8>);
1647+
SetFastMethod(context,
1648+
target,
1649+
"asciiWriteStatic",
1650+
SlowWriteString<ASCII>,
1651+
&fast_write_string_ascii);
1652+
SetFastMethod(context,
1653+
target,
1654+
"latin1WriteStatic",
1655+
SlowWriteString<LATIN1>,
1656+
&fast_write_string_latin1);
1657+
SetFastMethod(context,
1658+
target,
1659+
"utf8WriteStatic",
1660+
SlowWriteString<UTF8>,
1661+
&fast_write_string_utf8);
15601662

15611663
SetMethod(context, target, "getZeroFillToggle", GetZeroFillToggle);
15621664
}
@@ -1601,8 +1703,12 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
16011703
registry->Register(SlowWriteString<ASCII>);
16021704
registry->Register(SlowWriteString<LATIN1>);
16031705
registry->Register(SlowWriteString<UTF8>);
1604-
registry->Register(fast_write_string.GetTypeInfo());
1605-
registry->Register(FastWriteString);
1706+
registry->Register(FastWriteString<ASCII>);
1707+
registry->Register(fast_write_string_ascii.GetTypeInfo());
1708+
registry->Register(FastWriteString<LATIN1>);
1709+
registry->Register(fast_write_string_latin1.GetTypeInfo());
1710+
registry->Register(FastWriteString<UTF8>);
1711+
registry->Register(fast_write_string_utf8.GetTypeInfo());
16061712
registry->Register(StringWrite<ASCII>);
16071713
registry->Register(StringWrite<BASE64>);
16081714
registry->Register(StringWrite<BASE64URL>);
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Flags: --expose-internals --no-warnings --allow-natives-syntax
2+
'use strict';
3+
4+
const common = require('../common');
5+
const assert = require('assert');
6+
7+
const { internalBinding } = require('internal/test/binding');
8+
9+
function testFastUtf8Write() {
10+
{
11+
const buf = Buffer.from('\x80');
12+
13+
assert.strictEqual(buf[0], 194);
14+
assert.strictEqual(buf[1], 128);
15+
}
16+
17+
{
18+
const buf = Buffer.alloc(64);
19+
const newBuf = buf.subarray(0, buf.write('éñüç߯'));
20+
assert.deepStrictEqual(newBuf, Buffer.from([195, 169, 195, 177, 195, 188, 195, 167, 195, 159, 195, 134]));
21+
}
22+
23+
{
24+
const buf = Buffer.alloc(64);
25+
const newBuf = buf.subarray(0, buf.write('¿'));
26+
assert.deepStrictEqual(newBuf, Buffer.from([194, 191]));
27+
}
28+
29+
{
30+
const buf = Buffer.from(new ArrayBuffer(34), 0, 16);
31+
const str = Buffer.from([50, 83, 127, 39, 104, 8, 74, 65, 108, 123, 5, 4, 82, 10, 7, 53]).toString();
32+
const newBuf = buf.subarray(0, buf.write(str));
33+
assert.deepStrictEqual(newBuf, Buffer.from([ 50, 83, 127, 39, 104, 8, 74, 65, 108, 123, 5, 4, 82, 10, 7, 53]));
34+
}
35+
}
36+
37+
eval('%PrepareFunctionForOptimization(Buffer.prototype.utf8Write)');
38+
testFastUtf8Write();
39+
eval('%OptimizeFunctionOnNextCall(Buffer.prototype.utf8Write)');
40+
testFastUtf8Write();
41+
42+
if (common.isDebug) {
43+
const { getV8FastApiCallCount } = internalBinding('debug');
44+
assert(getV8FastApiCallCount('buffer.writeString'), 4);
45+
}

0 commit comments

Comments
 (0)