Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: add fast path to TextEncoder.encodeInto #45701

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions benchmark/util/text-encoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,27 @@

const common = require('../common.js');

const BASE = 'string\ud801';

const bench = common.createBenchmark(main, {
len: [256, 1024, 1024 * 32],
len: [0, 256, 1024, 1024 * 32],
n: [1e4],
type: ['v8-one-byte-string', 'v8-two-byte-string'],
op: ['encode', 'encodeInto']
});

function main({ n, op, len }) {
function main({ n, op, len, type }) {
const encoder = new TextEncoder();
const input = BASE.repeat(len);
let base = '';

switch (type) {
case 'v8-one-byte-string':
base = 'a';
break;
case 'v8-two-byte-string':
base = 'ğ';
break;
}

const input = base.repeat(len);
const subarray = new Uint8Array(len);

bench.start();
Expand Down
2 changes: 1 addition & 1 deletion common.gypi
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

# Reset this number to 0 on major V8 upgrades.
# Increment by one for each non-official patch applied to deps/v8.
'v8_embedder_string': '-node.5',
'v8_embedder_string': '-node.6',

##### V8 defaults for Node.js #####

Expand Down
21 changes: 20 additions & 1 deletion deps/v8/include/v8-fast-api-calls.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ class CTypeInfo {
kFloat32,
kFloat64,
kV8Value,
kSeqOneByteString,
kApiObject, // This will be deprecated once all users have
// migrated from v8::ApiObject to v8::Local<v8::Value>.
kAny, // This is added to enable untyped representation of fast
Expand Down Expand Up @@ -379,6 +380,11 @@ struct FastApiArrayBuffer {
size_t byte_length;
};

struct FastOneByteString {
const char* data;
uint32_t length;
};

class V8_EXPORT CFunctionInfo {
public:
// Construct a struct to hold a CFunction's type information.
Expand Down Expand Up @@ -438,6 +444,7 @@ struct AnyCType {
const FastApiTypedArray<uint64_t>* uint64_ta_value;
const FastApiTypedArray<float>* float_ta_value;
const FastApiTypedArray<double>* double_ta_value;
const FastOneByteString* string_value;
FastApiCallbackOptions* options_value;
};
};
Expand Down Expand Up @@ -614,7 +621,7 @@ class CFunctionInfoImpl : public CFunctionInfo {
kReturnType == CTypeInfo::Type::kFloat32 ||
kReturnType == CTypeInfo::Type::kFloat64 ||
kReturnType == CTypeInfo::Type::kAny,
"64-bit int and api object values are not currently "
"64-bit int, string and api object values are not currently "
"supported return types.");
}

Expand Down Expand Up @@ -735,6 +742,18 @@ struct TypeInfoHelper<FastApiCallbackOptions&> {
}
};

template <>
struct TypeInfoHelper<const FastOneByteString&> {
static constexpr CTypeInfo::Flags Flags() { return CTypeInfo::Flags::kNone; }

static constexpr CTypeInfo::Type Type() {
return CTypeInfo::Type::kSeqOneByteString;
}
static constexpr CTypeInfo::SequenceType SequenceType() {
return CTypeInfo::SequenceType::kScalar;
}
};

#define STATIC_ASSERT_IMPLIES(COND, ASSERTION, MSG) \
static_assert(((COND) == 0) || (ASSERTION), MSG)

Expand Down
1 change: 1 addition & 0 deletions deps/v8/src/codegen/machine-type.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ class MachineType {
case CTypeInfo::Type::kFloat64:
return MachineType::Float64();
case CTypeInfo::Type::kV8Value:
case CTypeInfo::Type::kSeqOneByteString:
case CTypeInfo::Type::kApiObject:
return MachineType::AnyTagged();
}
Expand Down
45 changes: 45 additions & 0 deletions deps/v8/src/compiler/effect-control-linearizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5395,6 +5395,50 @@ Node* EffectControlLinearizer::AdaptFastCallArgument(
case CTypeInfo::Type::kFloat32: {
return __ TruncateFloat64ToFloat32(node);
}
case CTypeInfo::Type::kSeqOneByteString: {
// Check that the value is a HeapObject.
Node* value_is_smi = ObjectIsSmi(node);
__ GotoIf(value_is_smi, if_error);

Node* map = __ LoadField(AccessBuilder::ForMap(), node);
Node* instance_type =
__ LoadField(AccessBuilder::ForMapInstanceType(), map);

Node* encoding = __ Word32And(
instance_type,
__ Int32Constant(kStringRepresentationAndEncodingMask));

Node* is_onebytestring = __ Word32Equal(
encoding, __ Int32Constant(kSeqOneByteStringTag));
__ GotoIfNot(is_onebytestring, if_error);

Node* length_in_bytes =
__ LoadField(AccessBuilder::ForStringLength(), node);
Node* data_ptr = __ IntPtrAdd(
node, __ IntPtrConstant(SeqOneByteString::kHeaderSize -
kHeapObjectTag));

constexpr int kAlign = alignof(FastOneByteString);
constexpr int kSize = sizeof(FastOneByteString);
static_assert(kSize == sizeof(uintptr_t) + sizeof(size_t),
"The size of "
"FastOneByteString isn't equal to the sum of its "
"expected members.");
Node* stack_slot = __ StackSlot(kSize, kAlign);

__ Store(StoreRepresentation(MachineType::PointerRepresentation(),
kNoWriteBarrier),
stack_slot, 0, data_ptr);
__ Store(StoreRepresentation(MachineRepresentation::kWord32,
kNoWriteBarrier),
stack_slot, sizeof(size_t), length_in_bytes);

static_assert(sizeof(uintptr_t) == sizeof(size_t),
"The string length can't "
"fit the PointerRepresentation used to store it.");

return stack_slot;
}
default: {
return node;
}
Expand Down Expand Up @@ -5600,6 +5644,7 @@ Node* EffectControlLinearizer::LowerFastApiCall(Node* node) {
case CTypeInfo::Type::kFloat64:
return ChangeFloat64ToTagged(
c_call_result, CheckForMinusZeroMode::kCheckForMinusZero);
case CTypeInfo::Type::kSeqOneByteString:
case CTypeInfo::Type::kV8Value:
case CTypeInfo::Type::kApiObject:
case CTypeInfo::Type::kUint8:
Expand Down
1 change: 1 addition & 0 deletions deps/v8/src/compiler/fast-api-calls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ ElementsKind GetTypedArrayElementsKind(CTypeInfo::Type type) {
case CTypeInfo::Type::kFloat64:
return FLOAT64_ELEMENTS;
case CTypeInfo::Type::kVoid:
case CTypeInfo::Type::kSeqOneByteString:
case CTypeInfo::Type::kBool:
case CTypeInfo::Type::kV8Value:
case CTypeInfo::Type::kApiObject:
Expand Down
1 change: 1 addition & 0 deletions deps/v8/src/compiler/simplified-lowering.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1961,6 +1961,7 @@ class RepresentationSelector {
case CTypeInfo::Type::kFloat64:
return UseInfo::CheckedNumberAsFloat64(kDistinguishZeros, feedback);
case CTypeInfo::Type::kV8Value:
case CTypeInfo::Type::kSeqOneByteString:
case CTypeInfo::Type::kApiObject:
return UseInfo::AnyTagged();
}
Expand Down
47 changes: 47 additions & 0 deletions deps/v8/src/d8/d8-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,43 @@ class FastCApiObject {
public:
static FastCApiObject& instance();

#ifdef V8_USE_SIMULATOR_WITH_GENERIC_C_CALLS
static AnyCType CopyStringFastCallbackPatch(AnyCType receiver,
AnyCType should_fallback,
AnyCType source, AnyCType out,
AnyCType options) {
AnyCType ret;
CopyStringFastCallback(receiver.object_value, should_fallback.bool_value,
*source.string_value, *out.uint8_ta_value,
*options.options_value);
return ret;
}

#endif // V8_USE_SIMULATOR_WITH_GENERIC_C_CALLS
static void CopyStringFastCallback(Local<Object> receiver,
bool should_fallback,
const FastOneByteString& source,
const FastApiTypedArray<uint8_t>& out,
FastApiCallbackOptions& options) {
FastCApiObject* self = UnwrapObject(receiver);
self->fast_call_count_++;

if (should_fallback) {
options.fallback = true;
} else {
options.fallback = false;
}

uint8_t* memory = nullptr;
CHECK(out.getStorageIfAligned(&memory));
memcpy(memory, source.data, source.length);
}

static void CopyStringSlowCallback(const FunctionCallbackInfo<Value>& args) {
FastCApiObject* self = UnwrapObject(args.This());
CHECK_SELF_OR_THROW();
self->slow_call_count_++;
}
#ifdef V8_USE_SIMULATOR_WITH_GENERIC_C_CALLS
static AnyCType AddAllFastCallbackPatch(AnyCType receiver,
AnyCType should_fallback,
Expand Down Expand Up @@ -1084,6 +1121,16 @@ Local<FunctionTemplate> Shell::CreateTestFastCApiTemplate(Isolate* isolate) {
PerIsolateData::Get(isolate)->SetTestApiObjectCtor(api_obj_ctor);
Local<Signature> signature = Signature::New(isolate, api_obj_ctor);
{
CFunction copy_str_func = CFunction::Make(
FastCApiObject::CopyStringFastCallback V8_IF_USE_SIMULATOR(
FastCApiObject::CopyStringFastCallbackPatch));
api_obj_ctor->PrototypeTemplate()->Set(
isolate, "copy_string",
FunctionTemplate::New(isolate, FastCApiObject::CopyStringSlowCallback,
Local<Value>(), signature, 1,
ConstructorBehavior::kThrow,
SideEffectType::kHasSideEffect, &copy_str_func));

CFunction add_all_c_func =
CFunction::Make(FastCApiObject::AddAllFastCallback V8_IF_USE_SIMULATOR(
FastCApiObject::AddAllFastCallbackPatch));
Expand Down
84 changes: 84 additions & 0 deletions deps/v8/test/mjsunit/compiler/fast-api-calls-string.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This file excercises one byte string support for fast API calls.

// Flags: --turbo-fast-api-calls --expose-fast-api --allow-natives-syntax --turbofan
// --always-turbofan is disabled because we rely on particular feedback for
// optimizing to the fastest path.
// Flags: --no-always-turbofan
// The test relies on optimizing/deoptimizing at predictable moments, so
// it's not suitable for deoptimization fuzzing.
// Flags: --deopt-every-n-times=0

assertThrows(() => d8.test.FastCAPI());
const fast_c_api = new d8.test.FastCAPI();

function assertSlowCall(input) {
assertEquals(new Uint8Array(input.length), copy_string(false, input));
}

function assertFastCall(input) {
const bytes = Uint8Array.from(input, c => c.charCodeAt(0));
assertEquals(bytes, copy_string(false, input));
}

function copy_string(should_fallback = false, input) {
const buffer = new Uint8Array(input.length);
fast_c_api.copy_string(should_fallback, input, buffer);
return buffer;
}

%PrepareFunctionForOptimization(copy_string);
assertSlowCall('Hello');
%OptimizeFunctionOnNextCall(copy_string);

fast_c_api.reset_counts();
assertFastCall('Hello');
assertFastCall('');
assertFastCall(['Hello', 'World'].join(''));
assertOptimized(copy_string);
assertEquals(3, fast_c_api.fast_call_count());
assertEquals(0, fast_c_api.slow_call_count());

// Fall back for twobyte strings.
fast_c_api.reset_counts();
assertSlowCall('Hello\u{10000}');
assertSlowCall('नमस्ते');
assertSlowCall(['नमस्ते', 'World'].join(''));
assertOptimized(copy_string);
assertEquals(0, fast_c_api.fast_call_count());
assertEquals(3, fast_c_api.slow_call_count());

// Fall back for cons strings.
function getTwoByteString() {
return '\u1234t';
}
function getCons() {
return 'hello' + getTwoByteString()
}

fast_c_api.reset_counts();
assertSlowCall(getCons());
assertOptimized(copy_string);
assertEquals(0, fast_c_api.fast_call_count());
assertEquals(1, fast_c_api.slow_call_count());

// Fall back for sliced strings.
fast_c_api.reset_counts();
function getSliced() {
return getCons().slice(1);
}
assertSlowCall(getSliced());
assertOptimized(copy_string);
assertEquals(0, fast_c_api.fast_call_count());
assertEquals(1, fast_c_api.slow_call_count());

// Fall back for SMI and non-string inputs.
fast_c_api.reset_counts();
assertSlowCall(1);
assertSlowCall({});
assertSlowCall(new Uint8Array(1));
assertEquals(0, fast_c_api.fast_call_count());
assertEquals(3, fast_c_api.slow_call_count());
1 change: 1 addition & 0 deletions lib/internal/bootstrap/loaders.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ const internalBindingAllowlist = new SafeSet([
'constants',
'contextify',
'crypto',
'encoding_methods',
'fs',
'fs_event_wrap',
'http_parser',
Expand Down
14 changes: 7 additions & 7 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ const {
} = require('internal/validators');

const {
encodeInto,
encodeUtf8String,
decodeUTF8,
} = internalBinding('buffer');
decodeUtf8,
encodeUtf8,
encodeIntoUtf8,
} = internalBinding('encoding_methods');

let Buffer;
function lazyBuffer() {
Expand Down Expand Up @@ -337,15 +337,15 @@ class TextEncoder {

encode(input = '') {
validateEncoder(this);
return encodeUtf8String(`${input}`);
return encodeUtf8(`${input}`);
}

encodeInto(src, dest) {
validateEncoder(this);
validateString(src, 'src');
if (!dest || !isUint8Array(dest))
throw new ERR_INVALID_ARG_TYPE('dest', 'Uint8Array', dest);
encodeInto(src, dest, encodeIntoResults);
encodeIntoUtf8(src, dest, encodeIntoResults);
return { read: encodeIntoResults[0], written: encodeIntoResults[1] };
}

Expand Down Expand Up @@ -430,7 +430,7 @@ function makeTextDecoderICU() {
this[kUTF8FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM]);
return decodeUtf8(input, this[kIgnoreBOM]);
}

this.#prepareConverter();
Expand Down
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@
'src/node_contextify.cc',
'src/node_credentials.cc',
'src/node_dir.cc',
'src/node_encoding.cc',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: we already have so many files dealing with encodings, does this really need to be a new one (instead of, e.g., staying in node_buffer or maybe string_bytes)? And if so, is this the encoding file?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t have any specific thoughts on this. I’ll try to move the encode utf8 to the node_encoding once this is implementation improves the existing benchmark.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still not sure why this is a new file when it only covers a tiny part of all encoding-related routines.

'src/node_env_var.cc',
'src/node_errors.cc',
'src/node_external_reference.cc',
Expand Down
1 change: 1 addition & 0 deletions src/node_binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
V(contextify) \
V(credentials) \
V(errors) \
V(encoding_methods) \
V(fs) \
V(fs_dir) \
V(fs_event_wrap) \
Expand Down
Loading