-
Notifications
You must be signed in to change notification settings - Fork 29.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
V8 fast API calls #33374
Comments
I've been wanting to use this for a while but since it only supports |
Is there any more information on this anywhere? I'm curious about how much "faster" this is (compared to node's |
@mscdex It will be quite a bit faster, because the calls to these functions are put directly into the compiled JS code, as I understand it. That skips quite a bit of conversion and setup between C++ and JS.
Not really sure what you’re referring to here, we don’t use |
I've been looking forward to this one also. :-) Other than the void return value, are there any other limitations to be aware of? |
@jasnell The biggest limitation is that the function can’t call into V8 in any way, so no interaction with JS objects, basically. Also, there’s a list of argument types in the header that are intended to be supported but aren’t yet, and overall the feature is still labelled experimental. |
diff --git a/deps/v8/include/v8-fast-api-calls.h b/deps/v8/include/v8-fast-api-calls.h
index bfce66b652..bee1fedf1d 100644
--- a/deps/v8/include/v8-fast-api-calls.h
+++ b/deps/v8/include/v8-fast-api-calls.h
@@ -192,11 +192,13 @@ class CTypeInfo {
ArgFlags flags = ArgFlags::None) {
uintptr_t wrapper_type_info_ptr =
reinterpret_cast<uintptr_t>(wrapper_type_info);
+ /*
// Check that the lower kIsWrapperTypeBit bits are 0's.
CHECK_EQ(
wrapper_type_info_ptr & ~(static_cast<uintptr_t>(~0)
<< static_cast<uintptr_t>(kIsWrapperTypeBit)),
0);
+ */
// TODO(mslekova): Refactor the manual bit manipulations to use
// PointerWithPayload instead.
return CTypeInfo(wrapper_type_info_ptr | flags | kIsWrapperTypeBit);
@@ -335,17 +337,17 @@ template <typename R, typename... Args>
class CFunctionInfoImpl : public CFunctionInfo {
public:
CFunctionInfoImpl()
- : return_info_(i::GetCType<R>::Get()),
+ : return_info_(GetCType<R>::Get()),
arg_count_(sizeof...(Args)),
- arg_info_{i::GetCType<Args>::Get()...} {
- static_assert(i::GetCType<R>::Get().GetType() == CTypeInfo::Type::kVoid,
+ arg_info_{GetCType<Args>::Get()...} {
+ static_assert(GetCType<R>::Get().GetType() == CTypeInfo::Type::kVoid,
"Only void return types are currently supported.");
}
const CTypeInfo& ReturnInfo() const override { return return_info_; }
unsigned int ArgumentCount() const override { return arg_count_; }
const CTypeInfo& ArgumentInfo(unsigned int index) const override {
- CHECK_LT(index, ArgumentCount());
+ // CHECK_LT(index, ArgumentCount());
return arg_info_[index];
}
diff --git a/lib/internal/process/per_thread.js b/lib/internal/process/per_thread.js
index d219cd7298..f871f20a2f 100644
--- a/lib/internal/process/per_thread.js
+++ b/lib/internal/process/per_thread.js
@@ -41,7 +41,9 @@ function assert(x, msg) {
function wrapProcessMethods(binding) {
const {
hrtime: _hrtime,
- hrtimeBigInt: _hrtimeBigInt,
+ // hrtimeBigInt: _hrtimeBigInt,
+ hrtimeBigIntBuffer,
+ hrtimeBigIntContext,
cpuUsage: _cpuUsage,
memoryUsage: _memoryUsage,
resourceUsage: _resourceUsage
@@ -140,10 +142,10 @@ function wrapProcessMethods(binding) {
// Use a BigUint64Array in the closure because this is actually a bit
// faster than simply returning a BigInt from C++ in V8 7.1.
- const hrBigintValues = new BigUint64Array(1);
+ const hrBigIntValues = new BigUint64Array(hrtimeBigIntBuffer);
function hrtimeBigInt() {
- _hrtimeBigInt(hrBigintValues);
- return hrBigintValues[0];
+ hrtimeBigIntContext.hrtime();
+ return hrBigIntValues[0];
}
const memValues = new Float64Array(5);
diff --git a/src/node_process_methods.cc b/src/node_process_methods.cc
index 88f4c1cfbd..e6ee795a93 100644
--- a/src/node_process_methods.cc
+++ b/src/node_process_methods.cc
@@ -7,6 +7,7 @@
#include "node_process.h"
#include "util-inl.h"
#include "uv.h"
+#include "v8-fast-api-calls.h"
#include "v8.h"
#include <vector>
@@ -156,11 +157,23 @@ static void Hrtime(const FunctionCallbackInfo<Value>& args) {
fields[2] = t % NANOS_PER_SEC;
}
-static void HrtimeBigInt(const FunctionCallbackInfo<Value>& args) {
- Local<ArrayBuffer> ab = args[0].As<BigUint64Array>()->Buffer();
- uint64_t* fields = static_cast<uint64_t*>(ab->GetBackingStore()->Data());
- fields[0] = uv_hrtime();
-}
+class HrtimeData {
+ public:
+ HrtimeData(uint64_t* fields) : fields_(fields) {}
+
+ static void FastMethod(HrtimeData* receiver) {
+ receiver->fields_[0] = uv_hrtime();
+ }
+
+ static void SlowMethod(const v8::FunctionCallbackInfo<v8::Value>& args) {
+ HrtimeData* receiver = reinterpret_cast<HrtimeData*>(
+ args.Holder().As<Object>()->GetAlignedPointerFromInternalField(0));
+ FastMethod(receiver);
+ }
+
+ private:
+ uint64_t* fields_;
+};
static void Kill(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
@@ -478,9 +491,45 @@ static void InitializeProcessMethods(Local<Object> target,
env->SetMethod(target, "memoryUsage", MemoryUsage);
env->SetMethod(target, "cpuUsage", CPUUsage);
env->SetMethod(target, "hrtime", Hrtime);
- env->SetMethod(target, "hrtimeBigInt", HrtimeBigInt);
env->SetMethod(target, "resourceUsage", ResourceUsage);
+ {
+ // env->isolate()->set_embedder_wrapper_type_index(kV8EmbedderWrapperTypeIndex);
+ // env->isolate()->set_embedder_wrapper_object_index(kV8EmbedderWrapperObjectIndex);
+ v8::CFunction fast_func = v8::CFunction::Make(HrtimeData::FastMethod);
+ Local<v8::FunctionTemplate> ftmpl =
+ v8::FunctionTemplate::New(env->isolate(),
+ HrtimeData::SlowMethod,
+ Local<Value>(),
+ Local<v8::Signature>(),
+ 0,
+ v8::ConstructorBehavior::kThrow,
+ v8::SideEffectType::kHasSideEffect,
+ &fast_func);
+
+ Local<v8::ObjectTemplate> otmpl = v8::ObjectTemplate::New(env->isolate());
+ otmpl->SetInternalFieldCount(1);
+ otmpl->Set(FIXED_ONE_BYTE_STRING(env->isolate(), "hrtime"), ftmpl);
+
+ Local<Object> obj = otmpl->NewInstance(env->context()).ToLocalChecked();
+
+ Local<ArrayBuffer> ab = ArrayBuffer::New(env->isolate(), 8);
+ HrtimeData* data = new HrtimeData(
+ reinterpret_cast<uint64_t*>(ab->GetBackingStore()->Data()));
+ obj->SetAlignedPointerInInternalField(0, data);
+
+ target
+ ->Set(env->context(),
+ FIXED_ONE_BYTE_STRING(env->isolate(), "hrtimeBigIntBuffer"),
+ ab)
+ .ToChecked();
+ target
+ ->Set(env->context(),
+ FIXED_ONE_BYTE_STRING(env->isolate(), "hrtimeBigIntContext"),
+ obj)
+ .ToChecked();
+ }
+
env->SetMethod(target, "_getActiveRequests", GetActiveRequests);
env->SetMethod(target, "_getActiveHandles", GetActiveHandles);
env->SetMethod(target, "_kill", Kill);
@@ -494,5 +543,16 @@ static void InitializeProcessMethods(Local<Object> target,
} // namespace node
+namespace v8 {
+template <>
+class WrapperTraits<node::HrtimeData> {
+ public:
+ static const void* GetTypeInfo() {
+ static const int tag = 0;
+ return reinterpret_cast<const void*>(&tag);
+ }
+};
+} // namespace v8
+
NODE_MODULE_CONTEXT_AWARE_INTERNAL(process_methods,
node::InitializeProcessMethods)
|
@devsnek but how much of that supposed gain is from not having to call |
That's a good point, I'm not sure. |
@MayaLekova can provide more insights. |
Hi, thanks for bringing this up! The remarks by @addaleax are absolutely correct, the API is really experimental and the current implementation is a prototype of what we'll want to have in the end. The gains are expected to come for hot functions that are optimized by TurboFan and the C++ callback is called without runtime type marshalling, but only if the type checks can be done at compile time (which in practice seems to be true for the WebGL use case I've tested with). There are two major limitations regarding the "fast" callbacks :
We've planned to guard against these, but the current implementation doesn't police it, so you'll most probably get a crash instead of a meaningful error. Moreover, if the fast callback wants to signal an error, the idea is to do it via an output parameter of the callback, but this is currently being implemented and might involve some changes to the fast call API in the very near future. So the design is really in the move, ongoing work can be followed here. I'd appreciate any comments on it, we're really aiming to make it useful for different embedders, but we're still in the process of proving that the performance gains are worth the effort. Once we've passed that stage, I'll make sure to share an explainer about it, with more details (about the usage and the motivation) than the comments in the header file. The benchmark posted by @devsnek is very interesting, thanks for sharing it! After chatting with @ulan, it seems that if the code is completely migrated to the new ArrayBuffer API, the Side note: the raw pointer returned by Again, thanks for pinging me, I'll follow the discussion with interest and I'm looking forward to seeing more use cases for this experimental API! |
diff --git a/src/node_process_methods.cc b/src/node_process_methods.cc
index 88f4c1cfbd..39e36a91c8 100644
--- a/src/node_process_methods.cc
+++ b/src/node_process_methods.cc
@@ -156,9 +156,13 @@ static void Hrtime(const FunctionCallbackInfo<Value>& args) {
fields[2] = t % NANOS_PER_SEC;
}
+uint64_t* fields;
+
static void HrtimeBigInt(const FunctionCallbackInfo<Value>& args) {
- Local<ArrayBuffer> ab = args[0].As<BigUint64Array>()->Buffer();
- uint64_t* fields = static_cast<uint64_t*>(ab->GetBackingStore()->Data());
+ if (!fields) {
+ Local<ArrayBuffer> ab = args[0].As<BigUint64Array>()->Buffer();
+ fields = static_cast<uint64_t*>(ab->GetBackingStore()->Data());
+ }
fields[0] = uv_hrtime();
}
|
@devsnek thanks for checking! Could you please also try keeping It creates a handle and dereference a field which may contribute to the overhead. |
diff --git a/src/node_process_methods.cc b/src/node_process_methods.cc
index 88f4c1cfbd..77d0e634df 100644
--- a/src/node_process_methods.cc
+++ b/src/node_process_methods.cc
@@ -156,9 +156,13 @@ static void Hrtime(const FunctionCallbackInfo<Value>& args) {
fields[2] = t % NANOS_PER_SEC;
}
+uint64_t* fields;
+
static void HrtimeBigInt(const FunctionCallbackInfo<Value>& args) {
Local<ArrayBuffer> ab = args[0].As<BigUint64Array>()->Buffer();
- uint64_t* fields = static_cast<uint64_t*>(ab->GetBackingStore()->Data());
+ if (!fields) {
+ fields = static_cast<uint64_t*>(ab->GetBackingStore()->Data());
+ }
fields[0] = uv_hrtime();
}
|
@devsnek thanks again! Most of the overhead is coming form local handle construction. |
@devsnek Thanks a lot for the more detailed measurements! Do I read it correctly that the "fast call" version (snippet 1 that you shared) is only better than the original code because of the savings from not always creating the local handle and not always calling Also, by comparing snippets 1 and 3, is it fair to conclude that ~25% of the gain comes from not calling |
@MayaLekova do those isolate index values have to be set? I assumed not from the docs in the header (and since they're not exposed on the public isolate yet) |
@devsnek Good point. Actually they need to be set, as this check would fire otherwise. Unfortunately the change that adds them to the public isolate landed in V8 8.4, sorry for this version mismatch. If you want to apply them manually, you could add the following to v8.h and api.cc. Which makes me wonder whether the "fast" callback fires at all with your change from snippet 1? |
@MayaLekova ok yeah with that patch we're seeing the good numbers:
|
Thanks a lot for applying those manually! Sorry again that they're not part of the original commit (in V8 8.3), but as I mentioned before, the API is still highly experimental. The numbers that you share are really promising though and I'll have in mind the opportunity to run snippet 1 (with the augmentation from 8.4) as a way to show the isolated improvement and make sure to not regress it with future changes. For all: we have some ongoing discussion about the implementation details in this doc (also mentioned in the tracking bug), please ping me for commenter access if you're interested to join. Outside of that, any comments and suggestions about the actual interface are welcome here too, thanks! |
@devsnek feel free to backport https://chromiumdash.appspot.com/commit/0d6debcc5f08fe3c41d07686f82c9de05310519f to master. We already have it in v14.x for forward-compat. |
@MayaLekova has automatically generating the slow path function from the information known about the fast path function been considered? In node the majority of our calls into c++ are already using validated arguments so we will end up with boilerplate conversions which V8 could probably do faster anyway since it doesn't have to do handle allocation. |
That's a good idea, but I'd say that's up to the embedder. In Blink for example, all bindings (fast and slow) will be generated from the WebIDL describing the low-level APIs. Does Node.js provide some similar typing information? Regarding handle allocations, not entirely sure I got your point. For the slow version, V8 still has to pass handlified objects, as it's not restricted w.r.t. triggering GC. |
@MayaLekova We don't have such a way to generate our bindings, though I suppose it would technically be possible? I'm thinking that the large majority of cases where we could use fast calls will look like this: int FastWhatever(int a, int b) {
// logic
}
void SlowWhatever(FunctionCallbackInfo args) {
CHECK(args[0]->IsInteger());
int a = args[0].As<Integer>()->Value();
CHECK(args[1]->IsInteger());
int b = args[1].As<Integer>()->Value();
return Integer::New(isolate, FastWhatever(a, b));
} Our c++ bindings generally take already validated arguments, they aren't exposed to userland directly, so we don't usually perform any complex logic for converting the values. As for handles, I just meant if v8 is the one doing the |
Regarding automatic generation of the slow callback, just realized we already had such an idea: "We might want to create a FunctionTemplate::New overload that takes only the fast method, and automatically creates an appropriate slow method wrapping it. The automatically created slow version would generate and perform the type checks from the signature." - it's possible, but I need to re-think whether it should be the task of V8 or of the embedder to generate those. Regarding handle creation, I see your point, it makes sense to spare the work indeed. How would the |
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: #33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: #33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: #33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs#33374
process: update v8 fast api calls usage This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: #33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: #33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: nodejs/node#33374
This commit removes the WrapperTraits specialization for FastHrtime according to recent changes in the V8 API. Refs: #33374 PR-URL: #35415 Reviewed-By: Rich Trott <[email protected]> Reviewed-By: Jiawen Geng <[email protected]> Reviewed-By: Daniel Bevenius <[email protected]> Reviewed-By: Matteo Collina <[email protected]> Reviewed-By: Myles Borins <[email protected]>
Did Node-API benefit front the |
Today, V8 8.3 landed on the master branch.
It includes a new C++ header:
v8-fast-api-calls.h
that's probably interesting for us./cc @nodejs/v8 @addaleax
The text was updated successfully, but these errors were encountered: