From 8c772e01cd532618831de03950bf8b3c8b1a8af2 Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Fri, 11 Nov 2022 09:39:30 +0530 Subject: [PATCH 1/6] perf(web): optimize single pass utf8 decoding --- ext/web/08_text_encoding.js | 70 ++++++++++++++++++++----------------- ext/web/lib.rs | 34 ++++++++++++++++++ 2 files changed, 72 insertions(+), 32 deletions(-) diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js index 4477d9b9ee2cee..aa17c4a4aa5473 100644 --- a/ext/web/08_text_encoding.js +++ b/ext/web/08_text_encoding.js @@ -34,6 +34,8 @@ #fatal; /** @type {boolean} */ #ignoreBOM; + /** @type {boolean} */ + #utf8SinglePass; /** @type {number | null} */ #rid = null; @@ -56,6 +58,7 @@ this.#encoding = encoding; this.#fatal = options.fatal; this.#ignoreBOM = options.ignoreBOM; + this.#utf8SinglePass = encoding === "utf-8" && !options.fatal; this[webidl.brand] = webidl.brand; } @@ -81,7 +84,7 @@ * @param {BufferSource} [input] * @param {TextDecodeOptions} options */ - decode(input = new Uint8Array(), options = {}) { + decode(input = new Uint8Array(), options = undefined) { webidl.assertBranded(this, TextDecoderPrototype); const prefix = "Failed to execute 'decode' on 'TextDecoder'"; if (input !== undefined) { @@ -91,40 +94,43 @@ allowShared: true, }); } - options = webidl.converters.TextDecodeOptions(options, { - prefix, - context: "Argument 2", - }); + let stream = false; + if (options !== undefined) { + options = webidl.converters.TextDecodeOptions(options, { + prefix, + context: "Argument 2", + }); + stream = options.stream; + } try { - try { - if (ArrayBufferIsView(input)) { - input = new Uint8Array( - input.buffer, - input.byteOffset, - input.byteLength, - ); - } else { - input = new Uint8Array(input); - } - } catch { - // If the buffer is detached, just create a new empty Uint8Array. + // Note from spec: implementations are strongly encouraged to use an implementation strategy that avoids this copy. + // When doing so they will have to make sure that changes to input do not affect future calls to decode(). + if (input.byteLength === 0) { + // This handles the case where buffer is detached too. input = new Uint8Array(); } - if ( - ObjectPrototypeIsPrototypeOf( - SharedArrayBuffer.prototype, - input.buffer, - ) - ) { - // We clone the data into a non-shared ArrayBuffer so we can pass it - // to Rust. - // `input` is now a Uint8Array, and calling the TypedArray constructor - // with a TypedArray argument copies the data. - input = new Uint8Array(input); - } - if (!options.stream && this.#rid === null) { + // if ( + // ObjectPrototypeIsPrototypeOf( + // SharedArrayBuffer.prototype, + // input.buffer, + // ) + // ) { + // // We clone the data into a non-shared ArrayBuffer so we can pass it + // // to Rust. + // // `input` is now a Uint8Array, and calling the TypedArray constructor + // // with a TypedArray argument copies the data. + // input = new Uint8Array(input); + // } + + // Fast path for single pass encoding. + if (!stream && this.#rid === null) { + // Fast path for utf8 single pass encoding. + if (this.#utf8SinglePass) { + return ops.op_encoding_decode_utf8(input, this.#ignoreBOM); + } + return ops.op_encoding_decode_single( input, this.#encoding, @@ -140,9 +146,9 @@ this.#ignoreBOM, ); } - return ops.op_encoding_decode(input, this.#rid, options.stream); + return ops.op_encoding_decode(input, this.#rid, stream); } finally { - if (!options.stream && this.#rid !== null) { + if (!stream && this.#rid !== null) { core.close(this.#rid); this.#rid = null; } diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 588a3adfd18d94..8c73535e4ac045 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -91,6 +91,7 @@ pub fn init( op_base64_btoa::decl(), op_encoding_normalize_label::decl(), op_encoding_decode_single::decl(), + op_encoding_decode_utf8::decl(), op_encoding_new_decoder::decl(), op_encoding_decode::decl(), op_encoding_encode_into::decl(), @@ -179,6 +180,39 @@ fn op_encoding_normalize_label(label: String) -> Result { Ok(encoding.name().to_lowercase()) } +#[op(v8)] +fn op_encoding_decode_utf8<'a>( + scope: &mut v8::HandleScope<'a>, + zero_copy: &[u8], + ignore_bom: bool, +) -> Result, AnyError> { + let buf = &zero_copy; + + let buf = if ignore_bom + && buf.len() >= 3 + && buf[0] == 0xef + && buf[1] == 0xbb + && buf[2] == 0xbf + { + &buf[3..] + } else { + buf + }; + + // If `String::new_from_utf8()` returns `None`, this means that the + // length of the decoded string would be longer than what V8 can + // handle. In this case we return `RangeError`. + // + // For more details see: + // - https://encoding.spec.whatwg.org/#dom-textdecoder-decode + // - https://github.com/denoland/deno/issues/6649 + // - https://github.com/v8/v8/blob/d68fb4733e39525f9ff0a9222107c02c28096e2a/include/v8.h#L3277-L3278 + match v8::String::new_from_utf8(scope, buf, v8::NewStringType::Normal) { + Some(text) => Ok(serde_v8::from_v8(scope, text.into())?), + None => Err(range_error("string too long")), + } +} + #[op] fn op_encoding_decode_single( data: &[u8], From b5dde4312ed959238a0512e25abb5c032801cf4c Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Fri, 11 Nov 2022 09:45:27 +0530 Subject: [PATCH 2/6] lint --- ext/web/08_text_encoding.js | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js index aa17c4a4aa5473..794c7c6fd7a1b8 100644 --- a/ext/web/08_text_encoding.js +++ b/ext/web/08_text_encoding.js @@ -16,8 +16,6 @@ const ops = core.ops; const webidl = window.__bootstrap.webidl; const { - ArrayBufferIsView, - ObjectPrototypeIsPrototypeOf, PromiseReject, PromiseResolve, StringPrototypeCharCodeAt, @@ -111,19 +109,6 @@ input = new Uint8Array(); } - // if ( - // ObjectPrototypeIsPrototypeOf( - // SharedArrayBuffer.prototype, - // input.buffer, - // ) - // ) { - // // We clone the data into a non-shared ArrayBuffer so we can pass it - // // to Rust. - // // `input` is now a Uint8Array, and calling the TypedArray constructor - // // with a TypedArray argument copies the data. - // input = new Uint8Array(input); - // } - // Fast path for single pass encoding. if (!stream && this.#rid === null) { // Fast path for utf8 single pass encoding. From 60b940fe483aef52c14a7162e32b1419513723a4 Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Fri, 11 Nov 2022 10:05:46 +0530 Subject: [PATCH 3/6] fix --- ext/web/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 8c73535e4ac045..169a0013d56f36 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -209,7 +209,7 @@ fn op_encoding_decode_utf8<'a>( // - https://github.com/v8/v8/blob/d68fb4733e39525f9ff0a9222107c02c28096e2a/include/v8.h#L3277-L3278 match v8::String::new_from_utf8(scope, buf, v8::NewStringType::Normal) { Some(text) => Ok(serde_v8::from_v8(scope, text.into())?), - None => Err(range_error("string too long")), + None => Err(type_error("buffer exceeds maximum length")), } } From 3d2db48ba018cf8b004f2ea74b8578e09f4f0d0f Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Fri, 11 Nov 2022 10:34:16 +0530 Subject: [PATCH 4/6] sab copy + ignore_bom --- ext/web/lib.rs | 2 +- ops/lib.rs | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 169a0013d56f36..f799f02e746c83 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -188,7 +188,7 @@ fn op_encoding_decode_utf8<'a>( ) -> Result, AnyError> { let buf = &zero_copy; - let buf = if ignore_bom + let buf = if !ignore_bom && buf.len() >= 3 && buf[0] == 0xef && buf[1] == 0xbb diff --git a/ops/lib.rs b/ops/lib.rs index 44f783280394c3..095f774dbd4fed 100644 --- a/ops/lib.rs +++ b/ops/lib.rs @@ -912,7 +912,20 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 { Ok(b) => { let store = b.data() as *mut u8; // SAFETY: rust guarantees that lifetime of slice is no longer than the call. - unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) } + let slice = unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) }; + + let bs = b.get_backing_store(); + if bs.is_shared() { + // Copy SAB into a non-shared buffer + let bs = #core::v8::ArrayBuffer::new_backing_store_from_boxed_slice( + slice.to_vec().into_boxed_slice(), + ); + + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(bs.data().unwrap().as_mut() as *mut _ as *mut u8, bs.byte_length()) } + } else { + slice + } }, Err(_) => { if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) { From ea9bd208e8c50c6440afafd815146e64c2c5f0c1 Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Fri, 11 Nov 2022 10:46:33 +0530 Subject: [PATCH 5/6] add back sab copy --- ext/web/08_text_encoding.js | 24 ++++++++++++++++--- ops/lib.rs | 46 ++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js index 794c7c6fd7a1b8..bf4b338082c4e0 100644 --- a/ext/web/08_text_encoding.js +++ b/ext/web/08_text_encoding.js @@ -22,6 +22,8 @@ StringPrototypeSlice, TypedArrayPrototypeSubarray, Uint8Array, + ObjectPrototypeIsPrototypeOf, + ArrayBufferIsView, Uint32Array, } = window.__bootstrap.primordials; @@ -104,9 +106,25 @@ try { // Note from spec: implementations are strongly encouraged to use an implementation strategy that avoids this copy. // When doing so they will have to make sure that changes to input do not affect future calls to decode(). - if (input.byteLength === 0) { - // This handles the case where buffer is detached too. - input = new Uint8Array(); + if ( + ObjectPrototypeIsPrototypeOf( + SharedArrayBuffer.prototype, + input || input.buffer, + ) + ) { + // We clone the data into a non-shared ArrayBuffer so we can pass it + // to Rust. + // `input` is now a Uint8Array, and calling the TypedArray constructor + // with a TypedArray argument copies the data. + if (ArrayBufferIsView(input)) { + input = new Uint8Array( + input.buffer, + input.byteOffset, + input.byteLength, + ); + } else { + input = new Uint8Array(input); + } } // Fast path for single pass encoding. diff --git a/ops/lib.rs b/ops/lib.rs index 095f774dbd4fed..9b7715629f528e 100644 --- a/ops/lib.rs +++ b/ops/lib.rs @@ -910,35 +910,33 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 { let value = args.get(#idx as i32); match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) { Ok(b) => { - let store = b.data() as *mut u8; - // SAFETY: rust guarantees that lifetime of slice is no longer than the call. - let slice = unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) }; - - let bs = b.get_backing_store(); - if bs.is_shared() { - // Copy SAB into a non-shared buffer - let bs = #core::v8::ArrayBuffer::new_backing_store_from_boxed_slice( - slice.to_vec().into_boxed_slice(), - ); - - // SAFETY: rust guarantees that lifetime of slice is no longer than the call. - unsafe { ::std::slice::from_raw_parts_mut(bs.data().unwrap().as_mut() as *mut _ as *mut u8, bs.byte_length()) } + // Handles detached buffers. + let byte_length = b.byte_length(); + if byte_length == 0 { + &mut [] } else { - slice + let store = b.data() as *mut u8; + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) } } }, Err(_) => { if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) { - let (offset, len) = (view.byte_offset(), view.byte_length()); - let buffer = match view.buffer(scope) { - Some(v) => v, - None => { - return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); - } - }; - let store = buffer.data() as *mut u8; - // SAFETY: rust guarantees that lifetime of slice is no longer than the call. - unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) } + let len = view.byte_length(); + if len == 0 { + &mut [] + } else { + let offset = view.byte_offset(); + let buffer = match view.buffer(scope) { + Some(v) => v, + None => { + return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); + } + }; + let store = buffer.data() as *mut u8; + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) } + } } else { return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); } From 8a8af855e7ea1928ee619e73d4ac35309c5819f1 Mon Sep 17 00:00:00 2001 From: Divy Srivastava Date: Fri, 11 Nov 2022 11:36:33 +0530 Subject: [PATCH 6/6] rm special case 0 len --- ops/lib.rs | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/ops/lib.rs b/ops/lib.rs index 9b7715629f528e..f2e9545dbd39cf 100644 --- a/ops/lib.rs +++ b/ops/lib.rs @@ -912,31 +912,23 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 { Ok(b) => { // Handles detached buffers. let byte_length = b.byte_length(); - if byte_length == 0 { - &mut [] - } else { - let store = b.data() as *mut u8; - // SAFETY: rust guarantees that lifetime of slice is no longer than the call. - unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) } - } + let store = b.data() as *mut u8; + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) } }, Err(_) => { if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) { let len = view.byte_length(); - if len == 0 { - &mut [] - } else { - let offset = view.byte_offset(); - let buffer = match view.buffer(scope) { - Some(v) => v, - None => { - return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); - } - }; - let store = buffer.data() as *mut u8; - // SAFETY: rust guarantees that lifetime of slice is no longer than the call. - unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) } - } + let offset = view.byte_offset(); + let buffer = match view.buffer(scope) { + Some(v) => v, + None => { + return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); + } + }; + let store = buffer.data() as *mut u8; + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) } } else { return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); }