Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions crates/oxc_estree_tokens/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub use jsx_state::{JSXState, JSXStateJS, JSXStateTS};
pub use options::{
ESTreeTokenConfig, ESTreeTokenOptions, ESTreeTokenOptionsJS, ESTreeTokenOptionsTS,
};
use serialize::serialize_tokens;
use serialize::{estimate_json_len, serialize_tokens};

/// Serializer config for tokens.
/// We never include ranges, so use this custom config which returns `false` for `ranges()`.
Expand Down Expand Up @@ -59,12 +59,8 @@ pub fn to_estree_tokens_json<O: ESTreeTokenConfig>(
span_converter: &Utf8ToUtf16,
options: O,
) -> String {
// Estimated size of a single token serialized to JSON, in bytes.
// TODO: Estimate this better based on real-world usage.
const BYTES_PER_TOKEN: usize = 64;

let mut serializer =
CompactTokenSerializer::with_capacity(tokens.len() * BYTES_PER_TOKEN, false);
let capacity = estimate_json_len(tokens.len(), source_text.len(), true);
let mut serializer = CompactTokenSerializer::with_capacity(capacity, false);
serialize_tokens(&mut serializer, tokens, program, source_text, span_converter, options);
serializer.into_string()
}
Expand All @@ -83,12 +79,8 @@ pub fn to_estree_tokens_pretty_json<O: ESTreeTokenConfig>(
span_converter: &Utf8ToUtf16,
options: O,
) -> String {
// Estimated size of a single token serialized to JSON, in bytes.
// TODO: Estimate this better based on real-world usage.
const BYTES_PER_TOKEN: usize = 64;

let mut serializer =
PrettyTokenSerializer::with_capacity(tokens.len() * BYTES_PER_TOKEN, false);
let capacity = estimate_json_len(tokens.len(), source_text.len(), false);
let mut serializer = PrettyTokenSerializer::with_capacity(capacity, false);
serialize_tokens(&mut serializer, tokens, program, source_text, span_converter, options);
serializer.into_string()
}
44 changes: 44 additions & 0 deletions crates/oxc_estree_tokens/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,50 @@ use oxc_span::{GetSpan, Span};

use crate::{ESTreeTokenConfig, JSXState, token_type::TokenType, u32_string::U32String};

/// Estimate size of tokens serialized to JSON, in bytes.
/// Aim is to allocate capacity which is a reasonable over-estimate for the size of all tokens serialized to JSON,
/// in order to ensure the serializer's buffer never has to grow during serialization.
///
/// Combine the following:
///
/// * Basic JSON structure for a token x number of tokens.
/// * Max length of token type x number of tokens.
/// This is an over-estimate because not all tokens have the longest type (`PrivateIdentifier`).
/// * Length of source text.
/// Tokens can at most include all of the source text in their `value` fields (tokens cannot overlap).
/// In a minified file, this is usually a slight under-estimate, as some `value` fields will need escaping in JSON.
/// In a non-minified file, there'll be whitespace and comments between tokens, so it's likely an over-estimate.
/// * Max offset length x number of tokens x 2.
/// Each token includes `start` and `end` fields, which cannot be larger than the length of the source text.
/// This is a bit of an over-estimate, as earlier tokens will have smaller offsets, but it's in right ballpark.
/// * 2 bytes for leading/trailing `[` and `]`. This is purely to get the right length for empty source text.
///
/// Regex tokens (which are longer) are ignored in this calculation, on assumption they're relatively rare.
///
/// There are 2 factors which are under-estimates in this calculation, but overall it's likely to be
/// a decent over-estimate, due to the over-estimate on length of token type.
pub fn estimate_json_len(tokens_len: usize, source_text_len: usize, is_compact: bool) -> usize {
const TYPE_LEN: usize = "PrivateIdentifier".len();

const COMPACT_JSON_STRUCTURE_LEN: usize =
r#"{"type":"","value":"","start":,"end":},"#.len() + TYPE_LEN;
const PRETTY_JSON_STRUCTURE_LEN: usize =
" {\n \"type\": \"\",\n \"value\": \"\",\n \"start\": ,\n \"end\": \n },\n"
.len()
+ TYPE_LEN;
const COMPACT_JSON_HEADER_FOOTER_LEN: usize = "[]".len();
const PRETTY_JSON_HEADER_FOOTER_LEN: usize = "[\n]".len();

let (structure_len, header_footer_len) = if is_compact {
(COMPACT_JSON_STRUCTURE_LEN, COMPACT_JSON_HEADER_FOOTER_LEN)
} else {
(PRETTY_JSON_STRUCTURE_LEN, PRETTY_JSON_HEADER_FOOTER_LEN)
};
let offset_len = source_text_len.checked_ilog10().unwrap_or(0) as usize + 1;
let token_len = structure_len + offset_len * 2;
token_len * tokens_len + source_text_len + header_footer_len
}

/// Walk AST and serialize each token into the serializer as it's encountered.
///
/// Tokens are consumed from the `tokens` slice in source order.
Expand Down
Loading