Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bench/sourcemap/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
big-module.generated.ts
52 changes: 52 additions & 0 deletions bench/sourcemap/internal-sourcemap-bench.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Microbench for the InternalSourceMap stack-trace remapping path. Compare
// against a baseline build by running:
//
// bun run build:release bench/sourcemap/internal-sourcemap-bench.ts
//
// On first run this generates a ~150k-line .ts module next to this file so the
// stack frames being remapped live inside a large SavedSourceMap entry.

import fs from "node:fs";
import path from "node:path";

const ITER = 10_000;
const BIG_LINES = 150_000;

const flag = process.argv[2] ?? "bench";
const mb = (n: number) => (n / 1048576).toFixed(2) + " MB";

const bigPath = path.join(import.meta.dir, "big-module.generated.ts");
if (!fs.existsSync(bigPath)) {
let src = "let v: number = 0;\n";
for (let i = 0; i < BIG_LINES; i++) src += `v = (v + ${i % 97}) | 0;\n`;
src += "export function go(): string { return new Error('e').stack!; }\n";
fs.writeFileSync(bigPath, src);
}

const rssBefore = process.memoryUsage().rss;
const tLoad0 = performance.now();
const { go } = require(bigPath) as { go: () => string };
const tLoad1 = performance.now();
const rssAfterLoad = process.memoryUsage().rss;

// First .stack: triggers full VLQ decode in the old path.
const tFirst0 = performance.now();
let s = go();
const tFirst1 = performance.now();
const rssAfterFirst = process.memoryUsage().rss;

const t0 = performance.now();
for (let i = 0; i < ITER; i++) s = go();
const t1 = performance.now();
const rssAfterLoop = process.memoryUsage().rss;

console.log(`[${flag}] load big module: ${(tLoad1 - tLoad0).toFixed(2)} ms`);
console.log(`[${flag}] first new Error().stack: ${(tFirst1 - tFirst0).toFixed(3)} ms`);
console.log(
`[${flag}] ${ITER}x new Error().stack: ${(t1 - t0).toFixed(2)} ms (${(((t1 - t0) * 1000) / ITER).toFixed(2)} µs/op)`,
);
console.log(
`[${flag}] rss before/afterLoad/afterFirst/afterLoop: ${mb(rssBefore)} / ${mb(rssAfterLoad)} / ${mb(rssAfterFirst)} / ${mb(rssAfterLoop)}`,
);
console.log(`[${flag}] rss delta load→firstStack: ${mb(rssAfterFirst - rssAfterLoad)}`);
void s;
43 changes: 24 additions & 19 deletions src/StandaloneModuleGraph.zig
Original file line number Diff line number Diff line change
Expand Up @@ -264,25 +264,25 @@
defer init_lock.unlock();

return switch (this.*) {
.none => null,
.parsed => |map| map,
.serialized => |serialized| {
var stored = switch (SourceMap.Mapping.parse(
bun.default_allocator,
serialized.mappingVLQ(),
null,
std.math.maxInt(i32),
std.math.maxInt(i32),
.{},
)) {
.success => |x| x,
.fail => {
this.* = .none;
return null;
},
const blob = serialized.mappingBlob() orelse {
this.* = .none;
return null;
};
if (!SourceMap.InternalSourceMap.isValidBlob(blob)) {
this.* = .none;
return null;
}
const ism = SourceMap.InternalSourceMap{ .data = blob.ptr };
var stored: SourceMap.ParsedSourceMap = .{
.ref_count = .init(),
.internal = ism,
.input_line_count = ism.inputLineCount(),
};
Comment thread
coderabbitai[bot] marked this conversation as resolved.

const source_files = serialized.sourceFileNames();

Check failure on line 285 in src/StandaloneModuleGraph.zig

View check run for this annotation

Claude / Claude Code Review

ParsedSourceMap.deinit() would free borrowed slices and use mismatched allocation size for standalone module graph PSMs

In `ParsedSourceMap.deinit()`, the `external_source_names` cleanup block (lines 113–117 of `src/sourcemap/ParsedSourceMap.zig`) lacks the `is_standalone_module_graph` guard that the adjacent `ism.deinit()` call correctly has. For standalone module graph PSMs, `external_source_names` elements are borrowed sub-slices of `serialized.bytes` (not heap-allocated), and the backing allocation is `alloc(?[]u8, source_files.len * 2)` while `external_source_names` only covers the first half — so if `deinit
Comment thread
claude[bot] marked this conversation as resolved.
Comment on lines 267 to 285

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 In ParsedSourceMap.deinit(), the external_source_names cleanup block (lines 113–117 of src/sourcemap/ParsedSourceMap.zig) lacks the is_standalone_module_graph guard that the adjacent ism.deinit() call correctly has. For standalone module graph PSMs, external_source_names elements are borrowed sub-slices of serialized.bytes (not heap-allocated), and the backing allocation is alloc(?[]u8, source_files.len * 2) while external_source_names only covers the first half — so if deinit() were called, it would both free non-heap pointers and pass a size-N slice to the allocator when the allocation was 2N, causing heap corruption. Currently latent because parsed.ref() at StandaloneModuleGraph.zig:306 keeps the refcount ≥ 2; add the same \!this.is_standalone_module_graph guard to the external_source_names block.

Extended reasoning...

What the bug is and how it manifests

In LazySourceMap.load() (StandaloneModuleGraph.zig:286), a single allocation of 2N elements is made:

const slices = bun.default_allocator.alloc(?[]u8, source_files.len * 2);

The first half becomes file_names (a slice of length N), and each element is set to src.slice(serialized.bytes) — a borrowed sub-slice pointing into the standalone binary blob, not a separate heap allocation. This is then stored:

stored.external_source_names = file_names;  // N borrowed pointers
stored.is_standalone_module_graph = true;
parsed.ref(); // never free

The specific code path that triggers it

ParsedSourceMap.deinit() (src/sourcemap/ParsedSourceMap.zig:113–117) contains:

if (this.external_source_names.len > 0) {
    for (this.external_source_names) |name|
        allocator.free(name);              // (1) frees borrowed blob pointers
    allocator.free(this.external_source_names); // (2) size-N free of a 2N allocation
}

This has no is_standalone_module_graph guard. Contrast with the ism.deinit() call immediately above it (line 109), which this PR correctly guarded:

if (this.internal) |ism| {
    if (\!this.is_standalone_module_graph) ism.deinit();  // ← guarded
}

The inconsistency — adding the guard for ISM but not for external_source_names — is the bug introduced by this PR.

Why existing code doesn't prevent it

The protection is entirely behavioral: parsed.ref() // never free at StandaloneModuleGraph.zig:306 leaves the refcount at 2, so deinit() never reaches zero in normal operation. But this is a fragile invariant: any future refcount refactor, test harness that calls deref(), or leak-finder that attempts balanced ref/deref would trigger both heap corruptions silently in ReleaseFast builds.

What the impact would be

If deinit() were called on a standalone PSM:

  1. allocator.free(name) for each name in external_source_names — each name is a slice into serialized.bytes (mmap'd or embedded binary data), not a heap allocation. Freeing a non-heap pointer is immediate heap corruption.
  2. allocator.free(this.external_source_names) passes a []const u8 of length N to the allocator, but the original allocation was alloc(?[]u8, 2*N). Passing the wrong size to free corrupts allocator metadata in tcmalloc/mimalloc.

Step-by-step proof

  1. bun build --compile --sourcemap=inline produces an executable with embedded SerializedSourceMap.
  2. At runtime, LazySourceMap.load() reaches the .serialized branch: allocates slices[0..2N], sets file_names = @ptrCast(slices[0..N]), fills each element with src.slice(serialized.bytes) (borrowed pointer into the embedded section), sets stored.external_source_names = file_names, stored.is_standalone_module_graph = true, then calls parsed.ref() bringing refcount to 2.
  3. Suppose a future deref() is added (or the ref() is removed as a cleanup): refcount reaches 0, deinit() is called.
  4. ism.deinit() is skipped (correctly, because is_standalone_module_graph == true).
  5. allocator.free(name) runs on each borrowed pointer → heap corruption.
  6. allocator.free(this.external_source_names) passes the wrong size → heap corruption.

How to fix it

Add the same guard to the external_source_names block:

if (this.external_source_names.len > 0 and \!this.is_standalone_module_graph) {
    for (this.external_source_names) |name|
        allocator.free(name);
    allocator.free(this.external_source_names);
}

This mirrors exactly the pattern already used for ism.deinit() on line 109.

const slices = bun.handleOom(bun.default_allocator.alloc(?[]u8, source_files.len * 2));

const file_names: [][]const u8 = @ptrCast(slices[0..source_files.len]);
Expand Down Expand Up @@ -1358,14 +1358,15 @@

/// Source map serialization in the bundler is specially designed to be
/// loaded in memory as is. Source contents are compressed with ZSTD to
/// reduce the file size, and mappings are stored as uncompressed VLQ.
/// reduce the file size, and mappings are stored as an InternalSourceMap
/// blob (varint deltas + sync points) so lookups need no decode pass.
pub const SerializedSourceMap = struct {
bytes: []const u8,

/// Following the header bytes:
/// - source_files_count number of StringPointer, file names
/// - source_files_count number of StringPointer, zstd compressed contents
/// - the mapping data, `map_vlq_length` bytes
/// - the InternalSourceMap blob, `map_bytes_length` bytes
/// - all the StringPointer contents
pub const Header = extern struct {
source_files_count: u32,
Expand All @@ -1376,9 +1377,11 @@
return @ptrCast(map.bytes.ptr);
}

pub fn mappingVLQ(map: SerializedSourceMap) []const u8 {
pub fn mappingBlob(map: SerializedSourceMap) ?[]const u8 {
if (map.bytes.len < @sizeOf(Header)) return null;
const head = map.header();
const start = @sizeOf(Header) + head.source_files_count * @sizeOf(StringPointer) * 2;
if (start > map.bytes.len or head.map_bytes_length > map.bytes.len - start) return null;
return map.bytes[start..][0..head.map_bytes_length];
}

Expand Down Expand Up @@ -1466,14 +1469,16 @@
}

const map_vlq: []const u8 = mappings_str.data.e_string.slice(arena);
const map_blob = SourceMap.InternalSourceMap.fromVLQ(arena, map_vlq, 0) catch
return error.InvalidSourceMap;
Comment thread
claude[bot] marked this conversation as resolved.

try out.writeInt(u32, sources_paths.items.len, .little);
try out.writeInt(u32, @intCast(map_vlq.len), .little);
try out.writeInt(u32, @intCast(map_blob.len), .little);

const string_payload_start_location = @sizeOf(u32) +
@sizeOf(u32) +
@sizeOf(bun.StringPointer) * sources_content.items.len * 2 + // path + source
map_vlq.len;
map_blob.len;

for (sources_paths.items.slice()) |item| {
if (item.data != .e_string)
Expand Down Expand Up @@ -1519,7 +1524,7 @@
try out.writeInt(u32, slice.length, .little);
}

try out.writeAll(map_vlq);
try out.writeAll(map_blob);

bun.assert(header_list.items.len == string_payload_start_location);
}
Expand Down
4 changes: 3 additions & 1 deletion src/bun.js/RuntimeTranspilerCache.zig
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
/// Version 16: Added typeof undefined minification optimization.
/// Version 17: Removed transpiler import rewrite for bun:test. Not bumping it causes test/js/bun/http/req-url-leak.test.ts to fail with SyntaxError: Export named 'expect' not found in module 'bun:test'.
/// Version 18: Include ESM record (module info) with an ES Module, see #15758
const expected_version = 18;
/// Version 19: Sourcemap blob is InternalSourceMap (varint stream + sync points), not VLQ.
/// Version 20: InternalSourceMap stream is bit-packed windows.
const expected_version = 20;

const debug = Output.scoped(.cache, .visible);
const MINIMUM_CACHE_SIZE = 50 * 1024;
Expand Down
128 changes: 46 additions & 82 deletions src/bun.js/SavedSourceMap.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ const SavedSourceMap = @This();
map: *HashTable,
mutex: bun.Mutex = .{},

pub const vlq_offset = 24;
/// Warm cache for `remapStackFramePositions`: the last decoded sync window and
/// the last (path_hash -> ISM) resolution. Guarded by `mutex`. Invalidated on
/// any `putValue` since that may free the cached blob.
find_cache: InternalSourceMap.FindCache = .{},
last_path_hash: u64 = 0,
last_ism: ?InternalSourceMap = null,

pub fn init(this: *SavedSourceMap, map: *HashTable) void {
this.* = .{
Expand All @@ -25,70 +30,16 @@ pub inline fn unlock(map: *SavedSourceMap) void {
map.mutex.unlock();
}

// For the runtime, we store the number of mappings and how many bytes the final list is at the beginning of the array
// The first 8 bytes are the length of the array
// The second 8 bytes are the number of mappings
pub const SavedMappings = struct {
data: [*]u8,

pub fn vlq(this: SavedMappings) []u8 {
return this.data[vlq_offset..this.len()];
}

pub inline fn len(this: SavedMappings) usize {
return @as(u64, @bitCast(this.data[0..8].*));
}

pub fn deinit(this: SavedMappings) void {
bun.default_allocator.free(this.data[0..this.len()]);
}

pub fn toMapping(this: SavedMappings, allocator: Allocator, path: string) anyerror!ParsedSourceMap {
const result = SourceMap.Mapping.parse(
allocator,
this.data[vlq_offset..this.len()],
@as(usize, @bitCast(this.data[8..16].*)),
1,
@as(usize, @bitCast(this.data[16..24].*)),
.{},
);
switch (result) {
.fail => |fail| {
if (Output.enable_ansi_colors_stderr) {
try fail.toData(path).writeFormat(
Output.errorWriter(),
logger.Kind.warn,
false,
true,
);
} else {
try fail.toData(path).writeFormat(
Output.errorWriter(),
logger.Kind.warn,
false,
false,
);
}

return fail.err;
},
.success => |success| {
return success;
},
}
}
};

/// ParsedSourceMap is the canonical form for sourcemaps,
///
/// but `SavedMappings` and `SourceProviderMap` are much cheaper to construct.
/// In `fn get`, this value gets converted to ParsedSourceMap always
/// `InternalSourceMap` is the storage for runtime-transpiled modules.
/// `ParsedSourceMap` is materialized lazily from a `SourceProviderMap` /
/// `BakeSourceProvider` / `DevServerSourceProvider` for sources that ship
/// their own external `.map`.
pub const Value = bun.TaggedPointerUnion(.{
ParsedSourceMap,
SavedMappings,
SourceProviderMap,
BakeSourceProvider,
DevServerSourceProvider,
InternalSourceMap,
});

pub const MissingSourceMapNoteInfo = struct {
Expand Down Expand Up @@ -178,11 +129,10 @@ pub fn deinit(this: *SavedSourceMap) void {
var value = Value.from(val.*);
if (value.get(ParsedSourceMap)) |source_map| {
source_map.deref();
} else if (value.get(SavedMappings)) |saved_mappings| {
var saved = SavedMappings{ .data = @as([*]u8, @ptrCast(saved_mappings)) };
saved.deinit();
} else if (value.get(SourceProviderMap)) |provider| {
_ = provider; // do nothing, we did not hold a ref to ZigSourceProvider
} else if (value.get(InternalSourceMap)) |ism| {
(InternalSourceMap{ .data = @as([*]u8, @ptrCast(ism)) }).deinit();
}
}
}
Expand All @@ -192,24 +142,27 @@ pub fn deinit(this: *SavedSourceMap) void {
}

pub fn putMappings(this: *SavedSourceMap, source: *const logger.Source, mappings: MutableString) !void {
try this.putValue(source.path.text, Value.init(bun.cast(*SavedMappings, try bun.default_allocator.dupe(u8, mappings.list.items))));
const blob = try bun.default_allocator.dupe(u8, mappings.list.items);
errdefer bun.default_allocator.free(blob);
try this.putValue(source.path.text, Value.init(bun.cast(*InternalSourceMap, blob.ptr)));
}
Comment on lines 144 to 148

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Free the duplicated blob if putValue() fails.

The allocation succeeds before the hash-table insert, so an error from putValue() leaks blob on the failure path.

Suggested fix
 pub fn putMappings(this: *SavedSourceMap, source: *const logger.Source, mappings: MutableString) !void {
     const blob = try bun.default_allocator.dupe(u8, mappings.list.items);
+    errdefer bun.default_allocator.free(blob);
     try this.putValue(source.path.text, Value.init(bun.cast(*InternalSourceMap, blob.ptr)));
 }

As per coding guidelines, "In Zig code, be careful with allocators and use defer for cleanup".

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
pub fn putMappings(this: *SavedSourceMap, source: *const logger.Source, mappings: MutableString) !void {
try this.putValue(source.path.text, Value.init(bun.cast(*SavedMappings, try bun.default_allocator.dupe(u8, mappings.list.items))));
const blob = try bun.default_allocator.dupe(u8, mappings.list.items);
try this.putValue(source.path.text, Value.init(bun.cast(*InternalSourceMap, blob.ptr)));
}
pub fn putMappings(this: *SavedSourceMap, source: *const logger.Source, mappings: MutableString) !void {
const blob = try bun.default_allocator.dupe(u8, mappings.list.items);
errdefer bun.default_allocator.free(blob);
try this.putValue(source.path.text, Value.init(bun.cast(*InternalSourceMap, blob.ptr)));
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/bun.js/SavedSourceMap.zig` around lines 137 - 140, The duplicated
allocation stored in blob is leaked if putValue(...) fails; update putMappings
(in SavedSourceMap) to free the duplicated buffer on error by using a defer or
try/catch around the call to this.putValue(source.path.text, Value.init(...)) so
that bun.default_allocator.free(blob.ptr) (or the appropriate allocator free)
runs when putValue returns an error; ensure the defer is canceled only on
success so successful inserts keep the blob ownership transferred to the map.

Comment thread
claude[bot] marked this conversation as resolved.

pub fn putValue(this: *SavedSourceMap, path: []const u8, value: Value) !void {
this.lock();
defer this.unlock();

this.find_cache.invalidateAll();
this.last_ism = null;
const entry = try this.map.getOrPut(bun.hash(path));
if (entry.found_existing) {
var old_value = Value.from(entry.value_ptr.*);
if (old_value.get(ParsedSourceMap)) |parsed_source_map| {
var source_map: *ParsedSourceMap = parsed_source_map;
source_map.deref();
} else if (old_value.get(SavedMappings)) |saved_mappings| {
var saved = SavedMappings{ .data = @as([*]u8, @ptrCast(saved_mappings)) };
saved.deinit();
} else if (old_value.get(SourceProviderMap)) |provider| {
_ = provider; // do nothing, we did not hold a ref to ZigSourceProvider
} else if (old_value.get(InternalSourceMap)) |ism| {
(InternalSourceMap{ .data = @as([*]u8, @ptrCast(ism)) }).deinit();
}
}
entry.value_ptr.* = value.ptr();
Expand All @@ -233,25 +186,30 @@ fn getWithContent(
};

switch (Value.from(mapping.value_ptr.*).tag()) {
@field(Value.Tag, @typeName(ParsedSourceMap)) => {
@field(Value.Tag, @typeName(InternalSourceMap)) => {
// Runtime-transpiled module. Wrap the blob in a refcounted
// ParsedSourceMap shell (no VLQ decode, no Mapping.List) so callers
// can hold a ref while the table mutates. The shell takes ownership
// of the blob.
defer this.unlock();
const map = Value.from(mapping.value_ptr.*).as(ParsedSourceMap);
map.ref();
return .{ .map = map };
},
@field(Value.Tag, @typeName(SavedMappings)) => {
defer this.unlock();
var saved = SavedMappings{ .data = @as([*]u8, @ptrCast(Value.from(mapping.value_ptr.*).as(ParsedSourceMap))) };
defer saved.deinit();
const result = bun.new(ParsedSourceMap, saved.toMapping(bun.default_allocator, path) catch {
_ = this.map.remove(mapping.key_ptr.*);
return .{};
const ism: InternalSourceMap = .{
.data = @as([*]u8, @ptrCast(Value.from(mapping.value_ptr.*).as(InternalSourceMap))),
};
const result = bun.new(ParsedSourceMap, .{
.ref_count = .init(),
.input_line_count = ism.inputLineCount(),
.internal = ism,
});
mapping.value_ptr.* = Value.init(result).ptr();
result.ref();

return .{ .map = result };
},
@field(Value.Tag, @typeName(ParsedSourceMap)) => {
defer this.unlock();
const map = Value.from(mapping.value_ptr.*).as(ParsedSourceMap);
map.ref();
return .{ .map = map };
},
@field(Value.Tag, @typeName(SourceProviderMap)) => {
const ptr: *SourceProviderMap = Value.from(mapping.value_ptr.*).as(SourceProviderMap);
this.unlock();
Expand Down Expand Up @@ -348,6 +306,12 @@ pub fn get(this: *SavedSourceMap, path: string) ?*ParsedSourceMap {
return this.getWithContent(path, .mappings_only).map;
}

/// Mutex must already be held. Returns the raw table value for `hash` if any.
pub fn getValueLocked(this: *SavedSourceMap, hash: u64) ?Value {
const raw = this.map.get(hash) orelse return null;
return Value.from(raw);
}

pub fn resolveMapping(
this: *SavedSourceMap,
path: []const u8,
Expand All @@ -362,7 +326,7 @@ pub fn resolveMapping(
const map = parse.map orelse return null;

const mapping = parse.mapping orelse
map.mappings.find(line, column) orelse
map.findMapping(line, column) orelse
return null;

return .{
Expand All @@ -375,7 +339,6 @@ pub fn resolveMapping(
const string = []const u8;

const std = @import("std");
const Allocator = std.mem.Allocator;

const bun = @import("bun");
const Environment = bun.Environment;
Expand All @@ -387,5 +350,6 @@ const logger = bun.logger;
const SourceMap = bun.SourceMap;
const BakeSourceProvider = bun.SourceMap.BakeSourceProvider;
const DevServerSourceProvider = bun.SourceMap.DevServerSourceProvider;
const InternalSourceMap = SourceMap.InternalSourceMap;
const ParsedSourceMap = SourceMap.ParsedSourceMap;
const SourceProviderMap = SourceMap.SourceProviderMap;
Loading
Loading