Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
245 changes: 245 additions & 0 deletions scripts/strip-long-rs-comments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
#!/usr/bin/env bun
Comment thread
robobun marked this conversation as resolved.
Comment thread
claude[bot] marked this conversation as resolved.
Comment thread
robobun marked this conversation as resolved.
Comment thread
claude[bot] marked this conversation as resolved.
// Remove every `//`-style comment block longer than 3 lines from tracked .rs
// files, except a block that is the first non-blank content in the file.
//
// "Comment block" = a maximal run of consecutive lines whose trimmed content
// starts with `//` (this covers `//`, `///`, and `//!`). Inline `/* ... */`
// annotations and trailing `// ...` after code are not comment-only lines and
// are never touched.
//
// Load-bearing comment lines/blocks are preserved:
// - `// HOST_EXPORT(...)` markers (scraped by src/codegen/generate-host-exports.ts)
// → the marker line is kept; surrounding prose in the same block is removed.
// - `// SAFETY:` / `/// # Safety` blocks (clippy::undocumented_unsafe_blocks is
// `deny` workspace-wide, Cargo.toml:216) → the entire block is kept.
//
// Usage:
// bun scripts/strip-long-rs-comments.ts # apply in-place to all tracked *.rs
// bun scripts/strip-long-rs-comments.ts --dry # report only
// bun scripts/strip-long-rs-comments.ts --min N # threshold (default 4)
// bun scripts/strip-long-rs-comments.ts a.rs b.rs # only these files (no git)

import { $ } from "bun";
import { readFileSync, writeFileSync } from "node:fs";

// Comment lines that must never be removed because codegen scrapes them.
// The matching line is kept; the rest of its block is still removed.
// Matched against the trimmed line.
const PROTECTED_LINE: RegExp[] = [
/^\/\/\s*HOST_EXPORT\(/, // src/codegen/generate-host-exports.ts
];

// If any line in a block matches one of these, the entire block is kept.
// Matched against the trimmed line.
//
// `clippy::undocumented_unsafe_blocks` is `deny` workspace-wide (Cargo.toml)
// and accepts any comment containing /safety:/i immediately before `unsafe`.
// The tree uses many header variants — `// SAFETY:`, `// SAFETY (invariant):`,
// `// SAFETY CONTRACT:`, `/// # Safety`, `/// ## Safety` — so we keep any
// block that mentions SAFETY in caps, opens a line with a `Safety:`-style
// marker in any case, or carries a `/// # Safety` doc heading. Over-preserving
// a few prose blocks that merely reference the pattern is preferable to a
// clippy deny-error.
const PROTECTED_BLOCK: RegExp[] = [
/\bSAFETY\b/, // `// SAFETY: ...`, `// SAFETY CONTRACT: ...`, any all-caps mention
/^\/\/[/!]*\s*safety\s*:/i, // `// Safety: ...` / `// safety: ...` — clippy matches case-insensitively
/^\/\/\/\s*#+\s*Safety\b/, // `/// # Safety` doc headings
];

// Hot-path helpers avoid .trim()/.trimStart() allocation — debug-build JS is
// slow enough that scanning ~3M lines with per-line temp strings takes 30s+.
function firstNonWs(line: string): number {
let j = 0;
const n = line.length;
while (j < n) {
const c = line.charCodeAt(j);
if (c !== 32 && c !== 9) break; // space / tab
j++;
}
return j;
}

function isCommentOnly(line: string): boolean {
const j = firstNonWs(line);
return line.charCodeAt(j) === 47 /* / */ && line.charCodeAt(j + 1) === 47 /* / */;
}

function isProtectedLine(line: string): boolean {
const t = line.slice(firstNonWs(line));
return PROTECTED_LINE.some(re => re.test(t));
}

function isProtectedBlockLine(line: string): boolean {
const t = line.slice(firstNonWs(line));
return PROTECTED_BLOCK.some(re => re.test(t));
}

function isBlank(line: string): boolean {
return firstNonWs(line) === line.length;
}
Comment thread
robobun marked this conversation as resolved.

export type Edit = { start: number; end: number }; // [start, end) line indices to delete

export function planEdits(lines: string[], min: number): Edit[] {
// First non-blank line index, to identify the top-of-file block.
let firstContent = 0;
while (firstContent < lines.length && isBlank(lines[firstContent])) firstContent++;

const raw: Edit[] = [];
let i = 0;
while (i < lines.length) {
if (!isCommentOnly(lines[i])) {
i++;
continue;
}
const start = i;
while (i < lines.length && isCommentOnly(lines[i])) i++;
const end = i; // exclusive
const len = end - start;
const isTopOfFile = start === firstContent;
if (len < min || isTopOfFile) continue;
// Keep the whole block if any line in it is a SAFETY/# Safety justification.
let keepBlock = false;
for (let k = start; k < end; k++) {
if (isProtectedBlockLine(lines[k])) {
keepBlock = true;
break;
}
}
if (keepBlock) continue;
// Split around protected single lines; keep those, drop the rest of the block.
let segStart = start;
for (let k = start; k <= end; k++) {
const boundary = k === end || isProtectedLine(lines[k]);
if (boundary) {
if (k > segStart) raw.push({ start: segStart, end: k });
segStart = k + 1;
}
}
}

// Merge edits that are separated only by blank lines (e.g. a section banner
// followed by a doc comment) so the intervening blanks are removed too.
const merged: Edit[] = [];
for (const e of raw) {
const prev = merged[merged.length - 1];
if (prev) {
let gap = prev.end;
while (gap < e.start && isBlank(lines[gap])) gap++;
if (gap === e.start) {
prev.end = e.end;
continue;
}
}
merged.push({ ...e });
}

// Extend each edit over trailing blank lines so removal doesn't leave a
// double-blank gap or a stray blank right after `{`.
for (const e of merged) {
let trail = e.end;
while (trail < lines.length && isBlank(lines[trail])) trail++;
if (trail > e.end) {
const before = e.start === 0 ? "" : lines[e.start - 1];
const blank_before = e.start === 0 || isBlank(before);
const open_before = before.trimEnd().endsWith("{");
e.end = blank_before || open_before ? trail : trail - 1;
}
}
Comment thread
claude[bot] marked this conversation as resolved.

return merged;
}

function applyEdits(lines: string[], edits: Edit[]): string[] {
if (edits.length === 0) return lines;
const keep: string[] = [];
let ei = 0;
for (let i = 0; i < lines.length; i++) {
if (ei < edits.length && i >= edits[ei].start && i < edits[ei].end) {
if (i === edits[ei].end - 1) ei++;
continue;
}
keep.push(lines[i]);
}
return keep;
}

/** Pure transform: returns the stripped source, or the input unchanged. */
export function stripLongComments(src: string, min = 4): string {
const hadTrailingNL = src.endsWith("\n");
const lines = src.split("\n");
const edits = planEdits(lines, min);
if (edits.length === 0) return src;
const out = applyEdits(lines, edits);
let text = out.join("\n");
if (hadTrailingNL && !text.endsWith("\n")) text += "\n";
return text;
}

// Trees whose .rs files are not processed:
// - vendor/ third-party code
// - packages/ externally-published crates (bun-native-plugin
// renders `///` on docs.rs — user-facing API docs)
// - scripts/verify-baseline-static/ CLAUDE.md cites line ranges into these sources;
// the inline encoding-derivation comments are the
// on-call triage doc
export const SKIP_PREFIXES = ["vendor/", "packages/", "scripts/verify-baseline-static/"];

export async function listTrackedRsFiles(cwd?: string): Promise<string[]> {
const tracked = (await $`git ls-files '*.rs'`.cwd(cwd ?? process.cwd()).text()).split("\n").filter(Boolean);
return tracked.filter(f => !SKIP_PREFIXES.some(p => f.startsWith(p)));
}

// ─── CLI ──────────────────────────────────────────────────────────────────────

async function main() {
const args = process.argv.slice(2);
const dry = args.includes("--dry") || args.includes("-n");
const minIdx = args.findIndex(a => a === "--min");
const min = minIdx >= 0 ? Number(args[minIdx + 1]) : 4;
if (!Number.isInteger(min) || min < 1) {
console.error(`invalid --min value`);
process.exit(1);
}
const minValueIdx = minIdx >= 0 ? minIdx + 1 : -1;
const explicitFiles = args.filter((a, i) => !a.startsWith("-") && i !== minValueIdx);
const files = explicitFiles.length > 0 ? explicitFiles : await listTrackedRsFiles();

let changed = 0;
let blocksRemoved = 0;
let linesRemoved = 0;

for (const file of files) {
const src = readFileSync(file, "utf8");
const lines = src.split("\n");
const edits = planEdits(lines, min);
if (edits.length === 0) continue;

const removed = edits.reduce((n, e) => n + (e.end - e.start), 0);
blocksRemoved += edits.length;
linesRemoved += removed;
changed++;

if (dry) {
for (const e of edits) {
console.log(`${file}:${e.start + 1}-${e.end}: ${e.end - e.start} lines`);
}
continue;
}

// Reuse the edits already planned above instead of re-running the full
// transform (planEdits + applyEdits) a second time inside stripLongComments.
const out = applyEdits(lines, edits);
let text = out.join("\n");
if (src.endsWith("\n") && !text.endsWith("\n")) text += "\n";
writeFileSync(file, text);
}

console.error(
`strip-long-rs-comments: ${dry ? "would change" : "changed"} ${changed} file(s), ` +
`removed ${blocksRemoved} block(s) / ${linesRemoved} line(s) ` +
`(threshold >= ${min} lines; top-of-file, SAFETY, HOST_EXPORT kept)`,
);
}

if (import.meta.main) await main();
60 changes: 0 additions & 60 deletions src/analytics/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ use bun_core::slice_to_nul;

// ──────────────────────────────────────────────────────────────────────────

/// Enables analytics. This is used by:
/// - crash_handler's `report` function to anonymously report crashes
///
/// Since this field can be `Unknown`, it makes more sense to call `is_enabled`
/// instead of processing this field directly.
#[repr(u8)]
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum TriState {
Expand Down Expand Up @@ -77,26 +72,9 @@ pub fn is_enabled() -> bool {
// Features
// ──────────────────────────────────────────────────────────────────────────

/// This answers, "What parts of bun are people actually using?"
///
/// PORT NOTE: In Zig this is a `struct` used purely as a namespace of `pub var`
/// decls, iterated via `@typeInfo` reflection. Rust has no decl reflection, so
/// the feature list is declared once via `define_features!` and that macro
/// generates the statics, `PACKED_FEATURES_LIST`, `PackedFeatures`,
/// `packed_features()`, and the `Display` body.
pub mod features {
use super::*;

// PORT NOTE (cyclebreak): the Zig original is
// `EnumSet(bun.jsc.ModuleLoader.HardcodedModule)`. That enum lives in
// `bun_resolve_builtins` (T5) and pulling it here would create a forward
// dep (analytics is T1). The only operations we need are `insert` and
// ordered iteration of the module *names* for the crash-report formatter,
// so store the `&'static str` name (= `@tagName(HardcodedModule)`) instead
// of the enum value. Writers (`runtime/jsc_hooks.rs`) call
// `BUILTIN_MODULES.lock().insert(<&'static str>::from(hardcoded))`.
// PERF(port): Zig used a packed `EnumSet` (bitset); BTreeSet is O(log n)
// insert — fine for ≤~80 entries written once each at module-load time.
pub(crate) static BUILTIN_MODULES: bun_core::Mutex<std::collections::BTreeSet<&'static str>> =
bun_core::Mutex::new(std::collections::BTreeSet::new());
// PORT NOTE: Zig used a plain mutable global; wrapped in a Mutex here
Expand All @@ -121,11 +99,6 @@ pub mod features {
/// Zig: `pub const packed_features_list = brk: { ... }`
pub const PACKED_FEATURES_LIST: &[&str] = &[ $( $name ),* ];

// Zig: `pub const PackedFeatures = @Type(.{ .@"struct" = .{ .layout = .@"packed", .backing_integer = u64, ... } })`
// All fields are `bool` → bitflags over u64.
// PORT NOTE: nightly `${index()}` (macro_metavar_expr) is unavailable
// on stable, so each feature carries an explicit `$idx` literal at the
// call site. The dense-index assertion below catches gaps/duplicates.
::bitflags::bitflags! {
#[repr(transparent)]
#[derive(Default, Copy, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -214,11 +187,6 @@ pub mod features {
};
}

// PORT NOTE: Zig identifiers `@"Bun.stderr"` etc. cannot be Rust idents;
// renamed to `bun_stderr` etc. The string literal preserves the original
// name for output / `PACKED_FEATURES_LIST` (matches `@tagName` semantics).
// The leading integer is the bit index in `PackedFeatures` (must be dense
// 0..N — asserted at compile time inside the macro).
define_features! {
0 => (bun_stderr, "Bun.stderr"),
1 => (bun_stdin, "Bun.stdin"),
Expand Down Expand Up @@ -289,15 +257,6 @@ pub mod features {
#[unsafe(export_name = "Bun__Feature__webview_webkit")]
57 => (webview_webkit, "webview_webkit"),
}

// Zig: `comptime { @export(&napi_module_register, .{ .name = "Bun__napi_module_register_count" }); ... }`
// PORT NOTE: C++ declares these as `extern "C" size_t Bun__...;` and
// reads/increments the value directly, so the exported symbol must BE the
// `usize` storage (not a pointer to it). `AtomicUsize` is `#[repr(C)]
// usize`-layout-compatible. Handled via `#[unsafe(export_name = "...")]`
// on the canonical statics inside `define_features!` above — Rust cannot
// alias-export a static under a second symbol name, so the export name is
// attached to the single definition.
}

// Re-exports to mirror Zig's `Features.packedFeatures()` etc. at module scope.
Expand Down Expand Up @@ -336,11 +295,6 @@ pub enum EventName {
http_build,
}

// Zig: `var random: std.rand.DefaultPrng = undefined;`
// PORT NOTE: declared but never read in analytics.zig — dead code. Dropped
// rather than gated; if a future schema-encode path needs a PRNG, seed one
// locally (PORTING.md §Concurrency: OnceLock<...>, no `static mut`).

const PLATFORM_ARCH: analytics::Architecture = {
#[cfg(target_arch = "aarch64")]
{
Expand Down Expand Up @@ -409,14 +363,6 @@ pub mod generate_header {
// Linux / Android
// ──────────────────────────────────────────────────────────────────

// Zig: `pub var linux_os_name: std.c.utsname = undefined;`
// PORT NOTE: Zig's `Environment.isLinux` is true on Android (it checks
// the kernel, not the libc target), so all Linux-gated items below are
// `any(linux, android)` — `for_linux()` itself branches on Android.
// The cached `utsname` itself now lives in T1 at
// `bun_core::ffi::cached_uname()` so `bun_sys` feature probes share the
// same single `uname(2)` syscall.

// ──────────────────────────────────────────────────────────────────
// Platform OnceLock
// ──────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -496,12 +442,6 @@ pub mod generate_header {

#[cfg(any(target_os = "linux", target_os = "android"))]
pub fn kernel_version() -> semver::Version {
// Route through the T1 canonical probe so the whole binary issues
// a single `uname(2)` for kernel-version detection. The full
// semver `tag` (pre/build) is irrelevant here — `.min()` on the
// old parse path already zeroed it — so a {major,minor,patch}
// lift is behavior-identical for all callers (crash_handler
// formatting, epoll_pwait2 >=5.11 gate, `bun.linuxKernelVersion`).
let v = bun_core::linux_kernel_version();
semver::Version {
major: u64::from(v.major),
Expand Down
Loading
Loading