oxc-project · graphite-app · Oct 30, 2025 · Oct 30, 2025
diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs
@@ -889,6 +889,16 @@ impl<'a> Codegen<'a> {
         if let Some(sourcemap_builder) = self.sourcemap_builder.as_mut()
             && !span.is_empty()
         {
+            // Validate that span.end is within source content bounds.
+            // When oxc_codegen adds punctuation (semicolons, newlines) that don't exist in the
+            // original source, span.end may be at or beyond the source content length.
+            // We should not create sourcemap tokens for such positions as they would be invalid.
+            if let Some(source_text) = self.source_text {
+                #[expect(clippy::cast_possible_truncation)]
+                if span.end >= source_text.len() as u32 {
+                    return;
+                }
+            }
             sourcemap_builder.add_source_mapping(self.code.as_bytes(), span.end, None);
         }
     }

diff --git a/crates/oxc_codegen/tests/integration/sourcemap.rs b/crates/oxc_codegen/tests/integration/sourcemap.rs
@@ -31,6 +31,62 @@ fn incorrect_ast() {
     assert!(ret.map.is_some(), "sourcemap exists");
 }
 
+/// Test that sourcemaps don't contain invalid tokens for positions beyond source content.
+/// This addresses the issue where oxc_codegen adds semicolons/newlines and creates tokens
+/// for positions that don't exist in the original source.
+/// See: https://github.com/rolldown/rolldown/pull/6750
+#[test]
+fn no_invalid_tokens_beyond_source() {
+    let test_cases = vec![
+        // Export statement without trailing semicolon
+        "export default { foo }",
+        // Variable declaration without trailing semicolon
+        "const a = 1",
+        // Function without trailing semicolon
+        "function foo() { return 42 }",
+        // Object with shorthand property
+        "const obj = { foo }",
+    ];
+
+    for source_text in test_cases {
+        let allocator = Allocator::default();
+        let source_type = SourceType::mjs();
+        let ret = Parser::new(&allocator, source_text, source_type).parse();
+
+        let result = Codegen::new()
+            .with_options(CodegenOptions {
+                source_map_path: Some(PathBuf::from("test.js")),
+                ..Default::default()
+            })
+            .build(&ret.program);
+
+        let map = result.map.unwrap();
+        // Verify all tokens have source positions within bounds
+        for token in map.get_tokens() {
+            if let Some(source_id) = token.get_source_id()
+                && let Some(content) = map.get_source_content(source_id)
+            {
+                let src_line = token.get_src_line() as usize;
+                let src_col = token.get_src_col() as usize;
+
+                let lines: Vec<&str> = content.split('\n').collect();
+                assert!(
+                    src_line < lines.len(),
+                    "Invalid token: line {src_line} is beyond source line count {} for source '{source_text}'",
+                    lines.len(),
+                );
+
+                let line_content = lines[src_line];
+                let line_len_utf16: usize = line_content.chars().map(char::len_utf16).sum();
+                assert!(
+                    src_col < line_len_utf16,
+                    "Invalid token: column {src_col} is beyond line length {line_len_utf16} for line '{line_content}' in source '{source_text}'",
+                );
+            }
+        }
+    }
+}
+
 #[test]
 #[cfg(not(target_endian = "big"))] // we run big endian tests on docker that does not have node installed
 fn stacktrace_is_correct() {