Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/bun_core/string/StringJoiner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ pub struct Watcher {
}

impl StringJoiner {
/// Pre-allocate room for `additional` more pushed slices, so a join with a
/// known piece count does a single nodes allocation instead of log₂N grows.
pub fn reserve(&mut self, additional: usize) {
self.nodes.reserve(additional);
}

/// `data` is expected to live until `.done` is called
pub fn push_static(&mut self, data: &[u8]) {
self.push(data);
Expand Down
101 changes: 58 additions & 43 deletions src/runtime/webcore/Blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,23 +858,16 @@ impl BlobExt for Blob {
}

fn from_dom_form_data(global_this: &JSGlobalObject, form_data: &mut jsc::DOMFormData) -> Blob {
// PERF(port): was arena bulk-free + stack-fallback alloc — profile if it shows up on a hot path.

let mut hex_buf = [0u8; 70];
let boundary = {
// "----WebKitFormBoundary" (22 bytes) + 32 lowercase-hex chars of a fresh UUID.
const BOUNDARY_PREFIX: &[u8; 22] = b"----WebKitFormBoundary";
let mut boundary_buf = [0u8; BOUNDARY_PREFIX.len() + 32];
let boundary: &[u8] = {
// SAFETY: bun_vm() never returns null for a Bun-owned global.
let random = global_this.bun_vm().as_mut().rare_data().next_uuid().bytes;
use std::io::Write;
let mut cursor = &mut hex_buf[..];
boundary_buf[..BOUNDARY_PREFIX.len()].copy_from_slice(BOUNDARY_PREFIX);
// Zig `{x}` on `[16]u8` emits 32 contiguous lowercase-hex chars.
// Rust's `{:x?}` on `[u8;16]` is Debug formatting (`[a1, b2, …]`),
// so write the prefix then encode bytes one at a time.
cursor.write_all(b"----WebKitFormBoundary").unwrap();
for b in random {
write!(&mut cursor, "{b:02x}").expect("infallible: in-memory write");
}
let written = 70 - cursor.len();
&hex_buf[..written]
bun_core::fmt::bytes_to_hex_lower(&random, &mut boundary_buf[BOUNDARY_PREFIX.len()..]);
&boundary_buf
};

let mut context = FormDataContext {
Expand All @@ -883,6 +876,9 @@ impl BlobExt for Blob {
failed: false,
global_this,
};
// Size the node list up front: a file entry pushes at most 13 slices, a
// string entry 8, plus 3 for the closing boundary.
context.joiner.reserve(form_data.count() * 13 + 3);

// PORT NOTE (layering): `bun_jsc::DOMFormData::for_each` yields the
// lower-tier `bun_jsc::dom_form_data::FormDataEntry`, whose `blob`
Expand Down Expand Up @@ -945,11 +941,9 @@ impl BlobExt for Blob {
for_each_thunk,
);
if context.failed {
// The joiner's Node structs are owned by the (former) arena, but each
// node's data carries its own owner allocator — heap for non-ASCII
// name/value slices and the NodeFS read_file result buffer. Drop the
// joiner (Drop runs StringJoiner::deinit) so every heap-owned slice
// already pushed is freed.
// Drop the joiner (Drop runs StringJoiner::deinit) so every
// heap-owned slice already pushed — escaped names, non-ASCII
// conversions, NodeFS read_file result buffers — is freed.
drop(context.joiner);
return Blob::init_empty(global_this);
}
Expand All @@ -968,16 +962,10 @@ impl BlobExt for Blob {
let blob = Blob::init_with_store(store, global_this);
// Always allocate content_type with the default allocator so deinit() can
// free it unconditionally.
let mut ct = Vec::new();
{
use std::io::Write;
write!(
&mut ct,
"multipart/form-data; boundary={}",
bstr::BStr::new(boundary)
)
.unwrap();
}
const CONTENT_TYPE_PREFIX: &[u8] = b"multipart/form-data; boundary=";
let mut ct = Vec::with_capacity(CONTENT_TYPE_PREFIX.len() + boundary.len());
ct.extend_from_slice(CONTENT_TYPE_PREFIX);
ct.extend_from_slice(boundary);
blob.content_type
.set(bun_core::heap::into_raw(ct.into_boxed_slice()));
blob.content_type_allocated.set(true);
Expand Down Expand Up @@ -3942,24 +3930,50 @@ struct FormDataContext<'a> {

/// WHATWG HTML "multipart/form-data encoding algorithm": percent-encode `"`,
/// CR and LF in field names and filenames so they cannot terminate the
/// quoted-string or inject part headers.
fn escape_form_data_name(bytes: Vec<u8>) -> Box<[u8]> {
/// quoted-string or inject part headers. Returns `None` when nothing needs
/// escaping so the caller can keep using the original bytes without a copy.
fn escape_form_data_name(bytes: &[u8]) -> Option<Box<[u8]>> {
if !bytes.iter().any(|&b| matches!(b, b'"' | b'\r' | b'\n')) {
return bytes.into_boxed_slice();
return None;
}
let mut out = Vec::with_capacity(bytes.len() + 6);
for &b in &bytes {
for &b in bytes {
match b {
b'"' => out.extend_from_slice(b"%22"),
b'\r' => out.extend_from_slice(b"%0D"),
b'\n' => out.extend_from_slice(b"%0A"),
_ => out.push(b),
}
}
out.into_boxed_slice()
Some(out.into_boxed_slice())
}

impl FormDataContext<'_> {
/// Append the UTF-8 view of a form-data string without copying it: the
/// borrowed case points into a WTF string owned by the `DOMFormData` being
/// serialized, which outlives `joiner.done()` in `from_dom_form_data`
/// (Zig: `joiner.push(slice, slice.allocator.get())`); an owned slice
/// (UTF-16 / non-ASCII Latin-1 conversion) transfers its allocation to the
/// joiner. When `escape` is set, `"`/CR/LF are percent-encoded into a copy.
fn push_string_slice(joiner: &mut StringJoiner, slice: ZigStringSlice, escape: bool) {
if escape {
if let Some(escaped) = escape_form_data_name(slice.slice()) {
joiner.push_owned(escaped);
return;
}
}
if matches!(slice, ZigStringSlice::Owned(_)) {
// `into_vec` moves the buffer out of an `Owned` slice without copying.
joiner.push_owned(slice.into_vec().into_boxed_slice());
} else if matches!(slice, ZigStringSlice::Static(..)) {
joiner.push_static(slice.slice());
} else {
// WTF-backed slices release their pin on drop — copy rather than
// borrow past it. (`ZigString::to_slice` never produces these.)
joiner.push_cloned(slice.slice());
}
Comment thread
robobun marked this conversation as resolved.
}

pub(crate) fn on_entry(&mut self, name: ZigString, entry: FormDataEntry<'_>) {
if self.failed {
return;
Expand All @@ -3975,22 +3989,20 @@ impl FormDataContext<'_> {
joiner.push_static(b"\r\n");

joiner.push_static(b"Content-Disposition: form-data; name=\"");
// PORT NOTE: Zig `joiner.push(slice, allocator?)` encoded ownership in
// the optional allocator. `StringJoiner::push_owned` is the Rust
// equivalent; `ZigStringSlice::into_vec` moves out the buffer if owned
// or copies if borrowed (matching Zig's `null`-allocator borrow case).
joiner.push_owned(escape_form_data_name(name.to_slice().into_vec()));
Self::push_string_slice(joiner, name.to_slice(), true);

match entry {
FormDataEntry::String(value) => {
joiner.push_static(b"\"\r\n\r\n");
joiner.push_owned(value.to_slice().into_vec().into_boxed_slice());
Self::push_string_slice(joiner, value.to_slice(), false);
}
FormDataEntry::File { blob, filename } => {
joiner.push_static(b"\"; filename=\"");
joiner.push_owned(escape_form_data_name(filename.to_slice().into_vec()));
Self::push_string_slice(joiner, filename.to_slice(), true);
joiner.push_static(b"\"\r\n");

// Borrowed from the blob, which the `DOMFormData` keeps alive
// past `joiner.done()` (Zig used `pushStatic` here too).
let blob_ct = blob.content_type_slice();
let content_type: &[u8] = if !blob_ct.is_empty()
&& !blob_ct.iter().any(|&b| matches!(b, b'\r' | b'\n'))
Expand All @@ -4000,7 +4012,7 @@ impl FormDataContext<'_> {
b"application/octet-stream"
};
joiner.push_static(b"Content-Type: ");
joiner.push_cloned(content_type);
joiner.push_static(content_type);
joiner.push_static(b"\r\n\r\n");

if blob.store.get().is_some() {
Expand Down Expand Up @@ -4052,7 +4064,10 @@ impl FormDataContext<'_> {
}
}
store::Data::Bytes(_) => {
joiner.push_cloned(blob.shared_view());
// Borrowed: the blob's store is kept alive by the
// `DOMFormData` entry until after `joiner.done()`
// (Zig used `pushStatic` here too).
joiner.push_static(blob.shared_view());
}
}
}
Expand Down
102 changes: 102 additions & 0 deletions test/js/web/html/FormData-multipart-serialization.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { describe, expect, test } from "bun:test";
Comment thread
robobun marked this conversation as resolved.

// Serializing a FormData body (`new Response(formData)` / `new Request(..., { body: formData })`)
// goes through Blob::from_dom_form_data, which joins the multipart parts out of
// borrowed views of the FormData's strings and blob bytes. These tests lock in
// the exact wire format (boundary shape, part layout, name/filename escaping,
// content-type fallback) and verify the serialize → parse round-trip, including
// non-ASCII strings and large binary payloads.
describe("multipart serialization (new Response(formData))", () => {
test("serializes string fields, blobs, unicode and escaped names exactly", async () => {
const formData = new FormData();
formData.append("simple", "value");
formData.append("empty", "");
formData.append("unicode name ☺", "ünïcode välue 😊");
formData.append('quote"name', "v1");
formData.append("crlf\r\nname", "v2");
formData.append("untyped-blob", new Blob(["blob-bytes"]));
formData.append("named-blob", new Blob(["named-bytes"]), "file.bin");
formData.append("typed-file", new File(["<p>hi</p>"], 'weird"file\r\nname.html', { type: "text/html" }));

const response = new Response(formData);
const contentType = response.headers.get("Content-Type")!;
expect(contentType).toMatch(/^multipart\/form-data; boundary=----WebKitFormBoundary[0-9a-f]{32}$/);
const boundary = contentType.slice(contentType.indexOf("boundary=") + "boundary=".length);

const text = await response.text();
expect(text).toBe(
[
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="simple"\r\n\r\nvalue\r\n`,
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="empty"\r\n\r\n\r\n`,
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="unicode name ☺"\r\n\r\nünïcode välue 😊\r\n`,
`--${boundary}\r\n`,
// '"', CR and LF in names/filenames are percent-encoded so they can't
// break out of the quoted-string or inject part headers.
`Content-Disposition: form-data; name="quote%22name"\r\n\r\nv1\r\n`,
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="crlf%0D%0Aname"\r\n\r\nv2\r\n`,
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="untyped-blob"; filename=""\r\n`,
`Content-Type: application/octet-stream\r\n\r\nblob-bytes\r\n`,
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="named-blob"; filename="file.bin"\r\n`,
`Content-Type: application/octet-stream\r\n\r\nnamed-bytes\r\n`,
`--${boundary}\r\n`,
`Content-Disposition: form-data; name="typed-file"; filename="weird%22file%0D%0Aname.html"\r\n`,
`Content-Type: text/html;charset=utf-8\r\n\r\n<p>hi</p>\r\n`,
`--${boundary}--\r\n`,
].join(""),
);
});

test("round-trips every entry kind through Response.formData()", async () => {
const formData = new FormData();
formData.append("simple", "value");
formData.append("dup", "first");
formData.append("dup", "second");
formData.append("unicode name ☺", "ünïcode välue 😊");
formData.append("blob", new Blob(["blob-bytes"]));
formData.append("file", new File(["<p>hi</p>"], "日本語ファイル名.html", { type: "text/html" }));

const parsed = await new Response(formData).formData();
const entries = await Promise.all(
[...parsed.entries()].map(async ([name, value]) =>
value instanceof Blob
? [
name,
{ file: value instanceof File, name: (value as File).name, type: value.type, text: await value.text() },
]
: [name, value],
),
);

expect(entries).toEqual([
["simple", "value"],
["dup", "first"],
["dup", "second"],
["unicode name ☺", "ünïcode välue 😊"],
["blob", { file: true, name: undefined, type: "", text: "blob-bytes" }],
["file", { file: true, name: "日本語ファイル名.html", type: "text/html;charset=utf-8", text: "<p>hi</p>" }],
]);
});

test("round-trips large binary blob contents intact", async () => {
const bytes = new Uint8Array(64 * 1024);
for (let i = 0; i < bytes.length; i++) {
bytes[i] = (i * 31 + 7) & 0xff;
}

const formData = new FormData();
formData.append("payload", new Blob([bytes]), "payload.bin");
formData.append("after", "still-parses");

const parsed = await new Response(formData).formData();
const payload = parsed.get("payload") as File;
expect(payload.size).toBe(bytes.length);
expect(new Uint8Array(await payload.arrayBuffer())).toEqual(bytes);
expect(parsed.get("after")).toBe("still-parses");
});
});
Loading