Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions crates/biome_markdown_parser/tests/fuzz_corpus/seed.jsonl

Large diffs are not rendered by default.

167 changes: 167 additions & 0 deletions crates/biome_markdown_parser/tests/fuzz_differential.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//! Differential fuzzer: compares Biome's markdown HTML output against
//! commonmark.js reference output from a pre-generated corpus.
//!
//! The checked-in seed corpus (`seed.jsonl`) contains only passing cases.
//! Any failure is either a regression or a newly discovered mismatch.
//!
//! Run with: cargo test -p biome_markdown_parser --test fuzz_differential -- --ignored --nocapture

use biome_markdown_parser::{document_to_html, parse_markdown};
use biome_markdown_syntax::MdDocument;
use biome_rowan::AstNode;
use std::fs;
use std::path::{Path, PathBuf};

/// Normalize HTML for comparison, preserving whitespace inside `<pre>` blocks.
/// Matches the normalization in `xtask/coverage/src/markdown/commonmark.rs`.
fn normalize_html(html: &str) -> String {
let mut result = Vec::new();
let mut in_pre = false;

for line in html.lines() {
if line.contains("<pre") {
in_pre = true;
}
if in_pre {
result.push(line.to_string());
} else {
result.push(line.trim_end().to_string());
}
if line.contains("</pre>") {
in_pre = false;
}
}

result.join("\n").trim().to_string() + "\n"
}

/// FNV-1a 64-bit hash — deterministic across Rust toolchain versions.
fn content_hash(s: &str) -> String {
let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
for byte in s.as_bytes() {
hash ^= *byte as u64;
hash = hash.wrapping_mul(0x0100_0000_01b3);
}
format!("{hash:016x}")
}

#[derive(serde::Deserialize)]
struct SeedCase {
markdown: String,
html: String,
}

struct Failure {
hash: String,
markdown: String,
expected: String,
actual: String,
}

fn run_corpus(path: &Path) -> (Vec<Failure>, usize) {
let content = fs::read_to_string(path)
.unwrap_or_else(|e| panic!("Failed to read corpus {}: {e}", path.display()));

let mut failures = vec![];
let mut total = 0usize;

for (i, line) in content.lines().enumerate() {
if line.trim().is_empty() {
continue;
}

let entry: SeedCase = serde_json::from_str(line)
.unwrap_or_else(|e| panic!("Malformed JSON at {}:{}: {e}", path.display(), i + 1));

let markdown = &entry.markdown;
let expected_html = &entry.html;
total += 1;

let parsed = parse_markdown(markdown);
let Some(doc) = MdDocument::cast(parsed.syntax()) else {
failures.push(Failure {
hash: content_hash(markdown),
markdown: markdown.clone(),
expected: expected_html.clone(),
actual: "<parse failed>".to_string(),
});
continue;
};

let actual = document_to_html(
&doc,
parsed.list_tightness(),
parsed.list_item_indents(),
parsed.quote_indents(),
);

let expected_normalized = normalize_html(expected_html);
let actual_normalized = normalize_html(&actual);

if expected_normalized != actual_normalized {
failures.push(Failure {
hash: content_hash(markdown),
markdown: markdown.clone(),
expected: expected_html.clone(),
actual,
});
}
}

(failures, total)
}

#[test]
#[ignore]
fn differential_fuzz_against_commonmark_js() {
let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let corpus_dir = manifest_dir.join("tests/fuzz_corpus");

// Always run the checked-in seed corpus (passing cases only)
let seed_path = corpus_dir.join("seed.jsonl");
let (mut all_failures, mut total) = run_corpus(&seed_path);

// Optionally run an extended corpus if FUZZ_CORPUS env var is set
if let Ok(extra_path) = std::env::var("FUZZ_CORPUS") {
let (extra_failures, extra_total) = run_corpus(Path::new(&extra_path));
all_failures.extend(extra_failures);
total += extra_total;
}

// Write failure artifacts if FUZZ_FAILURES_DIR is set
if let Ok(failures_dir) = std::env::var("FUZZ_FAILURES_DIR") {
let dir = PathBuf::from(&failures_dir);
fs::create_dir_all(&dir).expect("Failed to create failures directory");

for failure in &all_failures {
let base = dir.join(&failure.hash);
fs::write(base.with_extension("md"), &failure.markdown).ok();
fs::write(base.with_extension("expected.html"), &failure.expected).ok();
fs::write(base.with_extension("actual.html"), &failure.actual).ok();
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}

// Print summary
let passed = total - all_failures.len();
eprintln!(
"\nDifferential fuzz: {total} cases, {passed} passed, {} failed",
all_failures.len()
);

if !all_failures.is_empty() {
eprintln!("\n=== {} differential failures ===\n", all_failures.len());
for (i, f) in all_failures.iter().enumerate().take(10) {
eprintln!("--- Failure {} [{}] ---", i + 1, f.hash);
eprintln!("Input:\n{}", f.markdown);
eprintln!("Expected:\n{}", f.expected);
eprintln!("Actual:\n{}", f.actual);
eprintln!();
}
if all_failures.len() > 10 {
eprintln!("... and {} more", all_failures.len() - 10);
}
panic!("{} differential mismatches found", all_failures.len());
}

eprintln!("All cases passed.");
}
Loading
Loading