diff --git a/Cargo.lock b/Cargo.lock index 1abafdd6a5066..5c815bc451386 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2529,6 +2529,7 @@ dependencies = [ "oxc_parser", "oxc_span", "rayon", + "simdutf8", "tracing-subscriber", ] diff --git a/apps/oxfmt/Cargo.toml b/apps/oxfmt/Cargo.toml index 0ffba2cdbbb9b..8c105e3ea4efb 100644 --- a/apps/oxfmt/Cargo.toml +++ b/apps/oxfmt/Cargo.toml @@ -38,6 +38,7 @@ cow-utils = { workspace = true } ignore = { workspace = true, features = ["simd-accel"] } miette = { workspace = true } rayon = { workspace = true } +simdutf8 = { workspace = true } tracing-subscriber = { workspace = true, features = [] } # Omit the `regex` feature # NAPI dependencies (conditional on napi feature) diff --git a/apps/oxfmt/src/service.rs b/apps/oxfmt/src/service.rs index 9e350ccfaf57a..5a3a9d569c155 100644 --- a/apps/oxfmt/src/service.rs +++ b/apps/oxfmt/src/service.rs @@ -1,4 +1,4 @@ -use std::{fs, path::Path, sync::mpsc, time::Instant}; +use std::{fs, io, path::Path, sync::mpsc, time::Instant}; use cow_utils::CowUtils; use rayon::prelude::*; @@ -77,7 +77,7 @@ impl FormatService { let source_type = enable_jsx_source_type(entry.source_type); let allocator = self.allocator_pool.get(); - let source_text = fs::read_to_string(path).expect("Failed to read file"); + let source_text = read_to_string(path).expect("Failed to read file"); let ret = Parser::new(&allocator, &source_text, source_type) .with_options(get_parse_options()) @@ -148,3 +148,17 @@ impl FormatService { } } } + +fn read_to_string(path: &Path) -> io::Result { + // `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally + let bytes = fs::read(path)?; + if simdutf8::basic::from_utf8(&bytes).is_err() { + // Same error as `fs::read_to_string` produces (using `io::ErrorKind::InvalidData`) + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "stream did not contain valid UTF-8", + )); + } + // SAFETY: `simdutf8` has ensured it's a valid UTF-8 string + Ok(unsafe { String::from_utf8_unchecked(bytes) }) +}