Skip to content

Commit f161adf

Browse files
authored
perf(compile): read embedded files as static references when UTF-8 and reading as strings (#27033)
1 parent 76daa03 commit f161adf

19 files changed

+158
-61
lines changed

cli/args/deno_json.rs

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ impl<'a> deno_config::fs::DenoConfigFs for DenoConfigFsAdapter<'a> {
2222
self
2323
.0
2424
.read_text_file_lossy_sync(path, None)
25+
// todo(https://github.com/denoland/deno_config/pull/140): avoid clone
26+
.map(|s| s.into_owned())
2527
.map_err(|err| err.into_io_error())
2628
}
2729

cli/cache/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ impl<'a> deno_cache_dir::DenoCacheEnv for DenoCacheEnvFsAdapter<'a> {
116116
self
117117
.0
118118
.read_file_sync(path, None)
119+
// todo(https://github.com/denoland/deno_cache_dir/pull/66): avoid clone
120+
.map(|bytes| bytes.into_owned())
119121
.map_err(|err| err.into_io_error())
120122
}
121123

cli/module_loader.rs

+16-10
Original file line numberDiff line numberDiff line change
@@ -1060,7 +1060,10 @@ impl<TGraphContainer: ModuleGraphContainer> NodeRequireLoader
10601060
self.npm_resolver.ensure_read_permission(permissions, path)
10611061
}
10621062

1063-
fn load_text_file_lossy(&self, path: &Path) -> Result<String, AnyError> {
1063+
fn load_text_file_lossy(
1064+
&self,
1065+
path: &Path,
1066+
) -> Result<Cow<'static, str>, AnyError> {
10641067
// todo(dsherret): use the preloaded module from the graph if available?
10651068
let media_type = MediaType::from_path(path);
10661069
let text = self.fs.read_text_file_lossy_sync(path, None)?;
@@ -1075,15 +1078,18 @@ impl<TGraphContainer: ModuleGraphContainer> NodeRequireLoader
10751078
.into(),
10761079
);
10771080
}
1078-
self.emitter.emit_parsed_source_sync(
1079-
&specifier,
1080-
media_type,
1081-
// this is probably not super accurate due to require esm, but probably ok.
1082-
// If we find this causes a lot of churn in the emit cache then we should
1083-
// investigate how we can make this better
1084-
ModuleKind::Cjs,
1085-
&text.into(),
1086-
)
1081+
self
1082+
.emitter
1083+
.emit_parsed_source_sync(
1084+
&specifier,
1085+
media_type,
1086+
// this is probably not super accurate due to require esm, but probably ok.
1087+
// If we find this causes a lot of churn in the emit cache then we should
1088+
// investigate how we can make this better
1089+
ModuleKind::Cjs,
1090+
&text.into(),
1091+
)
1092+
.map(Cow::Owned)
10871093
} else {
10881094
Ok(text)
10891095
}

cli/node.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer {
160160
if let Ok(source_from_file) =
161161
self.fs.read_text_file_lossy_async(path, None).await
162162
{
163-
Cow::Owned(source_from_file)
163+
source_from_file
164164
} else {
165165
return Ok(ExtNodeCjsAnalysis::Cjs(CjsAnalysisExports {
166166
exports: vec![],

cli/resolver.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use crate::node::CliNodeCodeTranslator;
3737
use crate::npm::CliNpmResolver;
3838
use crate::npm::InnerCliNpmResolverRef;
3939
use crate::util::sync::AtomicFlag;
40-
use crate::util::text_encoding::from_utf8_lossy_owned;
40+
use crate::util::text_encoding::from_utf8_lossy_cow;
4141

4242
pub type CjsTracker = deno_resolver::cjs::CjsTracker<DenoFsNodeResolverEnv>;
4343
pub type IsCjsResolver =
@@ -62,7 +62,10 @@ pub struct ModuleCodeStringSource {
6262
pub struct CliDenoResolverFs(pub Arc<dyn FileSystem>);
6363

6464
impl deno_resolver::fs::DenoResolverFs for CliDenoResolverFs {
65-
fn read_to_string_lossy(&self, path: &Path) -> std::io::Result<String> {
65+
fn read_to_string_lossy(
66+
&self,
67+
path: &Path,
68+
) -> std::io::Result<Cow<'static, str>> {
6669
self
6770
.0
6871
.read_text_file_lossy_sync(path, None)
@@ -182,18 +185,21 @@ impl NpmModuleLoader {
182185

183186
let code = if self.cjs_tracker.is_maybe_cjs(specifier, media_type)? {
184187
// translate cjs to esm if it's cjs and inject node globals
185-
let code = from_utf8_lossy_owned(code);
188+
let code = from_utf8_lossy_cow(code);
186189
ModuleSourceCode::String(
187190
self
188191
.node_code_translator
189-
.translate_cjs_to_esm(specifier, Some(Cow::Owned(code)))
192+
.translate_cjs_to_esm(specifier, Some(code))
190193
.await?
191194
.into_owned()
192195
.into(),
193196
)
194197
} else {
195198
// esm and json code is untouched
196-
ModuleSourceCode::Bytes(code.into_boxed_slice().into())
199+
ModuleSourceCode::Bytes(match code {
200+
Cow::Owned(bytes) => bytes.into_boxed_slice().into(),
201+
Cow::Borrowed(bytes) => bytes.into(),
202+
})
197203
};
198204

199205
Ok(ModuleCodeStringSource {

cli/standalone/binary.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -282,14 +282,13 @@ impl StandaloneModules {
282282
.vfs
283283
.read_file_all(entry, VfsFileSubDataKind::ModuleGraph)?,
284284
Err(err) if err.kind() == ErrorKind::NotFound => {
285-
let bytes = match RealFs.read_file_sync(&path, None) {
285+
match RealFs.read_file_sync(&path, None) {
286286
Ok(bytes) => bytes,
287287
Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => {
288288
return Ok(None)
289289
}
290290
Err(err) => return Err(err.into()),
291-
};
292-
Cow::Owned(bytes)
291+
}
293292
}
294293
Err(err) => return Err(err.into()),
295294
};
@@ -694,7 +693,7 @@ impl<'a> DenoCompileBinaryWriter<'a> {
694693
&file_path,
695694
match maybe_source {
696695
Some(source) => source,
697-
None => RealFs.read_file_sync(&file_path, None)?,
696+
None => RealFs.read_file_sync(&file_path, None)?.into_owned(),
698697
},
699698
VfsFileSubDataKind::ModuleGraph,
700699
)

cli/standalone/mod.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ use crate::resolver::CliNpmReqResolver;
9191
use crate::resolver::NpmModuleLoader;
9292
use crate::util::progress_bar::ProgressBar;
9393
use crate::util::progress_bar::ProgressBarStyle;
94+
use crate::util::text_encoding::from_utf8_lossy_cow;
9495
use crate::util::v8::construct_v8_flags;
9596
use crate::worker::CliCodeCache;
9697
use crate::worker::CliMainWorkerFactory;
@@ -516,13 +517,13 @@ impl NodeRequireLoader for EmbeddedModuleLoader {
516517
fn load_text_file_lossy(
517518
&self,
518519
path: &std::path::Path,
519-
) -> Result<String, AnyError> {
520+
) -> Result<Cow<'static, str>, AnyError> {
520521
let file_entry = self.shared.vfs.file_entry(path)?;
521522
let file_bytes = self
522523
.shared
523524
.vfs
524525
.read_file_all(file_entry, VfsFileSubDataKind::ModuleGraph)?;
525-
Ok(String::from_utf8(file_bytes.into_owned())?)
526+
Ok(from_utf8_lossy_cow(file_bytes))
526527
}
527528

528529
fn is_maybe_cjs(

cli/standalone/virtual_fs.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -743,15 +743,12 @@ impl deno_io::fs::File for FileBackedVfsFile {
743743
Err(FsError::NotSupported)
744744
}
745745

746-
fn read_all_sync(self: Rc<Self>) -> FsResult<Vec<u8>> {
747-
self.read_to_end().map(|bytes| bytes.into_owned())
746+
fn read_all_sync(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
747+
self.read_to_end()
748748
}
749-
async fn read_all_async(self: Rc<Self>) -> FsResult<Vec<u8>> {
749+
async fn read_all_async(self: Rc<Self>) -> FsResult<Cow<'static, [u8]>> {
750750
let inner = (*self).clone();
751-
tokio::task::spawn_blocking(move || {
752-
inner.read_to_end().map(|bytes| bytes.into_owned())
753-
})
754-
.await?
751+
tokio::task::spawn_blocking(move || inner.read_to_end()).await?
755752
}
756753

757754
fn chmod_sync(self: Rc<Self>, _pathmode: u32) -> FsResult<()> {

cli/util/text_encoding.rs

+9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@ use deno_core::ModuleSourceCode;
1111
static SOURCE_MAP_PREFIX: &[u8] =
1212
b"//# sourceMappingURL=data:application/json;base64,";
1313

14+
#[inline(always)]
15+
pub fn from_utf8_lossy_cow(bytes: Cow<[u8]>) -> Cow<str> {
16+
match bytes {
17+
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
18+
Cow::Owned(bytes) => Cow::Owned(from_utf8_lossy_owned(bytes)),
19+
}
20+
}
21+
22+
#[inline(always)]
1423
pub fn from_utf8_lossy_owned(bytes: Vec<u8>) -> String {
1524
match String::from_utf8_lossy(&bytes) {
1625
Cow::Owned(code) => code,

ext/fs/in_memory_fs.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// Allow using Arc for this module.
44
#![allow(clippy::disallowed_types)]
55

6+
use std::borrow::Cow;
67
use std::collections::hash_map::Entry;
78
use std::collections::HashMap;
89
use std::io::Error;
@@ -457,11 +458,11 @@ impl FileSystem for InMemoryFs {
457458
&self,
458459
path: &Path,
459460
_access_check: Option<AccessCheckCb>,
460-
) -> FsResult<Vec<u8>> {
461+
) -> FsResult<Cow<'static, [u8]>> {
461462
let entry = self.get_entry(path);
462463
match entry {
463464
Some(entry) => match &*entry {
464-
PathEntry::File(data) => Ok(data.clone()),
465+
PathEntry::File(data) => Ok(Cow::Owned(data.clone())),
465466
PathEntry::Dir => Err(FsError::Io(Error::new(
466467
ErrorKind::InvalidInput,
467468
"Is a directory",
@@ -474,7 +475,7 @@ impl FileSystem for InMemoryFs {
474475
&'a self,
475476
path: PathBuf,
476477
access_check: Option<AccessCheckCb<'a>>,
477-
) -> FsResult<Vec<u8>> {
478+
) -> FsResult<Cow<'static, [u8]>> {
478479
self.read_file_sync(&path, access_check)
479480
}
480481
}

ext/fs/interface.rs

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
22

3+
use core::str;
34
use std::borrow::Cow;
45
use std::path::Path;
56
use std::path::PathBuf;
@@ -288,7 +289,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
288289
&self,
289290
path: &Path,
290291
access_check: Option<AccessCheckCb>,
291-
) -> FsResult<Vec<u8>> {
292+
) -> FsResult<Cow<'static, [u8]>> {
292293
let options = OpenOptions::read();
293294
let file = self.open_sync(path, options, access_check)?;
294295
let buf = file.read_all_sync()?;
@@ -298,7 +299,7 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
298299
&'a self,
299300
path: PathBuf,
300301
access_check: Option<AccessCheckCb<'a>>,
301-
) -> FsResult<Vec<u8>> {
302+
) -> FsResult<Cow<'static, [u8]>> {
302303
let options = OpenOptions::read();
303304
let file = self.open_async(path, options, access_check).await?;
304305
let buf = file.read_all_async().await?;
@@ -327,17 +328,25 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
327328
&self,
328329
path: &Path,
329330
access_check: Option<AccessCheckCb>,
330-
) -> FsResult<String> {
331+
) -> FsResult<Cow<'static, str>> {
331332
let buf = self.read_file_sync(path, access_check)?;
332-
Ok(string_from_utf8_lossy(buf))
333+
Ok(string_from_cow_utf8_lossy(buf))
333334
}
334335
async fn read_text_file_lossy_async<'a>(
335336
&'a self,
336337
path: PathBuf,
337338
access_check: Option<AccessCheckCb<'a>>,
338-
) -> FsResult<String> {
339+
) -> FsResult<Cow<'static, str>> {
339340
let buf = self.read_file_async(path, access_check).await?;
340-
Ok(string_from_utf8_lossy(buf))
341+
Ok(string_from_cow_utf8_lossy(buf))
342+
}
343+
}
344+
345+
#[inline(always)]
346+
fn string_from_cow_utf8_lossy(buf: Cow<'static, [u8]>) -> Cow<'static, str> {
347+
match buf {
348+
Cow::Owned(buf) => Cow::Owned(string_from_utf8_lossy(buf)),
349+
Cow::Borrowed(buf) => String::from_utf8_lossy(buf),
341350
}
342351
}
343352

ext/fs/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub use crate::interface::OpenOptions;
1717
pub use crate::ops::FsOpsError;
1818
pub use crate::ops::FsOpsErrorKind;
1919
pub use crate::ops::OperationError;
20+
pub use crate::ops::V8MaybeStaticStr;
2021
pub use crate::std_fs::RealFs;
2122
pub use crate::sync::MaybeSend;
2223
pub use crate::sync::MaybeSync;

0 commit comments

Comments
 (0)