Skip to content

Commit 15fbd2f

Browse files
committed
Auto merge of #14137 - Xaeroxe:checksum-freshness, r=weihanglo
initial version of checksum based freshness Implementation for #14136 and resolves #6529 This PR implements the use of checksums in cargo fingerprints as an alternative to using mtimes. This is most useful on systems with poor mtime implementations. This has a dependency on rust-lang/rust#126930. It's expected this will increase the time it takes to declare a build to be fresh. Still this loss in performance may be preferable to the issues the ecosystem has had with the use of mtimes for determining freshness.
2 parents ac39e69 + cf893c1 commit 15fbd2f

File tree

15 files changed

+3668
-172
lines changed

15 files changed

+3668
-172
lines changed

Cargo.lock

+33
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ anstream = "0.6.15"
2323
anstyle = "1.0.8"
2424
anyhow = "1.0.86"
2525
base64 = "0.22.1"
26+
blake3 = "1.5.2"
2627
bytesize = "1.3"
2728
cargo = { path = "" }
2829
cargo-credential = { version = "0.4.2", path = "credential/cargo-credential" }
@@ -148,6 +149,7 @@ anstream.workspace = true
148149
anstyle.workspace = true
149150
anyhow.workspace = true
150151
base64.workspace = true
152+
blake3.workspace = true
151153
bytesize.workspace = true
152154
cargo-credential.workspace = true
153155
cargo-platform.workspace = true
@@ -197,6 +199,7 @@ shell-escape.workspace = true
197199
supports-hyperlinks.workspace = true
198200
tar.workspace = true
199201
tempfile.workspace = true
202+
thiserror.workspace = true
200203
time.workspace = true
201204
toml.workspace = true
202205
toml_edit.workspace = true

crates/cargo-test-support/src/lib.rs

+79
Original file line numberDiff line numberDiff line change
@@ -1582,3 +1582,82 @@ where
15821582
let thread = std::thread::spawn(|| f());
15831583
thread_wait_timeout(n, thread)
15841584
}
1585+
1586+
// Helper for testing dep-info files in the fingerprint dir.
1587+
#[track_caller]
1588+
pub fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[(u8, &str)])) {
1589+
let mut files = project
1590+
.glob(fingerprint)
1591+
.map(|f| f.expect("unwrap glob result"))
1592+
// Filter out `.json` entries.
1593+
.filter(|f| f.extension().is_none());
1594+
let info_path = files
1595+
.next()
1596+
.unwrap_or_else(|| panic!("expected 1 dep-info file at {}, found 0", fingerprint));
1597+
assert!(files.next().is_none(), "expected only 1 dep-info file");
1598+
let dep_info = fs::read(&info_path).unwrap();
1599+
let dep_info = &mut &dep_info[..];
1600+
let deps = (0..read_usize(dep_info))
1601+
.map(|_| {
1602+
let ty = read_u8(dep_info);
1603+
let path = std::str::from_utf8(read_bytes(dep_info)).unwrap();
1604+
let checksum_present = read_bool(dep_info);
1605+
if checksum_present {
1606+
// Read out the checksum info without using it
1607+
let _file_len = read_u64(dep_info);
1608+
let _checksum = read_bytes(dep_info);
1609+
}
1610+
(ty, path)
1611+
})
1612+
.collect::<Vec<_>>();
1613+
test_cb(&info_path, &deps);
1614+
1615+
fn read_usize(bytes: &mut &[u8]) -> usize {
1616+
let ret = &bytes[..4];
1617+
*bytes = &bytes[4..];
1618+
1619+
u32::from_le_bytes(ret.try_into().unwrap()) as usize
1620+
}
1621+
1622+
fn read_u8(bytes: &mut &[u8]) -> u8 {
1623+
let ret = bytes[0];
1624+
*bytes = &bytes[1..];
1625+
ret
1626+
}
1627+
1628+
fn read_bool(bytes: &mut &[u8]) -> bool {
1629+
read_u8(bytes) != 0
1630+
}
1631+
1632+
fn read_u64(bytes: &mut &[u8]) -> u64 {
1633+
let ret = &bytes[..8];
1634+
*bytes = &bytes[8..];
1635+
1636+
u64::from_le_bytes(ret.try_into().unwrap())
1637+
}
1638+
1639+
fn read_bytes<'a>(bytes: &mut &'a [u8]) -> &'a [u8] {
1640+
let n = read_usize(bytes);
1641+
let ret = &bytes[..n];
1642+
*bytes = &bytes[n..];
1643+
ret
1644+
}
1645+
}
1646+
1647+
pub fn assert_deps_contains(project: &Project, fingerprint: &str, expected: &[(u8, &str)]) {
1648+
assert_deps(project, fingerprint, |info_path, entries| {
1649+
for (e_kind, e_path) in expected {
1650+
let pattern = glob::Pattern::new(e_path).unwrap();
1651+
let count = entries
1652+
.iter()
1653+
.filter(|(kind, path)| kind == e_kind && pattern.matches(path))
1654+
.count();
1655+
if count != 1 {
1656+
panic!(
1657+
"Expected 1 match of {} {} in {:?}, got {}:\n{:#?}",
1658+
e_kind, e_path, info_path, count, entries
1659+
);
1660+
}
1661+
}
1662+
})
1663+
}

deny.toml

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ allow = [
9494
"MIT",
9595
"MIT-0",
9696
"Apache-2.0",
97+
"BSD-2-Clause",
9798
"BSD-3-Clause",
9899
"MPL-2.0",
99100
"Unicode-DFS-2016",

src/cargo/core/compiler/build_runner/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use jobserver::Client;
1616

1717
use super::build_plan::BuildPlan;
1818
use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts};
19-
use super::fingerprint::Fingerprint;
19+
use super::fingerprint::{Checksum, Fingerprint};
2020
use super::job_queue::JobQueue;
2121
use super::layout::Layout;
2222
use super::lto::Lto;
@@ -50,6 +50,8 @@ pub struct BuildRunner<'a, 'gctx> {
5050
pub fingerprints: HashMap<Unit, Arc<Fingerprint>>,
5151
/// Cache of file mtimes to reduce filesystem hits.
5252
pub mtime_cache: HashMap<PathBuf, FileTime>,
53+
/// Cache of file checksums to reduce filesystem reads.
54+
pub checksum_cache: HashMap<PathBuf, Checksum>,
5355
/// A set used to track which units have been compiled.
5456
/// A unit may appear in the job graph multiple times as a dependency of
5557
/// multiple packages, but it only needs to run once.
@@ -113,6 +115,7 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
113115
build_script_outputs: Arc::new(Mutex::new(BuildScriptOutputs::default())),
114116
fingerprints: HashMap::new(),
115117
mtime_cache: HashMap::new(),
118+
checksum_cache: HashMap::new(),
116119
compiled: HashSet::new(),
117120
build_scripts: HashMap::new(),
118121
build_explicit_deps: HashMap::new(),

src/cargo/core/compiler/fingerprint/dirty_reason.rs

+62
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ pub enum DirtyReason {
3434
old: String,
3535
new: String,
3636
},
37+
ChecksumUseChanged {
38+
old: bool,
39+
},
3740
DepInfoOutputChanged {
3841
old: PathBuf,
3942
new: PathBuf,
@@ -183,6 +186,16 @@ impl DirtyReason {
183186
DirtyReason::PrecalculatedComponentsChanged { .. } => {
184187
s.dirty_because(unit, "the precalculated components changed")
185188
}
189+
DirtyReason::ChecksumUseChanged { old } => {
190+
if *old {
191+
s.dirty_because(
192+
unit,
193+
"the prior compilation used checksum freshness and this one does not",
194+
)
195+
} else {
196+
s.dirty_because(unit, "checksum freshness requested, prior compilation did not use checksum freshness")
197+
}
198+
}
186199
DirtyReason::DepInfoOutputChanged { .. } => {
187200
s.dirty_because(unit, "the dependency info output changed")
188201
}
@@ -222,6 +235,20 @@ impl DirtyReason {
222235
format_args!("the file `{}` is missing", file.display()),
223236
)
224237
}
238+
StaleItem::UnableToReadFile(file) => {
239+
let file = file.strip_prefix(root).unwrap_or(&file);
240+
s.dirty_because(
241+
unit,
242+
format_args!("the file `{}` could not be read", file.display()),
243+
)
244+
}
245+
StaleItem::FailedToReadMetadata(file) => {
246+
let file = file.strip_prefix(root).unwrap_or(&file);
247+
s.dirty_because(
248+
unit,
249+
format_args!("couldn't read metadata for file `{}`", file.display()),
250+
)
251+
}
225252
StaleItem::ChangedFile {
226253
stale,
227254
stale_mtime,
@@ -235,6 +262,41 @@ impl DirtyReason {
235262
format_args!("the file `{}` has changed ({after})", file.display()),
236263
)
237264
}
265+
StaleItem::ChangedChecksum {
266+
source,
267+
stored_checksum,
268+
new_checksum,
269+
} => {
270+
let file = source.strip_prefix(root).unwrap_or(&source);
271+
s.dirty_because(
272+
unit,
273+
format_args!(
274+
"the file `{}` has changed (checksum didn't match, {stored_checksum} != {new_checksum})",
275+
file.display(),
276+
),
277+
)
278+
}
279+
StaleItem::FileSizeChanged {
280+
path,
281+
old_size,
282+
new_size,
283+
} => {
284+
let file = path.strip_prefix(root).unwrap_or(&path);
285+
s.dirty_because(
286+
unit,
287+
format_args!(
288+
"file size changed ({old_size} != {new_size}) for `{}`",
289+
file.display()
290+
),
291+
)
292+
}
293+
StaleItem::MissingChecksum(path) => {
294+
let file = path.strip_prefix(root).unwrap_or(&path);
295+
s.dirty_because(
296+
unit,
297+
format_args!("the checksum for file `{}` is missing", file.display()),
298+
)
299+
}
238300
StaleItem::ChangedEnv { var, .. } => s.dirty_because(
239301
unit,
240302
format_args!("the environment variable {var} changed"),

0 commit comments

Comments
 (0)