From d32a73320e4bcf9e19daa8fe145a90bb25aff6a1 Mon Sep 17 00:00:00 2001 From: onur-ozkan Date: Wed, 11 Oct 2023 17:02:03 +0300 Subject: [PATCH] optimize file read in `Config::verify` `Config::verify` refactored to improve the efficiency and memory usage of file hashing. Signed-off-by: onur-ozkan --- src/bootstrap/download.rs | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/bootstrap/download.rs b/src/bootstrap/download.rs index 8e9614ec89a0a..2653793da17d4 100644 --- a/src/bootstrap/download.rs +++ b/src/bootstrap/download.rs @@ -2,7 +2,7 @@ use std::{ env, ffi::{OsStr, OsString}, fs::{self, File}, - io::{BufRead, BufReader, BufWriter, ErrorKind, Write}, + io::{BufRead, BufReader, BufWriter, ErrorKind, Read, Write}, path::{Path, PathBuf}, process::{Command, Stdio}, }; @@ -324,21 +324,38 @@ impl Config { use sha2::Digest; self.verbose(&format!("verifying {}", path.display())); + + if self.dry_run() { + return false; + } + let mut hasher = sha2::Sha256::new(); - // FIXME: this is ok for rustfmt (4.1 MB large at time of writing), but it seems memory-intensive for rustc and larger components. - // Consider using streaming IO instead? - let contents = if self.dry_run() { vec![] } else { t!(fs::read(path)) }; - hasher.update(&contents); - let found = hex::encode(hasher.finalize().as_slice()); - let verified = found == expected; - if !verified && !self.dry_run() { + + let file = t!(File::open(path)); + let mut reader = BufReader::new(file); + let mut buffer = [0; 4096]; // read in chunks of 4KB + + loop { + let read_len = t!(reader.read(&mut buffer)); + // break if EOF + if read_len == 0 { + break; + } + hasher.update(&buffer[0..read_len]); + } + + let checksum = hex::encode(hasher.finalize().as_slice()); + let verified = checksum == expected; + + if !verified { println!( "invalid checksum: \n\ - found: {found}\n\ + found: {checksum}\n\ expected: {expected}", ); } - return verified; + + verified } }