From bf30ea985fef808753cc10bdc6867df9d061a42d Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 05:05:44 +0400 Subject: [PATCH 01/23] refactor: caching logic --- src/cache.rs | 221 +++++++++++++++++------------------------ src/compile/project.rs | 16 ++- 2 files changed, 99 insertions(+), 138 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 0cbe48efd..d1352e791 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -303,33 +303,6 @@ impl SolFilesCache { Ok(Artifacts(artifacts)) } - /// Retains only the `CacheEntry` specified by the file + version combination. - /// - /// In other words, only keep those cache entries with the paths (keys) that the iterator yields - /// and only keep the versions in the cache entry that the version iterator yields. - pub fn retain<'a, I, V>(&mut self, files: I) - where - I: IntoIterator, - V: IntoIterator, - { - let mut files: HashMap<_, _> = files.into_iter().collect(); - - self.files.retain(|file, entry| { - if entry.artifacts.is_empty() { - // keep entries that didn't emit any artifacts in the first place, such as a - // solidity file that only includes error definitions - return true; - } - - if let Some(versions) = files.remove(file.as_path()) { - entry.retain_versions(versions); - } else { - return false; - } - !entry.artifacts.is_empty() - }); - } - /// Inserts the provided cache entries, if there is an existing `CacheEntry` it will be updated /// but versions will be merged. pub fn extend(&mut self, entries: I) @@ -580,6 +553,28 @@ impl CacheEntry { } } +#[derive(Debug, Clone, Default)] +pub struct GroupedSources { + pub inner: HashMap>, +} + +impl GroupedSources { + pub fn insert(&mut self, file: PathBuf, version: Version) { + match self.inner.entry(file) { + hash_map::Entry::Occupied(mut entry) => { + entry.get_mut().insert(version); + } + hash_map::Entry::Vacant(entry) => { + entry.insert(HashSet::from([version])); + } + } + } + + pub fn contains(&self, file: &Path, version: &Version) -> bool { + self.inner.get(file).map_or(false, |versions| versions.contains(version)) + } +} + /// A helper abstraction over the [`SolFilesCache`] used to determine what files need to compiled /// and which `Artifacts` can be reused. #[derive(Debug)] @@ -596,18 +591,16 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { /// The project. pub project: &'a Project, - /// All the files that were filtered because they haven't changed. - pub filtered: HashMap)>, + /// Files that require recompilation. + pub dirty_sources: GroupedSources, + + /// All the files that haven't changed. + pub clean_sources: GroupedSources, - /// The corresponding cache entries for all sources that were deemed to be dirty. + /// Files that appear in cache but are not in scope of the current compiler invocation. /// - /// `CacheEntry` are grouped by their Solidity file. - /// During preprocessing the `artifacts` field of a new `CacheEntry` is left blank, because in - /// order to determine the artifacts of the solidity file, the file needs to be compiled first. - /// Only after the `CompilerOutput` is received and all compiled contracts are handled, see - /// [`crate::ArtifactOutput::on_output`] all artifacts, their disk paths, are determined and - /// can be populated before the updated [`crate::SolFilesCache`] is finally written to disk. - pub dirty_source_files: HashMap)>, + /// Those files should be kept in the cache, so we don't lose data on filtered runs. + pub out_of_scope_sources: GroupedSources, /// The file hashes. pub content_hashes: HashMap, @@ -638,30 +631,6 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { entry } - /// inserts a new cache entry for the given file - /// - /// If there is already an entry available for the file the given version is added to the set - fn insert_new_cache_entry(&mut self, file: &Path, source: &Source, version: Version) { - if let Some((_, versions)) = self.dirty_source_files.get_mut(file) { - versions.insert(version); - } else { - let entry = self.create_cache_entry(file, source); - self.dirty_source_files.insert(file.to_path_buf(), (entry, HashSet::from([version]))); - } - } - - /// inserts the filtered source with the given version - fn insert_filtered_source(&mut self, file: PathBuf, source: Source, version: Version) { - match self.filtered.entry(file) { - hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().1.insert(version); - } - hash_map::Entry::Vacant(entry) => { - entry.insert((source, HashSet::from([version]))); - } - } - } - /// Returns the set of [Source]s that need to be included in the `CompilerOutput` in order to /// recompile the project. /// @@ -687,6 +656,14 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { let mut clean_sources = Vec::with_capacity(sources.len()); let dirty_files = self.get_dirty_files(&sources, version); + // Mark sources which appear in cache but are not in scope of the current compiler run as + // out of scope. + for source in self.cache.files.keys().cloned().collect::>() { + if !sources.contains_key(&source) { + self.out_of_scope_sources.insert(source, version.clone()); + } + } + for (file, source) in sources { let source = self.filter_source(file, source, &dirty_files); if source.dirty { @@ -698,9 +675,11 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { } } - // track new cache entries for dirty files + // We are adding the dirty sources to the cache before those are populated with imports of + // dirty files because we don't want to recompile dirty files imports but just need them to + // be marked as [FilteredSource::Clean] so that we apply output selection optimization. for (file, filtered) in dirty_sources.iter() { - self.insert_new_cache_entry(file, filtered.source(), version.clone()); + self.mark_dirty(file, filtered.source(), version); } for clean_source in clean_sources { @@ -709,12 +688,20 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { // file is pulled in by a dirty file dirty_sources.insert(file.clone(), FilteredSource::Clean(source.clone())); } - self.insert_filtered_source(file, source, version.clone()); + self.clean_sources.insert(file, version.clone()); } dirty_sources.into() } + fn mark_dirty(&mut self, file: &Path, source: &Source, version: &Version) { + self.dirty_sources.insert(file.to_path_buf(), version.clone()); + if !self.cache.files.contains_key(file) { + let entry = self.create_cache_entry(file, source); + self.cache.files.insert(file.to_path_buf(), entry); + } + } + /// Returns the state of the given source file. fn filter_source( &self, @@ -762,12 +749,12 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { fn is_dirty_impl(&self, file: &Path, version: &Version) -> bool { let Some(hash) = self.content_hashes.get(file) else { - trace!("missing cache entry"); + trace!("missing content hash"); return true; }; let Some(entry) = self.cache.entry(file) else { - trace!("missing content hash"); + trace!("missing cache entry"); return true; }; @@ -892,9 +879,10 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { cached_artifacts, edges, project, - filtered: Default::default(), - dirty_source_files: Default::default(), + clean_sources: Default::default(), + dirty_sources: Default::default(), content_hashes: Default::default(), + out_of_scope_sources: Default::default(), }; ArtifactsCache::Cached(cache) @@ -973,76 +961,53 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { let ArtifactsCacheInner { mut cache, mut cached_artifacts, - mut dirty_source_files, - filtered, + dirty_sources, + out_of_scope_sources, project, .. } = cache; - // keep only those files that were previously filtered (not dirty, reused) - cache.retain(filtered.iter().map(|(p, (_, v))| (p.as_path(), v))); + // Remove cached artifacts which are out of scope, dirty or appear in `written_artifacts`. + cached_artifacts.0.retain(|file, artifacts| { + let file = Path::new(file); + artifacts.retain(|name, artifacts| { + artifacts.retain(|artifact| { + let version = &artifact.version; + + if out_of_scope_sources.contains(file, version) { + false + } else if dirty_sources.contains(file, version) { + false + } else if written_artifacts + .find_artifact(&file.to_string_lossy(), name, version) + .is_some() + { + false + } else { + true + } + }); + !artifacts.is_empty() + }); + !artifacts.is_empty() + }); - // add the written artifacts to the cache entries, this way we can keep a mapping - // from solidity file to its artifacts - // this step is necessary because the concrete artifacts are only known after solc - // was invoked and received as output, before that we merely know the file and - // the versions, so we add the artifacts on a file by file basis - for (file, written_artifacts) in written_artifacts.as_ref() { + // Update cache entries for dirty sources with newly compiled artifacts. + for (file, artifacts) in written_artifacts.as_ref() { let file_path = Path::new(file); - if let Some((cache_entry, versions)) = dirty_source_files.get_mut(file_path) { - cache_entry.insert_artifacts(written_artifacts.iter().map(|(name, artifacts)| { - let artifacts = artifacts - .iter() - .filter(|artifact| versions.contains(&artifact.version)) - .collect::>(); - (name, artifacts) - })); - } - // cached artifacts that were overwritten also need to be removed from the - // `cached_artifacts` set - if let Some((f, mut cached)) = cached_artifacts.0.remove_entry(file) { - trace!(file, "checking for obsolete cached artifact entries"); - cached.retain(|name, cached_artifacts| { - let Some(written_files) = written_artifacts.get(name) else { - return false; - }; - - // written artifact clashes with a cached artifact, so we need to decide whether - // to keep or to remove the cached - cached_artifacts.retain(|f| { - // we only keep those artifacts that don't conflict with written artifacts - // and which version was a compiler target - let same_version = - written_files.iter().all(|other| other.version != f.version); - let is_filtered = filtered - .get(file_path) - .map(|(_, versions)| versions.contains(&f.version)) - .unwrap_or_default(); - let retain = same_version && is_filtered; - if !retain { - trace!( - artifact=%f.file.display(), - contract=%name, - version=%f.version, - "purging obsolete cached artifact", - ); - } - retain - }); - - !cached_artifacts.is_empty() - }); + let Some(dirty_versions) = dirty_sources.inner.get(file_path) else { continue }; + let Some(entry) = cache.files.get_mut(file_path) else { continue }; - if !cached.is_empty() { - cached_artifacts.0.insert(f, cached); - } - } + entry.insert_artifacts(artifacts.iter().map(|(name, artifacts)| { + let artifacts = artifacts + .iter() + .filter(|artifact| dirty_versions.contains(&artifact.version)) + .collect::>(); + (name, artifacts) + })); } - // add the new cache entries to the cache file - cache.extend(dirty_source_files.into_iter().map(|(file, (entry, _))| (file, entry))); - // write to disk if write_to_disk { // make all `CacheEntry` paths relative to the project root and all artifact diff --git a/src/compile/project.rs b/src/compile/project.rs index 34a4f4d6a..675ce5da1 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -329,13 +329,6 @@ impl<'a, T: ArtifactOutput> CompiledState<'a, T> { ctx, )?; - match cache { - ArtifactsCache::Cached(ref cache) => { - project.artifacts_handler().handle_cached_artifacts(&cache.cached_artifacts)?; - } - ArtifactsCache::Ephemeral(..) => {} - } - // emits all the build infos, if they exist output.write_build_infos(project.build_info_path())?; @@ -370,6 +363,9 @@ impl<'a, T: ArtifactOutput> ArtifactsState<'a, T> { trace!(has_error, project.no_artifacts, skip_write_to_disk, cache_path=?project.cache_path(),"prepare writing cache file"); let cached_artifacts = cache.consume(&compiled_artifacts, !skip_write_to_disk)?; + + project.artifacts_handler().handle_cached_artifacts(&cached_artifacts)?; + Ok(ProjectCompileOutput { compiler_output: output, compiled_artifacts, @@ -708,8 +704,8 @@ mod tests { let prep = compiler.preprocess().unwrap(); let cache = prep.cache.as_cached().unwrap(); // 3 contracts - assert_eq!(cache.dirty_source_files.len(), 3); - assert!(cache.filtered.is_empty()); + assert_eq!(cache.dirty_sources.len(), 3); + assert!(cache.clean_sources.is_empty()); assert!(cache.cache.is_empty()); let compiled = prep.compile().unwrap(); @@ -728,7 +724,7 @@ mod tests { let inner = project.project(); let compiler = ProjectCompiler::new(inner).unwrap(); let prep = compiler.preprocess().unwrap(); - assert!(prep.cache.as_cached().unwrap().dirty_source_files.is_empty()) + assert!(prep.cache.as_cached().unwrap().dirty_sources.is_empty()) } #[test] From 17d01b0d83f0c159e1d099c937870c332eb34c89 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 05:24:56 +0400 Subject: [PATCH 02/23] clippy --- src/cache.rs | 15 ++++++++------- src/compile/project.rs | 6 +++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index d1352e791..6f04d01cf 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -975,17 +975,18 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { let version = &artifact.version; if out_of_scope_sources.contains(file, version) { - false - } else if dirty_sources.contains(file, version) { - false - } else if written_artifacts + return false + } + if dirty_sources.contains(file, version) { + return false + } + if written_artifacts .find_artifact(&file.to_string_lossy(), name, version) .is_some() { - false - } else { - true + return false } + true }); !artifacts.is_empty() }); diff --git a/src/compile/project.rs b/src/compile/project.rs index 675ce5da1..467afd369 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -704,8 +704,8 @@ mod tests { let prep = compiler.preprocess().unwrap(); let cache = prep.cache.as_cached().unwrap(); // 3 contracts - assert_eq!(cache.dirty_sources.len(), 3); - assert!(cache.clean_sources.is_empty()); + assert_eq!(cache.dirty_sources.inner.len(), 3); + assert!(cache.clean_sources.inner.is_empty()); assert!(cache.cache.is_empty()); let compiled = prep.compile().unwrap(); @@ -724,7 +724,7 @@ mod tests { let inner = project.project(); let compiler = ProjectCompiler::new(inner).unwrap(); let prep = compiler.preprocess().unwrap(); - assert!(prep.cache.as_cached().unwrap().dirty_sources.is_empty()) + assert!(prep.cache.as_cached().unwrap().dirty_sources.inner.is_empty()) } #[test] From d0707c9ec58683475656b8e363970b990113701f Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 05:32:32 +0400 Subject: [PATCH 03/23] fix test --- src/compile/project.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compile/project.rs b/src/compile/project.rs index 467afd369..5dada0823 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -706,7 +706,6 @@ mod tests { // 3 contracts assert_eq!(cache.dirty_sources.inner.len(), 3); assert!(cache.clean_sources.inner.is_empty()); - assert!(cache.cache.is_empty()); let compiled = prep.compile().unwrap(); assert_eq!(compiled.output.contracts.files().count(), 3); From 66f62edaf89036d8a959cc1906acdc1140d3bcae Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 05:35:11 +0400 Subject: [PATCH 04/23] fmt --- src/cache.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 6f04d01cf..bf4c7024c 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -975,16 +975,16 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { let version = &artifact.version; if out_of_scope_sources.contains(file, version) { - return false + return false; } if dirty_sources.contains(file, version) { - return false - } + return false; + } if written_artifacts .find_artifact(&file.to_string_lossy(), name, version) .is_some() { - return false + return false; } true }); From e8be3596c6fc135fb485b91d75b6f8bff0bfad3d Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 16:50:38 +0400 Subject: [PATCH 05/23] docs --- src/cache.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cache.rs b/src/cache.rs index bf4c7024c..cf18403ae 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -553,12 +553,14 @@ impl CacheEntry { } } +/// Collection of source file paths mapped to versions. #[derive(Debug, Clone, Default)] pub struct GroupedSources { pub inner: HashMap>, } impl GroupedSources { + /// Inserts provided source and version into the collection. pub fn insert(&mut self, file: PathBuf, version: Version) { match self.inner.entry(file) { hash_map::Entry::Occupied(mut entry) => { @@ -570,6 +572,7 @@ impl GroupedSources { } } + /// Returns true if the file was included with the given version. pub fn contains(&self, file: &Path, version: &Version) -> bool { self.inner.get(file).map_or(false, |versions| versions.contains(version)) } From 8eb160a8bea8947d182f993a60c1117abe40ebe9 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 19:19:54 +0400 Subject: [PATCH 06/23] fix cache entry updates --- src/cache.rs | 6 ++---- tests/project.rs | 7 +++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index cf18403ae..8d6c646b8 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -699,10 +699,8 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { fn mark_dirty(&mut self, file: &Path, source: &Source, version: &Version) { self.dirty_sources.insert(file.to_path_buf(), version.clone()); - if !self.cache.files.contains_key(file) { - let entry = self.create_cache_entry(file, source); - self.cache.files.insert(file.to_path_buf(), entry); - } + let entry = self.create_cache_entry(file, source); + self.cache.files.insert(file.to_path_buf(), entry); } /// Returns the state of the given source file. diff --git a/tests/project.rs b/tests/project.rs index 025832080..f85f7b6dc 100644 --- a/tests/project.rs +++ b/tests/project.rs @@ -3519,8 +3519,8 @@ fn can_detect_config_changes() { let compiled = project.compile().unwrap(); compiled.assert_success(); - let cache = SolFilesCache::read(&project.paths().cache).unwrap(); - assert_eq!(cache.files.len(), 2); + let cache_before = SolFilesCache::read(&project.paths().cache).unwrap(); + assert_eq!(cache_before.files.len(), 2); // nothing to compile let compiled = project.compile().unwrap(); @@ -3531,6 +3531,9 @@ fn can_detect_config_changes() { let compiled = project.compile().unwrap(); compiled.assert_success(); assert!(!compiled.is_unchanged()); + + let cache_after = SolFilesCache::read(&project.paths().cache).unwrap(); + assert_ne!(cache_before, cache_after); } #[test] From c3099bc565097a1daf43148eec3cfe88b07fbe1c Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 19:26:03 +0400 Subject: [PATCH 07/23] small doc --- src/cache.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cache.rs b/src/cache.rs index 8d6c646b8..e64554e20 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -699,6 +699,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { fn mark_dirty(&mut self, file: &Path, source: &Source, version: &Version) { self.dirty_sources.insert(file.to_path_buf(), version.clone()); + // We always create and insert a new entry to overwrite solc settings, content hash, etc. let entry = self.create_cache_entry(file, source); self.cache.files.insert(file.to_path_buf(), entry); } From 983b8c9ebc6c333255a9333d489d002242ca4929 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Tue, 12 Mar 2024 23:37:37 +0400 Subject: [PATCH 08/23] wip --- src/cache.rs | 253 +++++++++++++++++------------------------ src/compile/project.rs | 23 ++-- src/filter.rs | 131 ++++++++++----------- 3 files changed, 173 insertions(+), 234 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index e64554e20..3b40d78b1 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -4,7 +4,7 @@ use crate::{ artifacts::Sources, config::{ProjectPaths, SolcConfig}, error::{Result, SolcError}, - filter::{FilteredSource, FilteredSourceInfo, FilteredSources}, + filter::{FilteredSources, SourceCompilationKind}, resolver::GraphEdges, utils, ArtifactFile, ArtifactOutput, Artifacts, ArtifactsMap, OutputContext, Project, ProjectPathsConfig, Source, @@ -51,6 +51,16 @@ impl SolFilesCache { self.files.is_empty() } + /// Returns `true` if the cache contains any artifacts for the given file and version. + pub fn contains(&self, file: &Path, version: &Version) -> bool { + self.files.get(file).map_or(true, |entry| !entry.contains_version(version)) + } + + /// Removes entry for the given file + pub fn remove(&mut self, file: &Path) -> Option { + self.files.remove(file) + } + /// How many entries the cache contains where each entry represents a sourc file pub fn len(&self) -> usize { self.files.len() @@ -457,16 +467,18 @@ impl CacheEntry { Ok(artifacts) } - pub(crate) fn insert_artifacts<'a, I, T: 'a>(&mut self, artifacts: I) + pub(crate) fn extend_artifacts<'a, A, I, T: 'a>(&mut self, artifacts: I) where - I: IntoIterator>)>, + I: IntoIterator, + A: IntoIterator>, { - for (name, artifacts) in artifacts.into_iter().filter(|(_, a)| !a.is_empty()) { - let entries: BTreeMap<_, _> = artifacts - .into_iter() - .map(|artifact| (artifact.version.clone(), artifact.file.clone())) - .collect(); - self.artifacts.insert(name.clone(), entries); + for (name, artifacts) in artifacts.into_iter() { + for artifact in artifacts { + self.artifacts + .entry(name.clone()) + .or_insert_with(BTreeMap::new) + .insert(artifact.version.clone(), artifact.file.clone()); + } } } @@ -484,20 +496,6 @@ impl CacheEntry { } } - /// Retains only those artifacts that match the provided versions. - /// - /// Removes an artifact entry if none of its versions is included in the `versions` set. - pub fn retain_versions<'a, I>(&mut self, versions: I) - where - I: IntoIterator, - { - let versions = versions.into_iter().collect::>(); - self.artifacts.retain(|_, artifacts| { - artifacts.retain(|version, _| versions.contains(version)); - !artifacts.is_empty() - }) - } - /// Returns `true` if the artifacts set contains the given version pub fn contains_version(&self, version: &Version) -> bool { self.artifacts_versions().any(|(v, _)| v == version) @@ -594,15 +592,18 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { /// The project. pub project: &'a Project, - /// Files that require recompilation. - pub dirty_sources: GroupedSources, + /// Files that were invalidated and removed from cache. + /// Those are not grouped by version and purged completely. + pub dirty_sources: HashSet, - /// All the files that haven't changed. - pub clean_sources: GroupedSources, + /// Artifact+version pairs for which we were missing artifacts and are expect to receive them + /// from compilation. + pub missing_sources: GroupedSources, /// Files that appear in cache but are not in scope of the current compiler invocation. /// - /// Those files should be kept in the cache, so we don't lose data on filtered runs. + /// Those files should be kept in the cache, but should not be returned in the compilation + /// result. pub out_of_scope_sources: GroupedSources, /// The file hashes. @@ -611,53 +612,68 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { /// Creates a new cache entry for the file - fn create_cache_entry(&self, file: &Path, source: &Source) -> CacheEntry { + fn create_cache_entry(&mut self, file: PathBuf, source: &Source) { let imports = self .edges - .imports(file) + .imports(&file) .into_iter() .map(|import| utils::source_name(import, self.project.root()).to_path_buf()) .collect(); let entry = CacheEntry { - last_modification_date: CacheEntry::read_last_modification_date(file) + last_modification_date: CacheEntry::read_last_modification_date(&file) .unwrap_or_default(), content_hash: source.content_hash(), - source_name: utils::source_name(file, self.project.root()).into(), + source_name: utils::source_name(&file, self.project.root()).into(), solc_config: self.project.solc_config.clone(), imports, - version_requirement: self.edges.version_requirement(file).map(|v| v.to_string()), + version_requirement: self.edges.version_requirement(&file).map(|v| v.to_string()), // artifacts remain empty until we received the compiler output artifacts: Default::default(), }; - entry + self.cache.files.insert(file, entry.clone()); } - /// Returns the set of [Source]s that need to be included in the `CompilerOutput` in order to - /// recompile the project. - /// - /// We define _dirty_ sources as files that: - /// - are new - /// - were changed - /// - their imports were changed - /// - their artifact is missing + /// Returns the set of [Source]s that need to be compiled to produce artifacts for requested + /// input. /// - /// A _dirty_ file is always included in the `CompilerInput`. - /// A _dirty_ file can also include clean files - files that do not match any of the above - /// criteria - which solc also requires in order to compile a dirty file. - /// - /// Therefore, these files will also be included in the filtered output but not marked as dirty, - /// so that their `OutputSelection` can be optimized in the `CompilerOutput` and their (empty) - /// artifacts ignored. + /// Source file may have two [SourceCompilationKind]s: + /// 1. [SourceCompilationKind::Complete] - the file has been modified or compiled with different + /// settings and its cache is invalidated. For such sources we request full data needed for + /// artifact construction. + /// 2. [SourceCompilationKind::Optimized] - the file is not dirty, but is imported by a dirty + /// file and thus will be processed by solc. For such files we don't need full data, so we + /// are marking them as clean to optimize output selection later. fn filter(&mut self, sources: Sources, version: &Version) -> FilteredSources { - // all files that are not dirty themselves, but are pulled from a dirty file - let mut imports_of_dirty = HashSet::new(); + // sources that should be passed to compiler. + let mut compile_complete = BTreeSet::new(); + let mut compile_optimized = BTreeSet::new(); + + // Files which cache is invalidated. + self.dirty_sources = self.get_dirty_files(&sources); + + for (file, source) in sources.iter() { + if self.dirty_sources.contains(file) { + compile_complete.insert(file.clone()); + + // If file is dirty, its data should invalidated and all artifacts for all versions + // should be removed. + self.cache.remove(file.as_path()); + } else { + // If source is not dirty, but we are missing artifact for this version, we + // should compile it to populate the cache. + if self.cache.entry(file).map_or(true, |e| !e.contains_version(version)) { + compile_complete.insert(file.clone()); + self.missing_sources.insert(file.clone(), version.clone()); + } + } - // separates all source files that fit the criteria (dirty) from those that don't (clean) - let mut dirty_sources = BTreeMap::new(); - let mut clean_sources = Vec::with_capacity(sources.len()); - let dirty_files = self.get_dirty_files(&sources, version); + // Ensure that we have a cache entry for all sources. + if !self.cache.files.contains_key(file) { + self.create_cache_entry(file.clone(), source); + } + } // Mark sources which appear in cache but are not in scope of the current compiler run as // out of scope. @@ -667,62 +683,39 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { } } - for (file, source) in sources { - let source = self.filter_source(file, source, &dirty_files); - if source.dirty { - // mark all files that are imported by a dirty file - imports_of_dirty.extend(self.edges.all_imported_nodes(source.idx)); - dirty_sources.insert(source.file, FilteredSource::Dirty(source.source)); - } else { - clean_sources.push(source); - } - } - - // We are adding the dirty sources to the cache before those are populated with imports of - // dirty files because we don't want to recompile dirty files imports but just need them to - // be marked as [FilteredSource::Clean] so that we apply output selection optimization. - for (file, filtered) in dirty_sources.iter() { - self.mark_dirty(file, filtered.source(), version); - } - - for clean_source in clean_sources { - let FilteredSourceInfo { file, source, idx, .. } = clean_source; - if imports_of_dirty.contains(&idx) { - // file is pulled in by a dirty file - dirty_sources.insert(file.clone(), FilteredSource::Clean(source.clone())); + // Prepare optimization by collecting sources which are imported by files requiring complete + // compilation. + for source in &compile_complete { + for import in self.edges.imports(source) { + if !compile_complete.contains(import) { + compile_optimized.insert(import.clone()); + } } - self.clean_sources.insert(file, version.clone()); } - dirty_sources.into() - } - - fn mark_dirty(&mut self, file: &Path, source: &Source, version: &Version) { - self.dirty_sources.insert(file.to_path_buf(), version.clone()); - // We always create and insert a new entry to overwrite solc settings, content hash, etc. - let entry = self.create_cache_entry(file, source); - self.cache.files.insert(file.to_path_buf(), entry); - } + let filtered = sources + .into_iter() + .filter_map(|(file, source)| { + if compile_complete.contains(&file) { + Some((file, SourceCompilationKind::Complete(source))) + } else if compile_optimized.contains(&file) { + Some((file, SourceCompilationKind::Optimized(source))) + } else { + None + } + }) + .collect(); - /// Returns the state of the given source file. - fn filter_source( - &self, - file: PathBuf, - source: Source, - dirty_files: &HashSet, - ) -> FilteredSourceInfo { - let idx = self.edges.node_id(&file); - let dirty = dirty_files.contains(&file); - FilteredSourceInfo { file, source, idx, dirty } + FilteredSources(filtered) } /// Returns a set of files that are dirty itself or import dirty file directly or indirectly. - fn get_dirty_files(&self, sources: &Sources, version: &Version) -> HashSet { + fn get_dirty_files(&self, sources: &Sources) -> HashSet { let mut dirty_files = HashSet::new(); // Pre-add all sources that are guaranteed to be dirty for file in sources.keys() { - if self.is_dirty_impl(file, version) { + if self.is_dirty_impl(file) { dirty_files.insert(file.to_path_buf()); } } @@ -749,7 +742,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { } } - fn is_dirty_impl(&self, file: &Path, version: &Version) -> bool { + fn is_dirty_impl(&self, file: &Path) -> bool { let Some(hash) = self.content_hashes.get(file) else { trace!("missing content hash"); return true; @@ -770,41 +763,6 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { return true; } - // only check artifact's existence if the file generated artifacts. - // e.g. a solidity file consisting only of import statements (like interfaces that - // re-export) do not create artifacts - if entry.artifacts.is_empty() { - trace!("no artifacts"); - return false; - } - - if !entry.contains_version(version) { - trace!("missing linked artifacts",); - return true; - } - - if entry.artifacts_for_version(version).any(|artifact_path| { - let missing_artifact = !self.cached_artifacts.has_artifact(artifact_path); - if missing_artifact { - trace!("missing artifact \"{}\"", artifact_path.display()); - } - missing_artifact - }) { - return true; - } - - // If any requested extra files are missing for any artifact, mark source as dirty to - // generate them - for artifacts in self.cached_artifacts.values() { - for artifacts in artifacts.values() { - for artifact_file in artifacts { - if self.project.artifacts_handler().is_dirty(artifact_file).unwrap_or(true) { - return true; - } - } - } - } - // all things match, can be reused false } @@ -881,10 +839,10 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { cached_artifacts, edges, project, - clean_sources: Default::default(), dirty_sources: Default::default(), content_hashes: Default::default(), out_of_scope_sources: Default::default(), + missing_sources: Default::default(), }; ArtifactsCache::Cached(cache) @@ -979,7 +937,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { if out_of_scope_sources.contains(file, version) { return false; } - if dirty_sources.contains(file, version) { + if dirty_sources.contains(file) { return false; } if written_artifacts @@ -995,20 +953,15 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { !artifacts.is_empty() }); - // Update cache entries for dirty sources with newly compiled artifacts. + // Update cache entries with newly written artifacts. We update data for any artifacts as + // `written_artifacts` always contain the most recent data. for (file, artifacts) in written_artifacts.as_ref() { let file_path = Path::new(file); - - let Some(dirty_versions) = dirty_sources.inner.get(file_path) else { continue }; - let Some(entry) = cache.files.get_mut(file_path) else { continue }; - - entry.insert_artifacts(artifacts.iter().map(|(name, artifacts)| { - let artifacts = artifacts - .iter() - .filter(|artifact| dirty_versions.contains(&artifact.version)) - .collect::>(); - (name, artifacts) - })); + // Only update data for existing entries, we should have entries for all in-scope files + // by now. + if let Some(entry) = cache.files.get_mut(file_path) { + entry.extend_artifacts(artifacts); + } } // write to disk diff --git a/src/compile/project.rs b/src/compile/project.rs index 5dada0823..5edd8e14e 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -433,13 +433,13 @@ impl CompilerSources { .into_iter() .map(|(solc, (version, sources))| { trace!("Filtering {} sources for {}", sources.len(), version); - let sources = cache.filter(sources, &version); + let sources_to_compile = cache.filter(sources, &version); trace!( - "Detected {} dirty sources {:?}", - sources.dirty().count(), - sources.dirty_files().collect::>() + "Detected {} sources to compile {:?}", + sources_to_compile.complete().count(), + sources_to_compile.complete_files().collect::>() ); - (solc, (version, sources)) + (solc, (version, sources_to_compile)) }) .collect() } @@ -518,7 +518,7 @@ fn compile_sequential( solc.args ); - let dirty_files: Vec = filtered_sources.dirty_files().cloned().collect(); + let dirty_files: Vec = filtered_sources.complete_files().cloned().collect(); // depending on the composition of the filtered sources, the output selection can be // optimized @@ -597,7 +597,7 @@ fn compile_parallel( continue; } - let dirty_files: Vec = filtered_sources.dirty_files().cloned().collect(); + let dirty_files: Vec = filtered_sources.complete_files().cloned().collect(); // depending on the composition of the filtered sources, the output selection can be // optimized @@ -704,8 +704,7 @@ mod tests { let prep = compiler.preprocess().unwrap(); let cache = prep.cache.as_cached().unwrap(); // 3 contracts - assert_eq!(cache.dirty_sources.inner.len(), 3); - assert!(cache.clean_sources.inner.is_empty()); + assert_eq!(cache.dirty_sources.len(), 3); let compiled = prep.compile().unwrap(); assert_eq!(compiled.output.contracts.files().count(), 3); @@ -723,7 +722,7 @@ mod tests { let inner = project.project(); let compiler = ProjectCompiler::new(inner).unwrap(); let prep = compiler.preprocess().unwrap(); - assert!(prep.cache.as_cached().unwrap().dirty_sources.inner.is_empty()) + assert!(prep.cache.as_cached().unwrap().dirty_sources.is_empty()) } #[test] @@ -792,8 +791,8 @@ mod tests { // 3 contracts total assert_eq!(filtered.0.len(), 3); // A is modified - assert_eq!(filtered.dirty().count(), 1); - assert!(filtered.dirty_files().next().unwrap().ends_with("A.sol")); + assert_eq!(filtered.complete().count(), 1); + assert!(filtered.complete_files().next().unwrap().ends_with("A.sol")); let state = state.compile().unwrap(); assert_eq!(state.output.sources.len(), 3); diff --git a/src/filter.rs b/src/filter.rs index 4122066fe..3b5d9a6e6 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -57,7 +57,7 @@ pub enum SparseOutputFilter { /// In other words, we request the output of solc only for files that have been detected as /// _dirty_. #[default] - AllDirty, + Optimized, /// Apply an additional filter to [FilteredSources] to Custom(Box), } @@ -79,9 +79,9 @@ impl SparseOutputFilter { graph: &GraphEdges, ) -> Sources { match self { - SparseOutputFilter::AllDirty => { - if !sources.all_dirty() { - Self::all_dirty(&sources, settings) + SparseOutputFilter::Optimized => { + if !sources.all_complete() { + Self::optimize(&sources, settings) } } SparseOutputFilter::Custom(f) => { @@ -114,7 +114,7 @@ impl SparseOutputFilter { for (file, source) in sources.0.iter() { let key = format!("{}", file.display()); - if source.is_dirty() && f.is_match(file) { + if source.is_complete() && f.is_match(file) { settings.output_selection.as_mut().insert(key, selection.clone()); // the filter might not cover link references that will be required by the file, so @@ -137,11 +137,11 @@ impl SparseOutputFilter { } /// prunes all clean sources and only selects an output for dirty sources - fn all_dirty(sources: &FilteredSources, settings: &mut Settings) { + fn optimize(sources: &FilteredSources, settings: &mut Settings) { // settings can be optimized trace!( "optimizing output selection for {}/{} sources", - sources.clean().count(), + sources.optimized().count(), sources.len() ); @@ -151,18 +151,21 @@ impl SparseOutputFilter { .remove("*") .unwrap_or_else(OutputSelection::default_file_output_selection); - for (file, source) in sources.0.iter() { - if source.is_dirty() { - settings - .output_selection - .as_mut() - .insert(format!("{}", file.display()), selection.clone()); - } else { - trace!("using pruned output selection for {}", file.display()); - settings.output_selection.as_mut().insert( - format!("{}", file.display()), - OutputSelection::empty_file_output_select(), - ); + for (file, kind) in sources.0.iter() { + match kind { + SourceCompilationKind::Complete(_) => { + settings + .output_selection + .as_mut() + .insert(format!("{}", file.display()), selection.clone()); + } + SourceCompilationKind::Optimized(_) => { + trace!("using pruned output selection for {}", file.display()); + settings + .output_selection + .as_mut() + .insert(format!("{}", file.display()), OutputSelection::empty_file_output_select()); + } } } } @@ -177,7 +180,7 @@ impl From> for SparseOutputFilter { impl fmt::Debug for SparseOutputFilter { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - SparseOutputFilter::AllDirty => f.write_str("AllDirty"), + SparseOutputFilter::Optimized => f.write_str("AllDirty"), SparseOutputFilter::Custom(_) => f.write_str("Custom"), } } @@ -185,7 +188,7 @@ impl fmt::Debug for SparseOutputFilter { /// Container type for a set of [FilteredSource] #[derive(Debug, Clone, Eq, PartialEq)] -pub struct FilteredSources(pub BTreeMap); +pub struct FilteredSources(pub BTreeMap); impl FilteredSources { pub fn is_empty(&self) -> bool { @@ -196,24 +199,24 @@ impl FilteredSources { self.0.len() } - /// Returns `true` if all files are dirty - pub fn all_dirty(&self) -> bool { - self.0.values().all(|s| s.is_dirty()) + /// Returns `true` if no sources should have optimized output selection. + pub fn all_complete(&self) -> bool { + self.0.values().all(|s| s.is_complete()) } - /// Returns all entries that are dirty - pub fn dirty(&self) -> impl Iterator + '_ { - self.0.iter().filter(|(_, s)| s.is_dirty()) + /// Returns all entries that should not be optimized. + pub fn complete(&self) -> impl Iterator + '_ { + self.0.iter().filter(|(_, s)| s.is_complete()) } - /// Returns all entries that are clean - pub fn clean(&self) -> impl Iterator + '_ { - self.0.iter().filter(|(_, s)| !s.is_dirty()) + /// Returns all entries that should be optimized. + pub fn optimized(&self) -> impl Iterator + '_ { + self.0.iter().filter(|(_, s)| !s.is_complete()) } - /// Returns all dirty files - pub fn dirty_files(&self) -> impl Iterator + fmt::Debug + '_ { - self.0.iter().filter_map(|(k, s)| s.is_dirty().then_some(k)) + /// Returns all files that should not be optimized. + pub fn complete_files(&self) -> impl Iterator + fmt::Debug + '_ { + self.0.iter().filter_map(|(k, s)| s.is_complete().then_some(k)) } } @@ -225,75 +228,59 @@ impl From for Sources { impl From for FilteredSources { fn from(s: Sources) -> Self { - FilteredSources(s.into_iter().map(|(key, val)| (key, FilteredSource::Dirty(val))).collect()) + FilteredSources( + s.into_iter().map(|(key, val)| (key, SourceCompilationKind::Complete(val))).collect(), + ) } } -impl From> for FilteredSources { - fn from(s: BTreeMap) -> Self { +impl From> for FilteredSources { + fn from(s: BTreeMap) -> Self { FilteredSources(s) } } -impl AsRef> for FilteredSources { - fn as_ref(&self) -> &BTreeMap { +impl AsRef> for FilteredSources { + fn as_ref(&self) -> &BTreeMap { &self.0 } } -impl AsMut> for FilteredSources { - fn as_mut(&mut self) -> &mut BTreeMap { +impl AsMut> for FilteredSources { + fn as_mut(&mut self) -> &mut BTreeMap { &mut self.0 } } /// Represents the state of a filtered [Source] #[derive(Debug, Clone, Eq, PartialEq)] -pub enum FilteredSource { - /// A source that fits the _dirty_ criteria - Dirty(Source), - /// A source that does _not_ fit the _dirty_ criteria but is included in the filtered set - /// because a _dirty_ file pulls it in, either directly on indirectly. - Clean(Source), +pub enum SourceCompilationKind { + /// We need a complete compilation output for the source. + Complete(Source), + /// A source for which we don't need a complete output and want to optimize its compilation by + /// reducing output selection. + Optimized(Source), } -impl FilteredSource { +impl SourceCompilationKind { /// Returns the underlying source pub fn source(&self) -> &Source { match self { - FilteredSource::Dirty(s) => s, - FilteredSource::Clean(s) => s, + SourceCompilationKind::Complete(s) => s, + SourceCompilationKind::Optimized(s) => s, } } /// Consumes the type and returns the underlying source pub fn into_source(self) -> Source { match self { - FilteredSource::Dirty(s) => s, - FilteredSource::Clean(s) => s, + SourceCompilationKind::Complete(s) => s, + SourceCompilationKind::Optimized(s) => s, } } - /// Whether this file is actually dirt - pub fn is_dirty(&self) -> bool { - matches!(self, FilteredSource::Dirty(_)) + /// Whether this file should be compiled with full output selection + pub fn is_complete(&self) -> bool { + matches!(self, SourceCompilationKind::Complete(_)) } } - -/// Helper type that determines the state of a source file -#[derive(Debug)] -pub struct FilteredSourceInfo { - /// Path to the source file. - pub file: PathBuf, - - /// Contents of the file. - pub source: Source, - - /// Index in the [GraphEdges]. - pub idx: usize, - - /// Whether this file is actually dirty. - /// - /// See also `ArtifactsCacheInner::is_dirty` - pub dirty: bool, -} From 91f435b700361a36ff8aeb6049ec130ae7573657 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:07:46 +0400 Subject: [PATCH 09/23] wip --- src/cache.rs | 34 ++++++++++++++++++++++++++-------- src/filter.rs | 8 ++++---- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 3b40d78b1..7b122a983 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -476,7 +476,7 @@ impl CacheEntry { for artifact in artifacts { self.artifacts .entry(name.clone()) - .or_insert_with(BTreeMap::new) + .or_default() .insert(artifact.version.clone(), artifact.file.clone()); } } @@ -596,7 +596,7 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { /// Those are not grouped by version and purged completely. pub dirty_sources: HashSet, - /// Artifact+version pairs for which we were missing artifacts and are expect to receive them + /// Artifact+version pairs for which we were missing artifacts and expecting to receive them /// from compilation. pub missing_sources: GroupedSources, @@ -650,20 +650,26 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { let mut compile_complete = BTreeSet::new(); let mut compile_optimized = BTreeSet::new(); - // Files which cache is invalidated. - self.dirty_sources = self.get_dirty_files(&sources); + // Collect files which cache is invalidated. + self.dirty_sources.extend(self.get_dirty_files(&sources)); for (file, source) in sources.iter() { if self.dirty_sources.contains(file) { compile_complete.insert(file.clone()); - // If file is dirty, its data should invalidated and all artifacts for all versions - // should be removed. + // If file is dirty, its data should be invalidated and all artifacts for all + // versions should be removed. self.cache.remove(file.as_path()); } else { - // If source is not dirty, but we are missing artifact for this version, we + // If source is not dirty, but we are missing artifacts for this version, we // should compile it to populate the cache. - if self.cache.entry(file).map_or(true, |e| !e.contains_version(version)) { + let missing = self + .cached_artifacts + .get(file.to_string_lossy().as_ref()) + .map_or(true, |artifacts| { + artifacts.values().flatten().all(|a| a.version != *version) + }); + if missing { compile_complete.insert(file.clone()); self.missing_sources.insert(file.clone(), version.clone()); } @@ -763,6 +769,18 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { return true; } + // If any requested extra files are missing for any artifact, mark source as dirty to + // generate them + for artifacts in self.cached_artifacts.values() { + for artifacts in artifacts.values() { + for artifact_file in artifacts { + if self.project.artifacts_handler().is_dirty(artifact_file).unwrap_or(true) { + return true; + } + } + } + } + // all things match, can be reused false } diff --git a/src/filter.rs b/src/filter.rs index 3b5d9a6e6..d0402b7ef 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -161,10 +161,10 @@ impl SparseOutputFilter { } SourceCompilationKind::Optimized(_) => { trace!("using pruned output selection for {}", file.display()); - settings - .output_selection - .as_mut() - .insert(format!("{}", file.display()), OutputSelection::empty_file_output_select()); + settings.output_selection.as_mut().insert( + format!("{}", file.display()), + OutputSelection::empty_file_output_select(), + ); } } } From 4c2742bd87e8eb61d684cdb9c82d95370fc2222f Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:20:07 +0400 Subject: [PATCH 10/23] update doc --- src/cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache.rs b/src/cache.rs index 7b122a983..be6d49f43 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -638,7 +638,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { /// Returns the set of [Source]s that need to be compiled to produce artifacts for requested /// input. /// - /// Source file may have two [SourceCompilationKind]s: + /// Source file may have one of the two [SourceCompilationKind]s: /// 1. [SourceCompilationKind::Complete] - the file has been modified or compiled with different /// settings and its cache is invalidated. For such sources we request full data needed for /// artifact construction. From c75317a7476e4c4d7b1d5c2e818ffdda3f002385 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:25:14 +0400 Subject: [PATCH 11/23] rename --- src/compile/project.rs | 12 ++++++------ src/filter.rs | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/compile/project.rs b/src/compile/project.rs index 5edd8e14e..3220c03f2 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -436,8 +436,8 @@ impl CompilerSources { let sources_to_compile = cache.filter(sources, &version); trace!( "Detected {} sources to compile {:?}", - sources_to_compile.complete().count(), - sources_to_compile.complete_files().collect::>() + sources_to_compile.dirty().count(), + sources_to_compile.dirty_files().collect::>() ); (solc, (version, sources_to_compile)) }) @@ -518,7 +518,7 @@ fn compile_sequential( solc.args ); - let dirty_files: Vec = filtered_sources.complete_files().cloned().collect(); + let dirty_files: Vec = filtered_sources.dirty_files().cloned().collect(); // depending on the composition of the filtered sources, the output selection can be // optimized @@ -597,7 +597,7 @@ fn compile_parallel( continue; } - let dirty_files: Vec = filtered_sources.complete_files().cloned().collect(); + let dirty_files: Vec = filtered_sources.dirty_files().cloned().collect(); // depending on the composition of the filtered sources, the output selection can be // optimized @@ -791,8 +791,8 @@ mod tests { // 3 contracts total assert_eq!(filtered.0.len(), 3); // A is modified - assert_eq!(filtered.complete().count(), 1); - assert!(filtered.complete_files().next().unwrap().ends_with("A.sol")); + assert_eq!(filtered.dirty().count(), 1); + assert!(filtered.dirty_files().next().unwrap().ends_with("A.sol")); let state = state.compile().unwrap(); assert_eq!(state.output.sources.len(), 3); diff --git a/src/filter.rs b/src/filter.rs index d0402b7ef..b85087175 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -80,7 +80,7 @@ impl SparseOutputFilter { ) -> Sources { match self { SparseOutputFilter::Optimized => { - if !sources.all_complete() { + if !sources.all_dirty() { Self::optimize(&sources, settings) } } @@ -141,7 +141,7 @@ impl SparseOutputFilter { // settings can be optimized trace!( "optimizing output selection for {}/{} sources", - sources.optimized().count(), + sources.clean().count(), sources.len() ); @@ -200,22 +200,22 @@ impl FilteredSources { } /// Returns `true` if no sources should have optimized output selection. - pub fn all_complete(&self) -> bool { + pub fn all_dirty(&self) -> bool { self.0.values().all(|s| s.is_complete()) } /// Returns all entries that should not be optimized. - pub fn complete(&self) -> impl Iterator + '_ { + pub fn dirty(&self) -> impl Iterator + '_ { self.0.iter().filter(|(_, s)| s.is_complete()) } /// Returns all entries that should be optimized. - pub fn optimized(&self) -> impl Iterator + '_ { + pub fn clean(&self) -> impl Iterator + '_ { self.0.iter().filter(|(_, s)| !s.is_complete()) } /// Returns all files that should not be optimized. - pub fn complete_files(&self) -> impl Iterator + fmt::Debug + '_ { + pub fn dirty_files(&self) -> impl Iterator + fmt::Debug + '_ { self.0.iter().filter_map(|(k, s)| s.is_complete().then_some(k)) } } From bc7878ef1e0c5b3641103b9f553729934c6cd6c8 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:30:30 +0400 Subject: [PATCH 12/23] rename --- src/cache.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index be6d49f43..8ba1a14fa 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -600,11 +600,10 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { /// from compilation. pub missing_sources: GroupedSources, - /// Files that appear in cache but are not in scope of the current compiler invocation. + /// Artifact+version pairs which are in scope for each solc version. /// - /// Those files should be kept in the cache, but should not be returned in the compilation - /// result. - pub out_of_scope_sources: GroupedSources, + /// Only those files will be included into cached artifacts list for each version. + pub sources_in_scope: GroupedSources, /// The file hashes. pub content_hashes: HashMap, @@ -654,6 +653,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { self.dirty_sources.extend(self.get_dirty_files(&sources)); for (file, source) in sources.iter() { + self.sources_in_scope.insert(file.clone(), version.clone()); if self.dirty_sources.contains(file) { compile_complete.insert(file.clone()); @@ -681,14 +681,6 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { } } - // Mark sources which appear in cache but are not in scope of the current compiler run as - // out of scope. - for source in self.cache.files.keys().cloned().collect::>() { - if !sources.contains_key(&source) { - self.out_of_scope_sources.insert(source, version.clone()); - } - } - // Prepare optimization by collecting sources which are imported by files requiring complete // compilation. for source in &compile_complete { @@ -859,7 +851,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { project, dirty_sources: Default::default(), content_hashes: Default::default(), - out_of_scope_sources: Default::default(), + sources_in_scope: Default::default(), missing_sources: Default::default(), }; @@ -940,7 +932,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { mut cache, mut cached_artifacts, dirty_sources, - out_of_scope_sources, + sources_in_scope, project, .. } = cache; @@ -952,7 +944,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { artifacts.retain(|artifact| { let version = &artifact.version; - if out_of_scope_sources.contains(file, version) { + if !sources_in_scope.contains(file, version) { return false; } if dirty_sources.contains(file) { From 4b0a42cafe2b514f3077c9340d276fc48e8a5c56 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:31:53 +0400 Subject: [PATCH 13/23] fix doc --- src/filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filter.rs b/src/filter.rs index b85087175..d923ae45f 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -186,7 +186,7 @@ impl fmt::Debug for SparseOutputFilter { } } -/// Container type for a set of [FilteredSource] +/// Container type for a mapping from source path to [SourceCompilationKind] #[derive(Debug, Clone, Eq, PartialEq)] pub struct FilteredSources(pub BTreeMap); From 337f4a4b6eb3618589f78f3b250f6e4150a031b2 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:38:17 +0400 Subject: [PATCH 14/23] use .display() --- src/cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache.rs b/src/cache.rs index 8ba1a14fa..655d239bd 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -665,7 +665,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { // should compile it to populate the cache. let missing = self .cached_artifacts - .get(file.to_string_lossy().as_ref()) + .get(&format!("{}", file.display())) .map_or(true, |artifacts| { artifacts.values().flatten().all(|a| a.version != *version) }); From 18b05b8976980b1d761554ebf0394c1f8e31b9b8 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:39:17 +0400 Subject: [PATCH 15/23] fix debug impl --- src/filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filter.rs b/src/filter.rs index d923ae45f..4aeebe75b 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -180,7 +180,7 @@ impl From> for SparseOutputFilter { impl fmt::Debug for SparseOutputFilter { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - SparseOutputFilter::Optimized => f.write_str("AllDirty"), + SparseOutputFilter::Optimized => f.write_str("Optimized"), SparseOutputFilter::Custom(_) => f.write_str("Custom"), } } From 99068937500a88e4dd8c1e079bc45a827e78b2ba Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 01:44:11 +0400 Subject: [PATCH 16/23] is_dirty --- src/filter.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/filter.rs b/src/filter.rs index 4aeebe75b..cf4abb98e 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -114,7 +114,7 @@ impl SparseOutputFilter { for (file, source) in sources.0.iter() { let key = format!("{}", file.display()); - if source.is_complete() && f.is_match(file) { + if source.is_dirty() && f.is_match(file) { settings.output_selection.as_mut().insert(key, selection.clone()); // the filter might not cover link references that will be required by the file, so @@ -201,22 +201,22 @@ impl FilteredSources { /// Returns `true` if no sources should have optimized output selection. pub fn all_dirty(&self) -> bool { - self.0.values().all(|s| s.is_complete()) + self.0.values().all(|s| s.is_dirty()) } /// Returns all entries that should not be optimized. pub fn dirty(&self) -> impl Iterator + '_ { - self.0.iter().filter(|(_, s)| s.is_complete()) + self.0.iter().filter(|(_, s)| s.is_dirty()) } /// Returns all entries that should be optimized. pub fn clean(&self) -> impl Iterator + '_ { - self.0.iter().filter(|(_, s)| !s.is_complete()) + self.0.iter().filter(|(_, s)| !s.is_dirty()) } /// Returns all files that should not be optimized. pub fn dirty_files(&self) -> impl Iterator + fmt::Debug + '_ { - self.0.iter().filter_map(|(k, s)| s.is_complete().then_some(k)) + self.0.iter().filter_map(|(k, s)| s.is_dirty().then_some(k)) } } @@ -280,7 +280,7 @@ impl SourceCompilationKind { } /// Whether this file should be compiled with full output selection - pub fn is_complete(&self) -> bool { + pub fn is_dirty(&self) -> bool { matches!(self, SourceCompilationKind::Complete(_)) } } From c491365d3d59fedac6df95902acd0d8d6ba0c43d Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 02:00:08 +0400 Subject: [PATCH 17/23] add assertion --- src/compile/project.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compile/project.rs b/src/compile/project.rs index 3220c03f2..232fc6e08 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -791,6 +791,7 @@ mod tests { // 3 contracts total assert_eq!(filtered.0.len(), 3); // A is modified + assert_eq!(state.cache.as_cached().unwrap().dirty_sources.len(), 1); assert_eq!(filtered.dirty().count(), 1); assert!(filtered.dirty_files().next().unwrap().ends_with("A.sol")); From 1ae124f4d4808c69a3164a47f4bf3887ecb0494c Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 02:16:01 +0400 Subject: [PATCH 18/23] is_missing_artifacts --- src/cache.rs | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 655d239bd..60cb0cdc1 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -660,19 +660,11 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { // If file is dirty, its data should be invalidated and all artifacts for all // versions should be removed. self.cache.remove(file.as_path()); - } else { + } else if self.is_missing_artifacts(file, version) { // If source is not dirty, but we are missing artifacts for this version, we // should compile it to populate the cache. - let missing = self - .cached_artifacts - .get(&format!("{}", file.display())) - .map_or(true, |artifacts| { - artifacts.values().flatten().all(|a| a.version != *version) - }); - if missing { - compile_complete.insert(file.clone()); - self.missing_sources.insert(file.clone(), version.clone()); - } + compile_complete.insert(file.clone()); + self.missing_sources.insert(file.clone(), version.clone()); } // Ensure that we have a cache entry for all sources. @@ -707,6 +699,39 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { FilteredSources(filtered) } + /// Returns whether we are missing artifacts for the given file and version. + fn is_missing_artifacts(&self, file: &Path, version: &Version) -> bool { + let Some(entry) = self.cache.entry(file) else { + trace!("missing cache entry"); + return true; + }; + + // only check artifact's existence if the file generated artifacts. + // e.g. a solidity file consisting only of import statements (like interfaces that + // re-export) do not create artifacts + if entry.artifacts.is_empty() { + trace!("no artifacts"); + return false; + } + + if !entry.contains_version(version) { + trace!("missing linked artifacts",); + return true; + } + + if entry.artifacts_for_version(version).any(|artifact_path| { + let missing_artifact = !self.cached_artifacts.has_artifact(artifact_path); + if missing_artifact { + trace!("missing artifact \"{}\"", artifact_path.display()); + } + missing_artifact + }) { + return true; + } + + false + } + /// Returns a set of files that are dirty itself or import dirty file directly or indirectly. fn get_dirty_files(&self, sources: &Sources) -> HashSet { let mut dirty_files = HashSet::new(); From f84da500397bfcd775f03cfd189c555c28a37860 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 02:31:26 +0400 Subject: [PATCH 19/23] additional assertions --- src/compile/project.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/compile/project.rs b/src/compile/project.rs index 232fc6e08..690494e8c 100644 --- a/src/compile/project.rs +++ b/src/compile/project.rs @@ -783,6 +783,13 @@ mod tests { let state = compiler.preprocess().unwrap(); let sources = state.sources.sources(); + let cache = state.cache.as_cached().unwrap(); + + // 2 clean sources + assert_eq!(cache.cache.artifacts_len(), 2); + assert!(cache.cache.all_artifacts_exist()); + assert_eq!(cache.dirty_sources.len(), 1); + // single solc assert_eq!(sources.len(), 1); @@ -791,7 +798,6 @@ mod tests { // 3 contracts total assert_eq!(filtered.0.len(), 3); // A is modified - assert_eq!(state.cache.as_cached().unwrap().dirty_sources.len(), 1); assert_eq!(filtered.dirty().count(), 1); assert!(filtered.dirty_files().next().unwrap().ends_with("A.sol")); From 122c9e540f6ef66133e67f9182a0cc171261823f Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 02:37:45 +0400 Subject: [PATCH 20/23] remove empty check --- src/cache.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 60cb0cdc1..258531e62 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -706,14 +706,6 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { return true; }; - // only check artifact's existence if the file generated artifacts. - // e.g. a solidity file consisting only of import statements (like interfaces that - // re-export) do not create artifacts - if entry.artifacts.is_empty() { - trace!("no artifacts"); - return false; - } - if !entry.contains_version(version) { trace!("missing linked artifacts",); return true; From 8660dd09f3236d00b8a44b1b4934bf48de9c57be Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 05:12:26 +0400 Subject: [PATCH 21/23] rm missing --- src/cache.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 258531e62..bb24638b9 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -596,10 +596,6 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput> { /// Those are not grouped by version and purged completely. pub dirty_sources: HashSet, - /// Artifact+version pairs for which we were missing artifacts and expecting to receive them - /// from compilation. - pub missing_sources: GroupedSources, - /// Artifact+version pairs which are in scope for each solc version. /// /// Only those files will be included into cached artifacts list for each version. @@ -664,7 +660,6 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { // If source is not dirty, but we are missing artifacts for this version, we // should compile it to populate the cache. compile_complete.insert(file.clone()); - self.missing_sources.insert(file.clone(), version.clone()); } // Ensure that we have a cache entry for all sources. @@ -869,7 +864,6 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { dirty_sources: Default::default(), content_hashes: Default::default(), sources_in_scope: Default::default(), - missing_sources: Default::default(), }; ArtifactsCache::Cached(cache) From a0325189d8407a3753dd2764e6bc453f8c91bab8 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 05:15:42 +0400 Subject: [PATCH 22/23] merge_artifacts --- src/cache.rs | 41 +++-------------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index bb24638b9..ab72d43f4 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -12,10 +12,7 @@ use crate::{ use semver::Version; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{ - collections::{ - btree_map::{BTreeMap, Entry}, - hash_map, BTreeSet, HashMap, HashSet, - }, + collections::{btree_map::BTreeMap, hash_map, BTreeSet, HashMap, HashSet}, fs, path::{Path, PathBuf}, time::{Duration, UNIX_EPOCH}, @@ -312,24 +309,6 @@ impl SolFilesCache { .collect::>>()?; Ok(Artifacts(artifacts)) } - - /// Inserts the provided cache entries, if there is an existing `CacheEntry` it will be updated - /// but versions will be merged. - pub fn extend(&mut self, entries: I) - where - I: IntoIterator, - { - for (file, entry) in entries.into_iter() { - match self.files.entry(file) { - Entry::Vacant(e) => { - e.insert(entry); - } - Entry::Occupied(mut other) => { - other.get_mut().merge_artifacts(entry); - } - } - } - } } // async variants for read and write @@ -467,7 +446,7 @@ impl CacheEntry { Ok(artifacts) } - pub(crate) fn extend_artifacts<'a, A, I, T: 'a>(&mut self, artifacts: I) + pub(crate) fn merge_artifacts<'a, A, I, T: 'a>(&mut self, artifacts: I) where I: IntoIterator, A: IntoIterator>, @@ -482,20 +461,6 @@ impl CacheEntry { } } - /// Merges another `CacheEntries` artifacts into the existing set - fn merge_artifacts(&mut self, other: CacheEntry) { - for (name, artifacts) in other.artifacts { - match self.artifacts.entry(name) { - Entry::Vacant(entry) => { - entry.insert(artifacts); - } - Entry::Occupied(mut entry) => { - entry.get_mut().extend(artifacts); - } - } - } - } - /// Returns `true` if the artifacts set contains the given version pub fn contains_version(&self, version: &Version) -> bool { self.artifacts_versions().any(|(v, _)| v == version) @@ -981,7 +946,7 @@ impl<'a, T: ArtifactOutput> ArtifactsCache<'a, T> { // Only update data for existing entries, we should have entries for all in-scope files // by now. if let Some(entry) = cache.files.get_mut(file_path) { - entry.extend_artifacts(artifacts); + entry.merge_artifacts(artifacts); } } From 91becf5c214adf10a1a529df5997a2bae57492e3 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 13 Mar 2024 05:49:15 +0400 Subject: [PATCH 23/23] empty check --- src/cache.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cache.rs b/src/cache.rs index ab72d43f4..6e13be94f 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -666,6 +666,14 @@ impl<'a, T: ArtifactOutput> ArtifactsCacheInner<'a, T> { return true; }; + // only check artifact's existence if the file generated artifacts. + // e.g. a solidity file consisting only of import statements (like interfaces that + // re-export) do not create artifacts + if entry.artifacts.is_empty() { + trace!("no artifacts"); + return false; + } + if !entry.contains_version(version) { trace!("missing linked artifacts",); return true;