Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always decompose nvcc compilations #2300

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,6 @@ mod tasking_vx;
#[macro_use]
mod counted_array;

pub use crate::compiler::c::CCompilerKind;
pub use crate::compiler::compiler::*;
pub use crate::compiler::preprocessor_cache::PreprocessorCacheEntry;
40 changes: 31 additions & 9 deletions src/compiler/nvcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,20 @@ pub fn generate_compile_commands(
output_file_name: output.file_name().unwrap().to_owned(),
};

Ok((command, None, Cacheable::Yes))
Ok((
command,
None,
// Never assume the outer `nvcc` call is cacheable. We must decompose the nvcc call into
// its constituent subcommands with `--dryrun` and only cache the final build product.
//
// Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations
// is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect
// all source code changes.
//
// Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only
// code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output.
Cacheable::No,
))
Comment on lines +464 to +477
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might now be able to get away with doing less in the preprocess() function, since we effectively don't care about most of what it does.

}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -811,19 +824,28 @@ where
)
}
} else {
// Returns Cacheable::Yes to indicate we _do_ want to run this host
// compiler call through sccache (because it may be distributed),
// but we _do not_ want to cache its output. The output file will
// be cached as the result of the outer `nvcc` command. Caching
// here would store the same object twice under two different hashes,
// unnecessarily bloating the cache size.
// Cache the host compiler calls, since we've marked the outer `nvcc` call
// as non-cacheable. This ensures `sccache nvcc ...` _always_ decomposes the
// nvcc call into its constituent subcommands with `--dryrun`, but only caches
// the final build product once.
//
// Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations
// is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect
// all source code changes.
//
// Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only
// code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output.
(
env_vars
.iter()
.chain(
[
// Do not cache host compiler calls
("SCCACHE_NO_CACHE".into(), "true".into()),
// HACK: This compilation will look like a C/C++ compilation,
// but we want to report it in the stats as a CUDA compilation.
// The SccacheService API doesn't have a great way to specify this
// case, so we set a special envvar here that it can read when the
// compilation is finished.
("__SCCACHE_THIS_IS_A_CUDA_COMPILATION__".into(), "".into()),
]
.iter(),
)
Expand Down
18 changes: 16 additions & 2 deletions src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1305,8 +1305,22 @@ where

let out_pretty = hasher.output_pretty().into_owned();
let color_mode = hasher.color_mode();
let kind = compiler.kind();
let lang = hasher.language();

let (kind, lang) = {
// HACK: See note in src/compiler/nvcc.rs
if env_vars
.iter()
.any(|(k, _)| k == "__SCCACHE_THIS_IS_A_CUDA_COMPILATION__")
{
(
CompilerKind::C(crate::compiler::CCompilerKind::Nvcc),
Language::Cuda,
)
} else {
(compiler.kind(), hasher.language())
}
};

let me = self.clone();

self.rt
Expand Down
74 changes: 37 additions & 37 deletions tests/system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,12 +703,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile A request stats");
get_stats(|info| {
assert_eq!(2, info.stats.compile_requests);
assert_eq!(5, info.stats.requests_executed);
assert_eq!(1, info.stats.cache_hits.all());
assert_eq!(8, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("PTX").is_none());
assert!(info.stats.cache_hits.get("CUBIN").is_none());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
Expand All @@ -717,8 +717,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none());
assert!(info.stats.cache_hits.get_adv(&adv_cubin_key).is_none());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
Expand Down Expand Up @@ -747,12 +747,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile B request stats");
get_stats(|info| {
assert_eq!(3, info.stats.compile_requests);
assert_eq!(9, info.stats.requests_executed);
assert_eq!(2, info.stats.cache_hits.all());
assert_eq!(12, info.stats.requests_executed);
assert_eq!(4, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("PTX").is_none());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
Expand All @@ -761,8 +761,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
Expand All @@ -789,23 +789,23 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile ptx request stats");
get_stats(|info| {
assert_eq!(4, info.stats.compile_requests);
assert_eq!(11, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(6, info.stats.cache_misses.all());
assert_eq!(14, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert!(info.stats.cache_misses.get("C/C++").is_none());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
});
Expand All @@ -831,23 +831,23 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile cubin request stats");
get_stats(|info| {
assert_eq!(5, info.stats.compile_requests);
assert_eq!(14, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(7, info.stats.cache_misses.all());
assert_eq!(17, info.stats.requests_executed);
assert_eq!(7, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&3, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&3, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert!(info.stats.cache_misses.get("C/C++").is_none());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
});
Expand Down Expand Up @@ -914,14 +914,14 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
trace!("request stats");
get_stats(|info| {
assert_eq!(4, info.stats.compile_requests);
assert_eq!(8, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(12, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("C/C++").is_none());
assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert!(info.stats.cache_misses.get("C/C++").is_none());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
});
}
Expand Down
Loading