Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Clang] Add env var for nvptx-arch/amdgpu-arch timeout #102521

Merged
merged 8 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,10 @@ def warn_drv_amdgpu_cov6: Warning<
"code object v6 is still in development and not ready for production use yet;"
" use at your own risk">;
def err_drv_undetermined_gpu_arch : Error<
"cannot determine %0 architecture: %1; consider passing it via "
"'%2'">;
"cannot determine %0 architecture: %1; consider passing it via '%2'; "
"environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool "
"timeout (integer secs, <=0 is infinite)">;

def warn_drv_multi_gpu_arch : Warning<
"multiple %0 architectures are detected: %1; only the first one is used for "
"'%2'">, InGroup<MultiGPU>;
Expand Down
3 changes: 1 addition & 2 deletions clang/include/clang/Driver/ToolChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,7 @@ class ToolChain {

/// Executes the given \p Executable and returns the stdout.
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
executeToolChainProgram(StringRef Executable,
unsigned SecondsToWait = 0) const;
executeToolChainProgram(StringRef Executable) const;

void setTripleEnvironment(llvm::Triple::EnvironmentType Env);

Expand Down
14 changes: 12 additions & 2 deletions clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
Expand Down Expand Up @@ -104,8 +105,7 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
}

llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
ToolChain::executeToolChainProgram(StringRef Executable,
unsigned SecondsToWait) const {
ToolChain::executeToolChainProgram(StringRef Executable) const {
llvm::SmallString<64> OutputFile;
llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile);
llvm::FileRemover OutputRemover(OutputFile.c_str());
Expand All @@ -116,6 +116,16 @@ ToolChain::executeToolChainProgram(StringRef Executable,
};

std::string ErrorMessage;
int SecondsToWait = 60;
if (std::optional<std::string> Str =
llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) {
if (!llvm::to_integer(*Str, SecondsToWait))
return llvm::createStringError(std::error_code(),
"CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected "
"an integer, got '" +
*Str + "'");
SecondsToWait = std::min(SecondsToWait, 0); // infinite
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that negative becomes 0 to mean infinite, that's obviously supposed to be max not min. Pushed a fix as 7c4eb60.

}
if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait,
/*MemoryLimit=*/0, &ErrorMessage))
return llvm::createStringError(std::error_code(),
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
else
Program = GetProgramPath("amdgpu-arch");

auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
else
Program = GetProgramPath("nvptx-arch");

auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();

Expand Down
8 changes: 8 additions & 0 deletions clang/test/Driver/amdgpu-hip-system-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"

// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed.
// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
// RUN: --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \
// RUN: -x hip %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT
// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
8 changes: 8 additions & 0 deletions clang/test/Driver/nvptx-cuda-system-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,11 @@
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89
// MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu]
// MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89"

// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed.
// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib \
// RUN: --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT
// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
16 changes: 16 additions & 0 deletions clang/test/Driver/openmp-system-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,19 @@
// RUN: -fopenmp-targets=amdgcn-amd-amdhsa --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=AMDGPU
// AMDGPU: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march'

// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed for nvptx-arch.
// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT-NVPTX
// BAD-TIMEOUT-NVPTX: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '-march'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)

// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed for amdgpu-arch.
// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT= \
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib \
// RUN: --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT-AMDGPU
// BAD-TIMEOUT-AMDGPU: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got ''; consider passing it via '-march'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
1 change: 1 addition & 0 deletions llvm/utils/lit/lit/TestingConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def fromdefaults(litConfig):
"SYSTEMROOT",
"TERM",
"CLANG",
"CLANG_TOOLCHAIN_PROGRAM_TIMEOUT",
"LLDB",
"LD_PRELOAD",
"LLVM_SYMBOLIZER_PATH",
Expand Down
Loading