From dd9a8f130fa8f5f1b9f0d9da69c4a183611fa999 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 12 Mar 2024 19:00:19 -0700 Subject: [PATCH 1/3] [llvm-exegesis] Add support for pinning benchmarking process to a CPU This patch adds in support for pinning a benchmarking process to a specific CPU (in the subprocess benchmarking mode on Linux). This is intended to be used in environments where a certain set of CPUs is isolated from the scheduler using something like cgroups and thus should present less potential for noise than normal. This also opens up the door for doing multithreaded benchmarking as we can now pin benchmarking processes to specific CPUs that we know won't interfere with each other. --- .../X86/latency/cpu-pinning-execution-mode.s | 5 ++ .../llvm-exegesis/X86/latency/cpu-pinning.s | 5 ++ .../llvm-exegesis/lib/BenchmarkRunner.cpp | 66 ++++++++++++++----- .../tools/llvm-exegesis/lib/BenchmarkRunner.h | 6 +- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 14 +++- 5 files changed, 78 insertions(+), 18 deletions(-) create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s new file mode 100644 index 0000000000000..62a7b1d1e486e --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s @@ -0,0 +1,5 @@ +# REQUIRES: exegesis-can-measure-latency, x86_64-linux + +# RUN: not llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -execution-mode=inprocess --benchmark-process-cpu=0 2>&1 | FileCheck %s + +# CHECK: llvm-exegesis error: --benchmark-process-cpu is only supported in the subprocess execution mode diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s new file mode 100644 index 0000000000000..0ea3752fc3bb9 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning.s @@ -0,0 +1,5 @@ +# REQUIRES: exegesis-can-measure-latency, x86_64-linux + +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -execution-mode=subprocess | FileCheck %s + +# CHECK: - { key: latency, value: {{[0-9.]*}}, per_snippet_value: {{[0-9.]*}} diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 4e97d188d1725..9c5a037ee2e67 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -97,7 +97,8 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { public: static Expected> create(const LLVMState &State, object::OwningBinary Obj, - BenchmarkRunner::ScratchSpace *Scratch) { + BenchmarkRunner::ScratchSpace *Scratch, + std::optional BenchmarkProcessCPU) { Expected EF = ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); @@ -105,14 +106,17 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { return EF.takeError(); return std::unique_ptr( - new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); + new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch, + BenchmarkProcessCPU)); } private: InProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, - BenchmarkRunner::ScratchSpace *Scratch) - : State(State), Function(std::move(Function)), Scratch(Scratch) {} + BenchmarkRunner::ScratchSpace *Scratch, + std::optional BenchmarkCPU) + : State(State), Function(std::move(Function)), Scratch(Scratch), + BenchmarkProcessCPU(BenchmarkCPU) {} static void accumulateCounterValues(const SmallVector &NewValues, SmallVector *Result) { @@ -175,6 +179,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { const LLVMState &State; const ExecutableFunction Function; BenchmarkRunner::ScratchSpace *const Scratch; + const std::optional BenchmarkProcessCPU; }; #ifdef __linux__ @@ -189,27 +194,31 @@ class SubProcessFunctionExecutorImpl public: static Expected> create(const LLVMState &State, object::OwningBinary Obj, - const BenchmarkKey &Key) { + const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) { Expected EF = ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); if (!EF) return EF.takeError(); return std::unique_ptr( - new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key)); + new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key, + BenchmarkProcessCPU)); } private: SubProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, - const BenchmarkKey &Key) - : State(State), Function(std::move(Function)), Key(Key) {} + const BenchmarkKey &Key, + std::optional BenchmarkCPU) + : State(State), Function(std::move(Function)), Key(Key), + BenchmarkProcessCPU(BenchmarkCPU) {} enum ChildProcessExitCodeE { CounterFDReadFailed = 1, RSeqDisableFailed, FunctionDataMappingFailed, - AuxiliaryMemorySetupFailed + AuxiliaryMemorySetupFailed, + SetCPUAffinityFailed }; StringRef childProcessExitCodeToString(int ExitCode) const { @@ -222,6 +231,8 @@ class SubProcessFunctionExecutorImpl return "Failed to map memory for assembled snippet"; case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: return "Failed to setup auxiliary memory"; + case ChildProcessExitCodeE::SetCPUAffinityFailed: + return "Failed to set CPU affinity of the benchmarking process"; default: return "Child process returned with unknown exit code"; } @@ -310,6 +321,29 @@ class SubProcessFunctionExecutorImpl } if (ParentOrChildPID == 0) { + if (BenchmarkProcessCPU) { + // Set the CPU affinity for the child process, so that we ensure that if + // the user specified a CPU the process should run on, the benchmarking + // process is running on that CPU. + cpu_set_t CPUMask; + CPU_ZERO(&CPUMask); + CPU_SET(*BenchmarkProcessCPU, &CPUMask); + // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they + // are available. + int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); + if (SetAffinityReturn == -1) { + exit(ChildProcessExitCodeE::SetCPUAffinityFailed); + } + + // Check (if assertions are enabled) that we are actually running on the + // CPU that was specified by the user. + unsigned int CurrentCPU; + assert(getcpu(&CurrentCPU, nullptr) == 0 && + "Expected getcpu call to succeed."); + assert(static_cast(CurrentCPU) == *BenchmarkProcessCPU && + "Expected current CPU to equal the CPU requested by the user"); + } + // We are in the child process, close the write end of the pipe. close(PipeFiles[1]); // Unregister handlers, signal handling is now handled through ptrace in @@ -500,6 +534,7 @@ class SubProcessFunctionExecutorImpl const LLVMState &State; const ExecutableFunction Function; const BenchmarkKey &Key; + const std::optional BenchmarkProcessCPU; }; #endif // __linux__ } // namespace @@ -577,11 +612,11 @@ BenchmarkRunner::getRunnableConfiguration( Expected> BenchmarkRunner::createFunctionExecutor( object::OwningBinary ObjectFile, - const BenchmarkKey &Key) const { + const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) const { switch (ExecutionMode) { case ExecutionModeE::InProcess: { auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( - State, std::move(ObjectFile), Scratch.get()); + State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU); if (!InProcessExecutorOrErr) return InProcessExecutorOrErr.takeError(); @@ -590,7 +625,7 @@ BenchmarkRunner::createFunctionExecutor( case ExecutionModeE::SubProcess: { #ifdef __linux__ auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( - State, std::move(ObjectFile), Key); + State, std::move(ObjectFile), Key, BenchmarkProcessCPU); if (!SubProcessExecutorOrErr) return SubProcessExecutorOrErr.takeError(); @@ -605,8 +640,8 @@ BenchmarkRunner::createFunctionExecutor( } std::pair BenchmarkRunner::runConfiguration( - RunnableConfiguration &&RC, - const std::optional &DumpFile) const { + RunnableConfiguration &&RC, const std::optional &DumpFile, + std::optional BenchmarkProcessCPU) const { Benchmark &BenchmarkResult = RC.BenchmarkResult; object::OwningBinary &ObjectFile = RC.ObjectFile; @@ -627,7 +662,8 @@ std::pair BenchmarkRunner::runConfiguration( } Expected> Executor = - createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key); + createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key, + BenchmarkProcessCPU); if (!Executor) return {Executor.takeError(), std::move(BenchmarkResult)}; auto NewMeasurements = runMeasurements(**Executor); diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h index 9b4bb1d41149f..e688b814d1c83 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -68,7 +68,8 @@ class BenchmarkRunner { std::pair runConfiguration(RunnableConfiguration &&RC, - const std::optional &DumpFile) const; + const std::optional &DumpFile, + std::optional BenchmarkProcessCPU) const; // Scratch space to run instructions that touch memory. struct ScratchSpace { @@ -135,7 +136,8 @@ class BenchmarkRunner { Expected> createFunctionExecutor(object::OwningBinary Obj, - const BenchmarkKey &Key) const; + const BenchmarkKey &Key, + std::optional BenchmarkProcessCPU) const; }; } // namespace exegesis diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 1ae2565e894c6..3e0d75faaeb34 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -269,6 +269,11 @@ static cl::list ValidationCounters( "counter to validate benchmarking assumptions"), cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions()); +static cl::opt BenchmarkProcessCPU( + "benchmark-process-cpu", + cl::desc("The CPU number that the benchmarking process should executon on"), + cl::cat(BenchmarkOptions), cl::init(-1)); + static ExitOnError ExitOnErr("llvm-exegesis error: "); // Helper function that logs the error(s) and exits. @@ -418,8 +423,15 @@ static void runBenchmarkConfigurations( std::optional DumpFile; if (DumpObjectToDisk.getNumOccurrences()) DumpFile = DumpObjectToDisk; + std::optional BenchmarkCPU = std::nullopt; + if (BenchmarkProcessCPU != -1) { + if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess) + ExitWithError("--benchmark-process-cpu is only supported in the " + "subprocess execution mode"); + BenchmarkCPU = BenchmarkProcessCPU; + } auto [Err, BenchmarkResult] = - Runner.runConfiguration(std::move(RC), DumpFile); + Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU); if (Err) { // Errors from executing the snippets are fine. // All other errors are a framework issue and should fail. From dd37f7928c2aa84f2aa5d0ec577fab819c3dd2cb Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 18 Sep 2024 14:01:22 +0000 Subject: [PATCH 2/3] Address feedback --- .../X86/latency/cpu-pinning-execution-mode.s | 2 +- .../llvm-exegesis/lib/BenchmarkRunner.cpp | 56 ++++++++++--------- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 11 ++-- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s index 62a7b1d1e486e..b73ac26f2cfc7 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/cpu-pinning-execution-mode.s @@ -2,4 +2,4 @@ # RUN: not llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -opcode-name=ADD64rr -execution-mode=inprocess --benchmark-process-cpu=0 2>&1 | FileCheck %s -# CHECK: llvm-exegesis error: --benchmark-process-cpu is only supported in the subprocess execution mode +# CHECK: llvm-exegesis error: The inprocess execution mode does not support benchmark core pinning. diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 5a0ebff858d50..78850afaa9521 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -116,8 +116,7 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { ExecutableFunction Function, BenchmarkRunner::ScratchSpace *Scratch, std::optional BenchmarkCPU) - : State(State), Function(std::move(Function)), Scratch(Scratch), - BenchmarkProcessCPU(BenchmarkCPU) {} + : State(State), Function(std::move(Function)), Scratch(Scratch) {} static void accumulateCounterValues(const SmallVector &NewValues, SmallVector *Result) { @@ -180,7 +179,6 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { const LLVMState &State; const ExecutableFunction Function; BenchmarkRunner::ScratchSpace *const Scratch; - const std::optional BenchmarkProcessCPU; }; #ifdef __linux__ @@ -395,6 +393,31 @@ class SubProcessFunctionExecutorImpl return make_error(ChildSignalInfo.si_signo); } + void setCPUAffinityIfRequested() const { + if (BenchmarkProcessCPU.has_value()) { + // Set the CPU affinity for the child process, so that we ensure that if + // the user specified a CPU the process should run on, the benchmarking + // process is running on that CPU. + cpu_set_t CPUMask; + CPU_ZERO(&CPUMask); + CPU_SET(*BenchmarkProcessCPU, &CPUMask); + // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they + // are available. + int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); + if (SetAffinityReturn == -1) { + exit(ChildProcessExitCodeE::SetCPUAffinityFailed); + } + + // Check (if assertions are enabled) that we are actually running on the + // CPU that was specified by the user. + unsigned int CurrentCPU; + assert(getcpu(&CurrentCPU, nullptr) == 0 && + "Expected getcpu call to succeed."); + assert(static_cast(CurrentCPU) == *BenchmarkProcessCPU && + "Expected current CPU to equal the CPU requested by the user"); + } + } + Error createSubProcessAndRunBenchmark( StringRef CounterName, SmallVectorImpl &CounterValues, ArrayRef ValidationCounters, @@ -427,28 +450,7 @@ class SubProcessFunctionExecutorImpl } if (ParentOrChildPID == 0) { - if (BenchmarkProcessCPU) { - // Set the CPU affinity for the child process, so that we ensure that if - // the user specified a CPU the process should run on, the benchmarking - // process is running on that CPU. - cpu_set_t CPUMask; - CPU_ZERO(&CPUMask); - CPU_SET(*BenchmarkProcessCPU, &CPUMask); - // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they - // are available. - int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); - if (SetAffinityReturn == -1) { - exit(ChildProcessExitCodeE::SetCPUAffinityFailed); - } - - // Check (if assertions are enabled) that we are actually running on the - // CPU that was specified by the user. - unsigned int CurrentCPU; - assert(getcpu(&CurrentCPU, nullptr) == 0 && - "Expected getcpu call to succeed."); - assert(static_cast(CurrentCPU) == *BenchmarkProcessCPU && - "Expected current CPU to equal the CPU requested by the user"); - } + setCPUAffinityIfRequested(); // We are in the child process, close the write end of the pipe. close(PipeFiles[1]); @@ -653,6 +655,10 @@ BenchmarkRunner::createFunctionExecutor( const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) const { switch (ExecutionMode) { case ExecutionModeE::InProcess: { + if (BenchmarkProcessCPU.has_value()) + return make_error("The inprocess execution mode does not " + "support benchmark core pinning."); + auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU); if (!InProcessExecutorOrErr) diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 33ff586d3dea6..546ec770a8d22 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -423,13 +423,10 @@ static void runBenchmarkConfigurations( std::optional DumpFile; if (DumpObjectToDisk.getNumOccurrences()) DumpFile = DumpObjectToDisk; - std::optional BenchmarkCPU = std::nullopt; - if (BenchmarkProcessCPU != -1) { - if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess) - ExitWithError("--benchmark-process-cpu is only supported in the " - "subprocess execution mode"); - BenchmarkCPU = BenchmarkProcessCPU; - } + const std::optional BenchmarkCPU = + BenchmarkProcessCPU == -1 + ? std::nullopt + : std::optional(BenchmarkProcessCPU.getValue()); auto [Err, BenchmarkResult] = Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU); if (Err) { From b7a20123d1fcdc7aca1bf451704ce2b114a198e2 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 19 Sep 2024 13:29:03 +0000 Subject: [PATCH 3/3] Address feedback --- .../llvm-exegesis/lib/BenchmarkRunner.cpp | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 78850afaa9521..27b5b3084cbcc 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -107,15 +107,13 @@ class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { return EF.takeError(); return std::unique_ptr( - new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch, - BenchmarkProcessCPU)); + new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); } private: InProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, - BenchmarkRunner::ScratchSpace *Scratch, - std::optional BenchmarkCPU) + BenchmarkRunner::ScratchSpace *Scratch) : State(State), Function(std::move(Function)), Scratch(Scratch) {} static void accumulateCounterValues(const SmallVector &NewValues, @@ -393,29 +391,27 @@ class SubProcessFunctionExecutorImpl return make_error(ChildSignalInfo.si_signo); } - void setCPUAffinityIfRequested() const { - if (BenchmarkProcessCPU.has_value()) { - // Set the CPU affinity for the child process, so that we ensure that if - // the user specified a CPU the process should run on, the benchmarking - // process is running on that CPU. - cpu_set_t CPUMask; - CPU_ZERO(&CPUMask); - CPU_SET(*BenchmarkProcessCPU, &CPUMask); - // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they - // are available. - int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); - if (SetAffinityReturn == -1) { - exit(ChildProcessExitCodeE::SetCPUAffinityFailed); - } - - // Check (if assertions are enabled) that we are actually running on the - // CPU that was specified by the user. - unsigned int CurrentCPU; - assert(getcpu(&CurrentCPU, nullptr) == 0 && - "Expected getcpu call to succeed."); - assert(static_cast(CurrentCPU) == *BenchmarkProcessCPU && - "Expected current CPU to equal the CPU requested by the user"); + static void setCPUAffinityIfRequested(int CPUToUse) { + // Set the CPU affinity for the child process, so that we ensure that if + // the user specified a CPU the process should run on, the benchmarking + // process is running on that CPU. + cpu_set_t CPUMask; + CPU_ZERO(&CPUMask); + CPU_SET(CPUToUse, &CPUMask); + // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they + // are available. + int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); + if (SetAffinityReturn == -1) { + exit(ChildProcessExitCodeE::SetCPUAffinityFailed); } + + // Check (if assertions are enabled) that we are actually running on the + // CPU that was specified by the user. + unsigned int CurrentCPU; + assert(getcpu(&CurrentCPU, nullptr) == 0 && + "Expected getcpu call to succeed."); + assert(static_cast(CurrentCPU) == CPUToUse && + "Expected current CPU to equal the CPU requested by the user"); } Error createSubProcessAndRunBenchmark( @@ -450,7 +446,9 @@ class SubProcessFunctionExecutorImpl } if (ParentOrChildPID == 0) { - setCPUAffinityIfRequested(); + if (BenchmarkProcessCPU.has_value()) { + setCPUAffinityIfRequested(*BenchmarkProcessCPU); + } // We are in the child process, close the write end of the pipe. close(PipeFiles[1]);