Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llvm] Add a method to compute .text size of the compiled binary #575

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions compiler_gym/envs/compiler_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,12 +737,23 @@ def _retry(error) -> Optional[ObservationType]:
"""Abort and retry on error."""
logger.warning("%s during reset(): %s", type(error).__name__, error)
if self.service:
self.service.close()
try:
self.service.close()
except ServiceError as e:
# close() can raise ServiceError if the service exists with
# a non-zero return code. We swallow the error here as we
# are about to retry.
logger.debug(
"Ignoring service error during reset() attempt: %s (%s)",
e,
type(e).__name__,
)
self.service = None

if retry_count >= self._connection_settings.init_max_attempts:
raise OSError(
f"Failed to reset environment using benchmark {self.benchmark} after {retry_count - 1} attempts.\n"
"Failed to reset environment using benchmark "
f"{self.benchmark} after {retry_count - 1} attempts.\n"
f"Last error ({type(error).__name__}): {error}"
) from error
else:
Expand Down
3 changes: 0 additions & 3 deletions compiler_gym/envs/llvm/llvm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,6 @@ def get_system_library_flags(compiler: Optional[str] = None) -> List[str]:
:code:`c++` is invoked. This can be overridden by setting
:code:`os.environ["CXX"]` prior to calling this function.

The results of this function are cached, so changes to CXX will have no
effect on subsequent calls.

:return: A list of command line flags for a compiler.

:raises HostCompilerFailure: If the host compiler cannot be determined, or
Expand Down
37 changes: 37 additions & 0 deletions compiler_gym/envs/llvm/llvm_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,43 @@ def __init__(
deterministic=True,
platform_dependent=True,
),
CostFunctionReward(
name="TextSizeBytes",
cost_function="TextSizeBytes",
init_cost_function="TextSizeO0",
default_negates_returns=True,
deterministic=True,
platform_dependent=True,
),
NormalizedReward(
name="TextSizeNorm",
cost_function="TextSizeBytes",
init_cost_function="TextSizeO0",
max=1,
default_negates_returns=True,
deterministic=True,
platform_dependent=True,
),
BaselineImprovementNormalizedReward(
name="TextSizeO3",
cost_function="TextSizeBytes",
init_cost_function="TextSizeO0",
baseline_cost_function="TextSizeO3",
success_threshold=1,
default_negates_returns=True,
deterministic=True,
platform_dependent=True,
),
BaselineImprovementNormalizedReward(
name="TextSizeOz",
cost_function="TextSizeBytes",
init_cost_function="TextSizeO0",
baseline_cost_function="TextSizeOz",
success_threshold=1,
default_negates_returns=True,
deterministic=True,
platform_dependent=True,
),
],
derived_observation_spaces=[
{
Expand Down
15 changes: 15 additions & 0 deletions compiler_gym/envs/llvm/service/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ cc_library(
srcs = ["Benchmark.cc"],
hdrs = ["Benchmark.h"],
deps = [
":BenchmarkDynamicConfig",
":Cost",
"//compiler_gym/service/proto:compiler_gym_service_cc",
"//compiler_gym/util:GrpcStatusMacros",
Expand All @@ -104,12 +105,25 @@ cc_library(
],
)

cc_library(
name = "BenchmarkDynamicConfig",
srcs = ["BenchmarkDynamicConfig.cc"],
hdrs = ["BenchmarkDynamicConfig.h"],
deps = [
"//compiler_gym/service/proto:compiler_gym_service_cc",
"//compiler_gym/util:RunfilesPath",
"//compiler_gym/util:Subprocess",
"@boost//:filesystem",
],
)

cc_library(
name = "BenchmarkFactory",
srcs = ["BenchmarkFactory.cc"],
hdrs = ["BenchmarkFactory.h"],
deps = [
":Benchmark",
":BenchmarkDynamicConfig",
":Cost",
"//compiler_gym/service/proto:compiler_gym_service_cc",
"//compiler_gym/util:GrpcStatusMacros",
Expand Down Expand Up @@ -177,6 +191,7 @@ cc_library(
srcs = ["Cost.cc"],
hdrs = ["Cost.h"],
deps = [
":BenchmarkDynamicConfig",
"//compiler_gym/util:GrpcStatusMacros",
"//compiler_gym/util:RunfilesPath",
"//compiler_gym/util:Subprocess",
Expand Down
111 changes: 36 additions & 75 deletions compiler_gym/envs/llvm/service/Benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,31 +54,9 @@ std::unique_ptr<llvm::Module> makeModuleOrDie(llvm::LLVMContext& context, const
return module;
}

RealizedBenchmarkDynamicConfig realizeDynamicConfig(const BenchmarkDynamicConfig& original,
const fs::path& scratchDirectory) {
BenchmarkDynamicConfig cfg;
cfg.CopyFrom(original);

// Set up the environment variables.
(*cfg.mutable_build_cmd()->mutable_env())["CC"] =
util::getSiteDataPath("llvm-v0/bin/clang").string();
(*cfg.mutable_build_cmd()->mutable_env())["IN"] = (scratchDirectory / "out.bc").string();

// Register the IR as a pre-requisite build file.
cfg.mutable_build_cmd()->add_infile((scratchDirectory / "out.bc").string());

return RealizedBenchmarkDynamicConfig(cfg);
}
} // anonymous namespace

/**
* Create a temporary directory to use as a scratch pad for on-disk storage.
* This directory is guaranteed to exist.
*
* Errors in this function are fatal.
*
* @return fs::path A path.
*/
fs::path createScratchDirectoryOrDie() {
fs::path createBenchmarkScratchDirectoryOrDie() {
const fs::path cacheRoot = util::getCacheRootPath();
const fs::path dir = fs::unique_path(cacheRoot / "benchmark-scratch-%%%%-%%%%");

Expand All @@ -88,8 +66,6 @@ fs::path createScratchDirectoryOrDie() {
return dir;
}

} // anonymous namespace

Status readBitcodeFile(const fs::path& path, Bitcode* bitcode) {
std::ifstream ifs(path.string());
if (ifs.fail()) {
Expand Down Expand Up @@ -121,42 +97,48 @@ std::unique_ptr<llvm::Module> makeModule(llvm::LLVMContext& context, const Bitco
const std::string& name, Status* status) {
llvm::MemoryBufferRef buffer(llvm::StringRef(bitcode.data(), bitcode.size()), name);
VLOG(3) << "llvm::parseBitcodeFile(" << bitcode.size() << " bits)";

llvm::Expected<std::unique_ptr<llvm::Module>> moduleOrError =
llvm::parseBitcodeFile(buffer, context);
if (moduleOrError) {
*status = Status::OK;
std::unique_ptr<llvm::Module> module = std::move(moduleOrError.get());

// Strip the module identifiers and source file names from the module to
// anonymize them. This is to deter learning algorithms from overfitting to
// benchmarks by their name.
module->setModuleIdentifier("-");
module->setSourceFileName("-");

// Strip module debug info.
llvm::StripDebugInfo(*module);

// Erase module-level named metadata.
while (!module->named_metadata_empty()) {
llvm::NamedMDNode* nmd = &*module->named_metadata_begin();
module->eraseNamedMetadata(nmd);
}

return module;
} else {
if (auto error = moduleOrError.takeError()) {
*status = Status(StatusCode::INVALID_ARGUMENT,
fmt::format("Failed to parse LLVM bitcode: \"{}\"", name));
return nullptr;
}

*status = Status::OK;
std::unique_ptr<llvm::Module> module = std::move(moduleOrError.get());

if (!module) {
*status = Status(StatusCode::INTERNAL, "llvm::parseBitcodeFile return null");
return nullptr;
}

// Strip the module identifiers and source file names from the module to
// anonymize them. This is to deter learning algorithms from overfitting to
// benchmarks by their name.
module->setModuleIdentifier("-");
module->setSourceFileName("-");

// Strip module debug info.
llvm::StripDebugInfo(*module);

// Erase module-level named metadata.
while (!module->named_metadata_empty()) {
llvm::NamedMDNode* nmd = &*module->named_metadata_begin();
module->eraseNamedMetadata(nmd);
}

return module;
}

// A benchmark is an LLVM module and the LLVM context that owns it.
Benchmark::Benchmark(const std::string& name, const Bitcode& bitcode,
const BenchmarkDynamicConfig& dynamicConfig, const fs::path& workingDirectory,
const BaselineCosts& baselineCosts)
const compiler_gym::BenchmarkDynamicConfig& dynamicConfig,
const fs::path& workingDirectory, const BaselineCosts& baselineCosts)
: context_(std::make_unique<llvm::LLVMContext>()),
module_(makeModuleOrDie(*context_, bitcode, name)),
scratchDirectory_(createScratchDirectoryOrDie()),
scratchDirectory_(createBenchmarkScratchDirectoryOrDie()),
dynamicConfigProto_(dynamicConfig),
dynamicConfig_(realizeDynamicConfig(dynamicConfig, scratchDirectory_)),
baselineCosts_(baselineCosts),
Expand All @@ -168,11 +150,11 @@ Benchmark::Benchmark(const std::string& name, const Bitcode& bitcode,

Benchmark::Benchmark(const std::string& name, std::unique_ptr<llvm::LLVMContext> context,
std::unique_ptr<llvm::Module> module,
const BenchmarkDynamicConfig& dynamicConfig, const fs::path& workingDirectory,
const BaselineCosts& baselineCosts)
const compiler_gym::BenchmarkDynamicConfig& dynamicConfig,
const fs::path& workingDirectory, const BaselineCosts& baselineCosts)
: context_(std::move(context)),
module_(std::move(module)),
scratchDirectory_(createScratchDirectoryOrDie()),
scratchDirectory_(createBenchmarkScratchDirectoryOrDie()),
dynamicConfigProto_(dynamicConfig),
dynamicConfig_(realizeDynamicConfig(dynamicConfig, scratchDirectory_)),
baselineCosts_(baselineCosts),
Expand Down Expand Up @@ -222,7 +204,7 @@ Status Benchmark::writeBitcodeToFile(const fs::path& path) {
}

Status Benchmark::computeRuntime(Event& observation) {
const RealizedBenchmarkDynamicConfig& cfg = dynamicConfig();
const BenchmarkDynamicConfig& cfg = dynamicConfig();

if (!cfg.isRunnable()) {
return Status::OK;
Expand Down Expand Up @@ -333,25 +315,4 @@ bool Benchmark::applyBaselineOptimizations(unsigned optLevel, unsigned sizeLevel
return applyBaselineOptimizationsToModule(&module(), optLevel, sizeLevel);
}

namespace {

std::vector<util::LocalShellCommand> commandsFromProto(
const google::protobuf::RepeatedPtrField<Command>& cmds) {
std::vector<util::LocalShellCommand> outs;
for (const auto& cmd : cmds) {
outs.push_back(util::LocalShellCommand(cmd));
}
return outs;
}

} // anonymous namespace

RealizedBenchmarkDynamicConfig::RealizedBenchmarkDynamicConfig(const BenchmarkDynamicConfig& cfg)
: buildCommand_(cfg.build_cmd()),
runCommand_(cfg.run_cmd()),
preRunCommands_(commandsFromProto(cfg.pre_run_cmd())),
postRunCommands_(commandsFromProto(cfg.post_run_cmd())),
isBuildable_(!buildCommand_.empty()),
isRunnable_(!(buildCommand_.empty() || runCommand_.empty())) {}

} // namespace compiler_gym::llvm_service
49 changes: 17 additions & 32 deletions compiler_gym/envs/llvm/service/Benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <vector>

#include "boost/filesystem.hpp"
#include "compiler_gym/envs/llvm/service/BenchmarkDynamicConfig.h"
#include "compiler_gym/envs/llvm/service/Cost.h"
#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
#include "compiler_gym/util/Subprocess.h"
Expand Down Expand Up @@ -82,33 +83,6 @@ grpc::Status writeBitcodeFile(const llvm::Module& module, const boost::filesyste
std::unique_ptr<llvm::Module> makeModule(llvm::LLVMContext& context, const Bitcode& bitcode,
const std::string& name, grpc::Status* status);

/**
* Represents a BenchmarkDynamicConfig protocol buffer.
*/
class RealizedBenchmarkDynamicConfig {
public:
explicit RealizedBenchmarkDynamicConfig(const BenchmarkDynamicConfig& cfg);

inline const util::LocalShellCommand& buildCommand() const { return buildCommand_; };
inline const util::LocalShellCommand& runCommand() const { return runCommand_; };
inline const std::vector<util::LocalShellCommand>& preRunCommands() const {
return preRunCommands_;
};
inline const std::vector<util::LocalShellCommand>& postRunCommands() const {
return postRunCommands_;
};
inline bool isBuildable() const { return isBuildable_; }
inline bool isRunnable() const { return isRunnable_; }

private:
const util::LocalShellCommand buildCommand_;
const util::LocalShellCommand runCommand_;
const std::vector<util::LocalShellCommand> preRunCommands_;
const std::vector<util::LocalShellCommand> postRunCommands_;
const bool isBuildable_;
const bool isRunnable_;
};

/**
* An LLVM module and the LLVM context that owns it.
*
Expand All @@ -120,14 +94,15 @@ class Benchmark {
* Construct a benchmark from a bitcode.
*/
Benchmark(const std::string& name, const Bitcode& bitcode,
const BenchmarkDynamicConfig& dynamicConfig,
const compiler_gym::BenchmarkDynamicConfig& dynamicConfig,
const boost::filesystem::path& workingDirectory, const BaselineCosts& baselineCosts);

/**
* Construct a benchmark from an LLVM module.
*/
Benchmark(const std::string& name, std::unique_ptr<llvm::LLVMContext> context,
std::unique_ptr<llvm::Module> module, const BenchmarkDynamicConfig& dynamicConfig,
std::unique_ptr<llvm::Module> module,
const compiler_gym::BenchmarkDynamicConfig& dynamicConfig,
const boost::filesystem::path& workingDirectory, const BaselineCosts& baselineCosts);

void close();
Expand Down Expand Up @@ -232,7 +207,7 @@ class Benchmark {
/**
* A reference to the dynamic configuration object.
*/
inline const RealizedBenchmarkDynamicConfig& dynamicConfig() const { return dynamicConfig_; }
inline const BenchmarkDynamicConfig& dynamicConfig() const { return dynamicConfig_; }

inline bool isBuildable() const { return dynamicConfig().isBuildable(); }

Expand Down Expand Up @@ -285,8 +260,8 @@ class Benchmark {
std::unique_ptr<llvm::LLVMContext> context_;
std::unique_ptr<llvm::Module> module_;
const boost::filesystem::path scratchDirectory_;
const BenchmarkDynamicConfig dynamicConfigProto_;
const RealizedBenchmarkDynamicConfig dynamicConfig_;
const compiler_gym::BenchmarkDynamicConfig dynamicConfigProto_;
const BenchmarkDynamicConfig dynamicConfig_;
const BaselineCosts baselineCosts_;
/** The directory used for storing build / runtime artifacts. The difference
* between the scratch directory and the working directory is that the working
Expand All @@ -301,4 +276,14 @@ class Benchmark {
int buildtimesPerObservationCount_;
};

/**
* Create a temporary directory to use as a scratch pad for on-disk storage.
* This directory is guaranteed to exist.
*
* Errors in this function are fatal.
*
* @return fs::path A path.
*/
boost::filesystem::path createBenchmarkScratchDirectoryOrDie();

} // namespace compiler_gym::llvm_service
Loading