Skip to content

Commit

Permalink
Merge pull request #222 from ChrisCummins/new-dataset-api
Browse files Browse the repository at this point in the history
[datasets] Switch CompilerEnv to the new dataset API.
  • Loading branch information
ChrisCummins authored Apr 29, 2021
2 parents 69bd98b + 0260132 commit 6f7b6ff
Show file tree
Hide file tree
Showing 198 changed files with 2,275 additions and 3,024 deletions.
4 changes: 2 additions & 2 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ exports_files([
py_library(
name = "CompilerGym",
data = [
"//compiler_gym/third_party/cBench:benchmarks_list",
"//compiler_gym/third_party/cBench:crc32",
"//compiler_gym/third_party/cbench:benchmarks_list",
"//compiler_gym/third_party/cbench:crc32",
],
deps = [
"//compiler_gym",
Expand Down
45 changes: 10 additions & 35 deletions benchmarks/bench_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,6 @@ def env(request) -> CompilerEnv:
yield request.param


# Redefine this fixture since running all of the benchmarks in cBench would
# take too long, but we do want to use at least one small and one large
# benchmark to see both per-invocation overhead and overhead that is a result
# of the size of the fixture.
#
# adpcm is small and jpeg-d is large. ghostscript is the largest but that
# one takes too long.
@pytest.fixture(
params=["cBench-v1/crc32", "cBench-v1/jpeg-d"],
ids=["fast_benchmark", "slow_benchmark"],
)
def benchmark_name(request) -> str:
yield request.param


# @pytest.fixture(params=["cBench-v1/crc32"], ids=["fast_benchmark"])
# def fast_benchmark_name(request) -> str:
# yield request.param


# @pytest.fixture(params=["-globaldce", "-gvn"], ids=["fast_action", "slow_action"])
# def action_name(request) -> str:
# yield request.param


@pytest.mark.parametrize(
"env_id",
["llvm-v0", "example-cc-v0", "example-py-v0"],
Expand Down Expand Up @@ -96,8 +71,8 @@ def test_make_service(benchmark, args):
@pytest.mark.parametrize(
"make_env",
[
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/crc32"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/jpeg-d"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/crc32"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/jpeg-d"),
lambda: gym.make("example-cc-v0"),
lambda: gym.make("example-py-v0"),
],
Expand All @@ -112,16 +87,16 @@ def test_reset(benchmark, make_env: CompilerEnv):
"args",
[
(
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/crc32"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/crc32"),
"-globaldce",
),
(lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/crc32"), "-gvn"),
(lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/crc32"), "-gvn"),
(
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/jpeg-d"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/jpeg-d"),
"-globaldce",
),
(
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/jpeg-d"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/jpeg-d"),
"-gvn",
),
(lambda: gym.make("example-cc-v0"), "a"),
Expand All @@ -146,7 +121,7 @@ def test_step(benchmark, args):

_args = dict(
{
f"llvm;{obs}": (lambda: gym.make("llvm-v0", benchmark="cBench-v1/qsort"), obs)
f"llvm;{obs}": (lambda: gym.make("llvm-v0", benchmark="cbench-v1/qsort"), obs)
for obs in OBSERVATION_SPACE_NAMES
},
**{
Expand All @@ -167,7 +142,7 @@ def test_observation(benchmark, args):
_args = dict(
{
f"llvm;{reward}": (
lambda: gym.make("llvm-v0", benchmark="cBench-v1/qsort"),
lambda: gym.make("llvm-v0", benchmark="cbench-v1/qsort"),
reward,
)
for reward in REWARD_SPACE_NAMES
Expand All @@ -190,8 +165,8 @@ def test_reward(benchmark, args):
@pytest.mark.parametrize(
"make_env",
[
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/crc32"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cBench-v1/jpeg-d"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/crc32"),
lambda: gym.make("llvm-autophase-ic-v0", benchmark="cbench-v1/jpeg-d"),
# TODO: Example service does not yet support fork() operator.
# lambda: gym.make("example-cc-v0"),
# lambda: gym.make("example-py-v0"),
Expand Down
2 changes: 2 additions & 0 deletions compiler_gym/bin/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ py_binary(
srcs = ["datasets.py"],
visibility = ["//visibility:public"],
deps = [
":service",
"//compiler_gym/datasets",
"//compiler_gym/envs",
"//compiler_gym/util",
Expand Down Expand Up @@ -81,6 +82,7 @@ py_binary(
srcs = ["service.py"],
visibility = ["//visibility:public"],
deps = [
"//compiler_gym/datasets",
"//compiler_gym/envs",
"//compiler_gym/spaces",
"//compiler_gym/util",
Expand Down
100 changes: 16 additions & 84 deletions compiler_gym/bin/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,14 @@
A :code:`--delete_all` flag can be used to delete all of the locally installed
datasets.
"""
import os
import sys
from pathlib import Path
from typing import Tuple

import humanize
from absl import app, flags
from deprecated.sphinx import deprecated

from compiler_gym.datasets.dataset import (
LegacyDataset,
activate,
deactivate,
delete,
require,
)
from compiler_gym.bin.service import summarize_datasets
from compiler_gym.datasets.dataset import activate, deactivate, delete
from compiler_gym.util.flags.env_from_flags import env_from_flags
from compiler_gym.util.tabulate import tabulate

flags.DEFINE_list(
"download",
Expand Down Expand Up @@ -143,69 +134,43 @@
FLAGS = flags.FLAGS


def get_count_and_size_of_directory_contents(root: Path) -> Tuple[int, int]:
"""Return the number of files and combined size of a directory."""
count, size = 0, 0
for root, _, files in os.walk(str(root)):
count += len(files)
size += sum(os.path.getsize(f"{root}/{file}") for file in files)
return count, size


def enumerate_directory(name: str, path: Path):
rows = []
for path in path.iterdir():
if not path.is_file() or not path.name.endswith(".json"):
continue
dataset = LegacyDataset.from_json_file(path)
rows.append(
(dataset.name, dataset.license, dataset.file_count, dataset.size_bytes)
)
rows.append(("Total", "", sum(r[2] for r in rows), sum(r[3] for r in rows)))
return tabulate(
[(n, l, humanize.intcomma(f), humanize.naturalsize(s)) for n, l, f, s in rows],
headers=(name, "License", "#. Benchmarks", "Size on disk"),
)


@deprecated(
version="0.1.8",
reason=(
"Command-line management of datasets is deprecated. Please use "
":mod:`compiler_gym.bin.service` to print a tabular overview of the "
"available datasets. For management of datasets, use the "
":class:`env.datasets <compiler_gym.env>` property."
),
)
def main(argv):
"""Main entry point."""
if len(argv) != 1:
raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

env = env_from_flags()
try:
if not env.datasets_site_path:
raise app.UsageError("Environment has no benchmarks site path")

env.datasets_site_path.mkdir(parents=True, exist_ok=True)
env.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True)

invalidated_manifest = False

for name_or_url in FLAGS.download:
require(env, name_or_url)
env.datasets.install(name_or_url)

if FLAGS.download_all:
for dataset in env.available_datasets:
require(env, dataset)
for dataset in env.datasets:
dataset.install()

for name in FLAGS.activate:
activate(env, name)
invalidated_manifest = True

if FLAGS.activate_all:
for path in env.inactive_datasets_site_path.iterdir():
activate(env, path.name)
invalidated_manifest = True

for name in FLAGS.deactivate:
deactivate(env, name)
invalidated_manifest = True

if FLAGS.deactivate_all:
for path in env.datasets_site_path.iterdir():
deactivate(env, path.name)
invalidated_manifest = True

for name in FLAGS.delete:
Expand All @@ -214,41 +179,8 @@ def main(argv):
if invalidated_manifest:
env.make_manifest_file()

print(f"{env.spec.id} benchmarks site dir: {env.datasets_site_path}")
print()
print(
enumerate_directory("Active Datasets", env.datasets_site_path),
)
print(
"These benchmarks are ready for use. Deactivate them using `--deactivate=<name>`."
)
print()
print(enumerate_directory("Inactive Datasets", env.inactive_datasets_site_path))
print("These benchmarks may be activated using `--activate=<name>`.")
print()
print(
tabulate(
sorted(
[
(
d.name,
d.license,
humanize.intcomma(d.file_count),
humanize.naturalsize(d.size_bytes),
)
for d in env.available_datasets.values()
]
),
headers=(
"Downloadable Dataset",
"License",
"#. Benchmarks",
"Size on disk",
),
)
)
print(
"These benchmarks may be installed using `--download=<name> --activate=<name>`."
summarize_datasets(env.datasets),
)
finally:
env.close()
Expand Down
Loading

0 comments on commit 6f7b6ff

Please sign in to comment.