Skip to content

Commit

Permalink
[datasets] Update AnghaBench to v1.
Browse files Browse the repository at this point in the history
anghabench-v1 uses an amended manifest, but is otherwise the same.
  • Loading branch information
ChrisCummins committed Apr 30, 2021
1 parent 8803ad8 commit 98e7b62
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 18 deletions.
18 changes: 18 additions & 0 deletions compiler_gym/envs/llvm/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,24 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
site_data_base = site_data_base or site_data_path("llvm-v0")

yield AnghaBenchDataset(site_data_base=site_data_base, sort_order=0)
# Add legacy version of Anghabench using an old manifest.
anghabench_v0_manifest_url, anghabench_v0_manifest_sha256 = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
"39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
"a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
),
}[sys.platform]
yield AnghaBenchDataset(
name="benchmark://anghabench-v0",
site_data_base=site_data_base,
sort_order=0,
manifest_url=anghabench_v0_manifest_url,
manifest_sha256=anghabench_v0_manifest_sha256,
)
yield BlasDataset(site_data_base=site_data_base, sort_order=0)
yield CLgenDataset(site_data_base=site_data_base, sort_order=0)
yield CBenchDataset(site_data_base=site_data_base, sort_order=-1)
Expand Down
25 changes: 16 additions & 9 deletions compiler_gym/envs/llvm/datasets/anghabench.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,35 @@ class AnghaBenchDataset(TarDatasetWithManifest):
overhead of compiling it from C to bitcode. This is a one-off cost.
"""

def __init__(self, site_data_base: Path, sort_order: int = 0):
manifest_url, manifest_sha256 = {
def __init__(
self,
site_data_base: Path,
sort_order: int = 0,
manifest_url: str = None,
manifest_sha256: str = None,
name: str = None,
):
manifest_url_, manifest_sha256_ = {
"darwin": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
"39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v1-macos-manifest.bz2",
"96ead63da5f8efa07fd0370f0c6e452b59bed840828b8b19402102b1ce3ee109",
),
"linux": (
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
"a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
"https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v1-linux-manifest.bz2",
"14df85f650199498cf769715e9f0d7841d09f9fa62a95b8ecc242bdaf227f33a",
),
}[sys.platform]
super().__init__(
name="benchmark://anghabench-v0",
name=name or "benchmark://anghabench-v1",
description="Compile-only C/C++ functions extracted from GitHub",
references={
"Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf",
"Homepage": "http://cuda.dcc.ufmg.br/angha/",
},
license="Unknown. See: https://github.com/brenocfg/AnghaBench/issues/1",
site_data_base=site_data_base,
manifest_urls=[manifest_url],
manifest_sha256=manifest_sha256,
manifest_urls=[manifest_url or manifest_url_],
manifest_sha256=manifest_sha256 or manifest_sha256_,
tar_urls=[
"https://github.com/brenocfg/AnghaBench/archive/d8034ac8562b8c978376008f4b33df01b8887b19.tar.gz"
],
Expand Down
2 changes: 1 addition & 1 deletion docs/source/llvm/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ We provide several datasets of open-source LLVM-IR benchmarks for use:
+----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
| Dataset | Num. Benchmarks [#f1]_ | Description | Validatable [#f2]_ |
+============================+==========================+====================================================================================================================================================================================================================+======================+
| benchmark://anghabench-v0 | 1,042,976 | Compile-only C/C++ functions extracted from GitHub [`Homepage <http://cuda.dcc.ufmg.br/angha/>`__, `Paper <https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf>`__] | No |
| benchmark://anghabench-v1 | 1,041,333 | Compile-only C/C++ functions extracted from GitHub [`Homepage <http://cuda.dcc.ufmg.br/angha/>`__, `Paper <https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf>`__] | No |
+----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
| benchmark://blas-v0 | 300 | Basic linear algebra kernels [`Homepage <http://www.netlib.org/blas/>`__, `Paper <https://strum355.netsoc.co/books/PDF/Basic%20Linear%20Algebra%20Subprograms%20for%20Fortran%20Usage%20-%20BLAS%20(1979).pdf>`__] | No |
+----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
Expand Down
14 changes: 7 additions & 7 deletions tests/llvm/datasets/anghabench_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,33 @@
def anghabench_dataset() -> AnghaBenchDataset:
env = gym.make("llvm-v0")
try:
ds = env.datasets["anghabench-v0"]
ds = env.datasets["anghabench-v1"]
finally:
env.close()
yield ds


def test_anghabench_size(anghabench_dataset: AnghaBenchDataset):
if sys.platform == "darwin":
assert anghabench_dataset.size == 1042908
assert anghabench_dataset.size == 1041265
else:
assert anghabench_dataset.size == 1042976
assert anghabench_dataset.size == 1041333


def test_missing_benchmark_name(anghabench_dataset: AnghaBenchDataset, mocker):
# Mock install() so that on CI it doesn't download and unpack the tarfile.
mocker.patch.object(anghabench_dataset, "install")

with pytest.raises(
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v0$"
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v1$"
):
anghabench_dataset.benchmark("benchmark://anghabench-v0")
anghabench_dataset.benchmark("benchmark://anghabench-v1")
anghabench_dataset.install.assert_called_once()

with pytest.raises(
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v0/$"
LookupError, match=r"^No benchmark specified: benchmark://anghabench-v1/$"
):
anghabench_dataset.benchmark("benchmark://anghabench-v0/")
anghabench_dataset.benchmark("benchmark://anghabench-v1/")
assert anghabench_dataset.install.call_count == 2


Expand Down
2 changes: 1 addition & 1 deletion tests/llvm/datasets/llvm_datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_default_dataset_list():
try:
assert list(d.name for d in env.datasets) == [
"benchmark://cbench-v1",
"benchmark://anghabench-v0",
"benchmark://anghabench-v1",
"benchmark://blas-v0",
"benchmark://clgen-v0",
"benchmark://github-v0",
Expand Down

0 comments on commit 98e7b62

Please sign in to comment.