diff --git a/.gitignore b/.gitignore index b79ce264c8..5a70b5f080 100644 --- a/.gitignore +++ b/.gitignore @@ -91,3 +91,6 @@ tests/.hypothesis zarr/version.py zarr.egg-info/ + +# air speed velocity benchmarking +/.asv/ \ No newline at end of file diff --git a/asv.conf.jsonc b/asv.conf.jsonc new file mode 100644 index 0000000000..2361b1114a --- /dev/null +++ b/asv.conf.jsonc @@ -0,0 +1,90 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "zarr", + + // The project's homepage + "project_url": "https://zarr.readthedocs.io/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": ".", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": ["HEAD"], + + "build_command": [ + "python -m build --wheel -o {build_cache_dir} {build_dir}" + ], + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/zarr-developers/zarr-python/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["3.9"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "build": [], + "packaging": [] + }, + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } +} \ No newline at end of file diff --git a/bench/compress_normal.py b/bench/compress_normal.py deleted file mode 100644 index 64204cd1e5..0000000000 --- a/bench/compress_normal.py +++ /dev/null @@ -1,46 +0,0 @@ -import sys -import timeit - -import blosc -import line_profiler -import numpy as np - -import zarr - -if __name__ == "__main__": - sys.path.insert(0, "..") - - # setup - a = np.random.normal(2000, 1000, size=200000000).astype("u2") - z = zarr.empty_like( - a, - chunks=1000000, - compression="blosc", - compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 2}, - ) - print(z) - - print("*" * 79) - - # time - t = timeit.repeat("z[:] = a", repeat=10, number=1, globals=globals()) - print(t) - print(min(t)) - print(z) - - # profile - profile = line_profiler.LineProfiler(blosc.compress) - profile.run("z[:] = a") - profile.print_stats() - - print("*" * 79) - - # time - t = timeit.repeat("z[:]", repeat=10, number=1, globals=globals()) - print(t) - print(min(t)) - - # profile - profile = line_profiler.LineProfiler(blosc.decompress) - profile.run("z[:]") - profile.print_stats() diff --git a/bench/compress_normal.txt b/bench/compress_normal.txt deleted file mode 100644 index e5d6be6aeb..0000000000 --- a/bench/compress_normal.txt +++ /dev/null @@ -1,160 +0,0 @@ -zarr.core.Array((200000000,), uint16, chunks=(1000000,), order=C) - compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4', 'shuffle': 2} - nbytes: 381.5M; nbytes_stored: 294; ratio: 1360544.2; initialized: 0/200 - store: builtins.dict -******************************************************************************* -[0.27119584499996563, 0.2855067059999783, 0.2887747180002407, 0.3058794240005227, 0.3139041080003153, 0.3021271820007314, 0.31543190899992624, 0.31403100900024583, 0.3272544129995367, 0.31834129100025166] -0.27119584499996563 -zarr.core.Array((200000000,), uint16, chunks=(1000000,), order=C) - compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4', 'shuffle': 2} - nbytes: 381.5M; nbytes_stored: 314.1M; ratio: 1.2; initialized: 200/200 - store: builtins.dict -Timer unit: 1e-06 s - -Total time: 0.297223 s -File: /home/aliman/code/github/alimanfoo/zarr/zarr/blosc.pyx -Function: compress at line 137 - -Line # Hits Time Per Hit % Time Line Contents -============================================================== - 137 def compress(source, char* cname, int clevel, int shuffle): - 138 """Compress data in a numpy array. - 139 - 140 Parameters - 141 ---------- - 142 source : array-like - 143 Data to be compressed. - 144 cname : bytes - 145 Name of compression library to use. - 146 clevel : int - 147 Compression level. - 148 shuffle : int - 149 Shuffle filter. - 150 - 151 Returns - 152 ------- - 153 dest : bytes-like - 154 Compressed data. - 155 - 156 """ - 157 - 158 cdef: - 159 char *source_ptr - 160 char *dest_ptr - 161 Py_buffer source_buffer - 162 size_t nbytes, cbytes, itemsize - 163 200 506 2.5 0.2 array.array char_array_template = array.array('b', []) - 164 array.array dest - 165 - 166 # setup source buffer - 167 200 458 2.3 0.2 PyObject_GetBuffer(source, &source_buffer, PyBUF_ANY_CONTIGUOUS) - 168 200 119 0.6 0.0 source_ptr = source_buffer.buf - 169 - 170 # setup destination - 171 200 239 1.2 0.1 nbytes = source_buffer.len - 172 200 103 0.5 0.0 itemsize = source_buffer.itemsize - 173 200 2286 11.4 0.8 dest = array.clone(char_array_template, nbytes + BLOSC_MAX_OVERHEAD, - 174 zero=False) - 175 200 129 0.6 0.0 dest_ptr = dest.data.as_voidptr - 176 - 177 # perform compression - 178 200 1734 8.7 0.6 if _get_use_threads(): - 179 # allow blosc to use threads internally - 180 200 167 0.8 0.1 compressor_set = blosc_set_compressor(cname) - 181 200 94 0.5 0.0 if compressor_set < 0: - 182 raise ValueError('compressor not supported: %r' % cname) - 183 200 288570 1442.8 97.1 with nogil: - 184 cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, - 185 source_ptr, dest_ptr, - 186 nbytes + BLOSC_MAX_OVERHEAD) - 187 - 188 else: - 189 with nogil: - 190 cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, - 191 source_ptr, dest_ptr, - 192 nbytes + BLOSC_MAX_OVERHEAD, cname, - 193 0, 1) - 194 - 195 # release source buffer - 196 200 616 3.1 0.2 PyBuffer_Release(&source_buffer) - 197 - 198 # check compression was successful - 199 200 120 0.6 0.0 if cbytes <= 0: - 200 raise RuntimeError('error during blosc compression: %d' % cbytes) - 201 - 202 # resize after compression - 203 200 1896 9.5 0.6 array.resize(dest, cbytes) - 204 - 205 200 186 0.9 0.1 return dest - -******************************************************************************* -[0.24293352799941204, 0.2324290420001489, 0.24935673900017719, 0.25716222699975333, 0.24246313799994823, 0.23272456500035332, 0.2636815870000646, 0.2576046349995522, 0.2781278639995435, 0.23824110699933954] -0.2324290420001489 -Timer unit: 1e-06 s - -Total time: 0.240178 s -File: /home/aliman/code/github/alimanfoo/zarr/zarr/blosc.pyx -Function: decompress at line 75 - -Line # Hits Time Per Hit % Time Line Contents -============================================================== - 75 def decompress(source, dest): - 76 """Decompress data. - 77 - 78 Parameters - 79 ---------- - 80 source : bytes-like - 81 Compressed data, including blosc header. - 82 dest : array-like - 83 Object to decompress into. - 84 - 85 Notes - 86 ----- - 87 Assumes that the size of the destination buffer is correct for the size of - 88 the uncompressed data. - 89 - 90 """ - 91 cdef: - 92 int ret - 93 char *source_ptr - 94 char *dest_ptr - 95 Py_buffer source_buffer - 96 array.array source_array - 97 Py_buffer dest_buffer - 98 size_t nbytes - 99 - 100 # setup source buffer - 101 200 573 2.9 0.2 if PY2 and isinstance(source, array.array): - 102 # workaround fact that array.array does not support new-style buffer - 103 # interface in PY2 - 104 release_source_buffer = False - 105 source_array = source - 106 source_ptr = source_array.data.as_voidptr - 107 else: - 108 200 112 0.6 0.0 release_source_buffer = True - 109 200 144 0.7 0.1 PyObject_GetBuffer(source, &source_buffer, PyBUF_ANY_CONTIGUOUS) - 110 200 98 0.5 0.0 source_ptr = source_buffer.buf - 111 - 112 # setup destination buffer - 113 200 552 2.8 0.2 PyObject_GetBuffer(dest, &dest_buffer, - 114 PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) - 115 200 100 0.5 0.0 dest_ptr = dest_buffer.buf - 116 200 84 0.4 0.0 nbytes = dest_buffer.len - 117 - 118 # perform decompression - 119 200 1856 9.3 0.8 if _get_use_threads(): - 120 # allow blosc to use threads internally - 121 200 235286 1176.4 98.0 with nogil: - 122 ret = blosc_decompress(source_ptr, dest_ptr, nbytes) - 123 else: - 124 with nogil: - 125 ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) - 126 - 127 # release buffers - 128 200 754 3.8 0.3 if release_source_buffer: - 129 200 326 1.6 0.1 PyBuffer_Release(&source_buffer) - 130 200 165 0.8 0.1 PyBuffer_Release(&dest_buffer) - 131 - 132 # handle errors - 133 200 128 0.6 0.1 if ret <= 0: - 134 raise RuntimeError('error during blosc decompression: %d' % ret) diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py new file mode 100644 index 0000000000..c2d1530f65 --- /dev/null +++ b/benchmarks/benchmarks.py @@ -0,0 +1,28 @@ +import zarr + + +class OneDIndexingSuite: + def time_1d_fill_no_sharding_no_compression(self) -> None: + array = zarr.create_array( + store={}, + shape=(1000000,), + dtype="i4", + compressors=None, + filters=None, + chunks=(10000,), + fill_value=0, + ) + array[:] = 1 + + def time_1d_fill_sharding_no_compression(self) -> None: + array = zarr.create_array( + store={}, + shape=(1000000,), + dtype="i4", + compressors=None, + filters=None, + chunks=(10000,), + shards=(50000,), + fill_value=0, + ) + array[:] = 1 diff --git a/changes/3554.misc.md b/changes/3554.misc.md new file mode 100644 index 0000000000..98f37ea8dd --- /dev/null +++ b/changes/3554.misc.md @@ -0,0 +1 @@ +Set up benchmarking via [air speed velocity](https://github.com/airspeed-velocity/asv). \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7f14971396..1e2a71a879 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,7 +115,7 @@ docs = [ 'astroid<4', 'pytest' ] - +benchmark = ["asv>=0.6.5"] [project.scripts] zarr = "zarr._cli.cli:app" @@ -270,6 +270,12 @@ dependencies = [ test = "pytest tests/test_docs.py -v" list-env = "pip list" +[tool.hatch.envs.benchmark] +features = ["benchmark"] + +[tool.hatch.envs.benchmark.scripts] +run = "asv run --dry-run --show-stderr" + [tool.ruff] line-length = 100 force-exclude = true