From f951e160c92951fc14951221ab8ecece1514018d Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 00:02:40 +0900 Subject: [PATCH 01/16] Use y* instead of u* for buffer-input funcs --- README.md | 2 +- docs/CONTRIBUTING.md | 4 +- docs/api.md | 4 +- docs/{changelog_link.md => changelog.md} | 0 docs/index.rst | 5 +- docs/{readme_link.md => quickstart.md} | 0 src/mmh3/mmh3module.c | 171 ++++++++++------------- tests/test_doctrings.py | 10 +- tests/test_mmh3.py | 110 ++++++++------- tox.ini | 2 +- 10 files changed, 139 insertions(+), 169 deletions(-) rename docs/{changelog_link.md => changelog.md} (100%) rename docs/{readme_link.md => quickstart.md} (100%) diff --git a/README.md b/README.md index d223340..4cf5cbe 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ b'\x82_n\xdd \xac\xb6j\xef\x99\xb1e\xc4\n\xc9\xfd' ## Changelog -See [Changelog](https://mmh3.readthedocs.io/en/latest/changelog_link.html) for the +See [Changelog](https://mmh3.readthedocs.io/en/latest/changelog.html) for the complete changelog. ### [Unreleased] diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 722360e..b1ebf98 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -129,13 +129,13 @@ The idea of the subproject directory loosely follows the ### Updating mmh3 core C code -Run `tox -e build-cfiles`. This will fetch Appleby's original SMHasher project +Run `tox -e build_cfiles`. This will fetch Appleby's original SMHasher project as a git submodule and then generate PEP 7-compliant C code from the original project. To perform further edits, add transformation code to the `refresh.py` script, instead of editing `murmurhash3.*` files manually. -Then, run `tox -e build-cfiles` again to update the `murmurhash3.*` files. +Then, run `tox -e build_cfiles` again to update the `murmurhash3.*` files. ### Local files diff --git a/docs/api.md b/docs/api.md index 78c4720..fa06c9a 100644 --- a/docs/api.md +++ b/docs/api.md @@ -22,9 +22,8 @@ UTF-8 encoding before hashing. The following functions are used to hash types that implement the buffer protocol such as `bytes`, `bytearray`, `memoryview`, and `numpy` arrays. -String inputs are also supported and are automatically converted to `bytes` -using UTF-8 encoding before hashing. +```{seealso} The buffer protocol, [originally implemented as a part of Python/C API](https://docs.python.org/3/c-api/buffer.html), was formally defined as a Python-level API in @@ -37,6 +36,7 @@ type hint which is itself an alias for [typing_extensions.Buffer](https://typing-extensions.readthedocs.io/en/latest/#typing_extensions.Buffer), the backported type hint for `collections.abc.Buffer`. +``` ```{eval-rst} .. autofunction:: mmh3.hash_from_buffer diff --git a/docs/changelog_link.md b/docs/changelog.md similarity index 100% rename from docs/changelog_link.md rename to docs/changelog.md diff --git a/docs/index.rst b/docs/index.rst index 8918aba..e604903 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,9 +6,9 @@ mmh3 is a Python extension for `MurmurHash (MurmurHash3) + Quickstart api - Changelog + Changelog .. toctree:: :maxdepth: 2 @@ -21,5 +21,4 @@ Indices and tables ================== * :ref:`genindex` -* :ref:`modindex` * :ref:`search` \ No newline at end of file diff --git a/docs/readme_link.md b/docs/quickstart.md similarity index 100% rename from docs/readme_link.md rename to docs/quickstart.md diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index a7d50a8..488c043 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -37,7 +37,7 @@ typedef unsigned __int64 uint64_t; PyDoc_STRVAR( mmh3_hash_doc, - "hash(key[, seed=0, signed=True]) -> int\n" + "hash(key, seed=0, signed=True) -> int\n" "\n" "Return a hash as a 32-bit integer.\n" "\n" @@ -108,7 +108,7 @@ mmh3_hash(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_hash_from_buffer_doc, - "hash_from_buffer(key[, seed=0, signed=True]) -> int\n" + "hash_from_buffer(key, seed=0, signed=True) -> int\n" "\n" "Return a hash for the buffer as a 32-bit integer.\n" "\n" @@ -116,14 +116,19 @@ PyDoc_STRVAR( "memory-views such as numpy arrays.\n" "\n" "Args:\n" - " key (Buffer | str): The bufer to hash.\n" + " key (Buffer | str): The bufer to hash. String inputs are also\n" + " supported and are automatically converted to `bytes` using\n" + " UTF-8 encoding before hashing.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" " signed (bool): If True, return a signed integer. Otherwise, return " "an unsigned integer.\n" "\n" "Returns:\n" - " int: The hash value as a 32-bit integer.\n"); + " int: The hash value as a 32-bit integer.\n" + "\n" + ".. deprecated:: 5.0.0\n" + " Use ``mmh3_32_sintdigest()`` or ``mmh3_32_uintdigest()`` instead.\n"); static PyObject * mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds) @@ -180,7 +185,7 @@ mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_hash64_doc, - "hash64(key[, seed=0, x64arch=True, signed=True]) -> tuple[int, int]\n" + "hash64(key, seed=0, x64arch=True, signed=True) -> tuple[int, int]\n" "\n" "Return a hash as a tuple of two 64-bit integers.\n" "\n" @@ -233,7 +238,7 @@ mmh3_hash64(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_hash128_doc, - "hash128(key[, seed=0, x64arch=True, signed=False]]) -> int\n" + "hash128(key, seed=0, x64arch=True, signed=False) -> int\n" "\n" "Return a hash as a 128-bit integer.\n\n" "Calculated by the MurmurHash3_x{64, 86}_128 algorithm.\n" @@ -295,7 +300,7 @@ mmh3_hash128(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_hash_bytes_doc, - "hash_bytes(key[, seed=0, x64arch=True]) -> bytes\n" + "hash_bytes(key, seed=0, x64arch=True) -> bytes\n" "\n" "Return a 16-byte hash of the ``bytes`` type.\n" "\n" @@ -343,16 +348,19 @@ mmh3_hash_bytes(PyObject *self, PyObject *args, PyObject *keywds) return PyBytes_FromStringAndSize((char *)result, MMH3_128_DIGESTSIZE); } +//----------------------------------------------------------------------------- +// Functions that accept a buffer + PyDoc_STRVAR( mmh3_mmh3_32_digest_doc, - "mmh3_32_digest(key[, seed=0]) -> bytes\n" + "mmh3_32_digest(key, seed=0, /) -> bytes\n" "\n" "Return a 4-byte hash of the ``bytes`` type for the buffer.\n" "\n" "Calculated by the MurmurHash3_x86_32 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -369,10 +377,7 @@ mmh3_mmh3_32_digest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; char result[MMH3_32_DIGESTSIZE]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -388,14 +393,14 @@ mmh3_mmh3_32_digest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_32_sintdigest_doc, - "mmh3_32_sintdigest(key[, seed=0]) -> int\n" + "mmh3_32_sintdigest(key, seed=0, /) -> int\n" "\n" "Return a hash for the buffer as a 32-bit signed integer.\n" "\n" "Calculated by the MurmurHash3_x86_32 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -411,10 +416,7 @@ mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; int32_t result[1]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -426,14 +428,14 @@ mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_32_uintdigest_doc, - "mmh3_32_uintdigest(key[, seed=0]) -> int\n" + "mmh3_32_uintdigest(key, seed=0, /) -> int\n" "\n" "Return a hash for the buffer as a 32-bit unsigned integer.\n" "\n" "Calculated by the MurmurHash3_x86_32 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -449,10 +451,7 @@ mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint32_t result[1]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -464,14 +463,14 @@ mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x64_128_digest_doc, - "mmh3_x64_128_digest(key[, seed=0]) -> bytes\n" + "mmh3_x64_128_digest(key, seed=0, /) -> bytes\n" "\n" "Return a 16-byte hash of the ``bytes`` type for the buffer.\n" "\n" "Calculated by the MurmurHash3_x64_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -488,10 +487,7 @@ mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -508,14 +504,14 @@ mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x64_128_sintdigest_doc, - "mmh3_x64_128_sintdigest(key[, seed=0]) -> int\n" + "mmh3_x64_128_sintdigest(key, seed=0, /) -> int\n" "\n" "Return a hash for the buffer as a 128-bit signed integer.\n" "\n" "Calculated by the MurmurHash3_x64_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -531,10 +527,7 @@ mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -560,14 +553,14 @@ mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x64_128_uintdigest_doc, - "mmh3_x64_128_uintdigest(key[, seed=0]) -> int\n" + "mmh3_x64_128_uintdigest(key, seed=0, /) -> int\n" "\n" "Return a hash for the buffer as a 128-bit unsigned integer.\n" "\n" "Calculated by the MurmurHash3_x64_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -583,10 +576,7 @@ mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -612,14 +602,14 @@ mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x64_128_stupledigest_doc, - "mmh3_x64_128_stupledigest(key[, seed=0]) -> tuple[int, int]\n" + "mmh3_x64_128_stupledigest(key, seed=0, /) -> tuple[int, int]\n" "\n" "Return a hash for the buffer as a tuple of two 64-bit signed integers.\n" "\n" "Calculated by the MurmurHash3_x64_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -637,10 +627,7 @@ mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *args, uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -653,7 +640,7 @@ mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *args, PyDoc_STRVAR( mmh3_mmh3_x64_128_utupledigest_doc, - "mmh3_x64_128_utupledigest(key[, seed=0]) -> tuple[int, int]\n" + "mmh3_x64_128_utupledigest(key, seed=0, /) -> tuple[int, int]\n" "\n" "Return a hash for the buffer as a tuple of two 64-bit unsigned " "integers.\n" @@ -661,7 +648,7 @@ PyDoc_STRVAR( "Calculated by the MurmurHash3_x64_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -679,10 +666,7 @@ mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *args, uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -695,14 +679,14 @@ mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *args, PyDoc_STRVAR( mmh3_mmh3_x86_128_digest_doc, - "mmh3_x86_128_digest(key[, seed=0]) -> bytes\n" + "mmh3_x86_128_digest(key, seed=0, /) -> bytes\n" "\n" "Return a 16-byte hash of the ``bytes`` type for the buffer.\n" "\n" "Calculated by the MurmurHash3_x86_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -719,10 +703,7 @@ mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -739,14 +720,14 @@ mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x86_128_sintdigest_doc, - "mmh3_x86_128_sintdigest(key[, seed=0]) -> int\n" + "mmh3_x86_128_sintdigest(key, seed=0, /) -> int\n" "\n" "Return a hash for the buffer as a 128-bit signed integer.\n" "\n" "Calculated by the MurmurHash3_x86_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -762,10 +743,7 @@ mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -791,14 +769,14 @@ mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x86_128_uintdigest_doc, - "mmh3_x86_128_uintdigest(key[, seed=0]) -> int\n" + "mmh3_x86_128_uintdigest(key, seed=0, /) -> int\n" "\n" "Return a hash for the buffer as a 128-bit unsigned integer.\n" "\n" "Calculated by the MurmurHash3_x86_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -814,10 +792,7 @@ mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -843,14 +818,14 @@ mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) PyDoc_STRVAR( mmh3_mmh3_x86_128_stupledigest_doc, - "mmh3_x86_128_stupledigest(key[, seed=0]) -> tuple[int, int]\n" + "mmh3_x86_128_stupledigest(key, seed=0, /) -> tuple[int, int]\n" "\n" "Return a hash for the buffer as a tuple of two 64-bit signed integers.\n" "\n" "Calculated by the MurmurHash3_x86_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -868,10 +843,7 @@ mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *args, uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -884,7 +856,7 @@ mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *args, PyDoc_STRVAR( mmh3_mmh3_x86_128_utupledigest_doc, - "mmh3_x86_128_utupledigest(key[, seed=0]) -> tuple[int, int]\n" + "mmh3_x86_128_utupledigest(key, seed=0, /) -> tuple[int, int]\n" "\n" "Return a hash for the buffer as a tuple of two 64-bit unsigned " "integers.\n" @@ -892,7 +864,7 @@ PyDoc_STRVAR( "Calculated by the MurmurHash3_x86_128 algorithm.\n" "\n" "Args:\n" - " key (Buffer | str): The input buffer to hash.\n" + " key (Buffer): The input buffer to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" "\n" @@ -910,10 +882,7 @@ mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *args, uint32_t seed = 0; uint64_t result[2]; - static char *kwlist[] = {(char *)"key", (char *)"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|I", kwlist, &target_buf, - &seed)) { + if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { return NULL; } @@ -935,32 +904,32 @@ static PyMethodDef Mmh3Methods[] = { mmh3_hash128_doc}, {"hash_bytes", (PyCFunction)mmh3_hash_bytes, METH_VARARGS | METH_KEYWORDS, mmh3_hash_bytes_doc}, - {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_32_digest_doc}, - {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_32_sintdigest_doc}, - {"mmh3_32_uintdigest", (PyCFunction)mmh3_mmh3_32_uintdigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_32_uintdigest_doc}, + {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_VARARGS, + mmh3_mmh3_32_digest_doc}, + {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_VARARGS, + mmh3_mmh3_32_sintdigest_doc}, + {"mmh3_32_uintdigest", (PyCFunction)mmh3_mmh3_32_uintdigest, METH_VARARGS, + mmh3_mmh3_32_uintdigest_doc}, {"mmh3_x64_128_digest", (PyCFunction)mmh3_mmh3_x64_128_digest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x64_128_digest_doc}, + METH_VARARGS, mmh3_mmh3_x64_128_digest_doc}, {"mmh3_x64_128_sintdigest", (PyCFunction)mmh3_mmh3_x64_128_sintdigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x64_128_sintdigest_doc}, + METH_VARARGS, mmh3_mmh3_x64_128_sintdigest_doc}, {"mmh3_x64_128_uintdigest", (PyCFunction)mmh3_mmh3_x64_128_uintdigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x64_128_uintdigest_doc}, + METH_VARARGS, mmh3_mmh3_x64_128_uintdigest_doc}, {"mmh3_x64_128_stupledigest", (PyCFunction)mmh3_mmh3_x64_128_stupledigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x64_128_stupledigest_doc}, + METH_VARARGS, mmh3_mmh3_x64_128_stupledigest_doc}, {"mmh3_x64_128_utupledigest", (PyCFunction)mmh3_mmh3_x64_128_utupledigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x64_128_utupledigest_doc}, + METH_VARARGS, mmh3_mmh3_x64_128_utupledigest_doc}, {"mmh3_x86_128_digest", (PyCFunction)mmh3_mmh3_x86_128_digest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x86_128_digest_doc}, + METH_VARARGS, mmh3_mmh3_x86_128_digest_doc}, {"mmh3_x86_128_sintdigest", (PyCFunction)mmh3_mmh3_x86_128_sintdigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x86_128_sintdigest_doc}, + METH_VARARGS, mmh3_mmh3_x86_128_sintdigest_doc}, {"mmh3_x86_128_uintdigest", (PyCFunction)mmh3_mmh3_x86_128_uintdigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x86_128_uintdigest_doc}, + METH_VARARGS, mmh3_mmh3_x86_128_uintdigest_doc}, {"mmh3_x86_128_stupledigest", (PyCFunction)mmh3_mmh3_x86_128_stupledigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x86_128_stupledigest_doc}, + METH_VARARGS, mmh3_mmh3_x86_128_stupledigest_doc}, {"mmh3_x86_128_utupledigest", (PyCFunction)mmh3_mmh3_x86_128_utupledigest, - METH_VARARGS | METH_KEYWORDS, mmh3_mmh3_x86_128_utupledigest_doc}, + METH_VARARGS, mmh3_mmh3_x86_128_utupledigest_doc}, {NULL, NULL, 0, NULL}}; //----------------------------------------------------------------------------- diff --git a/tests/test_doctrings.py b/tests/test_doctrings.py index b50959a..c1e62df 100644 --- a/tests/test_doctrings.py +++ b/tests/test_doctrings.py @@ -5,30 +5,30 @@ def test_function_docstrings() -> None: assert "__doc__" in dir(mmh3.hash) assert mmh3.hash.__doc__ is not None - assert mmh3.hash.__doc__.startswith("hash(key[, seed=0, signed=True]) -> int\n\n") + assert mmh3.hash.__doc__.startswith("hash(key, seed=0, signed=True) -> int\n\n") assert "__doc__" in dir(mmh3.hash_from_buffer) assert mmh3.hash_from_buffer.__doc__ is not None assert mmh3.hash_from_buffer.__doc__.startswith( - "hash_from_buffer(key[, seed=0, signed=True]) -> int\n\n" + "hash_from_buffer(key, seed=0, signed=True) -> int\n\n" ) assert "__doc__" in dir(mmh3.hash64) assert mmh3.hash64.__doc__ is not None assert mmh3.hash64.__doc__.startswith( - "hash64(key[, seed=0, x64arch=True, signed=True]) -> tuple[int, int]\n\n" + "hash64(key, seed=0, x64arch=True, signed=True) -> tuple[int, int]\n\n" ) assert "__doc__" in dir(mmh3.hash128) assert mmh3.hash128.__doc__ is not None assert mmh3.hash128.__doc__.startswith( - "hash128(key[, seed=0, x64arch=True, signed=False]]) -> int\n\n" + "hash128(key, seed=0, x64arch=True, signed=False) -> int\n\n" ) assert "__doc__" in dir(mmh3.hash_bytes) assert mmh3.hash_bytes.__doc__ is not None assert mmh3.hash_bytes.__doc__.startswith( - "hash_bytes(key[, seed=0, x64arch=True]) -> bytes\n\n" + "hash_bytes(key, seed=0, x64arch=True) -> bytes\n\n" ) diff --git a/tests/test_mmh3.py b/tests/test_mmh3.py index ac03ed5..35a8cc7 100644 --- a/tests/test_mmh3.py +++ b/tests/test_mmh3.py @@ -234,7 +234,6 @@ def test_hash128() -> None: def test_mmh3_32_digest() -> None: assert mmh3.mmh3_32_digest(b"") == b"\0\0\0\0" assert mmh3.mmh3_32_digest(b"", 0) == b"\0\0\0\0" - assert mmh3.mmh3_32_digest(b"", seed=0) == b"\0\0\0\0" assert mmh3.mmh3_32_digest(b"\x21\x43\x65\x87", 0) == (0xF55B516B).to_bytes( 4, "little" ) @@ -254,17 +253,21 @@ def test_mmh3_32_digest() -> None: assert mmh3.mmh3_32_digest(b"\x00\x00", 0) == (0x30F4C306).to_bytes(4, "little") assert mmh3.mmh3_32_digest(b"\x00", 0) == (0x514E28B7).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("aaaa", 0x9747B28C) == (0x5A97808A).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("aaa", 0x9747B28C) == (0x283E0130).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("aa", 0x9747B28C) == (0x5D211726).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("a", 0x9747B28C) == (0x7FA09EA6).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"aaaa", 0x9747B28C) == (0x5A97808A).to_bytes( + 4, "little" + ) + assert mmh3.mmh3_32_digest(b"aaa", 0x9747B28C) == (0x283E0130).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"aa", 0x9747B28C) == (0x5D211726).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"a", 0x9747B28C) == (0x7FA09EA6).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("abcd", 0x9747B28C) == (0xF0478627).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("abc", 0x9747B28C) == (0xC84A62DD).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("ab", 0x9747B28C) == (0x74875592).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("a", 0x9747B28C) == (0x7FA09EA6).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"abcd", 0x9747B28C) == (0xF0478627).to_bytes( + 4, "little" + ) + assert mmh3.mmh3_32_digest(b"abc", 0x9747B28C) == (0xC84A62DD).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"ab", 0x9747B28C) == (0x74875592).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"a", 0x9747B28C) == (0x7FA09EA6).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("Hello, world!", 0x9747B28C) == (0x24884CBA).to_bytes( + assert mmh3.mmh3_32_digest(b"Hello, world!", 0x9747B28C) == (0x24884CBA).to_bytes( 4, "little" ) @@ -272,17 +275,17 @@ def test_mmh3_32_digest() -> None: 0xD58063C1 ).to_bytes(4, "little") - assert mmh3.mmh3_32_digest("a" * 256, 0x9747B28C) == (0x37405BDC).to_bytes( + assert mmh3.mmh3_32_digest(b"a" * 256, 0x9747B28C) == (0x37405BDC).to_bytes( 4, "little" ) - assert mmh3.mmh3_32_digest("abc", 0) == (0xB3DD93FA).to_bytes(4, "little") + assert mmh3.mmh3_32_digest(b"abc", 0) == (0xB3DD93FA).to_bytes(4, "little") assert mmh3.mmh3_32_digest( - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0 + b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0 ) == (0xEE925B90).to_bytes(4, "little") assert mmh3.mmh3_32_digest( - "The quick brown fox jumps over the lazy dog", 0x9747B28C + b"The quick brown fox jumps over the lazy dog", 0x9747B28C ) == (0x2FA826CD).to_bytes(4, "little") assert mmh3.mmh3_32_digest(bytearray(b"aaaa"), 0x9747B28C) == (0x5A97808A).to_bytes( @@ -294,16 +297,15 @@ def test_mmh3_32_digest() -> None: def test_mmh3_sintdigest() -> None: - assert mmh3.mmh3_32_sintdigest("foo") == -156908512 assert mmh3.mmh3_32_sintdigest(b"foo") == -156908512 assert mmh3.mmh3_32_sintdigest(bytearray(b"foo")) == -156908512 assert mmh3.mmh3_32_sintdigest(memoryview(b"foobar")[0:3]) == -156908512 # Test vectors devised by Ian Boyd # https://stackoverflow.com/a/31929528 - assert mmh3.mmh3_32_sintdigest(b"", seed=0) == 0 - assert mmh3.mmh3_32_sintdigest(b"", seed=1) == 0x514E28B7 - assert mmh3.mmh3_32_sintdigest(b"", seed=u32_to_s32(0xFFFFFFFF)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"", 0) == 0 + assert mmh3.mmh3_32_sintdigest(b"", 1) == 0x514E28B7 + assert mmh3.mmh3_32_sintdigest(b"", u32_to_s32(0xFFFFFFFF)) == u32_to_s32( 0x81F16F39 ) assert mmh3.mmh3_32_sintdigest(b"\x21\x43\x65\x87", 0) == u32_to_s32(0xF55B516B) @@ -319,65 +321,65 @@ def test_mmh3_sintdigest() -> None: assert mmh3.mmh3_32_sintdigest(b"\x00\x00", 0) == u32_to_s32(0x30F4C306) assert mmh3.mmh3_32_sintdigest(b"\x00", 0) == u32_to_s32(0x514E28B7) - assert mmh3.mmh3_32_sintdigest("aaaa", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"aaaa", u32_to_s32(0x9747B28C)) == u32_to_s32( 0x5A97808A ) - assert mmh3.mmh3_32_sintdigest("aaa", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"aaa", u32_to_s32(0x9747B28C)) == u32_to_s32( 0x283E0130 ) - assert mmh3.mmh3_32_sintdigest("aa", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"aa", u32_to_s32(0x9747B28C)) == u32_to_s32( 0x5D211726 ) - assert mmh3.mmh3_32_sintdigest("a", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"a", u32_to_s32(0x9747B28C)) == u32_to_s32( 0x7FA09EA6 ) - assert mmh3.mmh3_32_sintdigest("abcd", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"abcd", u32_to_s32(0x9747B28C)) == u32_to_s32( 0xF0478627 ) - assert mmh3.mmh3_32_sintdigest("abc", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"abc", u32_to_s32(0x9747B28C)) == u32_to_s32( 0xC84A62DD ) - assert mmh3.mmh3_32_sintdigest("ab", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"ab", u32_to_s32(0x9747B28C)) == u32_to_s32( 0x74875592 ) - assert mmh3.mmh3_32_sintdigest("a", u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"a", u32_to_s32(0x9747B28C)) == u32_to_s32( 0x7FA09EA6 ) assert mmh3.mmh3_32_sintdigest( - "Hello, world!", u32_to_s32(0x9747B28C) + b"Hello, world!", u32_to_s32(0x9747B28C) ) == u32_to_s32(0x24884CBA) assert mmh3.mmh3_32_sintdigest( "ππππππππ".encode("utf-8"), u32_to_s32(0x9747B28C) ) == u32_to_s32(0xD58063C1) - assert mmh3.mmh3_32_sintdigest("a" * 256, u32_to_s32(0x9747B28C)) == u32_to_s32( + assert mmh3.mmh3_32_sintdigest(b"a" * 256, u32_to_s32(0x9747B28C)) == u32_to_s32( 0x37405BDC ) - assert mmh3.mmh3_32_sintdigest("abc", 0) == u32_to_s32(0xB3DD93FA) + assert mmh3.mmh3_32_sintdigest(b"abc", 0) == u32_to_s32(0xB3DD93FA) assert mmh3.mmh3_32_sintdigest( - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0 + b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0 ) == u32_to_s32(0xEE925B90) assert mmh3.mmh3_32_sintdigest( - "The quick brown fox jumps over the lazy dog", u32_to_s32(0x9747B28C) + b"The quick brown fox jumps over the lazy dog", u32_to_s32(0x9747B28C) ) == u32_to_s32(0x2FA826CD) def test_mmh3_uintdigest() -> None: - assert mmh3.mmh3_32_uintdigest("foo") == 4138058784 assert mmh3.mmh3_32_uintdigest(b"foo") == 4138058784 assert mmh3.mmh3_32_uintdigest(bytearray(b"foo")) == 4138058784 assert mmh3.mmh3_32_uintdigest(memoryview(b"foobar")[0:3]) == 4138058784 # Test vectors devised by Ian Boyd # https://stackoverflow.com/a/31929528 - assert mmh3.mmh3_32_uintdigest(b"", seed=0) == 0 - assert mmh3.mmh3_32_uintdigest(b"", seed=1) == 0x514E28B7 - assert mmh3.mmh3_32_uintdigest(b"", seed=0xFFFFFFFF) == 0x81F16F39 + assert mmh3.mmh3_32_uintdigest(b"") == 0 + assert mmh3.mmh3_32_uintdigest(b"", 0) == 0 + assert mmh3.mmh3_32_uintdigest(b"", 1) == 0x514E28B7 + assert mmh3.mmh3_32_uintdigest(b"", 0xFFFFFFFF) == 0x81F16F39 assert mmh3.mmh3_32_uintdigest(b"\x21\x43\x65\x87", 0) == 0xF55B516B assert mmh3.mmh3_32_uintdigest(b"\x21\x43\x65\x87", 0x5082EDEE) == 0x2362F9DE assert mmh3.mmh3_32_uintdigest(b"\x21\x43\x65", 0) == 0x7E4A8634 @@ -389,40 +391,40 @@ def test_mmh3_uintdigest() -> None: assert mmh3.mmh3_32_uintdigest(b"\x00\x00", 0) == 0x30F4C306 assert mmh3.mmh3_32_uintdigest(b"\x00", 0) == 0x514E28B7 - assert mmh3.mmh3_32_uintdigest("aaaa", 0x9747B28C) == 0x5A97808A - assert mmh3.mmh3_32_uintdigest("aaa", 0x9747B28C) == 0x283E0130 - assert mmh3.mmh3_32_uintdigest("aa", 0x9747B28C) == 0x5D211726 - assert mmh3.mmh3_32_uintdigest("a", 0x9747B28C) == 0x7FA09EA6 + assert mmh3.mmh3_32_uintdigest(b"aaaa", 0x9747B28C) == 0x5A97808A + assert mmh3.mmh3_32_uintdigest(b"aaa", 0x9747B28C) == 0x283E0130 + assert mmh3.mmh3_32_uintdigest(b"aa", 0x9747B28C) == 0x5D211726 + assert mmh3.mmh3_32_uintdigest(b"a", 0x9747B28C) == 0x7FA09EA6 - assert mmh3.mmh3_32_uintdigest("abcd", 0x9747B28C) == 0xF0478627 - assert mmh3.mmh3_32_uintdigest("abc", 0x9747B28C) == 0xC84A62DD - assert mmh3.mmh3_32_uintdigest("ab", 0x9747B28C) == 0x74875592 - assert mmh3.mmh3_32_uintdigest("a", 0x9747B28C) == 0x7FA09EA6 + assert mmh3.mmh3_32_uintdigest(b"abcd", 0x9747B28C) == 0xF0478627 + assert mmh3.mmh3_32_uintdigest(b"abc", 0x9747B28C) == 0xC84A62DD + assert mmh3.mmh3_32_uintdigest(b"ab", 0x9747B28C) == 0x74875592 + assert mmh3.mmh3_32_uintdigest(b"a", 0x9747B28C) == 0x7FA09EA6 - assert mmh3.mmh3_32_uintdigest("Hello, world!", 0x9747B28C) == 0x24884CBA + assert mmh3.mmh3_32_uintdigest(b"Hello, world!", 0x9747B28C) == 0x24884CBA assert mmh3.mmh3_32_uintdigest("ππππππππ".encode("utf-8"), 0x9747B28C) == 0xD58063C1 - assert mmh3.mmh3_32_uintdigest("a" * 256, 0x9747B28C) == 0x37405BDC + assert mmh3.mmh3_32_uintdigest(b"a" * 256, 0x9747B28C) == 0x37405BDC - assert mmh3.mmh3_32_uintdigest("abc", 0) == 0xB3DD93FA + assert mmh3.mmh3_32_uintdigest(b"abc", 0) == 0xB3DD93FA assert ( mmh3.mmh3_32_uintdigest( - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0 + b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0 ) == 0xEE925B90 ) assert ( mmh3.mmh3_32_uintdigest( - "The quick brown fox jumps over the lazy dog", 0x9747B28C + b"The quick brown fox jumps over the lazy dog", 0x9747B28C ) == 0x2FA826CD ) assert ( mmh3.mmh3_32_uintdigest( - "The quick brown fox jumps over the lazy dog", 0x9747B28C + b"The quick brown fox jumps over the lazy dog", 0x9747B28C ) == 0x2FA826CD ) @@ -430,7 +432,7 @@ def test_mmh3_uintdigest() -> None: def test_mmh3_x64_128_digest() -> None: assert ( - mmh3.mmh3_x64_128_digest("foo") + mmh3.mmh3_x64_128_digest(b"foo") == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~" ) @@ -492,7 +494,7 @@ def test_mmh3_x64_128_utupledigest() -> None: def test_mmh3_x86_128_digest() -> None: - assert mmh3.mmh3_x86_128_digest("", 123) == ( + assert mmh3.mmh3_x86_128_digest(b"", 123) == ( 0x26F3E79926F3E79926F3E799FEDC5245 ).to_bytes(16, "little") @@ -525,7 +527,7 @@ def test_mmh3_x86_128_sintdigest() -> None: def test_mmh3_x86_128_uintdigest() -> None: - assert mmh3.mmh3_x64_128_uintdigest(b"") == 0 + assert mmh3.mmh3_x64_128_uintdigest(b"", 0) == 0 # Test vector from https://github.com/PeterScott/murmur3/blob/master/test.c assert ( @@ -535,7 +537,7 @@ def test_mmh3_x86_128_uintdigest() -> None: def test_mmh3_x86_128_stupledigest() -> None: - assert mmh3.mmh3_x86_128_stupledigest(b"") == (0, 0) + assert mmh3.mmh3_x86_128_stupledigest(b"", 0) == (0, 0) assert mmh3.mmh3_x86_128_stupledigest( memoryview(b"The quick brown fox jumps over the lazy dog"), 0x9747B28C @@ -546,7 +548,7 @@ def test_mmh3_x86_128_stupledigest() -> None: def test_mmh3_x86_128_utupledigest() -> None: - assert mmh3.mmh3_x86_128_utupledigest(b"") == (0, 0) + assert mmh3.mmh3_x86_128_utupledigest(b"", 0) == (0, 0) # Test vector from https://github.com/PeterScott/murmur3/blob/master/test.c assert mmh3.mmh3_x86_128_utupledigest(memoryview(b"Hello, world!"), 123) == ( diff --git a/tox.ini b/tox.ini index f26452b..3437a03 100644 --- a/tox.ini +++ b/tox.ini @@ -42,7 +42,7 @@ commands_pre = commands = make -C docs html -[testenv:build-cfiles] +[testenv:build_cfiles] allowlist_externals = find git From 4a6a292c12d32561d4de4fab9054a8e8f76a6744 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 00:10:41 +0900 Subject: [PATCH 02/16] Fix function signatures --- src/mmh3/mmh3module.c | 46 ++++++++++++++++++++----------------------- tox.ini | 2 +- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 488c043..ffa3159 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -371,7 +371,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_32_digest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_32_digest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -410,7 +410,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -445,7 +445,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -481,7 +481,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -521,7 +521,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -570,7 +570,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -620,8 +620,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *args, - PyObject *keywds) +mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -659,8 +658,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *args, - PyObject *keywds) +mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -697,7 +695,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -737,7 +735,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -786,7 +784,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -836,8 +834,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *args, - PyObject *keywds) +mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -875,8 +872,7 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *args, - PyObject *keywds) +mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *args) { Py_buffer target_buf; uint32_t seed = 0; @@ -894,16 +890,16 @@ mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *args, } static PyMethodDef Mmh3Methods[] = { - {"hash", (PyCFunction)mmh3_hash, METH_VARARGS | METH_KEYWORDS, + {"hash", (PyCFunctionWithKeywords)mmh3_hash, METH_VARARGS | METH_KEYWORDS, mmh3_hash_doc}, - {"hash_from_buffer", (PyCFunction)mmh3_hash_from_buffer, + {"hash_from_buffer", (PyCFunctionWithKeywords)mmh3_hash_from_buffer, METH_VARARGS | METH_KEYWORDS, mmh3_hash_from_buffer_doc}, - {"hash64", (PyCFunction)mmh3_hash64, METH_VARARGS | METH_KEYWORDS, - mmh3_hash64_doc}, - {"hash128", (PyCFunction)mmh3_hash128, METH_VARARGS | METH_KEYWORDS, - mmh3_hash128_doc}, - {"hash_bytes", (PyCFunction)mmh3_hash_bytes, METH_VARARGS | METH_KEYWORDS, - mmh3_hash_bytes_doc}, + {"hash64", (PyCFunctionWithKeywords)mmh3_hash64, + METH_VARARGS | METH_KEYWORDS, mmh3_hash64_doc}, + {"hash128", (PyCFunctionWithKeywords)mmh3_hash128, + METH_VARARGS | METH_KEYWORDS, mmh3_hash128_doc}, + {"hash_bytes", (PyCFunctionWithKeywords)mmh3_hash_bytes, + METH_VARARGS | METH_KEYWORDS, mmh3_hash_bytes_doc}, {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_VARARGS, mmh3_mmh3_32_digest_doc}, {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_VARARGS, diff --git a/tox.ini b/tox.ini index 3437a03..f735270 100644 --- a/tox.ini +++ b/tox.ini @@ -51,4 +51,4 @@ commands_pre = commands = git submodule update --init python util/refresh.py - find ./src/mmh3 -name '*.[ch]' -exec clang-format -i {} + + find ./src/mmh3 -name '*.[ch]' -exec clang-format -i {} + \ No newline at end of file From 56be9cea57cfe64afff3398fc9b75936df9e3acf Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 00:21:44 +0900 Subject: [PATCH 03/16] Fix casting errors --- src/mmh3/mmh3module.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index ffa3159..a9749ee 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -889,17 +889,21 @@ mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *args) return retval; } +// Casting to PyCFunction is mandatory for +// METH_VARARGS | METH_KEYWORDS functions. +// See +// https://docs.python.org/3/extending/extending.html#keyword-parameters-for-extension-functions static PyMethodDef Mmh3Methods[] = { - {"hash", (PyCFunctionWithKeywords)mmh3_hash, METH_VARARGS | METH_KEYWORDS, + {"hash", (PyCFunction)mmh3_hash, METH_VARARGS | METH_KEYWORDS, mmh3_hash_doc}, - {"hash_from_buffer", (PyCFunctionWithKeywords)mmh3_hash_from_buffer, + {"hash_from_buffer", (PyCFunction)mmh3_hash_from_buffer, METH_VARARGS | METH_KEYWORDS, mmh3_hash_from_buffer_doc}, - {"hash64", (PyCFunctionWithKeywords)mmh3_hash64, - METH_VARARGS | METH_KEYWORDS, mmh3_hash64_doc}, - {"hash128", (PyCFunctionWithKeywords)mmh3_hash128, - METH_VARARGS | METH_KEYWORDS, mmh3_hash128_doc}, - {"hash_bytes", (PyCFunctionWithKeywords)mmh3_hash_bytes, - METH_VARARGS | METH_KEYWORDS, mmh3_hash_bytes_doc}, + {"hash64", (PyCFunction)mmh3_hash64, METH_VARARGS | METH_KEYWORDS, + mmh3_hash64_doc}, + {"hash128", (PyCFunction)mmh3_hash128, METH_VARARGS | METH_KEYWORDS, + mmh3_hash128_doc}, + {"hash_bytes", (PyCFunction)mmh3_hash_bytes, METH_VARARGS | METH_KEYWORDS, + mmh3_hash_bytes_doc}, {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_VARARGS, mmh3_mmh3_32_digest_doc}, {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_VARARGS, From 5a169b92ae9bdb335464ddb95198cf29af09d010 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 01:01:42 +0900 Subject: [PATCH 04/16] Add tox testenv for benchmark and plot --- benchmark/plot_graph.py | 4 ++-- docs/CONTRIBUTING.md | 11 ++++------- tox.ini | 18 +++++++++++++++++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/benchmark/plot_graph.py b/benchmark/plot_graph.py index e3dfede..bd02ef0 100644 --- a/benchmark/plot_graph.py +++ b/benchmark/plot_graph.py @@ -124,8 +124,8 @@ def ordered_intersection(list1: list[T], list2: list[T]) -> list[T]: plt.savefig(os.path.join(args.output_dir, BANDWIDTH_SMALL_FILE_NAME)) df_latency_all = df_latency * 1000 - df_latency_all.index = df_latency_all.index / (1024 * 1024) - df_latency_all.plot(xlabel="Input size (MiB)", ylabel="Latency (ms)") + df_latency_all.index = df_latency_all.index / 1024 + df_latency_all.plot(xlabel="Input size (KiB)", ylabel="Latency (ms)") plt.savefig(os.path.join(args.output_dir, LATENCY_FILE_NAME)) df_latency_small = df_latency * 1000 * 1000 * 1000 diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index b1ebf98..0459511 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -153,8 +153,7 @@ Then, run `tox -e build_cfiles` again to update the `murmurhash3.*` files. To run benchmarks locally, try the following command: ```shell -pip install ".[benchmark]" -python benchmark/benchmark.py -o OUTPUT_FILE \ +tox -e benchmark -- -o OUTPUT_FILE \ --test-hash HASH_NAME --test-buffer-size-max HASH_SIZE ``` @@ -165,9 +164,8 @@ in bytes. For example, ```shell -pip install ".[benchmark]" -mkdir results -python benchmark/benchmark.py -o results/mmh3_128.json \ +mkdir -p _results +tox -e benchmark -- -o _results/mmh3_128.json \ --test-hash mmh3_128 --test-buffer-size-max 262144 ``` @@ -182,8 +180,7 @@ After obtaining the benchmark results, you can plot graphs by `plot_graph.py`. The following is an example of how to run the script: ```shell -pip install ".[benchmark,plot]" -python benchmark/plot_graph.py --output-dir docs/_static RESULT_DIR/*.json +tox -e plot -- --output-dir docs/_static RESULT_DIR/*.json ``` where `RESULT_DIR` is the directory containing the benchmark results. diff --git a/tox.ini b/tox.ini index f735270..13066fd 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,7 @@ commands = [testenv:lint] description = run linters with formatting +skip_install = true allowlist_externals = find npx @@ -40,6 +41,7 @@ allowlist_externals = commands_pre = pip install ".[docs]" commands = + make -C docs clean make -C docs html [testenv:build_cfiles] @@ -51,4 +53,18 @@ commands_pre = commands = git submodule update --init python util/refresh.py - find ./src/mmh3 -name '*.[ch]' -exec clang-format -i {} + \ No newline at end of file + find ./src/mmh3 -name '*.[ch]' -exec clang-format -i {} + + +[testenv:benchmark] +description = run benchmarks +commands_pre = + pip install ".[benchmark]" +commands = + python benchmark/benchmark.py {posargs} + +[testenv:plot] +description = plot benchmark results +commands_pre = + pip install ".[benchmark,plot]" +commands = + python benchmark/plot_graph.py {posargs} \ No newline at end of file From c341ca033d7fdb17a2a6691ea47fa2ac24c15604 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 02:05:23 +0900 Subject: [PATCH 05/16] Make some functions METH_FASTCALL --- src/mmh3/mmh3module.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index a9749ee..a690181 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -32,6 +32,28 @@ typedef unsigned __int64 uint64_t; #define MMH3_32_BLOCKSIZE 12 #define MMH3_128_BLOCKSIZE 32 +#define MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed) \ + if (nargs < 1) { \ + PyErr_SetString(PyExc_TypeError, \ + "function takes at least 1 argument (0 given)"); \ + return NULL; \ + } \ + if (nargs > 2) { \ + PyErr_Format(PyExc_TypeError, \ + "function takes at most 2 arguments (%d given)", \ + (int)nargs); \ + return NULL; \ + } \ + if (nargs == 2) { \ + if (!PyLong_Check(args[1])) { \ + PyErr_Format(PyExc_TypeError, \ + "'%s' object cannot be interpreted as an integer", \ + Py_TYPE(args[1])->tp_name); \ + return NULL; \ + } \ + seed = (uint32_t)PyLong_AsLong(args[1]); \ + } + //----------------------------------------------------------------------------- // One shot functions @@ -371,15 +393,15 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_32_digest(PyObject *self, PyObject *args) +mmh3_mmh3_32_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; char result[MMH3_32_DIGESTSIZE]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -481,15 +503,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *args) +mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -904,14 +927,14 @@ static PyMethodDef Mmh3Methods[] = { mmh3_hash128_doc}, {"hash_bytes", (PyCFunction)mmh3_hash_bytes, METH_VARARGS | METH_KEYWORDS, mmh3_hash_bytes_doc}, - {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_VARARGS, + {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_FASTCALL, mmh3_mmh3_32_digest_doc}, {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_VARARGS, mmh3_mmh3_32_sintdigest_doc}, {"mmh3_32_uintdigest", (PyCFunction)mmh3_mmh3_32_uintdigest, METH_VARARGS, mmh3_mmh3_32_uintdigest_doc}, {"mmh3_x64_128_digest", (PyCFunction)mmh3_mmh3_x64_128_digest, - METH_VARARGS, mmh3_mmh3_x64_128_digest_doc}, + METH_FASTCALL, mmh3_mmh3_x64_128_digest_doc}, {"mmh3_x64_128_sintdigest", (PyCFunction)mmh3_mmh3_x64_128_sintdigest, METH_VARARGS, mmh3_mmh3_x64_128_sintdigest_doc}, {"mmh3_x64_128_uintdigest", (PyCFunction)mmh3_mmh3_x64_128_uintdigest, From bf273e90fc2105a0ad6c52b3ec01f763dccbdfd2 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 02:24:16 +0900 Subject: [PATCH 06/16] Fix seed limit error --- src/mmh3/mmh3module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index a690181..b840fdb 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -51,7 +51,10 @@ typedef unsigned __int64 uint64_t; Py_TYPE(args[1])->tp_name); \ return NULL; \ } \ - seed = (uint32_t)PyLong_AsLong(args[1]); \ + seed = (uint32_t)PyLong_AsUnsignedLong(args[1]); \ + if (seed == (uint32_t) - 1 && PyErr_Occurred()) { \ + return NULL; \ + } \ } //----------------------------------------------------------------------------- From 391d040aef5285cee730f461b7e5d5e1e9b97aed Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 02:37:52 +0900 Subject: [PATCH 07/16] Fix lint error --- src/mmh3/mmh3module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index b840fdb..ca3966e 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -52,7 +52,7 @@ typedef unsigned __int64 uint64_t; return NULL; \ } \ seed = (uint32_t)PyLong_AsUnsignedLong(args[1]); \ - if (seed == (uint32_t) - 1 && PyErr_Occurred()) { \ + if (seed == (unsigned long)-1 && PyErr_Occurred()) { \ return NULL; \ } \ } From 551c477e0095113f64da2cb2bbe2fbde77ce0017 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 08:22:41 +0900 Subject: [PATCH 08/16] Make all *digest funcs METH_FASTCALL --- src/mmh3/mmh3module.c | 121 +++++++++++++++++++++++------------------- tests/test_mmh3.py | 50 ++++++----------- 2 files changed, 81 insertions(+), 90 deletions(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index ca3966e..53ecda8 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -435,15 +435,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *args) +mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; int32_t result[1]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -470,15 +471,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *args) +mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint32_t result[1]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -547,15 +549,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *args) +mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -596,15 +599,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *args) +mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -646,15 +650,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *args) +mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -684,15 +689,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *args) +mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -721,15 +727,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *args) +mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -761,15 +768,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *args) +mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -810,15 +818,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *args) +mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -860,15 +869,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *args) +mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -898,15 +908,16 @@ PyDoc_STRVAR( ".. versionadded:: 5.0.0\n"); static PyObject * -mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *args) +mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *const *args, + Py_ssize_t nargs) { Py_buffer target_buf; uint32_t seed = 0; uint64_t result[2]; - if (!PyArg_ParseTuple(args, "y*|I", &target_buf, &seed)) { - return NULL; - } + MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed); + + GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf); murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result); PyBuffer_Release(&target_buf); @@ -932,30 +943,30 @@ static PyMethodDef Mmh3Methods[] = { mmh3_hash_bytes_doc}, {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_FASTCALL, mmh3_mmh3_32_digest_doc}, - {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_VARARGS, + {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_FASTCALL, mmh3_mmh3_32_sintdigest_doc}, - {"mmh3_32_uintdigest", (PyCFunction)mmh3_mmh3_32_uintdigest, METH_VARARGS, + {"mmh3_32_uintdigest", (PyCFunction)mmh3_mmh3_32_uintdigest, METH_FASTCALL, mmh3_mmh3_32_uintdigest_doc}, {"mmh3_x64_128_digest", (PyCFunction)mmh3_mmh3_x64_128_digest, METH_FASTCALL, mmh3_mmh3_x64_128_digest_doc}, {"mmh3_x64_128_sintdigest", (PyCFunction)mmh3_mmh3_x64_128_sintdigest, - METH_VARARGS, mmh3_mmh3_x64_128_sintdigest_doc}, + METH_FASTCALL, mmh3_mmh3_x64_128_sintdigest_doc}, {"mmh3_x64_128_uintdigest", (PyCFunction)mmh3_mmh3_x64_128_uintdigest, - METH_VARARGS, mmh3_mmh3_x64_128_uintdigest_doc}, + METH_FASTCALL, mmh3_mmh3_x64_128_uintdigest_doc}, {"mmh3_x64_128_stupledigest", (PyCFunction)mmh3_mmh3_x64_128_stupledigest, - METH_VARARGS, mmh3_mmh3_x64_128_stupledigest_doc}, + METH_FASTCALL, mmh3_mmh3_x64_128_stupledigest_doc}, {"mmh3_x64_128_utupledigest", (PyCFunction)mmh3_mmh3_x64_128_utupledigest, - METH_VARARGS, mmh3_mmh3_x64_128_utupledigest_doc}, + METH_FASTCALL, mmh3_mmh3_x64_128_utupledigest_doc}, {"mmh3_x86_128_digest", (PyCFunction)mmh3_mmh3_x86_128_digest, - METH_VARARGS, mmh3_mmh3_x86_128_digest_doc}, + METH_FASTCALL, mmh3_mmh3_x86_128_digest_doc}, {"mmh3_x86_128_sintdigest", (PyCFunction)mmh3_mmh3_x86_128_sintdigest, - METH_VARARGS, mmh3_mmh3_x86_128_sintdigest_doc}, + METH_FASTCALL, mmh3_mmh3_x86_128_sintdigest_doc}, {"mmh3_x86_128_uintdigest", (PyCFunction)mmh3_mmh3_x86_128_uintdigest, - METH_VARARGS, mmh3_mmh3_x86_128_uintdigest_doc}, + METH_FASTCALL, mmh3_mmh3_x86_128_uintdigest_doc}, {"mmh3_x86_128_stupledigest", (PyCFunction)mmh3_mmh3_x86_128_stupledigest, - METH_VARARGS, mmh3_mmh3_x86_128_stupledigest_doc}, + METH_FASTCALL, mmh3_mmh3_x86_128_stupledigest_doc}, {"mmh3_x86_128_utupledigest", (PyCFunction)mmh3_mmh3_x86_128_utupledigest, - METH_VARARGS, mmh3_mmh3_x86_128_utupledigest_doc}, + METH_FASTCALL, mmh3_mmh3_x86_128_utupledigest_doc}, {NULL, NULL, 0, NULL}}; //----------------------------------------------------------------------------- diff --git a/tests/test_mmh3.py b/tests/test_mmh3.py index 35a8cc7..c8d935a 100644 --- a/tests/test_mmh3.py +++ b/tests/test_mmh3.py @@ -305,9 +305,7 @@ def test_mmh3_sintdigest() -> None: # https://stackoverflow.com/a/31929528 assert mmh3.mmh3_32_sintdigest(b"", 0) == 0 assert mmh3.mmh3_32_sintdigest(b"", 1) == 0x514E28B7 - assert mmh3.mmh3_32_sintdigest(b"", u32_to_s32(0xFFFFFFFF)) == u32_to_s32( - 0x81F16F39 - ) + assert mmh3.mmh3_32_sintdigest(b"", 0xFFFFFFFF) == u32_to_s32(0x81F16F39) assert mmh3.mmh3_32_sintdigest(b"\x21\x43\x65\x87", 0) == u32_to_s32(0xF55B516B) assert mmh3.mmh3_32_sintdigest( b"\x21\x43\x65\x87", u32_to_s32(0x5082EDEE) @@ -321,43 +319,25 @@ def test_mmh3_sintdigest() -> None: assert mmh3.mmh3_32_sintdigest(b"\x00\x00", 0) == u32_to_s32(0x30F4C306) assert mmh3.mmh3_32_sintdigest(b"\x00", 0) == u32_to_s32(0x514E28B7) - assert mmh3.mmh3_32_sintdigest(b"aaaa", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x5A97808A - ) - assert mmh3.mmh3_32_sintdigest(b"aaa", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x283E0130 - ) - assert mmh3.mmh3_32_sintdigest(b"aa", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x5D211726 - ) - assert mmh3.mmh3_32_sintdigest(b"a", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x7FA09EA6 - ) + assert mmh3.mmh3_32_sintdigest(b"aaaa", 0x9747B28C) == u32_to_s32(0x5A97808A) + assert mmh3.mmh3_32_sintdigest(b"aaa", 0x9747B28C) == u32_to_s32(0x283E0130) + assert mmh3.mmh3_32_sintdigest(b"aa", 0x9747B28C) == u32_to_s32(0x5D211726) + assert mmh3.mmh3_32_sintdigest(b"a", 0x9747B28C) == u32_to_s32(0x7FA09EA6) - assert mmh3.mmh3_32_sintdigest(b"abcd", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0xF0478627 - ) - assert mmh3.mmh3_32_sintdigest(b"abc", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0xC84A62DD - ) - assert mmh3.mmh3_32_sintdigest(b"ab", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x74875592 - ) - assert mmh3.mmh3_32_sintdigest(b"a", u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x7FA09EA6 - ) + assert mmh3.mmh3_32_sintdigest(b"abcd", 0x9747B28C) == u32_to_s32(0xF0478627) + assert mmh3.mmh3_32_sintdigest(b"abc", 0x9747B28C) == u32_to_s32(0xC84A62DD) + assert mmh3.mmh3_32_sintdigest(b"ab", 0x9747B28C) == u32_to_s32(0x74875592) + assert mmh3.mmh3_32_sintdigest(b"a", 0x9747B28C) == u32_to_s32(0x7FA09EA6) - assert mmh3.mmh3_32_sintdigest( - b"Hello, world!", u32_to_s32(0x9747B28C) - ) == u32_to_s32(0x24884CBA) + assert mmh3.mmh3_32_sintdigest(b"Hello, world!", 0x9747B28C) == u32_to_s32( + 0x24884CBA + ) assert mmh3.mmh3_32_sintdigest( - "ππππππππ".encode("utf-8"), u32_to_s32(0x9747B28C) + "ππππππππ".encode("utf-8"), 0x9747B28C ) == u32_to_s32(0xD58063C1) - assert mmh3.mmh3_32_sintdigest(b"a" * 256, u32_to_s32(0x9747B28C)) == u32_to_s32( - 0x37405BDC - ) + assert mmh3.mmh3_32_sintdigest(b"a" * 256, 0x9747B28C) == u32_to_s32(0x37405BDC) assert mmh3.mmh3_32_sintdigest(b"abc", 0) == u32_to_s32(0xB3DD93FA) assert mmh3.mmh3_32_sintdigest( @@ -365,7 +345,7 @@ def test_mmh3_sintdigest() -> None: ) == u32_to_s32(0xEE925B90) assert mmh3.mmh3_32_sintdigest( - b"The quick brown fox jumps over the lazy dog", u32_to_s32(0x9747B28C) + b"The quick brown fox jumps over the lazy dog", 0x9747B28C ) == u32_to_s32(0x2FA826CD) From 1b3f19becfc80a756c6e906970dcaa915748ba3d Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 09:27:01 +0900 Subject: [PATCH 09/16] Update CHANGELOG --- CHANGELOG.md | 39 ++++++++++++++++++++++++--------------- README.md | 29 +++++++++++++++++++---------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b92e529..c6805ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,14 @@ This project has adhered to ### Added -- Add `digest` functions that accept a non-immutable buffer as input - and process it without internal copying - ([#75](https://github.com/hajimes/mmh3/issues/75)). -- Slightly improve the performance of the `hash_bytes` function. - Add support for Python 3.13. +- Add `digest` functions that support the new buffer protocol + ([PEP 688](https://peps.python.org/pep-0688/)) as input + ([#75](https://github.com/hajimes/mmh3/pull/75)). + These functions are implemented with + [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL), + offering improved performance over legacy functions. +- Slightly improve the performance of the `hash_bytes()` function. - Add Read the Docs documentation ([#54](https://github.com/hajimes/mmh3/issues/54)). - (planned: Document benchmark results @@ -24,16 +27,22 @@ This project has adhered to - Change the format of CHANGELOG.md to conform to the [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard - ([#63](https://github.com/hajimes/mmh3/issues/63)). + ([#63](https://github.com/hajimes/mmh3/pull/63)). - **Backward-incompatible**: Change the constructors of hasher classes to - accept a buffer as the first argument. + accept a buffer as the first argument + ([#83](https://github.com/hajimes/mmh3/pull/83)). + +### Deprecated + +- Deprecate the `hash_from_buffer()` function. + Use `mmh3_32_sintdigest()` or `mmh3_32_uintdigest()` as alternatives. ### Fixed - Fix a reference leak in the `hash_from_buffer()` function - ([#75](https://github.com/hajimes/mmh3/issues/75)). -- Fix type hints ([#76](https://github.com/hajimes/mmh3/issues/76), - [#77](https://github.com/hajimes/mmh3/issues/77)). + ([#75](https://github.com/hajimes/mmh3/pull/75)). +- Fix type hints ([#76](https://github.com/hajimes/mmh3/pull/76), + [#77](https://github.com/hajimes/mmh3/pull/77)). ## [4.1.0] - 2024-01-09 @@ -47,7 +56,7 @@ This project has adhered to ([#50](https://github.com/hajimes/mmh3/issues/50)). - Fix incorrect type hints ([#51](https://github.com/hajimes/mmh3/issues/51)). - Fix invalid results on s390x when the arg `x64arch` of `hash64` or - `hash_bytes` is set to `False` + `hash_bytes()` is set to `False` ([#52](https://github.com/hajimes/mmh3/issues/52)). ## [4.0.1] - 2023-07-14 @@ -97,8 +106,8 @@ This project has adhered to [wouter bolsterlee](https://github.com/wbolster) and [Dušan Nikolić](https://github.com/n-dusan)! - Add support for 32-bit architectures such as `i686` and `armv7l`. From now on, - `hash` and `hash_from_buffer` on these architectures will generate the same - hash values as those on other environments. Thanks + `hash()` and `hash_from_buffer()` on these architectures will generate the + same hash values as those on other environments. Thanks [Danil Shein](https://github.com/dshein-alt)! - In relation to the above, `manylinux2014_i686` wheels are now available. - Support for hashing huge data (>16GB). Thanks @@ -134,13 +143,13 @@ This project has adhered to ### Fixed -- Bugfix for `hash_bytes`. Thanks [doozr](https://github.com/doozr)! +- Bugfix for `hash_bytes()`. Thanks [doozr](https://github.com/doozr)! ## [2.5] - 2017-10-28 ### Added -- Add `hash_from_buffer`. Thanks [Dimitri Vorona](https://github.com/alendit)! +- Add `hash_from_buffer()`. Thanks [Dimitri Vorona](https://github.com/alendit)! - Add a keyword argument `signed`. ## [2.4] - 2017-05-27 @@ -175,7 +184,7 @@ Thanks! ### Added -- Add `hash128`, which returns a 128-bit signed integer. +- Add `hash128()`, which returns a 128-bit signed integer. ### Fixed diff --git a/README.md b/README.md index 4cf5cbe..6dcce39 100644 --- a/README.md +++ b/README.md @@ -135,11 +135,14 @@ complete changelog. #### Added -- Add `digest` functions that accept a non-immutable buffer as input - and process it without internal copying - ([#75](https://github.com/hajimes/mmh3/issues/75)). -- Slightly improve the performance of the `hash_bytes` function. - Add support for Python 3.13. +- Add `digest` functions that support the new buffer protocol + ([PEP 688](https://peps.python.org/pep-0688/)) as input + ([#75](https://github.com/hajimes/mmh3/pull/75)). + These functions are implemented with + [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL), + offering improved performance over legacy functions. +- Slightly improve the performance of the `hash_bytes()` function. - Add Read the Docs documentation ([#54](https://github.com/hajimes/mmh3/issues/54)). - (planned: Document benchmark results @@ -149,16 +152,22 @@ complete changelog. - Change the format of CHANGELOG.md to conform to the [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard - ([#63](https://github.com/hajimes/mmh3/issues/63)). + ([#63](https://github.com/hajimes/mmh3/pull/63)). - **Backward-incompatible**: Change the constructors of hasher classes to - accept a buffer as the first argument. + accept a buffer as the first argument + ([#83](https://github.com/hajimes/mmh3/pull/83)). + +#### Deprecated + +- Deprecate the `hash_from_buffer()` function. + Use `mmh3_32_sintdigest()` or `mmh3_32_uintdigest()` as alternatives. #### Fixed - Fix a reference leak in the `hash_from_buffer()` function - ([#75](https://github.com/hajimes/mmh3/issues/75)). -- Fix type hints ([#76](https://github.com/hajimes/mmh3/issues/76), - [#77](https://github.com/hajimes/mmh3/issues/77)). + ([#75](https://github.com/hajimes/mmh3/pull/75)). +- Fix type hints ([#76](https://github.com/hajimes/mmh3/pull/76), + [#77](https://github.com/hajimes/mmh3/pull/77)). ### [4.1.0] - 2024-01-09 @@ -172,7 +181,7 @@ complete changelog. ([#50](https://github.com/hajimes/mmh3/issues/50)). - Fix incorrect type hints ([#51](https://github.com/hajimes/mmh3/issues/51)). - Fix invalid results on s390x when the arg `x64arch` of `hash64` or - `hash_bytes` is set to `False` + `hash_bytes()` is set to `False` ([#52](https://github.com/hajimes/mmh3/issues/52)). ## License From 2ad433104978729e9b733681869feb902fd7e591 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 11:48:59 +0900 Subject: [PATCH 10/16] Add value range check for seed --- src/mmh3/mmh3module.c | 12 ++++++++++-- tests/test_invalid_inputs.py | 24 ++++++++++++++++++++++++ tox.ini | 2 +- 3 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 tests/test_invalid_inputs.py diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 53ecda8..7c7dc39 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -51,10 +51,18 @@ typedef unsigned __int64 uint64_t; Py_TYPE(args[1])->tp_name); \ return NULL; \ } \ - seed = (uint32_t)PyLong_AsUnsignedLong(args[1]); \ - if (seed == (unsigned long)-1 && PyErr_Occurred()) { \ + const unsigned long seed_tmp = PyLong_AsUnsignedLong(args[1]); \ + if (seed_tmp == -1 && PyErr_Occurred()) { \ + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { \ + PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ + return NULL; \ + } \ + } \ + if (seed_tmp > 0xFFFFFFFF) { \ + PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ return NULL; \ } \ + seed = (uint32_t)seed_tmp; \ } //----------------------------------------------------------------------------- diff --git a/tests/test_invalid_inputs.py b/tests/test_invalid_inputs.py new file mode 100644 index 0000000..9e802d9 --- /dev/null +++ b/tests/test_invalid_inputs.py @@ -0,0 +1,24 @@ +# pylint: disable=missing-module-docstring, missing-function-docstring +# pylint: disable=no-value-for-parameter, too-many-function-args +import mmh3 +import pytest + + +def test_mmh3_32_digest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_32_digest() + with pytest.raises(TypeError): + mmh3.mmh3_32_digest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_32_digest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_32_digest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_32_digest([1, 2, 3], 42) + + +def test_mmh3_32_digest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_32_digest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_32_digest(b"hello, world", 2**32) diff --git a/tox.ini b/tox.ini index 13066fd..2f64ec3 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ description = run unit tests commands_pre = pip install ".[test]" commands = - pytest + pytest {posargs} [testenv:lint] description = run linters with formatting From 5a38ca4f4b5ba68d831b81837db7cddd9bd58a22 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 12:00:05 +0900 Subject: [PATCH 11/16] Fix mypy errors --- tests/test_invalid_inputs.py | 4 ++++ tox.ini | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_invalid_inputs.py b/tests/test_invalid_inputs.py index 9e802d9..f99b35b 100644 --- a/tests/test_invalid_inputs.py +++ b/tests/test_invalid_inputs.py @@ -1,9 +1,12 @@ # pylint: disable=missing-module-docstring, missing-function-docstring # pylint: disable=no-value-for-parameter, too-many-function-args +from typing import no_type_check + import mmh3 import pytest +@no_type_check def test_mmh3_32_digest_raises_typeerror() -> None: with pytest.raises(TypeError): mmh3.mmh3_32_digest() @@ -17,6 +20,7 @@ def test_mmh3_32_digest_raises_typeerror() -> None: mmh3.mmh3_32_digest([1, 2, 3], 42) +@no_type_check def test_mmh3_32_digest_raises_valueerror() -> None: with pytest.raises(ValueError): mmh3.mmh3_32_digest(b"hello, world", -1) diff --git a/tox.ini b/tox.ini index 2f64ec3..eb77b63 100644 --- a/tox.ini +++ b/tox.ini @@ -30,7 +30,7 @@ commands = [testenv:type] description = run type checks commands_pre = - pip install ".[type]" + pip install ".[test,type]" commands = mypy --strict tests From 7106202509ee551869b39f12f6c9b01bb2a9487d Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 15:23:50 +0900 Subject: [PATCH 12/16] Add seed range check for all funcs --- src/mmh3/__init__.pyi | 14 +- src/mmh3/mmh3module.c | 160 +++++++++++++------- tests/test_invalid_inputs.py | 286 +++++++++++++++++++++++++++++++++++ tests/test_mmh3.py | 32 ++-- 4 files changed, 412 insertions(+), 80 deletions(-) diff --git a/src/mmh3/__init__.pyi b/src/mmh3/__init__.pyi index 75c6137..87b731e 100644 --- a/src/mmh3/__init__.pyi +++ b/src/mmh3/__init__.pyi @@ -2,26 +2,24 @@ from __future__ import annotations import sys -from typing import Union, final +from typing import Any, Union, final if sys.version_info >= (3, 12): from collections.abc import Buffer else: from _typeshed import ReadableBuffer as Buffer -def hash(key: Union[bytes, str], seed: int = 0, signed: bool = True) -> int: ... +def hash(key: Union[bytes, str], seed: int = 0, signed: Any = True) -> int: ... def hash_from_buffer( - key: Union[Buffer, str], seed: int = 0, signed: bool = True + key: Union[Buffer, str], seed: int = 0, signed: Any = True ) -> int: ... def hash64( - key: Union[bytes, str], seed: int = 0, x64arch: bool = True, signed: bool = True + key: Union[bytes, str], seed: int = 0, x64arch: Any = True, signed: Any = True ) -> tuple[int, int]: ... def hash128( - key: Union[bytes, str], seed: int = 0, x64arch: bool = True, signed: bool = False + key: Union[bytes, str], seed: int = 0, x64arch: Any = True, signed: Any = False ) -> int: ... -def hash_bytes( - key: Union[bytes, str], seed: int = 0, x64arch: bool = True -) -> bytes: ... +def hash_bytes(key: Union[bytes, str], seed: int = 0, x64arch: Any = True) -> bytes: ... def mmh3_32_digest(key: Union[Buffer, str], seed: int = 0) -> bytes: ... def mmh3_32_sintdigest(key: Union[Buffer, str], seed: int = 0) -> int: ... def mmh3_32_uintdigest(key: Union[Buffer, str], seed: int = 0) -> int: ... diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 7c7dc39..3c62044 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -32,6 +32,12 @@ typedef unsigned __int64 uint64_t; #define MMH3_32_BLOCKSIZE 12 #define MMH3_128_BLOCKSIZE 32 +#define MMH3_VALIDATE_SEED(seed) \ + if (seed < 0 || seed > 0xFFFFFFFF) { \ + PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ + return NULL; \ + } + #define MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed) \ if (nargs < 1) { \ PyErr_SetString(PyExc_TypeError, \ @@ -80,24 +86,28 @@ PyDoc_STRVAR( " key (bytes | str): The input data to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" - " signed (bool): If True, return a signed integer. Otherwise, return " + " signed (Any): If True, return a signed integer. Otherwise, return " "an unsigned integer.\n" "\n" "Returns:\n" - " int: The hash value as a 32-bit integer.\n"); + " int: The hash value as a 32-bit integer.\n" + "\n" + ".. versionchanged:: 5.0.0\n" + " The ``seed`` argument is now strictly checked for valid range.\n" + " The type of the ``signed`` argument has been changed from\n" + " ``bool`` to ``Any``.\n"); static PyObject * mmh3_hash(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; - uint32_t seed = 0; + long long seed = 0; int32_t result[1]; long long_result = 0; - unsigned char is_signed = 1; + int is_signed = 1; - static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"signed", - NULL}; + static char *kwlist[] = {"key", "seed", "signed", NULL}; #ifndef _MSC_VER #if __LONG_WIDTH__ == 64 || defined(__APPLE__) @@ -105,13 +115,15 @@ mmh3_hash(PyObject *self, PyObject *args, PyObject *keywds) #endif #endif - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IB", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|Lp", kwlist, &target_str, &target_str_len, &seed, &is_signed)) { return NULL; } - murmurhash3_x86_32(target_str, target_str_len, seed, result); + MMH3_VALIDATE_SEED(seed); + + murmurhash3_x86_32(target_str, target_str_len, (uint32_t)seed, result); #if defined(_MSC_VER) /* for Windows envs */ @@ -154,26 +166,30 @@ PyDoc_STRVAR( " UTF-8 encoding before hashing.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" - " signed (bool): If True, return a signed integer. Otherwise, return " + " signed (Any): If True, return a signed integer. Otherwise, return " "an unsigned integer.\n" "\n" "Returns:\n" " int: The hash value as a 32-bit integer.\n" "\n" ".. deprecated:: 5.0.0\n" - " Use ``mmh3_32_sintdigest()`` or ``mmh3_32_uintdigest()`` instead.\n"); + " Use ``mmh3_32_sintdigest()`` or ``mmh3_32_uintdigest()`` instead.\n" + "\n" + ".. versionchanged:: 5.0.0\n" + " The ``seed`` argument is now strictly checked for valid range.\n" + " The type of the ``signed`` argument has been changed from\n" + " ``bool`` to ``Any``.\n"); static PyObject * mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds) { Py_buffer target_buf; - uint32_t seed = 0; + long long seed = 0; int32_t result[1]; long long_result = 0; - unsigned char is_signed = 1; + int is_signed = 1; - static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"signed", - NULL}; + static char *kwlist[] = {"key", "seed", "signed", NULL}; #ifndef _MSC_VER #if __LONG_WIDTH__ == 64 || defined(__APPLE__) @@ -181,12 +197,14 @@ mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds) #endif #endif - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|IB", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|Lp", kwlist, &target_buf, &seed, &is_signed)) { return NULL; } - murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result); + MMH3_VALIDATE_SEED(seed); + + murmurhash3_x86_32(target_buf.buf, target_buf.len, (uint32_t)seed, result); PyBuffer_Release(&target_buf); @@ -228,41 +246,49 @@ PyDoc_STRVAR( " key (bytes | str): The input data to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" - " x64arch (bool): If True, use an algorithm optimized for 64-bit " + " x64arch (Any): If True, use an algorithm optimized for 64-bit " "architecture. Otherwise, use one optimized for 32-bit architecture.\n" - " signed (bool): If True, return a signed integer. Otherwise, return " + " signed (Any): If True, return a signed integer. Otherwise, return " "an unsigned integer.\n" "\n" "Returns:\n" " tuple[int, int]: The hash value as a tuple of two 64-bit " - "integers.\n"); + "integers.\n" + "\n" + ".. versionchanged:: 5.0.0\n" + " The ``seed`` argument is now strictly checked for valid range.\n" + " The type of the ``x64arch`` and ``signed`` arguments has been\n" + " changed from ``bool`` to ``Any``.\n"); static PyObject * mmh3_hash64(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; - uint32_t seed = 0; + long long seed = 0; uint64_t result[2]; - unsigned char x64arch = 1; - unsigned char is_signed = 1; + int x64arch = 1; + int is_signed = 1; - static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", - (char *)"signed", NULL}; + static char *kwlist[] = {"key", "seed", "x64arch", "signed", NULL}; - static char *valflag[] = {(char *)"KK", (char *)"LL"}; + static char *valflag[] = {"KK", "LL"}; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IBB", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|Lpp", kwlist, &target_str, &target_str_len, &seed, &x64arch, &is_signed)) { return NULL; } + MMH3_VALIDATE_SEED(seed); + if (x64arch == 1) { - murmurhash3_x64_128(target_str, target_str_len, seed, result); + murmurhash3_x64_128(target_str, target_str_len, (uint32_t)seed, + result); } else { - murmurhash3_x86_128(target_str, target_str_len, seed, result); + murmurhash3_x86_128(target_str, target_str_len, (uint32_t)seed, + result); } PyObject *retval = Py_BuildValue(valflag[is_signed], result[0], result[1]); @@ -280,33 +306,39 @@ PyDoc_STRVAR( " key (bytes | str): The input data to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" - " x64arch (bool): If True, use an algorithm optimized for 64-bit " + " x64arch (Any): If True, use an algorithm optimized for 64-bit " "architecture. Otherwise, use one optimized for 32-bit architecture.\n" - " signed (bool): If True, return a signed integer. Otherwise, return " + " signed (Any): If True, return a signed integer. Otherwise, return " "an unsigned integer.\n" "\n" "Returns:\n" - " int: The hash value as a 128-bit integer.\n"); + " int: The hash value as a 128-bit integer.\n" + "\n" + ".. versionchanged:: 5.0.0\n" + " The ``seed`` argument is now strictly checked for valid range.\n" + " The type of the ``x64arch`` and ``signed`` arguments has been\n" + " changed from ``bool`` to ``Any``.\n"); static PyObject * mmh3_hash128(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; - uint32_t seed = 0; + long long seed = 0; uint64_t result[2]; - unsigned char x64arch = 1; - unsigned char is_signed = 0; + int x64arch = 1; + int is_signed = 0; - static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", - (char *)"signed", NULL}; + static char *kwlist[] = {"key", "seed", "x64arch", "signed", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IBB", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|Lpp", kwlist, &target_str, &target_str_len, &seed, &x64arch, &is_signed)) { return NULL; } + MMH3_VALIDATE_SEED(seed); + if (x64arch == 1) { murmurhash3_x64_128(target_str, target_str_len, seed, result); } @@ -341,31 +373,37 @@ PyDoc_STRVAR( " key (bytes | str): The input data to hash.\n" " seed (int): The seed value. Must be an integer in the range [0, " "0xFFFFFFFF].\n" - " x64arch (bool): If True, use an algorithm optimized for 64-bit " + " x64arch (Any): If True, use an algorithm optimized for 64-bit " "architecture. Otherwise, use one optimized for 32-bit architecture.\n" "\n" "Returns:\n" " bytes: The hash value as the ``bytes`` type with a length of 16 " - "bytes (128 bits).\n"); + "bytes (128 bits).\n") + "\n" + ".. versionchanged:: 5.0.0\n" + " The ``seed`` argument is now strictly checked for valid range.\n" + " The type of the ``x64arch`` argument has been changed from\n" + " ``bool`` to ``Any``.\n"; static PyObject * mmh3_hash_bytes(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; - uint32_t seed = 0; + long long seed = 0; uint64_t result[2]; - unsigned char x64arch = 1; + int x64arch = 1; - static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", - NULL}; + static char *kwlist[] = {"key", "seed", "x64arch", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IB", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|Lp", kwlist, &target_str, &target_str_len, &seed, &x64arch)) { return NULL; } + MMH3_VALIDATE_SEED(seed); + if (x64arch == 1) { murmurhash3_x64_128(target_str, target_str_len, seed, result); } @@ -1061,12 +1099,17 @@ static int MMH3Hasher32_init(MMH3Hasher32 *self, PyObject *args, PyObject *kwds) { Py_buffer target_buf = {0}; + long long seed = 0; static char *kwlist[] = {"data", "seed", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*I", kwlist, &target_buf, - &self->h)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*L", kwlist, &target_buf, + &seed)) return -1; + MMH3_VALIDATE_SEED(seed); + + self->h = (uint32_t)seed; + if (target_buf.buf != NULL) { // target_buf will be released in update32_impl update32_impl(self, &target_buf); @@ -1244,7 +1287,8 @@ PyDoc_STRVAR( "[0, 0xFFFFFFFF].\n" "\n" ".. versionchanged:: 5.0.0\n" - " Added the optional ``data`` parameter as the first argument.\n"); + " Added the optional ``data`` parameter as the first argument.\n" + " The ``seed`` argument is now strictly checked for valid range.\n"); static PyTypeObject MMH3Hasher32Type = { PyVarObject_HEAD_INIT(NULL, 0).tp_name = "mmh3.mmh3_32", @@ -1380,12 +1424,16 @@ static int MMH3Hasher128x64_init(MMH3Hasher128x64 *self, PyObject *args, PyObject *kwds) { Py_buffer target_buf = {0}; + long long seed = 0; static char *kwlist[] = {"data", "seed", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*K", kwlist, &target_buf, - &self->h1)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*L", kwlist, &target_buf, + &seed)) return -1; + MMH3_VALIDATE_SEED(seed); + + self->h1 = (uint64_t)seed; self->h2 = self->h1; if (target_buf.buf != NULL) { @@ -1605,7 +1653,8 @@ PyDoc_STRVAR( "[0, 0xFFFFFFFF].\n" "\n" ".. versionchanged:: 5.0.0\n" - " Added the optional ``data`` parameter as the first argument.\n"); + " Added the optional ``data`` parameter as the first argument.\n" + " The ``seed`` argument is now strictly checked for valid range.\n"); static PyTypeObject MMH3Hasher128x64Type = { PyVarObject_HEAD_INIT(NULL, 0).tp_name = "mmh3.mmh3_x64_128", @@ -1728,12 +1777,15 @@ static int MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds) { Py_buffer target_buf = {0}; + long long seed = 0; static char *kwlist[] = {"data", "seed", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*I", kwlist, &target_buf, - &self->h1)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*L", kwlist, &target_buf, + &seed)) return -1; + MMH3_VALIDATE_SEED(seed); + self->h1 = (uint32_t)seed; self->h2 = self->h1; self->h3 = self->h1; self->h4 = self->h1; @@ -1946,8 +1998,8 @@ PyDoc_STRVAR( "[0, 0xFFFFFFFF].\n" "\n" ".. versionchanged:: 5.0.0\n" - " Added the optional ``data`` parameter as the first argument.\n"); -; + " Added the optional ``data`` parameter as the first argument.\n" + " The ``seed`` argument is now strictly checked for valid range.\n"); static PyTypeObject MMH3Hasher128x86Type = { PyVarObject_HEAD_INIT(NULL, 0).tp_name = "mmh3.mmh3_x86_128", diff --git a/tests/test_invalid_inputs.py b/tests/test_invalid_inputs.py index f99b35b..b284f44 100644 --- a/tests/test_invalid_inputs.py +++ b/tests/test_invalid_inputs.py @@ -6,6 +6,28 @@ import pytest +@no_type_check +def test_hash_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.hash() + with pytest.raises(TypeError): + mmh3.hash(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.hash(b"hello, world", 42, True, 1234) + with pytest.raises(TypeError): + mmh3.hash(b"hello, world", seed="42") + with pytest.raises(TypeError): + mmh3.hash([1, 2, 3], 42) + + +@no_type_check +def test_hash_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.hash(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.hash(b"hello, world", 2**32) + + @no_type_check def test_mmh3_32_digest_raises_typeerror() -> None: with pytest.raises(TypeError): @@ -26,3 +48,267 @@ def test_mmh3_32_digest_raises_valueerror() -> None: mmh3.mmh3_32_digest(b"hello, world", -1) with pytest.raises(ValueError): mmh3.mmh3_32_digest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_32_sintdigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_32_sintdigest() + with pytest.raises(TypeError): + mmh3.mmh3_32_sintdigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_32_sintdigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_32_sintdigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_32_sintdigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_32_sintdigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_32_sintdigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_32_sintdigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_32_uintdigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_32_uintdigest() + with pytest.raises(TypeError): + mmh3.mmh3_32_uintdigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_32_uintdigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_32_uintdigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_32_uintdigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_32_uintdigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_32_uintdigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_32_uintdigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x64_128_digest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_digest() + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_digest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_digest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_digest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_digest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x64_128_digest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_digest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_digest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x64_128_sintdigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_sintdigest() + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_sintdigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_sintdigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_sintdigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_sintdigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x64_128_sintdigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_sintdigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_sintdigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x64_128_uintdigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_uintdigest() + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_uintdigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_uintdigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_uintdigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_uintdigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x64_128_uintdigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_uintdigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_uintdigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x64_128_stupledigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_stupledigest() + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_stupledigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_stupledigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_stupledigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_stupledigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x64_128_stupledigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_stupledigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_stupledigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x64_128_utupledigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_utupledigest() + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_utupledigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_utupledigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_utupledigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128_utupledigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x64_128_utupledigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_utupledigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x64_128_utupledigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x86_128_digest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_digest() + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_digest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_digest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_digest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_digest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x86_128_digest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_digest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_digest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x86_128_sintdigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_sintdigest() + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_sintdigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_sintdigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_sintdigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_sintdigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x86_128_sintdigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_sintdigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_sintdigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x86_128_uintdigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_uintdigest() + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_uintdigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_uintdigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_uintdigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_uintdigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x86_128_uintdigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_uintdigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_uintdigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x86_128_stupledigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_stupledigest() + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_stupledigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_stupledigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_stupledigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_stupledigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x86_128_stupledigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_stupledigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_stupledigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x86_128_utupledigest_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_utupledigest() + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_utupledigest(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_utupledigest("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_utupledigest(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128_utupledigest([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x86_128_utupledigest_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_utupledigest(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x86_128_utupledigest(b"hello, world", 2**32) diff --git a/tests/test_mmh3.py b/tests/test_mmh3.py index c8d935a..2ea3ef2 100644 --- a/tests/test_mmh3.py +++ b/tests/test_mmh3.py @@ -13,11 +13,9 @@ def test_hash() -> None: # https://stackoverflow.com/a/31929528 assert mmh3.hash(b"", seed=0) == 0 assert mmh3.hash(b"", seed=1) == 0x514E28B7 - assert mmh3.hash(b"", seed=u32_to_s32(0xFFFFFFFF)) == u32_to_s32(0x81F16F39) + assert mmh3.hash(b"", seed=0xFFFFFFFF) == u32_to_s32(0x81F16F39) assert mmh3.hash(b"\x21\x43\x65\x87", 0) == u32_to_s32(0xF55B516B) - assert mmh3.hash(b"\x21\x43\x65\x87", u32_to_s32(0x5082EDEE)) == u32_to_s32( - 0x2362F9DE - ) + assert mmh3.hash(b"\x21\x43\x65\x87", 0x5082EDEE) == u32_to_s32(0x2362F9DE) assert mmh3.hash(b"\x21\x43\x65", 0) == u32_to_s32(0x7E4A8634) assert mmh3.hash(b"\x21\x43", 0) == u32_to_s32(0xA0F7B07A) assert mmh3.hash(b"\x21", 0) == u32_to_s32(0x72661CF4) @@ -27,23 +25,21 @@ def test_hash() -> None: assert mmh3.hash(b"\x00\x00", 0) == u32_to_s32(0x30F4C306) assert mmh3.hash(b"\x00", 0) == u32_to_s32(0x514E28B7) - assert mmh3.hash("aaaa", u32_to_s32(0x9747B28C)) == u32_to_s32(0x5A97808A) - assert mmh3.hash("aaa", u32_to_s32(0x9747B28C)) == u32_to_s32(0x283E0130) - assert mmh3.hash("aa", u32_to_s32(0x9747B28C)) == u32_to_s32(0x5D211726) - assert mmh3.hash("a", u32_to_s32(0x9747B28C)) == u32_to_s32(0x7FA09EA6) + assert mmh3.hash("aaaa", 0x9747B28C) == u32_to_s32(0x5A97808A) + assert mmh3.hash("aaa", 0x9747B28C) == u32_to_s32(0x283E0130) + assert mmh3.hash("aa", 0x9747B28C) == u32_to_s32(0x5D211726) + assert mmh3.hash("a", 0x9747B28C) == u32_to_s32(0x7FA09EA6) - assert mmh3.hash("abcd", u32_to_s32(0x9747B28C)) == u32_to_s32(0xF0478627) - assert mmh3.hash("abc", u32_to_s32(0x9747B28C)) == u32_to_s32(0xC84A62DD) - assert mmh3.hash("ab", u32_to_s32(0x9747B28C)) == u32_to_s32(0x74875592) - assert mmh3.hash("a", u32_to_s32(0x9747B28C)) == u32_to_s32(0x7FA09EA6) + assert mmh3.hash("abcd", 0x9747B28C) == u32_to_s32(0xF0478627) + assert mmh3.hash("abc", 0x9747B28C) == u32_to_s32(0xC84A62DD) + assert mmh3.hash("ab", 0x9747B28C) == u32_to_s32(0x74875592) + assert mmh3.hash("a", 0x9747B28C) == u32_to_s32(0x7FA09EA6) - assert mmh3.hash("Hello, world!", u32_to_s32(0x9747B28C)) == u32_to_s32(0x24884CBA) + assert mmh3.hash("Hello, world!", 0x9747B28C) == u32_to_s32(0x24884CBA) - assert mmh3.hash("ππππππππ".encode("utf-8"), u32_to_s32(0x9747B28C)) == u32_to_s32( - 0xD58063C1 - ) + assert mmh3.hash("ππππππππ".encode("utf-8"), 0x9747B28C) == u32_to_s32(0xD58063C1) - assert mmh3.hash("a" * 256, u32_to_s32(0x9747B28C)) == u32_to_s32(0x37405BDC) + assert mmh3.hash("a" * 256, 0x9747B28C) == u32_to_s32(0x37405BDC) assert mmh3.hash("abc", 0) == u32_to_s32(0xB3DD93FA) assert mmh3.hash( @@ -51,7 +47,7 @@ def test_hash() -> None: ) == u32_to_s32(0xEE925B90) assert mmh3.hash( - "The quick brown fox jumps over the lazy dog", u32_to_s32(0x9747B28C) + "The quick brown fox jumps over the lazy dog", 0x9747B28C ) == u32_to_s32(0x2FA826CD) From 884ffe12745cf9fc85e52df9003ac1be96263df8 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 15:27:36 +0900 Subject: [PATCH 13/16] Fix int type warning --- src/mmh3/mmh3module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 3c62044..12665bf 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -58,7 +58,7 @@ typedef unsigned __int64 uint64_t; return NULL; \ } \ const unsigned long seed_tmp = PyLong_AsUnsignedLong(args[1]); \ - if (seed_tmp == -1 && PyErr_Occurred()) { \ + if (seed_tmp == (unsigned long)-1 && PyErr_Occurred()) { \ if (PyErr_ExceptionMatches(PyExc_OverflowError)) { \ PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ return NULL; \ From fc3ed7dfbc46c979d985cc31949ab2f1bddd8bdc Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 15:33:03 +0900 Subject: [PATCH 14/16] Fix return type warning --- src/mmh3/mmh3module.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 12665bf..bed2923 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -32,12 +32,18 @@ typedef unsigned __int64 uint64_t; #define MMH3_32_BLOCKSIZE 12 #define MMH3_128_BLOCKSIZE 32 -#define MMH3_VALIDATE_SEED(seed) \ +#define MMH3_VALIDATE_SEED_RETURN_NULL(seed) \ if (seed < 0 || seed > 0xFFFFFFFF) { \ PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ return NULL; \ } +#define MMH3_VALIDATE_SEED_RETURN_INT(seed) \ + if (seed < 0 || seed > 0xFFFFFFFF) { \ + PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ + return -1; \ + } + #define MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed) \ if (nargs < 1) { \ PyErr_SetString(PyExc_TypeError, \ @@ -121,7 +127,7 @@ mmh3_hash(PyObject *self, PyObject *args, PyObject *keywds) return NULL; } - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_NULL(seed); murmurhash3_x86_32(target_str, target_str_len, (uint32_t)seed, result); @@ -202,7 +208,7 @@ mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds) return NULL; } - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_NULL(seed); murmurhash3_x86_32(target_buf.buf, target_buf.len, (uint32_t)seed, result); @@ -280,7 +286,7 @@ mmh3_hash64(PyObject *self, PyObject *args, PyObject *keywds) return NULL; } - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_NULL(seed); if (x64arch == 1) { murmurhash3_x64_128(target_str, target_str_len, (uint32_t)seed, @@ -337,7 +343,7 @@ mmh3_hash128(PyObject *self, PyObject *args, PyObject *keywds) return NULL; } - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_NULL(seed); if (x64arch == 1) { murmurhash3_x64_128(target_str, target_str_len, seed, result); @@ -402,7 +408,7 @@ mmh3_hash_bytes(PyObject *self, PyObject *args, PyObject *keywds) return NULL; } - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_NULL(seed); if (x64arch == 1) { murmurhash3_x64_128(target_str, target_str_len, seed, result); @@ -1106,7 +1112,7 @@ MMH3Hasher32_init(MMH3Hasher32 *self, PyObject *args, PyObject *kwds) &seed)) return -1; - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_INT(seed); self->h = (uint32_t)seed; @@ -1431,7 +1437,7 @@ MMH3Hasher128x64_init(MMH3Hasher128x64 *self, PyObject *args, PyObject *kwds) &seed)) return -1; - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_INT(seed); self->h1 = (uint64_t)seed; self->h2 = self->h1; @@ -1784,7 +1790,7 @@ MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds) &seed)) return -1; - MMH3_VALIDATE_SEED(seed); + MMH3_VALIDATE_SEED_RETURN_INT(seed); self->h1 = (uint32_t)seed; self->h2 = self->h1; self->h3 = self->h1; From cd68416f151552c7bafe58316c215515cf5ace04 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 16:02:27 +0900 Subject: [PATCH 15/16] Fix hasher init; add more unit tests --- src/mmh3/mmh3module.c | 9 ++- tests/test_invalid_inputs.py | 148 +++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 4 deletions(-) diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index bed2923..1d7289b 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -38,8 +38,9 @@ typedef unsigned __int64 uint64_t; return NULL; \ } -#define MMH3_VALIDATE_SEED_RETURN_INT(seed) \ +#define MMH3_VALIDATE_SEED_RETURN_INT(seed, buf) \ if (seed < 0 || seed > 0xFFFFFFFF) { \ + PyBuffer_Release(&buf); \ PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ return -1; \ } @@ -1112,7 +1113,7 @@ MMH3Hasher32_init(MMH3Hasher32 *self, PyObject *args, PyObject *kwds) &seed)) return -1; - MMH3_VALIDATE_SEED_RETURN_INT(seed); + MMH3_VALIDATE_SEED_RETURN_INT(seed, target_buf); self->h = (uint32_t)seed; @@ -1437,7 +1438,7 @@ MMH3Hasher128x64_init(MMH3Hasher128x64 *self, PyObject *args, PyObject *kwds) &seed)) return -1; - MMH3_VALIDATE_SEED_RETURN_INT(seed); + MMH3_VALIDATE_SEED_RETURN_INT(seed, target_buf); self->h1 = (uint64_t)seed; self->h2 = self->h1; @@ -1790,7 +1791,7 @@ MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds) &seed)) return -1; - MMH3_VALIDATE_SEED_RETURN_INT(seed); + MMH3_VALIDATE_SEED_RETURN_INT(seed, target_buf); self->h1 = (uint32_t)seed; self->h2 = self->h1; self->h3 = self->h1; diff --git a/tests/test_invalid_inputs.py b/tests/test_invalid_inputs.py index b284f44..e4d8bbf 100644 --- a/tests/test_invalid_inputs.py +++ b/tests/test_invalid_inputs.py @@ -28,6 +28,94 @@ def test_hash_raises_valueerror() -> None: mmh3.hash(b"hello, world", 2**32) +@no_type_check +def test_hash128_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.hash128() + with pytest.raises(TypeError): + mmh3.hash128(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.hash128(b"hello, world", 42, True, False, 1234) + with pytest.raises(TypeError): + mmh3.hash128(b"hello, world", seed="42") + with pytest.raises(TypeError): + mmh3.hash128([1, 2, 3], 42) + + +@no_type_check +def test_hash128_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.hash128(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.hash128(b"hello, world", 2**32) + + +@no_type_check +def test_hash64_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.hash64() + with pytest.raises(TypeError): + mmh3.hash64(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.hash64(b"hello, world", 42, True, False, 1234) + with pytest.raises(TypeError): + mmh3.hash64(b"hello, world", seed="42") + with pytest.raises(TypeError): + mmh3.hash64([1, 2, 3], 42) + + +@no_type_check +def test_hash64_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.hash64(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.hash64(b"hello, world", 2**32) + + +@no_type_check +def test_hash_bytes_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.hash_bytes() + with pytest.raises(TypeError): + mmh3.hash_bytes(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.hash_bytes(b"hello, world", 42, True, 1234) + with pytest.raises(TypeError): + mmh3.hash_bytes(b"hello, world", seed="42") + with pytest.raises(TypeError): + mmh3.hash_bytes([1, 2, 3], 42) + + +@no_type_check +def test_hash_bytes_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.hash_bytes(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.hash_bytes(b"hello, world", 2**32) + + +@no_type_check +def test_hash_from_buffer_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.hash_from_buffer() + with pytest.raises(TypeError): + mmh3.hash_from_buffer(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.hash_from_buffer(b"hello, world", 42, True, 1234) + with pytest.raises(TypeError): + mmh3.hash_from_buffer(b"hello, world", seed="42") + with pytest.raises(TypeError): + mmh3.hash_from_buffer([1, 2, 3], 42) + + +@no_type_check +def test_hash_from_buffer_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.hash_from_buffer(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.hash_from_buffer(b"hello, world", 2**32) + + @no_type_check def test_mmh3_32_digest_raises_typeerror() -> None: with pytest.raises(TypeError): @@ -312,3 +400,63 @@ def test_mmh3_x86_128_utupledigest_raises_valueerror() -> None: mmh3.mmh3_x86_128_utupledigest(b"hello, world", -1) with pytest.raises(ValueError): mmh3.mmh3_x86_128_utupledigest(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_32_init_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_32(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_32("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_32(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_32([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_32_init_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_32(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_32(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x64_128_init_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x64_128(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x64_128("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x64_128([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x64_128_init_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x64_128(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x64_128(b"hello, world", 2**32) + + +@no_type_check +def test_mmh3_x86_128_init_raises_typeerror() -> None: + with pytest.raises(TypeError): + mmh3.mmh3_x86_128(b"hello, world", 42, 1234) + with pytest.raises(TypeError): + mmh3.mmh3_x86_128("hello, world") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128(b"hello, world", "42") + with pytest.raises(TypeError): + mmh3.mmh3_x86_128([1, 2, 3], 42) + + +@no_type_check +def test_mmh3_x86_128_init_raises_valueerror() -> None: + with pytest.raises(ValueError): + mmh3.mmh3_x86_128(b"hello, world", -1) + with pytest.raises(ValueError): + mmh3.mmh3_x86_128(b"hello, world", 2**32) From b8dbc5937a9d87fe4c6a4b3e3183dcfa5eaefaa9 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Tue, 17 Sep 2024 16:44:15 +0900 Subject: [PATCH 16/16] Improve docs and docstrings --- CHANGELOG.md | 10 ++- README.md | 33 ++------ src/mmh3/mmh3module.c | 193 ++++++++++++++++++++++-------------------- tox.ini | 2 +- 4 files changed, 116 insertions(+), 122 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6805ff..8a38fca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,12 +25,16 @@ This project has adhered to ### Changed -- Change the format of CHANGELOG.md to conform to the - [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard - ([#63](https://github.com/hajimes/mmh3/pull/63)). +- **Backward-incompatible**: The `seed` argument is now strictly validated to + ensure it falls within the range [0, 0xFFFFFFFF]. A `ValueError` is raised + if the seed is out of range. - **Backward-incompatible**: Change the constructors of hasher classes to accept a buffer as the first argument ([#83](https://github.com/hajimes/mmh3/pull/83)). +- The type of flag argumens has been changed from `bool` to `Any`. +- Change the format of CHANGELOG.md to conform to the + [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard + ([#63](https://github.com/hajimes/mmh3/pull/63)). ### Deprecated diff --git a/README.md b/README.md index 6dcce39..7d25b6b 100644 --- a/README.md +++ b/README.md @@ -150,12 +150,16 @@ complete changelog. #### Changed -- Change the format of CHANGELOG.md to conform to the - [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard - ([#63](https://github.com/hajimes/mmh3/pull/63)). +- **Backward-incompatible**: The `seed` argument is now strictly validated to + ensure it falls within the range [0, 0xFFFFFFFF]. A `ValueError` is raised + if the seed is out of range. - **Backward-incompatible**: Change the constructors of hasher classes to accept a buffer as the first argument ([#83](https://github.com/hajimes/mmh3/pull/83)). +- The type of flag argumens has been changed from `bool` to `Any`. +- Change the format of CHANGELOG.md to conform to the + [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard + ([#63](https://github.com/hajimes/mmh3/pull/63)). #### Deprecated @@ -210,29 +214,6 @@ For compatibility with [murmur3 (Go)](https://pkg.go.dev/github.com/spaolacci/murmur3), see . -### Unexpected results when given non 32-bit seeds - -In version 2.4, the type of a seed was changed from a signed 32-bit integer to -an unsigned 32-bit integer. However, the resulting values for signed seeds -remain unchanged from previous versions, as long as they are 32-bit. - -```pycon ->>> mmh3.hash("aaaa", -1756908916) # signed representation for 0x9747b28c -1519878282 ->>> mmh3.hash("aaaa", 2538058380) # unsigned representation for 0x9747b28c -1519878282 -``` - -Be careful so that these seeds do not exceed 32-bit. Unexpected results may -happen with invalid values. - -```pycon ->>> mmh3.hash("foo", 2 ** 33) --156908512 ->>> mmh3.hash("foo", 2 ** 34) --156908512 -``` - ## Contributing Guidelines See [Contributing](https://mmh3.readthedocs.io/en/latest/CONTRIBUTING.html). diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 1d7289b..5e0f20c 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -91,10 +91,10 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (bytes | str): The input data to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" - " signed (Any): If True, return a signed integer. Otherwise, return " - "an unsigned integer.\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" + " signed (Any): If True, return a signed integer. Otherwise, return\n" + " an unsigned integer.\n" "\n" "Returns:\n" " int: The hash value as a 32-bit integer.\n" @@ -171,10 +171,10 @@ PyDoc_STRVAR( " key (Buffer | str): The bufer to hash. String inputs are also\n" " supported and are automatically converted to `bytes` using\n" " UTF-8 encoding before hashing.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" - " signed (Any): If True, return a signed integer. Otherwise, return " - "an unsigned integer.\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" + " signed (Any): If True, return a signed integer. Otherwise, return\n" + " an unsigned integer.\n" "\n" "Returns:\n" " int: The hash value as a 32-bit integer.\n" @@ -251,12 +251,13 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (bytes | str): The input data to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" - " x64arch (Any): If True, use an algorithm optimized for 64-bit " - "architecture. Otherwise, use one optimized for 32-bit architecture.\n" - " signed (Any): If True, return a signed integer. Otherwise, return " - "an unsigned integer.\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" + " x64arch (Any): If True, use an algorithm optimized for 64-bit\n" + " architecture. Otherwise, use one optimized for 32-bit\n" + " architecture.\n" + " signed (Any): If True, return a signed integer. Otherwise, return\n" + " an unsigned integer.\n" "\n" "Returns:\n" " tuple[int, int]: The hash value as a tuple of two 64-bit " @@ -311,12 +312,13 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (bytes | str): The input data to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" - " x64arch (Any): If True, use an algorithm optimized for 64-bit " - "architecture. Otherwise, use one optimized for 32-bit architecture.\n" - " signed (Any): If True, return a signed integer. Otherwise, return " - "an unsigned integer.\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" + " x64arch (Any): If True, use an algorithm optimized for 64-bit\n" + " architecture. Otherwise, use one optimized for 32-bit\n" + " architecture.\n" + " signed (Any): If True, return a signed integer. Otherwise, return\n" + " an unsigned integer.\n" "\n" "Returns:\n" " int: The hash value as a 128-bit integer.\n" @@ -378,14 +380,14 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (bytes | str): The input data to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" - " x64arch (Any): If True, use an algorithm optimized for 64-bit " - "architecture. Otherwise, use one optimized for 32-bit architecture.\n" - "\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" + " x64arch (Any): If True, use an algorithm optimized for 64-bit\n" + " architecture. Otherwise, use one optimized for 32-bit\n" + " architecture.\n" "Returns:\n" - " bytes: The hash value as the ``bytes`` type with a length of 16 " - "bytes (128 bits).\n") + " bytes: The hash value as the ``bytes`` type with a length of 16\n" + " bytes (128 bits).\n") "\n" ".. versionchanged:: 5.0.0\n" " The ``seed`` argument is now strictly checked for valid range.\n" @@ -439,12 +441,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " bytes: The hash value as the ``bytes`` type with a length of 4 bytes " - "(32 bits).\n" + " bytes: The hash value as the ``bytes`` type with a length of\n" + " 4 bytes (32 bits).\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -479,8 +481,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" " int: The hash value as a 32-bit signed integer.\n" @@ -515,8 +517,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" " int: The hash value as a 32-bit unsigned integer.\n" @@ -551,12 +553,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " bytes: The hash value as the ``bytes`` type with a length of 16 " - "bytes (128 bits).\n" + " bytes: The hash value as the ``bytes`` type with a length of\n" + " 16 bytes (128 bits).\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -593,8 +595,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" " int: The hash value as a 128-bit signed integer.\n" @@ -643,8 +645,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" " int: The hash value as a 128-bit unsigned integer.\n" @@ -693,12 +695,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " tuple[int, int]: The hash value as a tuple of two 64-bit signed " - "integers.\n" + " tuple[int, int]: The hash value as a tuple of two 64-bit signed\n" + " integers.\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -732,12 +734,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " tuple[int, int]: The hash value as a tuple of two 64-bit unsigned " - "integers.\n" + " tuple[int, int]: The hash value as a tuple of two 64-bit unsigned\n" + " integers.\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -770,12 +772,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " bytes: The hash value as the ``bytes`` type with a length of 16 " - "bytes (128 bits).\n" + " bytes: The hash value as the ``bytes`` type with a length of\n" + " 16 bytes (128 bits).\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -812,8 +814,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" " int: The hash value as an signed 128-bit integer.\n" @@ -862,8 +864,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" " int: The hash value as a 128-bit unsigned integer.\n" @@ -912,12 +914,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " tuple[int, int]: The hash value as a tuple of two 64-bit signed " - "integers.\n" + " tuple[int, int]: The hash value as a tuple of two 64-bit signed\n" + " integers.\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -951,12 +953,12 @@ PyDoc_STRVAR( "\n" "Args:\n" " key (Buffer): The input buffer to hash.\n" - " seed (int): The seed value. Must be an integer in the range [0, " - "0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" "Returns:\n" - " tuple[int, int]: The hash value as a tuple of two 64-bit unsigned " - "integers.\n" + " tuple[int, int]: The hash value as a tuple of two 64-bit unsigned\n" + " integers.\n" "\n" ".. versionadded:: 5.0.0\n"); @@ -1125,12 +1127,14 @@ MMH3Hasher32_init(MMH3Hasher32 *self, PyObject *args, PyObject *kwds) return 0; } -PyDoc_STRVAR(MMH3Hasher_update_doc, - "update(data)\n\n" - "Update this hash object's state with the provided bytes-like " - "object.\n\n" - "Args:\n" - " data (Buffer): The buffer to hash.\n\n"); +PyDoc_STRVAR( + MMH3Hasher_update_doc, + "update(data)\n" + "\n" + "Update this hash object's state with the provided bytes-like object.\n" + "\n" + "Args:\n" + " data (Buffer): The buffer to hash.\n"); static PyObject * MMH3Hasher32_update(MMH3Hasher32 *self, PyObject *obj) @@ -1290,8 +1294,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " data (Buffer | None): The initial data to hash.\n" - " seed (int): The seed value. Must be an integer in the range " - "[0, 0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" ".. versionchanged:: 5.0.0\n" " Added the optional ``data`` parameter as the first argument.\n" @@ -1523,8 +1527,8 @@ PyDoc_STRVAR(MMH3Hasher128_stupledigest_doc, "Return the digest value as a tuple of two signed integers.\n" "\n" "Returns:\n" - " tuple[int, int]: The digest value as a tuple of two signed " - "integers.\n"); + " tuple[int, int]: The digest value as a tuple of two signed\n" + " integers.\n"); static PyObject * MMH3Hasher128x64_stupledigest(MMH3Hasher128x64 *self, @@ -1546,14 +1550,15 @@ MMH3Hasher128x64_stupledigest(MMH3Hasher128x64 *self, return Py_BuildValue(valflag, result1, result2); } -PyDoc_STRVAR(MMH3Hasher128_utupledigest_doc, - "utupledigest() -> tuple[int, int]\n" - "\n" - "Return the digest value as a tuple of two unsigned integers.\n" - "\n" - "Returns:\n" - " tuple[int, int]: The digest value as a tuple of two " - "unsigned integers.\n"); +PyDoc_STRVAR( + MMH3Hasher128_utupledigest_doc, + "utupledigest() -> tuple[int, int]\n" + "\n" + "Return the digest value as a tuple of two unsigned integers.\n" + "\n" + "Returns:\n" + " tuple[int, int]: The digest value as a tuple of two unsigned\n" + " integers.\n"); static PyObject * MMH3Hasher128x64_utupledigest(MMH3Hasher128x64 *self, @@ -1656,8 +1661,8 @@ PyDoc_STRVAR( "\n" "Args:\n" " data (Buffer | None): The initial data to hash.\n" - " seed (int): The seed value. Must be an integer in the range " - "[0, 0xFFFFFFFF].\n" + " seed (int): The seed value. Must be an integer in the range\n" + " [0, 0xFFFFFFFF].\n" "\n" ".. versionchanged:: 5.0.0\n" " Added the optional ``data`` parameter as the first argument.\n" @@ -2026,15 +2031,19 @@ static PyTypeObject MMH3Hasher128x86Type = { static struct PyModuleDef mmh3module = { PyModuleDef_HEAD_INIT, "mmh3", - "A Python front-end to MurmurHash3.\n\n" + "A Python front-end to MurmurHash3.\n" + "\n" "A Python front-end to MurmurHash3, " "a fast and robust non-cryptographic hash library " - "created by Austin Appleby (http://code.google.com/p/smhasher/).\n\n" + "created by Austin Appleby (http://code.google.com/p/smhasher/).\n" + "\n" "Ported by Hajime Senuma . " "If you find any bugs, please submit an issue via " - "https://github.com/hajimes/mmh3.\n\n" - "Typical usage example:\n\n" - " mmh3.hash('foobar', 42)", + "https://github.com/hajimes/mmh3.\n" + "\n" + "Typical usage example:\n" + "\n" + " mmh3.hash(\"foobar\", 42)", -1, Mmh3Methods, NULL, diff --git a/tox.ini b/tox.ini index eb77b63..8cb8f16 100644 --- a/tox.ini +++ b/tox.ini @@ -67,4 +67,4 @@ description = plot benchmark results commands_pre = pip install ".[benchmark,plot]" commands = - python benchmark/plot_graph.py {posargs} \ No newline at end of file + python benchmark/plot_graph.py {posargs}