Skip to content

Commit

Permalink
Merge pull request #4188 from tybug/background-db
Browse files Browse the repository at this point in the history
Add `BackgroundWriteDatabase`
  • Loading branch information
Zac-HD authored Nov 28, 2024
2 parents 32817c5 + c078ef1 commit 65bd569
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 4 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: minor

This release adds :class:`~hypothesis.database.BackgroundWriteDatabase`, a new database backend which defers writes on the wrapped database to a background thread. This allows for low-overhead writes in performance-critical environments like :ref:`fuzz_one_input <fuzz_one_input>`.
1 change: 1 addition & 0 deletions hypothesis-python/docs/database.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Hypothesis provides the following :class:`~hypothesis.database.ExampleDatabase`
.. autoclass:: hypothesis.database.GitHubArtifactDatabase
.. autoclass:: hypothesis.database.ReadOnlyDatabase
.. autoclass:: hypothesis.database.MultiplexedDatabase
.. autoclass:: hypothesis.database.BackgroundWriteDatabase
.. autoclass:: hypothesis.extra.redis.RedisExampleDatabase

---------------------------------
Expand Down
10 changes: 8 additions & 2 deletions hypothesis-python/docs/details.rst
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,8 @@ on working with markers <pytest:mark examples>`.
.. note::
Pytest will load the plugin automatically if Hypothesis is installed.
You don't need to do anything at all to use it.
If it causes problems, you can avoid loading the plugin with the

If it causes problems, you can avoid loading the plugin with the
``-p no:hypothesispytest`` option.


Expand Down Expand Up @@ -750,6 +750,12 @@ to the exact version of Hypothesis you are using and the strategies given to
the test, just like the :doc:`example database <database>` and
:func:`@reproduce_failure <hypothesis.reproduce_failure>` decorator.

.. tip::

For usages of ``fuzz_one_input`` which expect to discover many failures, consider
wrapping your database with :class:`~hypothesis.database.BackgroundWriteDatabase`
for low-overhead writes of failures.

~~~~~~~~~~~~~~~~~~~~~~~~~
Interaction with settings
~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
48 changes: 48 additions & 0 deletions hypothesis-python/src/hypothesis/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
import struct
import sys
import warnings
import weakref
from collections.abc import Iterable
from datetime import datetime, timedelta, timezone
from functools import lru_cache
from hashlib import sha384
from os import getenv
from pathlib import Path, PurePath
from queue import Queue
from threading import Thread
from typing import Optional
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
Expand Down Expand Up @@ -675,6 +678,51 @@ def delete(self, key: bytes, value: bytes) -> None:
raise RuntimeError(self._read_only_message)


class BackgroundWriteDatabase(ExampleDatabase):
"""A wrapper which defers writes on the given database to a background thread.
Calls to :meth:`~hypothesis.database.ExampleDatabase.fetch` wait for any
enqueued writes to finish before fetching from the database.
"""

def __init__(self, db: ExampleDatabase) -> None:
self._db = db
self._queue: Queue[tuple[str, tuple[bytes, ...]]] = Queue()
self._thread = Thread(target=self._worker, daemon=True)
self._thread.start()
# avoid an unbounded timeout during gc. 0.1 should be plenty for most
# use cases.
weakref.finalize(self, self._join, 0.1)

def __repr__(self) -> str:
return f"BackgroundWriteDatabase({self._db!r})"

def _worker(self) -> None:
while True:
method, args = self._queue.get()
getattr(self._db, method)(*args)
self._queue.task_done()

def _join(self, timeout: Optional[float] = None) -> None:
# copy of Queue.join with a timeout. https://bugs.python.org/issue9634
with self._queue.all_tasks_done:
while self._queue.unfinished_tasks:
self._queue.all_tasks_done.wait(timeout)

def fetch(self, key: bytes) -> Iterable[bytes]:
self._join()
return self._db.fetch(key)

def save(self, key: bytes, value: bytes) -> None:
self._queue.put(("save", (key, value)))

def delete(self, key: bytes, value: bytes) -> None:
self._queue.put(("delete", (key, value)))

def move(self, src: bytes, dest: bytes, value: bytes) -> None:
self._queue.put(("move", (src, dest, value)))


def ir_to_bytes(ir: Iterable[IRType], /) -> bytes:
"""Serialize a list of IR elements to a bytestring. Inverts ir_from_bytes."""
# We use a custom serialization format for this, which might seem crazy - but our
Expand Down
18 changes: 18 additions & 0 deletions hypothesis-python/tests/cover/test_database_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from hypothesis import configuration, example, given, settings, strategies as st
from hypothesis.database import (
BackgroundWriteDatabase,
DirectoryBasedExampleDatabase,
ExampleDatabase,
GitHubArtifactDatabase,
Expand All @@ -39,6 +40,7 @@
from hypothesis.strategies import binary, lists, tuples
from hypothesis.utils.conventions import not_set

from tests.common.utils import skipif_emscripten
from tests.conjecture.common import ir, ir_nodes

small_settings = settings(max_examples=50)
Expand Down Expand Up @@ -451,6 +453,22 @@ def test_database_directory_inaccessible(dirs, tmp_path, monkeypatch):
database.save(b"fizz", b"buzz")


@skipif_emscripten
def test_background_write_database():
db = BackgroundWriteDatabase(InMemoryExampleDatabase())
db.save(b"a", b"b")
db.save(b"a", b"c")
db.save(b"a", b"d")
assert set(db.fetch(b"a")) == {b"b", b"c", b"d"}

db.move(b"a", b"a2", b"b")
assert set(db.fetch(b"a")) == {b"c", b"d"}
assert set(db.fetch(b"a2")) == {b"b"}

db.delete(b"a", b"c")
assert set(db.fetch(b"a")) == {b"d"}


@given(lists(ir_nodes()))
# covering examples
@example(ir(True))
Expand Down
6 changes: 5 additions & 1 deletion hypothesis-python/tests/cover/test_stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,11 @@ def action(self, d):
raise AssertionError


@Settings(stateful_step_count=10, max_examples=30) # speed this up
@Settings(
stateful_step_count=10,
max_examples=30,
suppress_health_check=[HealthCheck.filter_too_much],
) # speed this up
class MachineWithConsumingRule(RuleBasedStateMachine):
b1 = Bundle("b1")
b2 = Bundle("b2")
Expand Down
7 changes: 6 additions & 1 deletion hypothesis-python/tests/nocover/test_database_agreement.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
import tempfile

from hypothesis import strategies as st
from hypothesis.database import DirectoryBasedExampleDatabase, InMemoryExampleDatabase
from hypothesis.database import (
BackgroundWriteDatabase,
DirectoryBasedExampleDatabase,
InMemoryExampleDatabase,
)
from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule


Expand All @@ -27,6 +31,7 @@ def __init__(self):
DirectoryBasedExampleDatabase(exampledir),
InMemoryExampleDatabase(),
DirectoryBasedExampleDatabase(exampledir),
BackgroundWriteDatabase(InMemoryExampleDatabase()),
]

keys = Bundle("keys")
Expand Down

0 comments on commit 65bd569

Please sign in to comment.