Skip to content

Commit

Permalink
add BackgroundWriteDatabase
Browse files Browse the repository at this point in the history
  • Loading branch information
tybug committed Nov 27, 2024
1 parent 89d58f2 commit 12072f4
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 5 deletions.
3 changes: 1 addition & 2 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
RELEASE_TYPE: minor

* This release changes our input distribution for low ``max_examples``. Previously, we capped the size of inputs when generating at least the first 10 inputs, with the reasoning that early inputs to a property should be small. However, this meant properties with ``max_examples=10`` would consistent entirely of small inputs. This patch removes the hard lower bound so that inputs to these properties are more representative of the input space.
* When a user requests an interactive input via ``strategy.example``, we generate and cache a batch of 100 inputs, returning the first one. This can be expensive for large strategies or when only a few examples are needed. This release improves the speed of ``strategy.example`` by lowering the batch size to 10.
This release adds :class:`~hypothesis.database.BackgroundWriteDatabase`, a new database backend that defers writes on the wrapped database to a background thread. This allows for low-overhead writes in performance-critical environments like fuzzing.
10 changes: 8 additions & 2 deletions hypothesis-python/docs/details.rst
Original file line number Diff line number Diff line change
Expand Up @@ -691,8 +691,8 @@ on working with markers <pytest:mark examples>`.
.. note::
Pytest will load the plugin automatically if Hypothesis is installed.
You don't need to do anything at all to use it.
If it causes problems, you can avoid loading the plugin with the

If it causes problems, you can avoid loading the plugin with the
``-p no:hypothesispytest`` option.


Expand Down Expand Up @@ -750,6 +750,12 @@ to the exact version of Hypothesis you are using and the strategies given to
the test, just like the :doc:`example database <database>` and
:func:`@reproduce_failure <hypothesis.reproduce_failure>` decorator.

.. tip::

For usages of ``fuzz_one_input`` which expect to discover many failures, consider
wrapping your database with :class:`~hypothesis.database.BackgroundWriteDatabase`
for low-overhead writes of failures to the database.

~~~~~~~~~~~~~~~~~~~~~~~~~
Interaction with settings
~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
48 changes: 48 additions & 0 deletions hypothesis-python/src/hypothesis/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
import struct
import sys
import warnings
import weakref
from collections.abc import Iterable
from datetime import datetime, timedelta, timezone
from functools import lru_cache
from hashlib import sha384
from os import getenv
from pathlib import Path, PurePath
from queue import Queue
from threading import Thread
from typing import Optional
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
Expand Down Expand Up @@ -675,6 +678,51 @@ def delete(self, key: bytes, value: bytes) -> None:
raise RuntimeError(self._read_only_message)


class BackgroundWriteDatabase(ExampleDatabase):
"""A wrapper which defers writes on the given database to a background thread.
Calls to :meth:`~hypothesis.database.ExampleDatabase.fetch` wait for any
enqueued writes to finish before fetching from the database.
"""

def __init__(self, db: ExampleDatabase) -> None:
self._db = db
self._queue: Queue[tuple[str, tuple[bytes, ...]]] = Queue()
self._thread = Thread(target=self._worker, daemon=True)
self._thread.start()
# avoid an unbounded timeout during gc. 0.1 should be plenty for most
# use cases.
weakref.finalize(self, self._join, 0.1)

def __repr__(self) -> str:
return f"BackgroundWriteDatabase({self._db!r})"

def _worker(self) -> None:
while True:
method, args = self._queue.get()
getattr(self._db, method)(*args)
self._queue.task_done()

def _join(self, timeout: Optional[int] = None) -> None:
# copy of Queue.join with a timeout. https://bugs.python.org/issue9634
with self._queue.all_tasks_done:
while self._queue.unfinished_tasks:
self._queue.all_tasks_done.wait(timeout)

def fetch(self, key: bytes) -> Iterable[bytes]:
self._join()
return self._db.fetch(key)

def save(self, key: bytes, value: bytes) -> None:
self._queue.put(("save", (key, value)))

def delete(self, key: bytes, value: bytes) -> None:
self._queue.put(("delete", (key, value)))

def move(self, src: bytes, dest: bytes, value: bytes) -> None:
self._queue.put(("move", (src, dest, value)))


def ir_to_bytes(ir: Iterable[IRType], /) -> bytes:
"""Serialize a list of IR elements to a bytestring. Inverts ir_from_bytes."""
# We use a custom serialization format for this, which might seem crazy - but our
Expand Down
16 changes: 16 additions & 0 deletions hypothesis-python/tests/cover/test_database_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from hypothesis import configuration, example, given, settings, strategies as st
from hypothesis.database import (
BackgroundWriteDatabase,
DirectoryBasedExampleDatabase,
ExampleDatabase,
GitHubArtifactDatabase,
Expand Down Expand Up @@ -451,6 +452,21 @@ def test_database_directory_inaccessible(dirs, tmp_path, monkeypatch):
database.save(b"fizz", b"buzz")


def test_background_write_database():
db = BackgroundWriteDatabase(InMemoryExampleDatabase())
db.save(b"a", b"b")
db.save(b"a", b"c")
db.save(b"a", b"d")
assert set(db.fetch(b"a")) == {b"b", b"c", b"d"}

db.move(b"a", b"a2", b"b")
assert set(db.fetch(b"a")) == {b"c", b"d"}
assert set(db.fetch(b"a2")) == {b"b"}

db.delete(b"a", b"c")
assert set(db.fetch(b"a")) == {b"d"}


@given(lists(ir_nodes()))
# covering examples
@example(ir(True))
Expand Down
7 changes: 6 additions & 1 deletion hypothesis-python/tests/nocover/test_database_agreement.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
import tempfile

from hypothesis import strategies as st
from hypothesis.database import DirectoryBasedExampleDatabase, InMemoryExampleDatabase
from hypothesis.database import (
BackgroundWriteDatabase,
DirectoryBasedExampleDatabase,
InMemoryExampleDatabase,
)
from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule


Expand All @@ -27,6 +31,7 @@ def __init__(self):
DirectoryBasedExampleDatabase(exampledir),
InMemoryExampleDatabase(),
DirectoryBasedExampleDatabase(exampledir),
BackgroundWriteDatabase(InMemoryExampleDatabase()),
]

keys = Bundle("keys")
Expand Down

0 comments on commit 12072f4

Please sign in to comment.