Skip to content

Commit

Permalink
Merge pull request #4231 from tybug/cached-tf-choices
Browse files Browse the repository at this point in the history
Use choices in `generate_novel_prefix` and `cached_test_function_ir`
  • Loading branch information
tybug authored Jan 9, 2025
2 parents 92eb11e + 85e0005 commit 8813586
Show file tree
Hide file tree
Showing 16 changed files with 287 additions and 400 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

More internal code refactoring.
61 changes: 8 additions & 53 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 +245,6 @@ def parent(self) -> Optional[int]:
return None
return self.owner.parentage[self.index]

@property
def start(self) -> int:
"""The position of the start of this example in the byte stream."""
return self.owner.starts[self.index]

@property
def end(self) -> int:
"""The position directly after the last byte in this byte stream.
i.e. the example corresponds to the half open region [start, end).
"""
return self.owner.ends[self.index]

@property
def ir_start(self) -> int:
return self.owner.ir_starts[self.index]
Expand All @@ -280,11 +268,6 @@ def discarded(self) -> bool:
generated value and try again."""
return self.index in self.owner.discarded

@property
def length(self) -> int:
"""The number of bytes in this example."""
return self.end - self.start

@property
def ir_length(self) -> int:
"""The number of ir nodes in this example."""
Expand Down Expand Up @@ -464,32 +447,6 @@ def __init__(self, record: ExampleRecord, blocks: "Blocks") -> None:
self.blocks = blocks
self.__children: "list[Sequence[int]] | None" = None

class _starts_and_ends(ExampleProperty):
def begin(self) -> None:
self.starts = IntList.of_length(len(self.examples))
self.ends = IntList.of_length(len(self.examples))

def start_example(self, i: int, label_index: int) -> None:
self.starts[i] = self.bytes_read

def stop_example(self, i: int, *, discarded: bool) -> None:
self.ends[i] = self.bytes_read

def finish(self) -> tuple[IntList, IntList]:
return (self.starts, self.ends)

starts_and_ends: "tuple[IntList, IntList]" = calculated_example_property(
_starts_and_ends
)

@property
def starts(self) -> IntList:
return self.starts_and_ends[0]

@property
def ends(self) -> IntList:
return self.starts_and_ends[1]

class _ir_starts_and_ends(ExampleProperty):
def begin(self) -> None:
self.starts = IntList.of_length(len(self.examples))
Expand Down Expand Up @@ -2035,11 +1992,13 @@ def _draw(self, ir_type, kwargs, *, observe, forced, fake_forced):
choice = self._pop_choice(ir_type, kwargs, forced=forced)
else:
try:
(choice, _buf) = ir_to_buffer(
ir_type, kwargs, forced=forced, random=self.__random
choice = (
forced
if forced is not None
else draw_choice(ir_type, kwargs, random=self.__random)
)
except StopTest:
debug_report("overrun because ir_to_buffer overran")
debug_report("overrun because draw_choice overran")
self.mark_overrun()

if forced is None:
Expand Down Expand Up @@ -2625,17 +2584,13 @@ def bits_to_bytes(n: int) -> int:
return (n + 7) >> 3


def ir_to_buffer(ir_type, kwargs, *, forced=None, random=None):
def draw_choice(ir_type, kwargs, *, random):
from hypothesis.internal.conjecture.engine import BUFFER_SIZE

if forced is None:
assert random is not None

cd = ConjectureData(
max_length=BUFFER_SIZE,
# buffer doesn't matter if forced is passed since we're forcing the sole draw
prefix=b"" if forced is None else bytes(BUFFER_SIZE),
prefix=b"",
random=random,
)
value = getattr(cd.provider, f"draw_{ir_type}")(**kwargs, forced=forced)
return (value, cd.buffer)
return getattr(cd.provider, f"draw_{ir_type}")(**kwargs)
64 changes: 26 additions & 38 deletions hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import math
from random import Random
from typing import TYPE_CHECKING, AbstractSet, Optional, Union
from typing import AbstractSet, Optional, Union

import attr

Expand Down Expand Up @@ -41,9 +41,6 @@
sign_aware_lte,
)

if TYPE_CHECKING:
from hypothesis.internal.conjecture.data import IRNode


class PreviouslyUnseenBehaviour(HypothesisException):
pass
Expand Down Expand Up @@ -666,23 +663,21 @@ def is_exhausted(self) -> bool:
"""
return self.root.is_exhausted

def generate_novel_prefix(self, random: Random) -> tuple["IRNode", ...]:
def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]:
"""Generate a short random string that (after rewriting) is not
a prefix of any buffer previously added to the tree.
The resulting prefix is essentially arbitrary - it would be nice
for it to be uniform at random, but previous attempts to do that
have proven too expensive.
"""
from hypothesis.internal.conjecture.data import IRNode

assert not self.is_exhausted
novel_prefix: list[IRNode] = []
prefix = []

def append_node(node):
if node.ir_type == "float":
node.value = int_to_float(node.value)
novel_prefix.append(node)
def append_choice(ir_type, choice):
if ir_type == "float":
choice = int_to_float(choice)
prefix.append(choice)

current_node = self.root
while True:
Expand All @@ -691,71 +686,67 @@ def append_node(node):
zip(current_node.ir_types, current_node.kwargs, current_node.values)
):
if i in current_node.forced:
append_node(
IRNode(
ir_type=ir_type, value=value, kwargs=kwargs, was_forced=True
)
)
append_choice(ir_type, value)
else:
attempts = 0
while True:
if attempts <= 10:
try:
node = self._draw(ir_type, kwargs, random=random)
node_value = self._draw(ir_type, kwargs, random=random)
except StopTest: # pragma: no cover
# it is possible that drawing from a fresh data can
# overrun BUFFER_SIZE, due to eg unlucky rejection sampling
# of integer probes. Retry these cases.
attempts += 1
continue
else:
node = self._draw_from_cache(
node_value = self._draw_from_cache(
ir_type, kwargs, key=id(current_node), random=random
)

if node.value != value:
append_node(node)
if node_value != value:
append_choice(ir_type, node_value)
break
attempts += 1
self._reject_child(
ir_type, kwargs, child=node.value, key=id(current_node)
ir_type, kwargs, child=node_value, key=id(current_node)
)
# We've now found a value that is allowed to
# vary, so what follows is not fixed.
return tuple(novel_prefix)
return tuple(prefix)
else:
assert not isinstance(current_node.transition, (Conclusion, Killed))
if current_node.transition is None:
return tuple(novel_prefix)
return tuple(prefix)
branch = current_node.transition
assert isinstance(branch, Branch)

attempts = 0
while True:
if attempts <= 10:
try:
node = self._draw(
node_value = self._draw(
branch.ir_type, branch.kwargs, random=random
)
except StopTest: # pragma: no cover
attempts += 1
continue
else:
node = self._draw_from_cache(
node_value = self._draw_from_cache(
branch.ir_type, branch.kwargs, key=id(branch), random=random
)
try:
child = branch.children[node.value]
child = branch.children[node_value]
except KeyError:
append_node(node)
return tuple(novel_prefix)
append_choice(branch.ir_type, node_value)
return tuple(prefix)
if not child.is_exhausted:
append_node(node)
append_choice(branch.ir_type, node_value)
current_node = child
break
attempts += 1
self._reject_child(
branch.ir_type, branch.kwargs, child=node.value, key=id(branch)
branch.ir_type, branch.kwargs, child=node_value, key=id(branch)
)

# We don't expect this assertion to ever fire, but coverage
Expand Down Expand Up @@ -829,9 +820,9 @@ def new_observer(self):
return TreeRecordingObserver(self)

def _draw(self, ir_type, kwargs, *, random):
from hypothesis.internal.conjecture.data import IRNode, ir_to_buffer
from hypothesis.internal.conjecture.data import draw_choice

(value, buf) = ir_to_buffer(ir_type, kwargs, random=random)
value = draw_choice(ir_type, kwargs, random=random)
# using floats as keys into branch.children breaks things, because
# e.g. hash(0.0) == hash(-0.0) would collide as keys when they are
# in fact distinct child branches.
Expand All @@ -842,7 +833,7 @@ def _draw(self, ir_type, kwargs, *, random):
# buffer), and converting between the two forms as appropriate.
if ir_type == "float":
value = float_to_int(value)
return IRNode(ir_type=ir_type, value=value, kwargs=kwargs, was_forced=False)
return value

def _get_children_cache(self, ir_type, kwargs, *, key):
# cache the state of the children generator per node/branch (passed as
Expand All @@ -863,8 +854,6 @@ def _get_children_cache(self, ir_type, kwargs, *, key):
return self._children_cache[key]

def _draw_from_cache(self, ir_type, kwargs, *, key, random):
from hypothesis.internal.conjecture.data import IRNode

(generator, children, rejected) = self._get_children_cache(
ir_type, kwargs, key=key
)
Expand All @@ -884,8 +873,7 @@ def _draw_from_cache(self, ir_type, kwargs, *, key, random):
if len(children) >= 100:
break

value = random.choice(children)
return IRNode(ir_type=ir_type, value=value, kwargs=kwargs, was_forced=True)
return random.choice(children)

def _reject_child(self, ir_type, kwargs, *, child, key):
(_generator, children, rejected) = self._get_children_cache(
Expand Down
Loading

0 comments on commit 8813586

Please sign in to comment.