Skip to content

Commit

Permalink
Merge pull request #3801 from tybug/forced-primitives
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD authored Dec 9, 2023
2 parents 10b7e78 + 0f0ebcc commit 512cfed
Show file tree
Hide file tree
Showing 7 changed files with 427 additions and 88 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

This patch refactors some more internals, continuing our work on supporting alternative backends (:issue:`3086`). There is no user-visible change.
218 changes: 148 additions & 70 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def wrapper(tp):
ONE_BOUND_INTEGERS_LABEL = calc_label_from_name("trying a one-bound int allowing 0")
INTEGER_RANGE_DRAW_LABEL = calc_label_from_name("another draw in integer_range()")
BIASED_COIN_LABEL = calc_label_from_name("biased_coin()")
BIASED_COIN_INNER_LABEL = calc_label_from_name("inside biased_coin()")

TOP_LABEL = calc_label_from_name("top")
DRAW_BYTES_LABEL = calc_label_from_name("draw_bytes() in ConjectureData")
Expand Down Expand Up @@ -936,18 +935,9 @@ def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool
else:
partial = True

if forced is None:
# We want to get to the point where True is represented by
# 1 and False is represented by 0 as quickly as possible, so
# we use the remove_discarded machinery in the shrinker to
# achieve that by discarding any draws that are > 1 and writing
# a suitable draw into the choice sequence at the end of the
# loop.
self._cd.start_example(BIASED_COIN_INNER_LABEL)
i = self._cd.draw_bits(bits)
self._cd.stop_example(discard=i > 1)
else:
i = self._cd.draw_bits(bits, forced=int(forced))
i = self._cd.draw_bits(
bits, forced=None if forced is None else int(forced)
)

# We always choose the region that causes us to repeat the loop as
# the maximum value, so that shrinking the drawn bits never causes
Expand Down Expand Up @@ -977,8 +967,6 @@ def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool
# becomes i > falsey.
result = i > falsey

if i > 1:
self._cd.draw_bits(bits, forced=int(result))
break
self._cd.stop_example()
return result
Expand All @@ -993,52 +981,58 @@ def draw_integer(
shrink_towards: int = 0,
forced: Optional[int] = None,
) -> int:
if min_value is not None:
shrink_towards = max(min_value, shrink_towards)
if max_value is not None:
shrink_towards = min(max_value, shrink_towards)

# This is easy to build on top of our existing conjecture utils,
# and it's easy to build sampled_from and weighted_coin on this.
if weights is not None:
assert min_value is not None
assert max_value is not None

sampler = Sampler(weights)
idx = sampler.sample(self._cd)
gap = max_value - shrink_towards

if shrink_towards <= min_value:
return min_value + idx
elif max_value <= shrink_towards:
return max_value - idx
else:
# For range -2..2, interpret idx = 0..4 as [0, 1, 2, -1, -2]
if idx <= (gap := max_value - shrink_towards):
return shrink_towards + idx
forced_idx = None
if forced is not None:
if forced >= shrink_towards:
forced_idx = forced - shrink_towards
else:
return shrink_towards - (idx - gap)
forced_idx = shrink_towards + gap - forced
idx = sampler.sample(self._cd, forced=forced_idx)

# For range -2..2, interpret idx = 0..4 as [0, 1, 2, -1, -2]
if idx <= gap:
return shrink_towards + idx
else:
return shrink_towards - (idx - gap)

if min_value is None and max_value is None:
return self._draw_unbounded_integer()
return self._draw_unbounded_integer(forced=forced)

if min_value is None:
assert max_value is not None # make mypy happy
if max_value <= shrink_towards:
return max_value - abs(self._draw_unbounded_integer())
else:
probe = max_value + 1
while max_value < probe:
self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
probe = self._draw_unbounded_integer() + shrink_towards
self._cd.stop_example(discard=max_value < probe)
return probe
probe = max_value + 1
while max_value < probe:
self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
probe = shrink_towards + self._draw_unbounded_integer(
forced=None if forced is None else forced - shrink_towards
)
self._cd.stop_example(discard=max_value < probe)
return probe

if max_value is None:
assert min_value is not None
if min_value >= shrink_towards:
return min_value + abs(self._draw_unbounded_integer())
else:
probe = min_value - 1
while probe < min_value:
self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
probe = self._draw_unbounded_integer() + shrink_towards
self._cd.stop_example(discard=probe < min_value)
return probe
probe = min_value - 1
while probe < min_value:
self._cd.start_example(ONE_BOUND_INTEGERS_LABEL)
probe = shrink_towards + self._draw_unbounded_integer(
forced=None if forced is None else forced - shrink_towards
)
self._cd.stop_example(discard=probe < min_value)
return probe

return self._draw_bounded_integer(
min_value,
Expand All @@ -1057,7 +1051,8 @@ def draw_float(
# TODO: consider supporting these float widths at the IR level in the
# future.
# width: Literal[16, 32, 64] = 64,
# exclude_min and exclude_max handled higher up
# exclude_min and exclude_max handled higher up,
forced: Optional[float] = None,
) -> float:
(
sampler,
Expand All @@ -1074,17 +1069,25 @@ def draw_float(

while True:
self._cd.start_example(FLOAT_STRATEGY_DO_DRAW_LABEL)
i = sampler.sample(self._cd) if sampler else 0
# If `forced in nasty_floats`, then `forced` was *probably*
# generated by drawing a nonzero index from the sampler. However, we
# have no obligation to generate it that way when forcing. In particular,
# i == 0 is able to produce all possible floats, and the forcing
# logic is simpler if we assume this choice.
forced_i = None if forced is None else 0
i = sampler.sample(self._cd, forced=forced_i) if sampler else 0
self._cd.start_example(DRAW_FLOAT_LABEL)
if i == 0:
result = self._draw_float(forced_sign_bit=forced_sign_bit)
result = self._draw_float(
forced_sign_bit=forced_sign_bit, forced=forced
)
if math.copysign(1.0, result) == -1:
assert neg_clamper is not None
clamped = -neg_clamper(-result)
else:
assert pos_clamper is not None
clamped = pos_clamper(result)
if clamped != result:
if clamped != result and not (math.isnan(result) and allow_nan):
self._cd.stop_example(discard=True)
self._cd.start_example(DRAW_FLOAT_LABEL)
self._write_float(clamped)
Expand All @@ -1104,10 +1107,13 @@ def draw_string(
*,
min_size: int = 0,
max_size: Optional[int] = None,
forced: Optional[str] = None,
) -> str:
if max_size is None:
max_size = 10**10 # "arbitrarily large"

assert forced is None or min_size <= len(forced) <= max_size

average_size = min(
max(min_size * 2, min_size + 5),
0.5 * (min_size + max_size),
Expand All @@ -1119,31 +1125,57 @@ def draw_string(
min_size=min_size,
max_size=max_size,
average_size=average_size,
forced=None if forced is None else len(forced),
)
while elements.more():
forced_i: Optional[int] = None
if forced is not None:
c = forced[elements.count - 1]
forced_i = intervals.index_from_char_in_shrink_order(c)

if len(intervals) > 256:
if self.draw_boolean(0.2):
i = self._draw_bounded_integer(256, len(intervals) - 1)
if self.draw_boolean(
0.2, forced=None if forced_i is None else forced_i > 255
):
i = self._draw_bounded_integer(
256, len(intervals) - 1, forced=forced_i
)
else:
i = self._draw_bounded_integer(0, 255)
i = self._draw_bounded_integer(0, 255, forced=forced_i)
else:
i = self._draw_bounded_integer(0, len(intervals) - 1)
i = self._draw_bounded_integer(0, len(intervals) - 1, forced=forced_i)

chars.append(intervals.char_in_shrink_order(i))

return "".join(chars)

def draw_bytes(self, size: int) -> bytes:
return self._cd.draw_bits(8 * size).to_bytes(size, "big")
def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
forced_i = None
if forced is not None:
forced_i = int_from_bytes(forced)
size = len(forced)

return self._cd.draw_bits(8 * size, forced=forced_i).to_bytes(size, "big")

def _draw_float(self, forced_sign_bit: Optional[int] = None) -> float:
def _draw_float(
self, forced_sign_bit: Optional[int] = None, *, forced: Optional[float] = None
) -> float:
"""
Helper for draw_float which draws a random 64-bit float.
"""
if forced is not None:
# sign_aware_lte(forced, -0.0) does not correctly handle the
# math.nan case here.
forced_sign_bit = math.copysign(1, forced) == -1

self._cd.start_example(DRAW_FLOAT_LABEL)
try:
is_negative = self._cd.draw_bits(1, forced=forced_sign_bit)
f = lex_to_float(self._cd.draw_bits(64))
f = lex_to_float(
self._cd.draw_bits(
64, forced=None if forced is None else float_to_lex(abs(forced))
)
)
return -f if is_negative else f
finally:
self._cd.stop_example()
Expand All @@ -1153,14 +1185,37 @@ def _write_float(self, f: float) -> None:
self._cd.draw_bits(1, forced=sign)
self._cd.draw_bits(64, forced=float_to_lex(abs(f)))

def _draw_unbounded_integer(self) -> int:
size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)]
r = self._cd.draw_bits(size)
def _draw_unbounded_integer(self, *, forced: Optional[int] = None) -> int:
forced_i = None
if forced is not None:
# Using any bucket large enough to contain this integer would be a
# valid way to force it. This is because an n bit integer could have
# been drawn from a bucket of size n, or from any bucket of size
# m > n.
# We'll always choose the smallest eligible bucket here.

# We need an extra bit to handle forced signed integers. INT_SIZES
# is interpreted as unsigned sizes.
bit_size = forced.bit_length() + 1
size = min(size for size in INT_SIZES if bit_size <= size)
forced_i = INT_SIZES.index(size)

size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd, forced=forced_i)]

forced_r = None
if forced is not None:
forced_r = forced
forced_r <<= 1
if forced < 0:
forced_r = -forced_r
forced_r |= 1

r = self._cd.draw_bits(size, forced=forced_r)
sign = r & 1
r >>= 1
if sign:
r = -r
return int(r)
return r

def _draw_bounded_integer(
self,
Expand Down Expand Up @@ -1202,7 +1257,9 @@ def _draw_bounded_integer(
bits = gap.bit_length()
probe = gap + 1

if bits > 24 and self._cd.draw_bits(3, forced=None if forced is None else 0):
if bits > 24 and self.draw_boolean(
7 / 8, forced=None if forced is None else False
):
# For large ranges, we combine the uniform random distribution from draw_bits
# with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our
# choice of unicode characters is uniform but the 32bit distribution is not.
Expand Down Expand Up @@ -1425,11 +1482,20 @@ def draw_integer(
if weights is not None:
assert min_value is not None
assert max_value is not None
assert (max_value - min_value) <= 1024 # arbitrary practical limit

if forced is not None:
assert min_value is not None
assert max_value is not None
width = max_value - min_value + 1
assert width <= 1024 # arbitrary practical limit
assert len(weights) == width

if forced is not None and (min_value is None or max_value is None):
# We draw `forced=forced - shrink_towards` here internally. If that
# grows larger than a 128 bit signed integer, we can't represent it.
# Disallow this combination for now.
# Note that bit_length() = 128 -> signed bit size = 129.
assert (forced - shrink_towards).bit_length() < 128
if forced is not None and min_value is not None:
assert min_value <= forced
if forced is not None and max_value is not None:
assert forced <= max_value

return self.provider.draw_integer(
min_value=min_value,
Expand All @@ -1449,14 +1515,23 @@ def draw_float(
# TODO: consider supporting these float widths at the IR level in the
# future.
# width: Literal[16, 32, 64] = 64,
# exclude_min and exclude_max handled higher up
# exclude_min and exclude_max handled higher up,
forced: Optional[float] = None,
) -> float:
assert smallest_nonzero_magnitude > 0
assert not math.isnan(min_value)
assert not math.isnan(max_value)

if forced is not None:
assert allow_nan or not math.isnan(forced)
assert math.isnan(forced) or min_value <= forced <= max_value

return self.provider.draw_float(
min_value=min_value,
max_value=max_value,
allow_nan=allow_nan,
smallest_nonzero_magnitude=smallest_nonzero_magnitude,
forced=forced,
)

def draw_string(
Expand All @@ -1465,13 +1540,16 @@ def draw_string(
*,
min_size: int = 0,
max_size: Optional[int] = None,
forced: Optional[str] = None,
) -> str:
assert forced is None or min_size <= len(forced)
return self.provider.draw_string(
intervals, min_size=min_size, max_size=max_size
intervals, min_size=min_size, max_size=max_size, forced=forced
)

def draw_bytes(self, size: int) -> bytes:
return self.provider.draw_bytes(size)
def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
assert forced is None or len(forced) == size
return self.provider.draw_bytes(size, forced=forced)

def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
return self.provider.draw_boolean(p, forced=forced)
Expand Down
Loading

0 comments on commit 512cfed

Please sign in to comment.