123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- # coding=utf-8
- #
- # This file is part of Hypothesis, which may be found at
- # https://github.com/HypothesisWorks/hypothesis-python
- #
- # Most of this work is copyright (C) 2013-2018 David R. MacIver
- # (david@drmaciver.com), but it contains contributions by others. See
- # CONTRIBUTING.rst for a full list of people who may hold copyright, and
- # consult the git log if you need to determine who owns an individual
- # contribution.
- #
- # This Source Code Form is subject to the terms of the Mozilla Public License,
- # v. 2.0. If a copy of the MPL was not distributed with this file, You can
- # obtain one at http://mozilla.org/MPL/2.0/.
- #
- # END HEADER
- from __future__ import division, print_function, absolute_import
- import sys
- import math
- from hypothesis.internal.compat import ceil, floor, hbytes, hrange, \
- int_to_bytes, int_from_bytes
- from hypothesis.internal.conjecture.floats import is_simple, \
- float_to_lex, lex_to_float
- """
- This module implements a lexicographic minimizer for blocks of bytes.
- That is, given a block of bytes of a given size, and a predicate that accepts
- such blocks, it tries to find a lexicographically minimal block of that size
- that satisfies the predicate, by repeatedly making local changes to that
- starting point.
- Assuming it is allowed to run to completion (which due to the way we use it it
- actually often isn't) it makes the following guarantees, but it usually tries
- to do better in practice:
- 1. The lexicographic predecessor (i.e. the largest block smaller than it) of
- the answer is not a solution.
- 2. No individual byte in the solution may be lowered while holding the others
- fixed.
- """
- class Minimizer(object):
- def __init__(self, initial, condition, random, full):
- self.current = hbytes(initial)
- self.size = len(self.current)
- self.condition = condition
- self.random = random
- self.full = full
- self.changes = 0
- self.seen = set()
- def incorporate(self, buffer):
- """Consider this buffer as a possible replacement for the current best
- buffer.
- Return True if it succeeds as such.
- """
- assert isinstance(buffer, hbytes)
- assert len(buffer) == self.size
- assert buffer <= self.current
- if buffer in self.seen:
- return False
- self.seen.add(buffer)
- if buffer != self.current and self.condition(buffer):
- self.current = buffer
- self.changes += 1
- return True
- return False
- def shift(self):
- """Attempt to shift individual byte values right as far as they can
- go."""
- prev = -1
- while prev != self.changes:
- prev = self.changes
- for i in hrange(self.size):
- block = bytearray(self.current)
- c = block[i]
- for k in hrange(c.bit_length(), 0, -1):
- block[i] = c >> k
- if self.incorporate(hbytes(block)):
- break
- def rotate_suffixes(self):
- for significant, c in enumerate(self.current): # pragma: no branch
- if c:
- break
- assert self.current[significant]
- prefix = hbytes(significant)
- for i in hrange(1, self.size - significant):
- left = self.current[significant:significant + i]
- right = self.current[significant + i:]
- rotated = prefix + right + left
- if rotated < self.current:
- self.incorporate(rotated)
- def shrink_indices(self):
- # We take a bet that there is some monotonic lower bound such that
- # whenever current >= lower_bound the result works.
- for i in hrange(self.size):
- if self.current[i] == 0:
- continue
- if self.incorporate(
- self.current[:i] + hbytes([0]) + self.current[i + 1:]
- ):
- continue
- prefix = self.current[:i]
- original_suffix = self.current[i + 1:]
- for suffix in [
- original_suffix,
- hbytes([255]) * len(original_suffix),
- ]:
- minimize_byte(
- self.current[i],
- lambda c: self.current[i] == c or self.incorporate(
- prefix + hbytes([c]) + suffix)
- )
- def incorporate_int(self, i):
- return self.incorporate(int_to_bytes(i, self.size))
- def incorporate_float(self, f):
- assert self.size == 8
- return self.incorporate_int(float_to_lex(f))
- def float_hack(self):
- """Our encoding of floating point numbers does the right thing when you
- lexically shrink it, but there are some highly non-obvious lexical
- shrinks corresponding to natural floating point operations.
- We can't actually tell when the floating point encoding is being used
- (that would break the assumptions that Hypothesis doesn't inspect
- the generated values), but we can cheat: We just guess when it might be
- being used and perform shrinks that are valid regardless of our guess
- is correct.
- So that's what this method does. It's a cheat to give us good shrinking
- of floating at low cost in runtime and only moderate cost in elegance.
- """
- # If the block is of the wrong size then we're certainly not using the
- # float encoding.
- if self.size != 8:
- return
- # If the high bit is zero then we're in the integer representation of
- # floats so we don't need these hacks because it will shrink normally.
- if self.current[0] >> 7 == 0:
- return
- i = int_from_bytes(self.current)
- f = lex_to_float(i)
- # This floating point number can be represented in our simple format.
- # So we try converting it to that (which will give the same float, but
- # a different encoding of it). If that doesn't work then the float
- # value of this doesn't unambiguously give the desired predicate, so
- # this approach isn't useful. If it *does* work, then we're now in a
- # situation where we don't need it, so either way we return here.
- if is_simple(f):
- self.incorporate_float(f)
- return
- # We check for a bunch of standard "large" floats. If we're currently
- # worse than them and the shrink downwards doesn't help, abort early
- # because there's not much useful we can do here.
- for g in [
- float('nan'), float('inf'), sys.float_info.max,
- ]:
- j = float_to_lex(g)
- if j < i:
- if self.incorporate_int(j):
- f = g
- i = j
- if math.isinf(f) or math.isnan(f):
- return
- # Finally we get to the important bit: Each of these is a small change
- # to the floating point number that corresponds to a large change in
- # the lexical representation. Trying these ensures that our floating
- # point shrink can always move past these obstacles. In particular it
- # ensures we can always move to integer boundaries and shrink past a
- # change that would require shifting the exponent while not changing
- # the float value much.
- for g in [floor(f), ceil(f)]:
- if self.incorporate_float(g):
- return
- if f > 2:
- self.incorporate_float(f - 1)
- def run(self):
- if not any(self.current):
- return
- if len(self.current) == 1:
- minimize_byte(
- self.current[0],
- lambda c: c == self.current[0] or self.incorporate(hbytes([c]))
- )
- return
- # Initial checks as to whether the two smallest possible buffers of
- # this length can work. If so there's nothing to do here.
- if self.incorporate(hbytes(self.size)):
- return
- if self.incorporate(hbytes([0] * (self.size - 1) + [1])):
- return
- # Perform a binary search to try to replace a long initial segment with
- # zero bytes.
- # Note that because this property isn't monotonic this will not always
- # find the longest subsequence we can replace with zero, only some
- # subsequence.
- # Replacing the first nonzero bytes with zero does *not* work
- nonzero = len(self.current)
- # Replacing the first canzero bytes with zero *does* work.
- canzero = 0
- while self.current[canzero] == 0:
- canzero += 1
- base = self.current
- @binsearch(canzero, nonzero)
- def zero_prefix(mid):
- return self.incorporate(
- hbytes(mid) +
- base[mid:]
- )
- base = self.current
- @binsearch(0, self.size)
- def shift_right(mid):
- if mid == 0:
- return True
- if mid == self.size:
- return False
- return self.incorporate(hbytes(mid) + base[:-mid])
- change_counter = -1
- first = True
- while (
- (first or self.full) and
- change_counter < self.changes
- ):
- first = False
- change_counter = self.changes
- self.float_hack()
- self.shift()
- self.shrink_indices()
- self.rotate_suffixes()
- def minimize(initial, condition, random, full=True):
- """Perform a lexicographical minimization of the byte string 'initial' such
- that the predicate 'condition' returns True, and return the minimized
- string."""
- m = Minimizer(initial, condition, random, full)
- m.run()
- return m.current
- def binsearch(_lo, _hi):
- """Run a binary search to find the point at which a function changes value
- between two bounds.
- This function is used purely for its side effects and returns
- nothing.
- """
- def accept(f):
- lo = _lo
- hi = _hi
- loval = f(lo)
- hival = f(hi)
- if loval == hival:
- return
- while lo + 1 < hi:
- mid = (lo + hi) // 2
- midval = f(mid)
- if midval == loval:
- lo = mid
- else:
- assert hival == midval
- hi = mid
- return accept
- def minimize_byte(c, f):
- """Return the smallest byte for which a function `f` returns True, starting
- with the byte `c` as an unsigned integer."""
- if f(0):
- return 0
- if c == 1 or f(1):
- return 1
- elif c == 2:
- return 2
- if f(c - 1):
- lo = 1
- hi = c - 1
- while lo + 1 < hi:
- mid = (lo + hi) // 2
- if f(mid):
- hi = mid
- else:
- lo = mid
- return hi
- return c
|