minimizer.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. # coding=utf-8
  2. #
  3. # This file is part of Hypothesis, which may be found at
  4. # https://github.com/HypothesisWorks/hypothesis-python
  5. #
  6. # Most of this work is copyright (C) 2013-2018 David R. MacIver
  7. # (david@drmaciver.com), but it contains contributions by others. See
  8. # CONTRIBUTING.rst for a full list of people who may hold copyright, and
  9. # consult the git log if you need to determine who owns an individual
  10. # contribution.
  11. #
  12. # This Source Code Form is subject to the terms of the Mozilla Public License,
  13. # v. 2.0. If a copy of the MPL was not distributed with this file, You can
  14. # obtain one at http://mozilla.org/MPL/2.0/.
  15. #
  16. # END HEADER
  17. from __future__ import division, print_function, absolute_import
  18. import sys
  19. import math
  20. from hypothesis.internal.compat import ceil, floor, hbytes, hrange, \
  21. int_to_bytes, int_from_bytes
  22. from hypothesis.internal.conjecture.floats import is_simple, \
  23. float_to_lex, lex_to_float
  24. """
  25. This module implements a lexicographic minimizer for blocks of bytes.
  26. That is, given a block of bytes of a given size, and a predicate that accepts
  27. such blocks, it tries to find a lexicographically minimal block of that size
  28. that satisfies the predicate, by repeatedly making local changes to that
  29. starting point.
  30. Assuming it is allowed to run to completion (which due to the way we use it it
  31. actually often isn't) it makes the following guarantees, but it usually tries
  32. to do better in practice:
  33. 1. The lexicographic predecessor (i.e. the largest block smaller than it) of
  34. the answer is not a solution.
  35. 2. No individual byte in the solution may be lowered while holding the others
  36. fixed.
  37. """
  38. class Minimizer(object):
  39. def __init__(self, initial, condition, random, full):
  40. self.current = hbytes(initial)
  41. self.size = len(self.current)
  42. self.condition = condition
  43. self.random = random
  44. self.full = full
  45. self.changes = 0
  46. self.seen = set()
  47. def incorporate(self, buffer):
  48. """Consider this buffer as a possible replacement for the current best
  49. buffer.
  50. Return True if it succeeds as such.
  51. """
  52. assert isinstance(buffer, hbytes)
  53. assert len(buffer) == self.size
  54. assert buffer <= self.current
  55. if buffer in self.seen:
  56. return False
  57. self.seen.add(buffer)
  58. if buffer != self.current and self.condition(buffer):
  59. self.current = buffer
  60. self.changes += 1
  61. return True
  62. return False
  63. def shift(self):
  64. """Attempt to shift individual byte values right as far as they can
  65. go."""
  66. prev = -1
  67. while prev != self.changes:
  68. prev = self.changes
  69. for i in hrange(self.size):
  70. block = bytearray(self.current)
  71. c = block[i]
  72. for k in hrange(c.bit_length(), 0, -1):
  73. block[i] = c >> k
  74. if self.incorporate(hbytes(block)):
  75. break
  76. def rotate_suffixes(self):
  77. for significant, c in enumerate(self.current): # pragma: no branch
  78. if c:
  79. break
  80. assert self.current[significant]
  81. prefix = hbytes(significant)
  82. for i in hrange(1, self.size - significant):
  83. left = self.current[significant:significant + i]
  84. right = self.current[significant + i:]
  85. rotated = prefix + right + left
  86. if rotated < self.current:
  87. self.incorporate(rotated)
  88. def shrink_indices(self):
  89. # We take a bet that there is some monotonic lower bound such that
  90. # whenever current >= lower_bound the result works.
  91. for i in hrange(self.size):
  92. if self.current[i] == 0:
  93. continue
  94. if self.incorporate(
  95. self.current[:i] + hbytes([0]) + self.current[i + 1:]
  96. ):
  97. continue
  98. prefix = self.current[:i]
  99. original_suffix = self.current[i + 1:]
  100. for suffix in [
  101. original_suffix,
  102. hbytes([255]) * len(original_suffix),
  103. ]:
  104. minimize_byte(
  105. self.current[i],
  106. lambda c: self.current[i] == c or self.incorporate(
  107. prefix + hbytes([c]) + suffix)
  108. )
  109. def incorporate_int(self, i):
  110. return self.incorporate(int_to_bytes(i, self.size))
  111. def incorporate_float(self, f):
  112. assert self.size == 8
  113. return self.incorporate_int(float_to_lex(f))
  114. def float_hack(self):
  115. """Our encoding of floating point numbers does the right thing when you
  116. lexically shrink it, but there are some highly non-obvious lexical
  117. shrinks corresponding to natural floating point operations.
  118. We can't actually tell when the floating point encoding is being used
  119. (that would break the assumptions that Hypothesis doesn't inspect
  120. the generated values), but we can cheat: We just guess when it might be
  121. being used and perform shrinks that are valid regardless of our guess
  122. is correct.
  123. So that's what this method does. It's a cheat to give us good shrinking
  124. of floating at low cost in runtime and only moderate cost in elegance.
  125. """
  126. # If the block is of the wrong size then we're certainly not using the
  127. # float encoding.
  128. if self.size != 8:
  129. return
  130. # If the high bit is zero then we're in the integer representation of
  131. # floats so we don't need these hacks because it will shrink normally.
  132. if self.current[0] >> 7 == 0:
  133. return
  134. i = int_from_bytes(self.current)
  135. f = lex_to_float(i)
  136. # This floating point number can be represented in our simple format.
  137. # So we try converting it to that (which will give the same float, but
  138. # a different encoding of it). If that doesn't work then the float
  139. # value of this doesn't unambiguously give the desired predicate, so
  140. # this approach isn't useful. If it *does* work, then we're now in a
  141. # situation where we don't need it, so either way we return here.
  142. if is_simple(f):
  143. self.incorporate_float(f)
  144. return
  145. # We check for a bunch of standard "large" floats. If we're currently
  146. # worse than them and the shrink downwards doesn't help, abort early
  147. # because there's not much useful we can do here.
  148. for g in [
  149. float('nan'), float('inf'), sys.float_info.max,
  150. ]:
  151. j = float_to_lex(g)
  152. if j < i:
  153. if self.incorporate_int(j):
  154. f = g
  155. i = j
  156. if math.isinf(f) or math.isnan(f):
  157. return
  158. # Finally we get to the important bit: Each of these is a small change
  159. # to the floating point number that corresponds to a large change in
  160. # the lexical representation. Trying these ensures that our floating
  161. # point shrink can always move past these obstacles. In particular it
  162. # ensures we can always move to integer boundaries and shrink past a
  163. # change that would require shifting the exponent while not changing
  164. # the float value much.
  165. for g in [floor(f), ceil(f)]:
  166. if self.incorporate_float(g):
  167. return
  168. if f > 2:
  169. self.incorporate_float(f - 1)
  170. def run(self):
  171. if not any(self.current):
  172. return
  173. if len(self.current) == 1:
  174. minimize_byte(
  175. self.current[0],
  176. lambda c: c == self.current[0] or self.incorporate(hbytes([c]))
  177. )
  178. return
  179. # Initial checks as to whether the two smallest possible buffers of
  180. # this length can work. If so there's nothing to do here.
  181. if self.incorporate(hbytes(self.size)):
  182. return
  183. if self.incorporate(hbytes([0] * (self.size - 1) + [1])):
  184. return
  185. # Perform a binary search to try to replace a long initial segment with
  186. # zero bytes.
  187. # Note that because this property isn't monotonic this will not always
  188. # find the longest subsequence we can replace with zero, only some
  189. # subsequence.
  190. # Replacing the first nonzero bytes with zero does *not* work
  191. nonzero = len(self.current)
  192. # Replacing the first canzero bytes with zero *does* work.
  193. canzero = 0
  194. while self.current[canzero] == 0:
  195. canzero += 1
  196. base = self.current
  197. @binsearch(canzero, nonzero)
  198. def zero_prefix(mid):
  199. return self.incorporate(
  200. hbytes(mid) +
  201. base[mid:]
  202. )
  203. base = self.current
  204. @binsearch(0, self.size)
  205. def shift_right(mid):
  206. if mid == 0:
  207. return True
  208. if mid == self.size:
  209. return False
  210. return self.incorporate(hbytes(mid) + base[:-mid])
  211. change_counter = -1
  212. first = True
  213. while (
  214. (first or self.full) and
  215. change_counter < self.changes
  216. ):
  217. first = False
  218. change_counter = self.changes
  219. self.float_hack()
  220. self.shift()
  221. self.shrink_indices()
  222. self.rotate_suffixes()
  223. def minimize(initial, condition, random, full=True):
  224. """Perform a lexicographical minimization of the byte string 'initial' such
  225. that the predicate 'condition' returns True, and return the minimized
  226. string."""
  227. m = Minimizer(initial, condition, random, full)
  228. m.run()
  229. return m.current
  230. def binsearch(_lo, _hi):
  231. """Run a binary search to find the point at which a function changes value
  232. between two bounds.
  233. This function is used purely for its side effects and returns
  234. nothing.
  235. """
  236. def accept(f):
  237. lo = _lo
  238. hi = _hi
  239. loval = f(lo)
  240. hival = f(hi)
  241. if loval == hival:
  242. return
  243. while lo + 1 < hi:
  244. mid = (lo + hi) // 2
  245. midval = f(mid)
  246. if midval == loval:
  247. lo = mid
  248. else:
  249. assert hival == midval
  250. hi = mid
  251. return accept
  252. def minimize_byte(c, f):
  253. """Return the smallest byte for which a function `f` returns True, starting
  254. with the byte `c` as an unsigned integer."""
  255. if f(0):
  256. return 0
  257. if c == 1 or f(1):
  258. return 1
  259. elif c == 2:
  260. return 2
  261. if f(c - 1):
  262. lo = 1
  263. hi = c - 1
  264. while lo + 1 < hi:
  265. mid = (lo + hi) // 2
  266. if f(mid):
  267. hi = mid
  268. else:
  269. lo = mid
  270. return hi
  271. return c