engine.py 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517
  1. # coding=utf-8
  2. #
  3. # This file is part of Hypothesis, which may be found at
  4. # https://github.com/HypothesisWorks/hypothesis-python
  5. #
  6. # Most of this work is copyright (C) 2013-2018 David R. MacIver
  7. # (david@drmaciver.com), but it contains contributions by others. See
  8. # CONTRIBUTING.rst for a full list of people who may hold copyright, and
  9. # consult the git log if you need to determine who owns an individual
  10. # contribution.
  11. #
  12. # This Source Code Form is subject to the terms of the Mozilla Public License,
  13. # v. 2.0. If a copy of the MPL was not distributed with this file, You can
  14. # obtain one at http://mozilla.org/MPL/2.0/.
  15. #
  16. # END HEADER
  17. from __future__ import division, print_function, absolute_import
  18. import heapq
  19. from enum import Enum
  20. from random import Random, getrandbits
  21. from weakref import WeakKeyDictionary
  22. from collections import defaultdict
  23. import attr
  24. from hypothesis import settings as Settings
  25. from hypothesis import Phase, HealthCheck
  26. from hypothesis.reporting import debug_report
  27. from hypothesis.internal.compat import Counter, ceil, hbytes, hrange, \
  28. int_to_text, int_to_bytes, benchmark_time, int_from_bytes, \
  29. to_bytes_sequence, unicode_safe_repr
  30. from hypothesis.utils.conventions import UniqueIdentifier
  31. from hypothesis.internal.healthcheck import fail_health_check
  32. from hypothesis.internal.conjecture.data import MAX_DEPTH, Status, \
  33. StopTest, ConjectureData
  34. from hypothesis.internal.conjecture.minimizer import minimize
  35. # Tell pytest to omit the body of this module from tracebacks
  36. # http://doc.pytest.org/en/latest/example/simple.html#writing-well-integrated-assertion-helpers
  37. __tracebackhide__ = True
  38. HUNG_TEST_TIME_LIMIT = 5 * 60
  39. @attr.s
  40. class HealthCheckState(object):
  41. valid_examples = attr.ib(default=0)
  42. invalid_examples = attr.ib(default=0)
  43. overrun_examples = attr.ib(default=0)
  44. draw_times = attr.ib(default=attr.Factory(list))
  45. class ExitReason(Enum):
  46. max_examples = 0
  47. max_iterations = 1
  48. timeout = 2
  49. max_shrinks = 3
  50. finished = 4
  51. flaky = 5
  52. class RunIsComplete(Exception):
  53. pass
  54. class ConjectureRunner(object):
  55. def __init__(
  56. self, test_function, settings=None, random=None,
  57. database_key=None,
  58. ):
  59. self._test_function = test_function
  60. self.settings = settings or Settings()
  61. self.shrinks = 0
  62. self.call_count = 0
  63. self.event_call_counts = Counter()
  64. self.valid_examples = 0
  65. self.start_time = benchmark_time()
  66. self.random = random or Random(getrandbits(128))
  67. self.database_key = database_key
  68. self.status_runtimes = {}
  69. self.all_drawtimes = []
  70. self.all_runtimes = []
  71. self.events_to_strings = WeakKeyDictionary()
  72. self.target_selector = TargetSelector(self.random)
  73. # Tree nodes are stored in an array to prevent heavy nesting of data
  74. # structures. Branches are dicts mapping bytes to child nodes (which
  75. # will in general only be partially populated). Leaves are
  76. # ConjectureData objects that have been previously seen as the result
  77. # of following that path.
  78. self.tree = [{}]
  79. # A node is dead if there is nothing left to explore past that point.
  80. # Recursively, a node is dead if either it is a leaf or every byte
  81. # leads to a dead node when starting from here.
  82. self.dead = set()
  83. # We rewrite the byte stream at various points during parsing, to one
  84. # that will produce an equivalent result but is in some sense more
  85. # canonical. We keep track of these so that when walking the tree we
  86. # can identify nodes where the exact byte value doesn't matter and
  87. # treat all bytes there as equivalent. This significantly reduces the
  88. # size of the search space and removes a lot of redundant examples.
  89. # Maps tree indices where to the unique byte that is valid at that
  90. # point. Corresponds to data.write() calls.
  91. self.forced = {}
  92. # Maps tree indices to the maximum byte that is valid at that point.
  93. # Currently this is only used inside draw_bits, but it potentially
  94. # could get used elsewhere.
  95. self.capped = {}
  96. # Where a tree node consists of the beginning of a block we track the
  97. # size of said block. This allows us to tell when an example is too
  98. # short even if it goes off the unexplored region of the tree - if it
  99. # is at the beginning of a block of size 4 but only has 3 bytes left,
  100. # it's going to overrun the end of the buffer regardless of the
  101. # buffer contents.
  102. self.block_sizes = {}
  103. self.interesting_examples = {}
  104. self.covering_examples = {}
  105. self.shrunk_examples = set()
  106. self.tag_intern_table = {}
  107. self.health_check_state = None
  108. self.used_examples_from_database = False
  109. def __tree_is_exhausted(self):
  110. return 0 in self.dead
  111. def test_function(self, data):
  112. if benchmark_time() - self.start_time >= HUNG_TEST_TIME_LIMIT:
  113. fail_health_check(self.settings, (
  114. 'Your test has been running for at least five minutes. This '
  115. 'is probably not what you intended, so by default Hypothesis '
  116. 'turns it into an error.'
  117. ), HealthCheck.hung_test)
  118. self.call_count += 1
  119. try:
  120. self._test_function(data)
  121. data.freeze()
  122. except StopTest as e:
  123. if e.testcounter != data.testcounter:
  124. self.save_buffer(data.buffer)
  125. raise e
  126. except BaseException:
  127. self.save_buffer(data.buffer)
  128. raise
  129. finally:
  130. data.freeze()
  131. self.note_details(data)
  132. self.target_selector.add(data)
  133. self.debug_data(data)
  134. tags = frozenset(data.tags)
  135. data.tags = self.tag_intern_table.setdefault(tags, tags)
  136. if data.status == Status.VALID:
  137. self.valid_examples += 1
  138. for t in data.tags:
  139. existing = self.covering_examples.get(t)
  140. if (
  141. existing is None or
  142. sort_key(data.buffer) < sort_key(existing.buffer)
  143. ):
  144. self.covering_examples[t] = data
  145. if self.database is not None:
  146. self.database.save(self.covering_key, data.buffer)
  147. if existing is not None:
  148. self.database.delete(
  149. self.covering_key, existing.buffer)
  150. tree_node = self.tree[0]
  151. indices = []
  152. node_index = 0
  153. for i, b in enumerate(data.buffer):
  154. indices.append(node_index)
  155. if i in data.forced_indices:
  156. self.forced[node_index] = b
  157. try:
  158. self.capped[node_index] = data.capped_indices[i]
  159. except KeyError:
  160. pass
  161. try:
  162. node_index = tree_node[b]
  163. except KeyError:
  164. node_index = len(self.tree)
  165. self.tree.append({})
  166. tree_node[b] = node_index
  167. tree_node = self.tree[node_index]
  168. if node_index in self.dead:
  169. break
  170. for u, v in data.blocks:
  171. # This can happen if we hit a dead node when walking the buffer.
  172. # In that case we alrady have this section of the tree mapped.
  173. if u >= len(indices):
  174. break
  175. self.block_sizes[indices[u]] = v - u
  176. if data.status != Status.OVERRUN and node_index not in self.dead:
  177. self.dead.add(node_index)
  178. self.tree[node_index] = data
  179. for j in reversed(indices):
  180. if (
  181. len(self.tree[j]) < self.capped.get(j, 255) + 1 and
  182. j not in self.forced
  183. ):
  184. break
  185. if set(self.tree[j].values()).issubset(self.dead):
  186. self.dead.add(j)
  187. else:
  188. break
  189. if data.status == Status.INTERESTING:
  190. key = data.interesting_origin
  191. changed = False
  192. try:
  193. existing = self.interesting_examples[key]
  194. except KeyError:
  195. changed = True
  196. else:
  197. if sort_key(data.buffer) < sort_key(existing.buffer):
  198. self.shrinks += 1
  199. self.downgrade_buffer(existing.buffer)
  200. changed = True
  201. if changed:
  202. self.save_buffer(data.buffer)
  203. self.interesting_examples[key] = data
  204. self.shrunk_examples.discard(key)
  205. if self.shrinks >= self.settings.max_shrinks:
  206. self.exit_with(ExitReason.max_shrinks)
  207. if (
  208. self.settings.timeout > 0 and
  209. benchmark_time() >= self.start_time + self.settings.timeout
  210. ):
  211. self.exit_with(ExitReason.timeout)
  212. if not self.interesting_examples:
  213. if self.valid_examples >= self.settings.max_examples:
  214. self.exit_with(ExitReason.max_examples)
  215. if self.call_count >= max(
  216. self.settings.max_iterations, self.settings.max_examples
  217. ):
  218. self.exit_with(ExitReason.max_iterations)
  219. if self.__tree_is_exhausted():
  220. self.exit_with(ExitReason.finished)
  221. self.record_for_health_check(data)
  222. def record_for_health_check(self, data):
  223. # Once we've actually found a bug, there's no point in trying to run
  224. # health checks - they'll just mask the actually important information.
  225. if data.status == Status.INTERESTING:
  226. self.health_check_state = None
  227. state = self.health_check_state
  228. if state is None:
  229. return
  230. state.draw_times.extend(data.draw_times)
  231. if data.status == Status.VALID:
  232. state.valid_examples += 1
  233. elif data.status == Status.INVALID:
  234. state.invalid_examples += 1
  235. else:
  236. assert data.status == Status.OVERRUN
  237. state.overrun_examples += 1
  238. max_valid_draws = 10
  239. max_invalid_draws = 50
  240. max_overrun_draws = 20
  241. assert state.valid_examples <= max_valid_draws
  242. if state.valid_examples == max_valid_draws:
  243. self.health_check_state = None
  244. return
  245. if state.overrun_examples == max_overrun_draws:
  246. fail_health_check(self.settings, (
  247. 'Examples routinely exceeded the max allowable size. '
  248. '(%d examples overran while generating %d valid ones)'
  249. '. Generating examples this large will usually lead to'
  250. ' bad results. You could try setting max_size parameters '
  251. 'on your collections and turning '
  252. 'max_leaves down on recursive() calls.') % (
  253. state.overrun_examples, state.valid_examples
  254. ), HealthCheck.data_too_large)
  255. if state.invalid_examples == max_invalid_draws:
  256. fail_health_check(self.settings, (
  257. 'It looks like your strategy is filtering out a lot '
  258. 'of data. Health check found %d filtered examples but '
  259. 'only %d good ones. This will make your tests much '
  260. 'slower, and also will probably distort the data '
  261. 'generation quite a lot. You should adapt your '
  262. 'strategy to filter less. This can also be caused by '
  263. 'a low max_leaves parameter in recursive() calls') % (
  264. state.invalid_examples, state.valid_examples
  265. ), HealthCheck.filter_too_much)
  266. draw_time = sum(state.draw_times)
  267. if draw_time > 1.0:
  268. fail_health_check(self.settings, (
  269. 'Data generation is extremely slow: Only produced '
  270. '%d valid examples in %.2f seconds (%d invalid ones '
  271. 'and %d exceeded maximum size). Try decreasing '
  272. "size of the data you're generating (with e.g."
  273. 'max_size or max_leaves parameters).'
  274. ) % (
  275. state.valid_examples, draw_time, state.invalid_examples,
  276. state.overrun_examples), HealthCheck.too_slow,)
  277. def save_buffer(self, buffer):
  278. if self.settings.database is not None:
  279. key = self.database_key
  280. if key is None:
  281. return
  282. self.settings.database.save(key, hbytes(buffer))
  283. def downgrade_buffer(self, buffer):
  284. if self.settings.database is not None:
  285. self.settings.database.move(
  286. self.database_key, self.secondary_key, buffer)
  287. @property
  288. def secondary_key(self):
  289. return b'.'.join((self.database_key, b'secondary'))
  290. @property
  291. def covering_key(self):
  292. return b'.'.join((self.database_key, b'coverage'))
  293. def note_details(self, data):
  294. runtime = max(data.finish_time - data.start_time, 0.0)
  295. self.all_runtimes.append(runtime)
  296. self.all_drawtimes.extend(data.draw_times)
  297. self.status_runtimes.setdefault(data.status, []).append(runtime)
  298. for event in set(map(self.event_to_string, data.events)):
  299. self.event_call_counts[event] += 1
  300. def debug(self, message):
  301. with self.settings:
  302. debug_report(message)
  303. def debug_data(self, data):
  304. buffer_parts = [u"["]
  305. for i, (u, v) in enumerate(data.blocks):
  306. if i > 0:
  307. buffer_parts.append(u" || ")
  308. buffer_parts.append(
  309. u', '.join(int_to_text(int(i)) for i in data.buffer[u:v]))
  310. buffer_parts.append(u']')
  311. status = unicode_safe_repr(data.status)
  312. if data.status == Status.INTERESTING:
  313. status = u'%s (%s)' % (
  314. status, unicode_safe_repr(data.interesting_origin,))
  315. self.debug(u'%d bytes %s -> %s, %s' % (
  316. data.index,
  317. u''.join(buffer_parts),
  318. status,
  319. data.output,
  320. ))
  321. def run(self):
  322. with self.settings:
  323. try:
  324. self._run()
  325. except RunIsComplete:
  326. pass
  327. for v in self.interesting_examples.values():
  328. self.debug_data(v)
  329. self.debug(
  330. u'Run complete after %d examples (%d valid) and %d shrinks' % (
  331. self.call_count, self.valid_examples, self.shrinks,
  332. ))
  333. def _new_mutator(self):
  334. target_data = [None]
  335. def draw_new(data, n):
  336. return uniform(self.random, n)
  337. def draw_existing(data, n):
  338. return target_data[0].buffer[data.index:data.index + n]
  339. def draw_smaller(data, n):
  340. existing = target_data[0].buffer[data.index:data.index + n]
  341. r = uniform(self.random, n)
  342. if r <= existing:
  343. return r
  344. return _draw_predecessor(self.random, existing)
  345. def draw_larger(data, n):
  346. existing = target_data[0].buffer[data.index:data.index + n]
  347. r = uniform(self.random, n)
  348. if r >= existing:
  349. return r
  350. return _draw_successor(self.random, existing)
  351. def reuse_existing(data, n):
  352. choices = data.block_starts.get(n, []) or \
  353. target_data[0].block_starts.get(n, [])
  354. if choices:
  355. i = self.random.choice(choices)
  356. return target_data[0].buffer[i:i + n]
  357. else:
  358. result = uniform(self.random, n)
  359. assert isinstance(result, hbytes)
  360. return result
  361. def flip_bit(data, n):
  362. buf = bytearray(
  363. target_data[0].buffer[data.index:data.index + n])
  364. i = self.random.randint(0, n - 1)
  365. k = self.random.randint(0, 7)
  366. buf[i] ^= (1 << k)
  367. return hbytes(buf)
  368. def draw_zero(data, n):
  369. return hbytes(b'\0' * n)
  370. def draw_max(data, n):
  371. return hbytes([255]) * n
  372. def draw_constant(data, n):
  373. return hbytes([self.random.randint(0, 255)]) * n
  374. def redraw_last(data, n):
  375. u = target_data[0].blocks[-1][0]
  376. if data.index + n <= u:
  377. return target_data[0].buffer[data.index:data.index + n]
  378. else:
  379. return uniform(self.random, n)
  380. options = [
  381. draw_new,
  382. redraw_last, redraw_last,
  383. reuse_existing, reuse_existing,
  384. draw_existing, draw_smaller, draw_larger,
  385. flip_bit,
  386. draw_zero, draw_max, draw_zero, draw_max,
  387. draw_constant,
  388. ]
  389. bits = [
  390. self.random.choice(options) for _ in hrange(3)
  391. ]
  392. def mutate_from(origin):
  393. target_data[0] = origin
  394. return draw_mutated
  395. def draw_mutated(data, n):
  396. if data.index + n > len(target_data[0].buffer):
  397. result = uniform(self.random, n)
  398. else:
  399. result = self.random.choice(bits)(data, n)
  400. return self.__rewrite_for_novelty(
  401. data, self.__zero_bound(data, result))
  402. return mutate_from
  403. def __rewrite(self, data, result):
  404. return self.__rewrite_for_novelty(
  405. data, self.__zero_bound(data, result)
  406. )
  407. def __zero_bound(self, data, result):
  408. """This tries to get the size of the generated data under control by
  409. replacing the result with zero if we are too deep or have already
  410. generated too much data.
  411. This causes us to enter "shrinking mode" there and thus reduce
  412. the size of the generated data.
  413. """
  414. if (
  415. data.depth * 2 >= MAX_DEPTH or
  416. (data.index + len(result)) * 2 >= self.settings.buffer_size
  417. ):
  418. if any(result):
  419. data.hit_zero_bound = True
  420. return hbytes(len(result))
  421. else:
  422. return result
  423. def __rewrite_for_novelty(self, data, result):
  424. """Take a block that is about to be added to data as the result of a
  425. draw_bytes call and rewrite it a small amount to ensure that the result
  426. will be novel: that is, not hit a part of the tree that we have fully
  427. explored.
  428. This is mostly useful for test functions which draw a small
  429. number of blocks.
  430. """
  431. assert isinstance(result, hbytes)
  432. try:
  433. node_index = data.__current_node_index
  434. except AttributeError:
  435. node_index = 0
  436. data.__current_node_index = node_index
  437. data.__hit_novelty = False
  438. data.__evaluated_to = 0
  439. if data.__hit_novelty:
  440. return result
  441. node = self.tree[node_index]
  442. for i in hrange(data.__evaluated_to, len(data.buffer)):
  443. node = self.tree[node_index]
  444. try:
  445. node_index = node[data.buffer[i]]
  446. assert node_index not in self.dead
  447. node = self.tree[node_index]
  448. except KeyError: # pragma: no cover
  449. assert False, (
  450. 'This should be impossible. If you see this error, please '
  451. 'report it as a bug (ideally with a reproducible test '
  452. 'case).'
  453. )
  454. for i, b in enumerate(result):
  455. assert isinstance(b, int)
  456. try:
  457. new_node_index = node[b]
  458. except KeyError:
  459. data.__hit_novelty = True
  460. return result
  461. new_node = self.tree[new_node_index]
  462. if new_node_index in self.dead:
  463. if isinstance(result, hbytes):
  464. result = bytearray(result)
  465. for c in range(256):
  466. if c not in node:
  467. assert c <= self.capped.get(node_index, c)
  468. result[i] = c
  469. data.__hit_novelty = True
  470. return hbytes(result)
  471. else:
  472. new_node_index = node[c]
  473. new_node = self.tree[new_node_index]
  474. if new_node_index not in self.dead:
  475. result[i] = c
  476. break
  477. else: # pragma: no cover
  478. assert False, (
  479. 'Found a tree node which is live despite all its '
  480. 'children being dead.')
  481. node_index = new_node_index
  482. node = new_node
  483. assert node_index not in self.dead
  484. data.__current_node_index = node_index
  485. data.__evaluated_to = data.index + len(result)
  486. return hbytes(result)
  487. @property
  488. def database(self):
  489. if self.database_key is None:
  490. return None
  491. return self.settings.database
  492. def has_existing_examples(self):
  493. return (
  494. self.database is not None and
  495. Phase.reuse in self.settings.phases
  496. )
  497. def reuse_existing_examples(self):
  498. """If appropriate (we have a database and have been told to use it),
  499. try to reload existing examples from the database.
  500. If there are a lot we don't try all of them. We always try the
  501. smallest example in the database (which is guaranteed to be the
  502. last failure) and the largest (which is usually the seed example
  503. which the last failure came from but we don't enforce that). We
  504. then take a random sampling of the remainder and try those. Any
  505. examples that are no longer interesting are cleared out.
  506. """
  507. if self.has_existing_examples():
  508. self.debug('Reusing examples from database')
  509. # We have to do some careful juggling here. We have two database
  510. # corpora: The primary and secondary. The primary corpus is a
  511. # small set of minimized examples each of which has at one point
  512. # demonstrated a distinct bug. We want to retry all of these.
  513. # We also have a secondary corpus of examples that have at some
  514. # point demonstrated interestingness (currently only ones that
  515. # were previously non-minimal examples of a bug, but this will
  516. # likely expand in future). These are a good source of potentially
  517. # interesting examples, but there are a lot of them, so we down
  518. # sample the secondary corpus to a more manageable size.
  519. corpus = sorted(
  520. self.settings.database.fetch(self.database_key),
  521. key=sort_key
  522. )
  523. desired_size = max(2, ceil(0.1 * self.settings.max_examples))
  524. for extra_key in [self.secondary_key, self.covering_key]:
  525. if len(corpus) < desired_size:
  526. extra_corpus = list(
  527. self.settings.database.fetch(extra_key),
  528. )
  529. shortfall = desired_size - len(corpus)
  530. if len(extra_corpus) <= shortfall:
  531. extra = extra_corpus
  532. else:
  533. extra = self.random.sample(extra_corpus, shortfall)
  534. extra.sort(key=sort_key)
  535. corpus.extend(extra)
  536. self.used_examples_from_database = len(corpus) > 0
  537. for existing in corpus:
  538. last_data = ConjectureData.for_buffer(existing)
  539. try:
  540. self.test_function(last_data)
  541. finally:
  542. if last_data.status != Status.INTERESTING:
  543. self.settings.database.delete(
  544. self.database_key, existing)
  545. self.settings.database.delete(
  546. self.secondary_key, existing)
  547. def exit_with(self, reason):
  548. self.exit_reason = reason
  549. raise RunIsComplete()
  550. def generate_new_examples(self):
  551. if Phase.generate not in self.settings.phases:
  552. return
  553. zero_data = self.cached_test_function(
  554. hbytes(self.settings.buffer_size))
  555. if zero_data.status == Status.OVERRUN or (
  556. zero_data.status == Status.VALID and
  557. len(zero_data.buffer) * 2 > self.settings.buffer_size
  558. ):
  559. fail_health_check(
  560. self.settings,
  561. 'The smallest natural example for your test is extremely '
  562. 'large. This makes it difficult for Hypothesis to generate '
  563. 'good examples, especially when trying to reduce failing ones '
  564. 'at the end. Consider reducing the size of your data if it is '
  565. 'of a fixed size. You could also fix this by improving how '
  566. 'your data shrinks (see https://hypothesis.readthedocs.io/en/'
  567. 'latest/data.html#shrinking for details), or by introducing '
  568. 'default values inside your strategy. e.g. could you replace '
  569. 'some arguments with their defaults by using '
  570. 'one_of(none(), some_complex_strategy)?',
  571. HealthCheck.large_base_example
  572. )
  573. if self.settings.perform_health_check:
  574. self.health_check_state = HealthCheckState()
  575. count = 0
  576. while not self.interesting_examples and (
  577. count < 10 or self.health_check_state is not None
  578. ):
  579. def draw_bytes(data, n):
  580. return self.__rewrite_for_novelty(
  581. data, self.__zero_bound(data, uniform(self.random, n))
  582. )
  583. targets_found = len(self.covering_examples)
  584. last_data = ConjectureData(
  585. max_length=self.settings.buffer_size,
  586. draw_bytes=draw_bytes
  587. )
  588. self.test_function(last_data)
  589. last_data.freeze()
  590. if len(self.covering_examples) > targets_found:
  591. count = 0
  592. else:
  593. count += 1
  594. mutations = 0
  595. mutator = self._new_mutator()
  596. zero_bound_queue = []
  597. while not self.interesting_examples:
  598. if zero_bound_queue:
  599. # Whenever we generated an example and it hits a bound
  600. # which forces zero blocks into it, this creates a weird
  601. # distortion effect by making certain parts of the data
  602. # stream (especially ones to the right) much more likely
  603. # to be zero. We fix this by redistributing the generated
  604. # data by shuffling it randomly. This results in the
  605. # zero data being spread evenly throughout the buffer.
  606. # Hopefully the shrinking this causes will cause us to
  607. # naturally fail to hit the bound.
  608. # If it doesn't then we will queue the new version up again
  609. # (now with more zeros) and try again.
  610. overdrawn = zero_bound_queue.pop()
  611. buffer = bytearray(overdrawn.buffer)
  612. # These will have values written to them that are different
  613. # from what's in them anyway, so the value there doesn't
  614. # really "count" for distributional purposes, and if we
  615. # leave them in then they can cause the fraction of non
  616. # zero bytes to increase on redraw instead of decrease.
  617. for i in overdrawn.forced_indices:
  618. buffer[i] = 0
  619. self.random.shuffle(buffer)
  620. buffer = hbytes(buffer)
  621. def draw_bytes(data, n):
  622. result = buffer[data.index:data.index + n]
  623. if len(result) < n:
  624. result += hbytes(n - len(result))
  625. return self.__rewrite(data, result)
  626. data = ConjectureData(
  627. draw_bytes=draw_bytes,
  628. max_length=self.settings.buffer_size,
  629. )
  630. self.test_function(data)
  631. data.freeze()
  632. else:
  633. target, origin = self.target_selector.select()
  634. mutations += 1
  635. targets_found = len(self.covering_examples)
  636. data = ConjectureData(
  637. draw_bytes=mutator(origin),
  638. max_length=self.settings.buffer_size
  639. )
  640. self.test_function(data)
  641. data.freeze()
  642. if (
  643. data.status > origin.status or
  644. len(self.covering_examples) > targets_found
  645. ):
  646. mutations = 0
  647. elif (
  648. data.status < origin.status or
  649. not self.target_selector.has_tag(target, data) or
  650. mutations >= 10
  651. ):
  652. # Cap the variations of a single example and move on to
  653. # an entirely fresh start. Ten is an entirely arbitrary
  654. # constant, but it's been working well for years.
  655. mutations = 0
  656. mutator = self._new_mutator()
  657. if getattr(data, 'hit_zero_bound', False):
  658. zero_bound_queue.append(data)
  659. mutations += 1
  660. def _run(self):
  661. self.start_time = benchmark_time()
  662. self.reuse_existing_examples()
  663. self.generate_new_examples()
  664. if (
  665. Phase.shrink not in self.settings.phases or
  666. not self.interesting_examples
  667. ):
  668. self.exit_with(ExitReason.finished)
  669. for prev_data in sorted(
  670. self.interesting_examples.values(),
  671. key=lambda d: sort_key(d.buffer)
  672. ):
  673. assert prev_data.status == Status.INTERESTING
  674. data = ConjectureData.for_buffer(prev_data.buffer)
  675. self.test_function(data)
  676. if data.status != Status.INTERESTING:
  677. self.exit_with(ExitReason.flaky)
  678. self.clear_secondary_key()
  679. while len(self.shrunk_examples) < len(self.interesting_examples):
  680. target, example = min([
  681. (k, v) for k, v in self.interesting_examples.items()
  682. if k not in self.shrunk_examples],
  683. key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
  684. )
  685. self.debug('Shrinking %r' % (target,))
  686. def predicate(d):
  687. if d.status < Status.INTERESTING:
  688. return False
  689. return d.interesting_origin == target
  690. self.shrink(example, predicate)
  691. self.shrunk_examples.add(target)
  692. self.exit_with(ExitReason.finished)
  693. def clear_secondary_key(self):
  694. if self.has_existing_examples():
  695. # If we have any smaller examples in the secondary corpus, now is
  696. # a good time to try them to see if they work as shrinks. They
  697. # probably won't, but it's worth a shot and gives us a good
  698. # opportunity to clear out the database.
  699. # It's not worth trying the primary corpus because we already
  700. # tried all of those in the initial phase.
  701. corpus = sorted(
  702. self.settings.database.fetch(self.secondary_key),
  703. key=sort_key
  704. )
  705. cap = max(
  706. sort_key(v.buffer)
  707. for v in self.interesting_examples.values()
  708. )
  709. for c in corpus:
  710. if sort_key(c) >= cap:
  711. break
  712. else:
  713. data = self.cached_test_function(c)
  714. if (
  715. data.status != Status.INTERESTING or
  716. self.interesting_examples[data.interesting_origin]
  717. is not data
  718. ):
  719. self.settings.database.delete(
  720. self.secondary_key, c)
  721. def shrink(self, example, predicate):
  722. s = self.new_shrinker(example, predicate)
  723. s.shrink()
  724. return s.shrink_target
  725. def new_shrinker(self, example, predicate):
  726. return Shrinker(self, example, predicate)
  727. def prescreen_buffer(self, buffer):
  728. """Attempt to rule out buffer as a possible interesting candidate.
  729. Returns False if we know for sure that running this buffer will not
  730. produce an interesting result. Returns True if it might (because it
  731. explores territory we have not previously tried).
  732. This is purely an optimisation to try to reduce the number of tests we
  733. run. "return True" would be a valid but inefficient implementation.
  734. """
  735. node_index = 0
  736. n = len(buffer)
  737. for k, b in enumerate(buffer):
  738. if node_index in self.dead:
  739. return False
  740. try:
  741. # The block size at that point provides a lower bound on how
  742. # many more bytes are required. If the buffer does not have
  743. # enough bytes to fulfill that block size then we can rule out
  744. # this buffer.
  745. if k + self.block_sizes[node_index] > n:
  746. return False
  747. except KeyError:
  748. pass
  749. try:
  750. b = self.forced[node_index]
  751. except KeyError:
  752. pass
  753. try:
  754. b = min(b, self.capped[node_index])
  755. except KeyError:
  756. pass
  757. try:
  758. node_index = self.tree[node_index][b]
  759. except KeyError:
  760. return True
  761. else:
  762. return False
  763. def cached_test_function(self, buffer):
  764. node_index = 0
  765. for i in hrange(self.settings.buffer_size):
  766. try:
  767. c = self.forced[node_index]
  768. except KeyError:
  769. if i < len(buffer):
  770. c = buffer[i]
  771. else:
  772. c = 0
  773. try:
  774. node_index = self.tree[node_index][c]
  775. except KeyError:
  776. break
  777. node = self.tree[node_index]
  778. if isinstance(node, ConjectureData):
  779. return node
  780. result = ConjectureData.for_buffer(buffer)
  781. self.test_function(result)
  782. return result
  783. def event_to_string(self, event):
  784. if isinstance(event, str):
  785. return event
  786. try:
  787. return self.events_to_strings[event]
  788. except KeyError:
  789. pass
  790. result = str(event)
  791. self.events_to_strings[event] = result
  792. return result
  793. def _draw_predecessor(rnd, xs):
  794. r = bytearray()
  795. any_strict = False
  796. for x in to_bytes_sequence(xs):
  797. if not any_strict:
  798. c = rnd.randint(0, x)
  799. if c < x:
  800. any_strict = True
  801. else:
  802. c = rnd.randint(0, 255)
  803. r.append(c)
  804. return hbytes(r)
  805. def _draw_successor(rnd, xs):
  806. r = bytearray()
  807. any_strict = False
  808. for x in to_bytes_sequence(xs):
  809. if not any_strict:
  810. c = rnd.randint(x, 255)
  811. if c > x:
  812. any_strict = True
  813. else:
  814. c = rnd.randint(0, 255)
  815. r.append(c)
  816. return hbytes(r)
  817. def sort_key(buffer):
  818. return (len(buffer), buffer)
  819. def uniform(random, n):
  820. return int_to_bytes(random.getrandbits(n * 8), n)
  821. class SampleSet(object):
  822. """Set data type with the ability to sample uniformly at random from it.
  823. The mechanism is that we store the set in two parts: A mapping of
  824. values to their index in an array. Sampling uniformly at random then
  825. becomes simply a matter of sampling from the array, but we can use
  826. the index for efficient lookup to add and remove values.
  827. """
  828. __slots__ = ('__values', '__index')
  829. def __init__(self):
  830. self.__values = []
  831. self.__index = {}
  832. def __len__(self):
  833. return len(self.__values)
  834. def __repr__(self):
  835. return 'SampleSet(%r)' % (self.__values,)
  836. def add(self, value):
  837. assert value not in self.__index
  838. # Adding simply consists of adding the value to the end of the array
  839. # and updating the index.
  840. self.__index[value] = len(self.__values)
  841. self.__values.append(value)
  842. def remove(self, value):
  843. # To remove a value we first remove it from the index. But this leaves
  844. # us with the value still in the array, so we have to fix that. We
  845. # can't simply remove the value from the array, as that would a) Be an
  846. # O(n) operation and b) Leave the index completely wrong for every
  847. # value after that index.
  848. # So what we do is we take the last element of the array and place it
  849. # in the position of the value we just deleted (if the value was not
  850. # already the last element of the array. If it was then we don't have
  851. # to do anything extra). This reorders the array, but that's OK because
  852. # we don't care about its order, we just need to sample from it.
  853. i = self.__index.pop(value)
  854. last = self.__values.pop()
  855. if i < len(self.__values):
  856. self.__values[i] = last
  857. self.__index[last] = i
  858. def choice(self, random):
  859. return random.choice(self.__values)
  860. class Negated(object):
  861. __slots__ = ('tag',)
  862. def __init__(self, tag):
  863. self.tag = tag
  864. NEGATED_CACHE = {}
  865. def negated(tag):
  866. try:
  867. return NEGATED_CACHE[tag]
  868. except KeyError:
  869. result = Negated(tag)
  870. NEGATED_CACHE[tag] = result
  871. return result
  872. universal = UniqueIdentifier('universal')
  873. class TargetSelector(object):
  874. """Data structure for selecting targets to use for mutation.
  875. The goal is to do a good job of exploiting novelty in examples without
  876. getting too obsessed with any particular novel factor.
  877. Roughly speaking what we want to do is give each distinct coverage target
  878. equal amounts of time. However some coverage targets may be harder to fuzz
  879. than others, or may only appear in a very small minority of examples, so we
  880. don't want to let those dominate the testing.
  881. Targets are selected according to the following rules:
  882. 1. We ideally want valid examples as our starting point. We ignore
  883. interesting examples entirely, and other than that we restrict ourselves
  884. to the best example status we've seen so far. If we've only seen
  885. OVERRUN examples we use those. If we've seen INVALID but not VALID
  886. examples we use those. Otherwise we use VALID examples.
  887. 2. Among the examples we've seen with the right status, when asked to
  888. select a target, we select a coverage target and return that along with
  889. an example exhibiting that target uniformly at random.
  890. Coverage target selection proceeds as follows:
  891. 1. Whenever we return an example from select, we update the usage count of
  892. each of its tags.
  893. 2. Whenever we see an example, we add it to the list of examples for all of
  894. its tags.
  895. 3. When selecting a tag, we select one with a minimal usage count. Among
  896. those of minimal usage count we select one with the fewest examples.
  897. Among those, we select one uniformly at random.
  898. This has the following desirable properties:
  899. 1. When two coverage targets are intrinsically linked (e.g. when you have
  900. multiple lines in a conditional so that either all or none of them will
  901. be covered in a conditional) they are naturally deduplicated.
  902. 2. Popular coverage targets will largely be ignored for considering what
  903. test to run - if every example exhibits a coverage target, picking an
  904. example because of that target is rather pointless.
  905. 3. When we discover new coverage targets we immediately exploit them until
  906. we get to the point where we've spent about as much time on them as the
  907. existing targets.
  908. 4. Among the interesting deduplicated coverage targets we essentially
  909. round-robin between them, but with a more consistent distribution than
  910. uniformly at random, which is important particularly for short runs.
  911. """
  912. def __init__(self, random):
  913. self.random = random
  914. self.best_status = Status.OVERRUN
  915. self.reset()
  916. def reset(self):
  917. self.examples_by_tags = defaultdict(list)
  918. self.tag_usage_counts = Counter()
  919. self.tags_by_score = defaultdict(SampleSet)
  920. self.scores_by_tag = {}
  921. self.scores = []
  922. self.mutation_counts = 0
  923. self.example_counts = 0
  924. self.non_universal_tags = set()
  925. self.universal_tags = None
  926. def add(self, data):
  927. if data.status == Status.INTERESTING:
  928. return
  929. if data.status < self.best_status:
  930. return
  931. if data.status > self.best_status:
  932. self.best_status = data.status
  933. self.reset()
  934. if self.universal_tags is None:
  935. self.universal_tags = set(data.tags)
  936. else:
  937. not_actually_universal = self.universal_tags - data.tags
  938. for t in not_actually_universal:
  939. self.universal_tags.remove(t)
  940. self.non_universal_tags.add(t)
  941. self.examples_by_tags[t] = list(
  942. self.examples_by_tags[universal]
  943. )
  944. new_tags = data.tags - self.non_universal_tags
  945. for t in new_tags:
  946. self.non_universal_tags.add(t)
  947. self.examples_by_tags[negated(t)] = list(
  948. self.examples_by_tags[universal]
  949. )
  950. self.example_counts += 1
  951. for t in self.tags_for(data):
  952. self.examples_by_tags[t].append(data)
  953. self.rescore(t)
  954. def has_tag(self, tag, data):
  955. if tag is universal:
  956. return True
  957. if isinstance(tag, Negated):
  958. return tag.tag not in data.tags
  959. return tag in data.tags
  960. def tags_for(self, data):
  961. yield universal
  962. for t in data.tags:
  963. yield t
  964. for t in self.non_universal_tags:
  965. if t not in data.tags:
  966. yield negated(t)
  967. def rescore(self, tag):
  968. new_score = (
  969. self.tag_usage_counts[tag], len(self.examples_by_tags[tag]))
  970. try:
  971. old_score = self.scores_by_tag[tag]
  972. except KeyError:
  973. pass
  974. else:
  975. self.tags_by_score[old_score].remove(tag)
  976. self.scores_by_tag[tag] = new_score
  977. sample = self.tags_by_score[new_score]
  978. if len(sample) == 0:
  979. heapq.heappush(self.scores, new_score)
  980. sample.add(tag)
  981. def select_tag(self):
  982. while True:
  983. peek = self.scores[0]
  984. sample = self.tags_by_score[peek]
  985. if len(sample) == 0:
  986. heapq.heappop(self.scores)
  987. else:
  988. return sample.choice(self.random)
  989. def select_example_for_tag(self, t):
  990. return self.random.choice(self.examples_by_tags[t])
  991. def select(self):
  992. t = self.select_tag()
  993. self.mutation_counts += 1
  994. result = self.select_example_for_tag(t)
  995. assert self.has_tag(t, result)
  996. for s in self.tags_for(result):
  997. self.tag_usage_counts[s] += 1
  998. self.rescore(s)
  999. return t, result
  1000. class Shrinker(object):
  1001. """A shrinker is a child object of a ConjectureRunner which is designed to
  1002. manage the associated state of a particular shrink problem.
  1003. Currently the only shrink problem we care about is "interesting and with a
  1004. particular interesting_origin", but this is abstracted into a general
  1005. purpose predicate for more flexibility later - e.g. we are likely to want
  1006. to shrink with respect to a particular coverage target later.
  1007. Data with a status < VALID may be assumed not to satisfy the predicate.
  1008. The expected usage pattern is that this is only ever called from within the
  1009. engine.
  1010. """
  1011. def __init__(self, engine, initial, predicate):
  1012. """Create a shrinker for a particular engine, with a given starting
  1013. point and predicate. When shrink() is called it will attempt to find an
  1014. example for which predicate is True and which is strictly smaller than
  1015. initial.
  1016. Note that initial is a ConjectureData object, and predicate
  1017. takes ConjectureData objects.
  1018. """
  1019. self.__engine = engine
  1020. self.__predicate = predicate
  1021. self.__discarding_failed = False
  1022. # We keep track of the current best example on the shrink_target
  1023. # attribute.
  1024. self.shrink_target = initial
  1025. def incorporate_new_buffer(self, buffer):
  1026. buffer = hbytes(buffer[:self.shrink_target.index])
  1027. assert sort_key(buffer) < sort_key(self.shrink_target.buffer)
  1028. if not self.__engine.prescreen_buffer(buffer):
  1029. return False
  1030. assert sort_key(buffer) <= sort_key(self.shrink_target.buffer)
  1031. data = ConjectureData.for_buffer(buffer)
  1032. self.__engine.test_function(data)
  1033. result = self.incorporate_test_data(data)
  1034. if result and not self.__discarding_failed:
  1035. self.remove_discarded()
  1036. return result
  1037. def incorporate_test_data(self, data):
  1038. if (
  1039. self.__predicate(data) and
  1040. sort_key(data.buffer) < sort_key(self.shrink_target.buffer)
  1041. ):
  1042. self.shrink_target = data
  1043. return True
  1044. return False
  1045. def cached_test_function(self, buffer):
  1046. result = self.__engine.cached_test_function(buffer)
  1047. self.incorporate_test_data(result)
  1048. return result
  1049. def debug(self, msg):
  1050. self.__engine.debug(msg)
  1051. def shrink(self):
  1052. # We assume that if an all-zero block of bytes is an interesting
  1053. # example then we're not going to do better than that.
  1054. # This might not technically be true: e.g. for integers() | booleans()
  1055. # the simplest example is actually [1, 0]. Missing this case is fairly
  1056. # harmless and this allows us to make various simplifying assumptions
  1057. # about the structure of the data (principally that we're never
  1058. # operating on a block of all zero bytes so can use non-zeroness as a
  1059. # signpost of complexity).
  1060. if (
  1061. not any(self.shrink_target.buffer) or
  1062. self.incorporate_new_buffer(hbytes(len(self.shrink_target.buffer)))
  1063. ):
  1064. return
  1065. # This will automatically be run during the normal loop, but it's worth
  1066. # running once before the coarse passes so they don't spend time on
  1067. # useless data.
  1068. self.remove_discarded()
  1069. # Coarse passes that are worth running once when the example is likely
  1070. # to be "far from shrunk" but not worth repeating in a loop because
  1071. # they are subsumed by more fine grained passes.
  1072. self.delta_interval_deletion()
  1073. self.coarse_block_replacement()
  1074. prev = None
  1075. while prev is not self.shrink_target:
  1076. prev = self.shrink_target
  1077. self.remove_discarded()
  1078. self.minimize_duplicated_blocks()
  1079. self.minimize_individual_blocks()
  1080. self.reorder_blocks()
  1081. self.greedy_interval_deletion()
  1082. self.interval_deletion_with_block_lowering()
  1083. def try_shrinking_blocks(self, blocks, b):
  1084. initial_attempt = bytearray(self.shrink_target.buffer)
  1085. for i in blocks:
  1086. if i >= len(self.shrink_target.blocks):
  1087. break
  1088. u, v = self.shrink_target.blocks[i]
  1089. initial_attempt[u:v] = b
  1090. initial_data = self.cached_test_function(initial_attempt)
  1091. if initial_data.status == Status.INTERESTING:
  1092. return initial_data is self.shrink_target
  1093. # If this produced something completely invalid we ditch it
  1094. # here rather than trying to persevere.
  1095. if initial_data.status < Status.VALID:
  1096. return False
  1097. if len(initial_data.buffer) < v:
  1098. return False
  1099. lost_data = len(self.shrink_target.buffer) - len(initial_data.buffer)
  1100. # If this did not in fact cause the data size to shrink we
  1101. # bail here because it's not worth trying to delete stuff from
  1102. # the remainder.
  1103. if lost_data <= 0:
  1104. return False
  1105. self.shrink_target.shrinking_blocks.update(blocks)
  1106. try_with_deleted = bytearray(initial_attempt)
  1107. del try_with_deleted[v:v + lost_data]
  1108. if self.incorporate_new_buffer(try_with_deleted):
  1109. return True
  1110. return False
  1111. def remove_discarded(self):
  1112. """Try removing all bytes marked as discarded."""
  1113. if not self.shrink_target.discarded:
  1114. return
  1115. attempt = bytearray(self.shrink_target.buffer)
  1116. for u, v in sorted(self.shrink_target.discarded, reverse=True):
  1117. del attempt[u:v]
  1118. self.__discarding_failed = not self.incorporate_new_buffer(attempt)
  1119. def delta_interval_deletion(self):
  1120. """Attempt to delete every interval in the example."""
  1121. self.debug('delta interval deletes')
  1122. # We do a delta-debugging style thing here where we initially try to
  1123. # delete many intervals at once and prune it down exponentially to
  1124. # eventually only trying to delete one interval at a time.
  1125. # I'm a little skeptical that this is helpful in general, but we've
  1126. # got at least one benchmark where it does help.
  1127. k = len(self.shrink_target.intervals) // 2
  1128. while k > 0:
  1129. i = 0
  1130. while i + k <= len(self.shrink_target.intervals):
  1131. bitmask = [True] * len(self.shrink_target.buffer)
  1132. for u, v in self.shrink_target.intervals[i:i + k]:
  1133. for t in range(u, v):
  1134. bitmask[t] = False
  1135. if not self.incorporate_new_buffer(hbytes(
  1136. b for b, v in zip(self.shrink_target.buffer, bitmask)
  1137. if v
  1138. )):
  1139. i += k
  1140. k //= 2
  1141. def greedy_interval_deletion(self):
  1142. """Attempt to delete every interval in the example."""
  1143. self.debug('greedy interval deletes')
  1144. i = 0
  1145. while i < len(self.shrink_target.intervals):
  1146. u, v = self.shrink_target.intervals[i]
  1147. if not self.incorporate_new_buffer(
  1148. self.shrink_target.buffer[:u] + self.shrink_target.buffer[v:]
  1149. ):
  1150. i += 1
  1151. def coarse_block_replacement(self):
  1152. """Attempts to zero every block. This is a very coarse pass that we
  1153. only run once to attempt to remove some irrelevant detail. The main
  1154. purpose of it is that if we manage to zero a lot of data then many
  1155. attempted deletes become duplicates of each other, so we run fewer
  1156. tests.
  1157. If more blocks become possible to zero later that will be
  1158. handled by minimize_individual_blocks. The point of this is
  1159. simply to provide a fairly fast initial pass.
  1160. """
  1161. self.debug('Zeroing blocks')
  1162. i = 0
  1163. while i < len(self.shrink_target.blocks):
  1164. buf = self.shrink_target.buffer
  1165. u, v = self.shrink_target.blocks[i]
  1166. assert u < v
  1167. block = buf[u:v]
  1168. if any(block):
  1169. self.incorporate_new_buffer(buf[:u] + hbytes(v - u) + buf[v:])
  1170. i += 1
  1171. def minimize_duplicated_blocks(self):
  1172. """Find blocks that have been duplicated in multiple places and attempt
  1173. to minimize all of the duplicates simultaneously."""
  1174. self.debug('Simultaneous shrinking of duplicated blocks')
  1175. counts = Counter(
  1176. self.shrink_target.buffer[u:v]
  1177. for u, v in self.shrink_target.blocks
  1178. )
  1179. blocks = [buffer for buffer, count in counts.items() if count > 1]
  1180. blocks.sort(reverse=True)
  1181. blocks.sort(key=lambda b: counts[b] * len(b), reverse=True)
  1182. for block in blocks:
  1183. targets = [
  1184. i for i, (u, v) in enumerate(self.shrink_target.blocks)
  1185. if self.shrink_target.buffer[u:v] == block
  1186. ]
  1187. # This can happen if some blocks have been lost in the previous
  1188. # shrinking.
  1189. if len(targets) <= 1:
  1190. continue
  1191. minimize(
  1192. block,
  1193. lambda b: self.try_shrinking_blocks(targets, b),
  1194. random=self.__engine.random, full=False
  1195. )
  1196. def minimize_individual_blocks(self):
  1197. self.debug('Shrinking of individual blocks')
  1198. i = 0
  1199. while i < len(self.shrink_target.blocks):
  1200. u, v = self.shrink_target.blocks[i]
  1201. minimize(
  1202. self.shrink_target.buffer[u:v],
  1203. lambda b: self.try_shrinking_blocks((i,), b),
  1204. random=self.__engine.random, full=False,
  1205. )
  1206. i += 1
  1207. def reorder_blocks(self):
  1208. self.debug('Reordering blocks')
  1209. block_lengths = sorted(self.shrink_target.block_starts, reverse=True)
  1210. for n in block_lengths:
  1211. i = 1
  1212. while i < len(self.shrink_target.block_starts.get(n, ())):
  1213. j = i
  1214. while j > 0:
  1215. buf = self.shrink_target.buffer
  1216. blocks = self.shrink_target.block_starts[n]
  1217. a_start = blocks[j - 1]
  1218. b_start = blocks[j]
  1219. a = buf[a_start:a_start + n]
  1220. b = buf[b_start:b_start + n]
  1221. if a <= b:
  1222. break
  1223. swapped = (
  1224. buf[:a_start] + b + buf[a_start + n:b_start] +
  1225. a + buf[b_start + n:])
  1226. assert len(swapped) == len(buf)
  1227. assert swapped < buf
  1228. if self.incorporate_new_buffer(swapped):
  1229. j -= 1
  1230. else:
  1231. break
  1232. i += 1
  1233. def interval_deletion_with_block_lowering(self):
  1234. self.debug('Lowering blocks while deleting intervals')
  1235. if not self.shrink_target.shrinking_blocks:
  1236. return
  1237. i = 0
  1238. while i < len(self.shrink_target.intervals):
  1239. u, v = self.shrink_target.intervals[i]
  1240. changed = False
  1241. prev = self.shrink_target
  1242. # This loop never exits normally because the r >= u branch will
  1243. # always trigger once we find a block inside the interval, hence
  1244. # the pragma.
  1245. for j, (r, s) in enumerate( # pragma: no branch
  1246. self.shrink_target.blocks
  1247. ):
  1248. if r >= u:
  1249. break
  1250. if j not in self.shrink_target.shrinking_blocks:
  1251. continue
  1252. b = self.shrink_target.buffer[r:s]
  1253. if any(b):
  1254. n = int_from_bytes(b)
  1255. for m in hrange(max(n - 2, 0), n):
  1256. c = int_to_bytes(m, len(b))
  1257. attempt = bytearray(self.shrink_target.buffer)
  1258. attempt[r:s] = c
  1259. del attempt[u:v]
  1260. if self.incorporate_new_buffer(attempt):
  1261. self.shrink_target.shrinking_blocks.update(
  1262. k for k in prev.shrinking_blocks
  1263. if prev.blocks[k][-1] <= u
  1264. )
  1265. changed = True
  1266. break
  1267. if changed:
  1268. break
  1269. if not changed:
  1270. i += 1