varints.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # Copyright 2007 Matt Chaput. All rights reserved.
  2. #
  3. # Redistribution and use in source and binary forms, with or without
  4. # modification, are permitted provided that the following conditions are met:
  5. #
  6. # 1. Redistributions of source code must retain the above copyright notice,
  7. # this list of conditions and the following disclaimer.
  8. #
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
  14. # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  15. # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  16. # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  18. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  19. # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  22. # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are
  25. # those of the authors and should not be interpreted as representing official
  26. # policies, either expressed or implied, of Matt Chaput.
  27. from array import array
  28. from whoosh.compat import array_tobytes, xrange
  29. # Varint cache
  30. # Build a cache of the varint byte sequences for the first N integers, so we
  31. # don't have to constantly recalculate them on the fly. This makes a small but
  32. # noticeable difference.
  33. def _varint(i):
  34. a = array("B")
  35. while (i & ~0x7F) != 0:
  36. a.append((i & 0x7F) | 0x80)
  37. i = i >> 7
  38. a.append(i)
  39. return array_tobytes(a)
  40. _varint_cache_size = 512
  41. _varint_cache = []
  42. for i in xrange(0, _varint_cache_size):
  43. _varint_cache.append(_varint(i))
  44. _varint_cache = tuple(_varint_cache)
  45. def varint(i):
  46. """Encodes the given integer into a string of the minimum number of bytes.
  47. """
  48. if i < len(_varint_cache):
  49. return _varint_cache[i]
  50. return _varint(i)
  51. def varint_to_int(vi):
  52. b = ord(vi[0])
  53. p = 1
  54. i = b & 0x7f
  55. shift = 7
  56. while b & 0x80 != 0:
  57. b = ord(vi[p])
  58. p += 1
  59. i |= (b & 0x7F) << shift
  60. shift += 7
  61. return i
  62. def signed_varint(i):
  63. """Zig-zag encodes a signed integer into a varint.
  64. """
  65. if i >= 0:
  66. return varint(i << 1)
  67. return varint((i << 1) ^ (~0))
  68. def decode_signed_varint(i):
  69. """Zig-zag decodes an integer value.
  70. """
  71. if not i & 1:
  72. return i >> 1
  73. return (i >> 1) ^ (~0)
  74. def read_varint(readfn):
  75. """
  76. Reads a variable-length encoded integer.
  77. :param readfn: a callable that reads a given number of bytes,
  78. like file.read().
  79. """
  80. b = ord(readfn(1))
  81. i = b & 0x7F
  82. shift = 7
  83. while b & 0x80 != 0:
  84. b = ord(readfn(1))
  85. i |= (b & 0x7F) << shift
  86. shift += 7
  87. return i