base85.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. """
  2. This module contains generic base85 encoding and decoding functions. The
  3. whoosh.util.numeric module contains faster variants for encoding and
  4. decoding integers.
  5. Modified from:
  6. http://paste.lisp.org/display/72815
  7. """
  8. import struct
  9. from whoosh.compat import xrange
  10. # Instead of using the character set from the ascii85 algorithm, I put the
  11. # characters in order so that the encoded text sorts properly (my life would be
  12. # a lot easier if they had just done that from the start)
  13. b85chars = ("!$%&*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  14. "^_abcdefghijklmnopqrstuvwxyz{|}~")
  15. b85dec = {}
  16. for i in range(len(b85chars)):
  17. b85dec[b85chars[i]] = i
  18. # Integer encoding and decoding functions
  19. def to_base85(x, islong=False):
  20. "Encodes the given integer using base 85."
  21. size = 10 if islong else 5
  22. rems = ""
  23. for i in xrange(size):
  24. rems = b85chars[x % 85] + rems
  25. x //= 85
  26. return rems
  27. def from_base85(text):
  28. "Decodes the given base 85 text into an integer."
  29. acc = 0
  30. for c in text:
  31. acc = acc * 85 + b85dec[c]
  32. return acc
  33. # Bytes encoding and decoding functions
  34. def b85encode(text, pad=False):
  35. l = len(text)
  36. r = l % 4
  37. if r:
  38. text += '\0' * (4 - r)
  39. longs = len(text) >> 2
  40. out = []
  41. words = struct.unpack('>' + 'L' * longs, text[0:longs * 4])
  42. for word in words:
  43. rems = [0, 0, 0, 0, 0]
  44. for i in range(4, -1, -1):
  45. rems[i] = b85chars[word % 85]
  46. word /= 85
  47. out.extend(rems)
  48. out = ''.join(out)
  49. if pad:
  50. return out
  51. # Trim padding
  52. olen = l % 4
  53. if olen:
  54. olen += 1
  55. olen += l / 4 * 5
  56. return out[0:olen]
  57. def b85decode(text):
  58. l = len(text)
  59. out = []
  60. for i in range(0, len(text), 5):
  61. chunk = text[i:i + 5]
  62. acc = 0
  63. for j in range(len(chunk)):
  64. try:
  65. acc = acc * 85 + b85dec[chunk[j]]
  66. except KeyError:
  67. raise TypeError('Bad base85 character at byte %d' % (i + j))
  68. if acc > 4294967295:
  69. raise OverflowError('Base85 overflow in hunk starting at byte %d' % i)
  70. out.append(acc)
  71. # Pad final chunk if necessary
  72. cl = l % 5
  73. if cl:
  74. acc *= 85 ** (5 - cl)
  75. if cl > 1:
  76. acc += 0xffffff >> (cl - 2) * 8
  77. out[-1] = acc
  78. out = struct.pack('>' + 'L' * ((l + 4) / 5), *out)
  79. if cl:
  80. out = out[:-(5 - cl)]
  81. return out