123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- """
- This module contains generic base85 encoding and decoding functions. The
- whoosh.util.numeric module contains faster variants for encoding and
- decoding integers.
- Modified from:
- http://paste.lisp.org/display/72815
- """
- import struct
- from whoosh.compat import xrange
- # Instead of using the character set from the ascii85 algorithm, I put the
- # characters in order so that the encoded text sorts properly (my life would be
- # a lot easier if they had just done that from the start)
- b85chars = ("!$%&*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "^_abcdefghijklmnopqrstuvwxyz{|}~")
- b85dec = {}
- for i in range(len(b85chars)):
- b85dec[b85chars[i]] = i
- # Integer encoding and decoding functions
- def to_base85(x, islong=False):
- "Encodes the given integer using base 85."
- size = 10 if islong else 5
- rems = ""
- for i in xrange(size):
- rems = b85chars[x % 85] + rems
- x //= 85
- return rems
- def from_base85(text):
- "Decodes the given base 85 text into an integer."
- acc = 0
- for c in text:
- acc = acc * 85 + b85dec[c]
- return acc
- # Bytes encoding and decoding functions
- def b85encode(text, pad=False):
- l = len(text)
- r = l % 4
- if r:
- text += '\0' * (4 - r)
- longs = len(text) >> 2
- out = []
- words = struct.unpack('>' + 'L' * longs, text[0:longs * 4])
- for word in words:
- rems = [0, 0, 0, 0, 0]
- for i in range(4, -1, -1):
- rems[i] = b85chars[word % 85]
- word /= 85
- out.extend(rems)
- out = ''.join(out)
- if pad:
- return out
- # Trim padding
- olen = l % 4
- if olen:
- olen += 1
- olen += l / 4 * 5
- return out[0:olen]
- def b85decode(text):
- l = len(text)
- out = []
- for i in range(0, len(text), 5):
- chunk = text[i:i + 5]
- acc = 0
- for j in range(len(chunk)):
- try:
- acc = acc * 85 + b85dec[chunk[j]]
- except KeyError:
- raise TypeError('Bad base85 character at byte %d' % (i + j))
- if acc > 4294967295:
- raise OverflowError('Base85 overflow in hunk starting at byte %d' % i)
- out.append(acc)
- # Pad final chunk if necessary
- cl = l % 5
- if cl:
- acc *= 85 ** (5 - cl)
- if cl > 1:
- acc += 0xffffff >> (cl - 2) * 8
- out[-1] = acc
- out = struct.pack('>' + 'L' * ((l + 4) / 5), *out)
- if cl:
- out = out[:-(5 - cl)]
- return out
|