lzw.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/env python
  2. import sys
  3. try:
  4. from cStringIO import StringIO
  5. except ImportError:
  6. from StringIO import StringIO
  7. class CorruptDataError(Exception):
  8. pass
  9. ## LZWDecoder
  10. ##
  11. class LZWDecoder(object):
  12. debug = 0
  13. def __init__(self, fp):
  14. self.fp = fp
  15. self.buff = 0
  16. self.bpos = 8
  17. self.nbits = 9
  18. self.table = None
  19. self.prevbuf = None
  20. return
  21. def readbits(self, bits):
  22. v = 0
  23. while 1:
  24. # the number of remaining bits we can get from the current buffer.
  25. r = 8-self.bpos
  26. if bits <= r:
  27. # |-----8-bits-----|
  28. # |-bpos-|-bits-| |
  29. # | |----r----|
  30. v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1))
  31. self.bpos += bits
  32. break
  33. else:
  34. # |-----8-bits-----|
  35. # |-bpos-|---bits----...
  36. # | |----r----|
  37. v = (v << r) | (self.buff & ((1 << r)-1))
  38. bits -= r
  39. x = self.fp.read(1)
  40. if not x:
  41. raise EOFError
  42. self.buff = ord(x)
  43. self.bpos = 0
  44. return v
  45. def feed(self, code):
  46. x = ''
  47. if code == 256:
  48. self.table = [chr(c) for c in xrange(256)] # 0-255
  49. self.table.append(None) # 256
  50. self.table.append(None) # 257
  51. self.prevbuf = ''
  52. self.nbits = 9
  53. elif code == 257:
  54. pass
  55. elif not self.prevbuf:
  56. x = self.prevbuf = self.table[code]
  57. else:
  58. if code < len(self.table):
  59. x = self.table[code]
  60. self.table.append(self.prevbuf+x[:1])
  61. elif code == len(self.table):
  62. self.table.append(self.prevbuf+self.prevbuf[:1])
  63. x = self.table[code]
  64. else:
  65. raise CorruptDataError
  66. l = len(self.table)
  67. if l == 511:
  68. self.nbits = 10
  69. elif l == 1023:
  70. self.nbits = 11
  71. elif l == 2047:
  72. self.nbits = 12
  73. self.prevbuf = x
  74. return x
  75. def run(self):
  76. while 1:
  77. try:
  78. code = self.readbits(self.nbits)
  79. except EOFError:
  80. break
  81. try:
  82. x = self.feed(code)
  83. except CorruptDataError:
  84. # just ignore corrupt data and stop yielding there
  85. break
  86. yield x
  87. if self.debug:
  88. print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
  89. (self.nbits, code, x, self.table[258:]))
  90. return
  91. # lzwdecode
  92. def lzwdecode(data):
  93. """
  94. >>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01')
  95. '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
  96. """
  97. fp = StringIO(data)
  98. return ''.join(LZWDecoder(fp).run())
  99. if __name__ == '__main__':
  100. import doctest
  101. doctest.testmod()