123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- #!/usr/bin/env python
- import sys
- try:
- from cStringIO import StringIO
- except ImportError:
- from StringIO import StringIO
- class CorruptDataError(Exception):
- pass
- ## LZWDecoder
- ##
- class LZWDecoder(object):
- debug = 0
- def __init__(self, fp):
- self.fp = fp
- self.buff = 0
- self.bpos = 8
- self.nbits = 9
- self.table = None
- self.prevbuf = None
- return
- def readbits(self, bits):
- v = 0
- while 1:
- # the number of remaining bits we can get from the current buffer.
- r = 8-self.bpos
- if bits <= r:
- # |-----8-bits-----|
- # |-bpos-|-bits-| |
- # | |----r----|
- v = (v << bits) | ((self.buff >> (r-bits)) & ((1 << bits)-1))
- self.bpos += bits
- break
- else:
- # |-----8-bits-----|
- # |-bpos-|---bits----...
- # | |----r----|
- v = (v << r) | (self.buff & ((1 << r)-1))
- bits -= r
- x = self.fp.read(1)
- if not x:
- raise EOFError
- self.buff = ord(x)
- self.bpos = 0
- return v
- def feed(self, code):
- x = ''
- if code == 256:
- self.table = [chr(c) for c in xrange(256)] # 0-255
- self.table.append(None) # 256
- self.table.append(None) # 257
- self.prevbuf = ''
- self.nbits = 9
- elif code == 257:
- pass
- elif not self.prevbuf:
- x = self.prevbuf = self.table[code]
- else:
- if code < len(self.table):
- x = self.table[code]
- self.table.append(self.prevbuf+x[:1])
- elif code == len(self.table):
- self.table.append(self.prevbuf+self.prevbuf[:1])
- x = self.table[code]
- else:
- raise CorruptDataError
- l = len(self.table)
- if l == 511:
- self.nbits = 10
- elif l == 1023:
- self.nbits = 11
- elif l == 2047:
- self.nbits = 12
- self.prevbuf = x
- return x
- def run(self):
- while 1:
- try:
- code = self.readbits(self.nbits)
- except EOFError:
- break
- try:
- x = self.feed(code)
- except CorruptDataError:
- # just ignore corrupt data and stop yielding there
- break
- yield x
- if self.debug:
- print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' %
- (self.nbits, code, x, self.table[258:]))
- return
- # lzwdecode
- def lzwdecode(data):
- """
- >>> lzwdecode('\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01')
- '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
- """
- fp = StringIO(data)
- return ''.join(LZWDecoder(fp).run())
- if __name__ == '__main__':
- import doctest
- doctest.testmod()
|