| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- # Copyright 2009 Matt Chaput. All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # 1. Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- #
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
- # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
- # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # The views and conclusions contained in the software and documentation are
- # those of the authors and should not be interpreted as representing official
- # policies, either expressed or implied, of Matt Chaput.
- from array import array
- from copy import copy
- from struct import calcsize
- from whoosh.compat import BytesIO, bytes_type
- from whoosh.compat import dump as dump_pickle
- from whoosh.compat import load as load_pickle
- from whoosh.compat import array_frombytes, array_tobytes
- from whoosh.system import _INT_SIZE, _SHORT_SIZE, _FLOAT_SIZE, _LONG_SIZE
- from whoosh.system import IS_LITTLE
- from whoosh.system import pack_byte, unpack_byte, pack_sbyte, unpack_sbyte
- from whoosh.system import pack_ushort, unpack_ushort
- from whoosh.system import pack_ushort_le, unpack_ushort_le
- from whoosh.system import pack_int, unpack_int, pack_uint, unpack_uint
- from whoosh.system import pack_uint_le, unpack_uint_le
- from whoosh.system import pack_long, unpack_long, pack_ulong, unpack_ulong
- from whoosh.system import pack_float, unpack_float
- from whoosh.util.varints import varint, read_varint
- from whoosh.util.varints import signed_varint, decode_signed_varint
- _SIZEMAP = dict((typecode, calcsize(typecode)) for typecode in "bBiIhHqQf")
- _ORDERMAP = {"little": "<", "big": ">"}
- _types = (("sbyte", "b"), ("ushort", "H"), ("int", "i"),
- ("long", "q"), ("float", "f"))
- # Main function
- class StructFile(object):
- """Returns a "structured file" object that wraps the given file object and
- provides numerous additional methods for writing structured data, such as
- "write_varint" and "write_long".
- """
- def __init__(self, fileobj, name=None, onclose=None):
- self.file = fileobj
- self._name = name
- self.onclose = onclose
- self.is_closed = False
- self.is_real = hasattr(fileobj, "fileno")
- if self.is_real:
- self.fileno = fileobj.fileno
- def __repr__(self):
- return "%s(%r)" % (self.__class__.__name__, self._name)
- def __str__(self):
- return self._name
- def __enter__(self):
- return self
- def __exit__(self, exc_type, exc_val, exc_tb):
- self.close()
- def __iter__(self):
- return iter(self.file)
- def raw_file(self):
- return self.file
- def read(self, *args, **kwargs):
- return self.file.read(*args, **kwargs)
- def readline(self, *args, **kwargs):
- return self.file.readline(*args, **kwargs)
- def write(self, *args, **kwargs):
- return self.file.write(*args, **kwargs)
- def tell(self, *args, **kwargs):
- return self.file.tell(*args, **kwargs)
- def seek(self, *args, **kwargs):
- return self.file.seek(*args, **kwargs)
- def truncate(self, *args, **kwargs):
- return self.file.truncate(*args, **kwargs)
- def flush(self):
- """Flushes the buffer of the wrapped file. This is a no-op if the
- wrapped file does not have a flush method.
- """
- if hasattr(self.file, "flush"):
- self.file.flush()
- def close(self):
- """Closes the wrapped file.
- """
- if self.is_closed:
- raise Exception("This file is already closed")
- if self.onclose:
- self.onclose(self)
- if hasattr(self.file, "close"):
- self.file.close()
- self.is_closed = True
- def subset(self, offset, length, name=None):
- from whoosh.filedb.compound import SubFile
- name = name or self._name
- return StructFile(SubFile(self.file, offset, length), name=name)
- def write_string(self, s):
- """Writes a string to the wrapped file. This method writes the length
- of the string first, so you can read the string back without having to
- know how long it was.
- """
- self.write_varint(len(s))
- self.write(s)
- def write_string2(self, s):
- self.write(pack_ushort(len(s)) + s)
- def write_string4(self, s):
- self.write(pack_int(len(s)) + s)
- def read_string(self):
- """Reads a string from the wrapped file.
- """
- return self.read(self.read_varint())
- def read_string2(self):
- l = self.read_ushort()
- return self.read(l)
- def read_string4(self):
- l = self.read_int()
- return self.read(l)
- def get_string2(self, pos):
- l = self.get_ushort(pos)
- base = pos + _SHORT_SIZE
- return self.get(base, l), base + l
- def get_string4(self, pos):
- l = self.get_int(pos)
- base = pos + _INT_SIZE
- return self.get(base, l), base + l
- def skip_string(self):
- l = self.read_varint()
- self.seek(l, 1)
- def write_varint(self, i):
- """Writes a variable-length unsigned integer to the wrapped file.
- """
- self.write(varint(i))
- def write_svarint(self, i):
- """Writes a variable-length signed integer to the wrapped file.
- """
- self.write(signed_varint(i))
- def read_varint(self):
- """Reads a variable-length encoded unsigned integer from the wrapped
- file.
- """
- return read_varint(self.read)
- def read_svarint(self):
- """Reads a variable-length encoded signed integer from the wrapped
- file.
- """
- return decode_signed_varint(read_varint(self.read))
- def write_tagint(self, i):
- """Writes a sometimes-compressed unsigned integer to the wrapped file.
- This is similar to the varint methods but uses a less compressed but
- faster format.
- """
- # Store numbers 0-253 in one byte. Byte 254 means "an unsigned 16-bit
- # int follows." Byte 255 means "An unsigned 32-bit int follows."
- if i <= 253:
- self.write(chr(i))
- elif i <= 65535:
- self.write("\xFE" + pack_ushort(i))
- else:
- self.write("\xFF" + pack_uint(i))
- def read_tagint(self):
- """Reads a sometimes-compressed unsigned integer from the wrapped file.
- This is similar to the varint methods but uses a less compressed but
- faster format.
- """
- tb = ord(self.read(1))
- if tb == 254:
- return self.read_ushort()
- elif tb == 255:
- return self.read_uint()
- else:
- return tb
- def write_byte(self, n):
- """Writes a single byte to the wrapped file, shortcut for
- ``file.write(chr(n))``.
- """
- self.write(pack_byte(n))
- def read_byte(self):
- return ord(self.read(1))
- def write_pickle(self, obj, protocol=-1):
- """Writes a pickled representation of obj to the wrapped file.
- """
- dump_pickle(obj, self.file, protocol)
- def read_pickle(self):
- """Reads a pickled object from the wrapped file.
- """
- return load_pickle(self.file)
- def write_sbyte(self, n):
- self.write(pack_sbyte(n))
- def write_int(self, n):
- self.write(pack_int(n))
- def write_uint(self, n):
- self.write(pack_uint(n))
- def write_uint_le(self, n):
- self.write(pack_uint_le(n))
- def write_ushort(self, n):
- self.write(pack_ushort(n))
- def write_ushort_le(self, n):
- self.write(pack_ushort_le(n))
- def write_long(self, n):
- self.write(pack_long(n))
- def write_ulong(self, n):
- self.write(pack_ulong(n))
- def write_float(self, n):
- self.write(pack_float(n))
- def write_array(self, arry):
- if IS_LITTLE:
- arry = copy(arry)
- arry.byteswap()
- if self.is_real:
- arry.tofile(self.file)
- else:
- self.write(array_tobytes(arry))
- def read_sbyte(self):
- return unpack_sbyte(self.read(1))[0]
- def read_int(self):
- return unpack_int(self.read(_INT_SIZE))[0]
- def read_uint(self):
- return unpack_uint(self.read(_INT_SIZE))[0]
- def read_uint_le(self):
- return unpack_uint_le(self.read(_INT_SIZE))[0]
- def read_ushort(self):
- return unpack_ushort(self.read(_SHORT_SIZE))[0]
- def read_ushort_le(self):
- return unpack_ushort_le(self.read(_SHORT_SIZE))[0]
- def read_long(self):
- return unpack_long(self.read(_LONG_SIZE))[0]
- def read_ulong(self):
- return unpack_ulong(self.read(_LONG_SIZE))[0]
- def read_float(self):
- return unpack_float(self.read(_FLOAT_SIZE))[0]
- def read_array(self, typecode, length):
- a = array(typecode)
- if self.is_real:
- a.fromfile(self.file, length)
- else:
- array_frombytes(a, self.read(length * _SIZEMAP[typecode]))
- if IS_LITTLE:
- a.byteswap()
- return a
- def get(self, position, length):
- self.seek(position)
- return self.read(length)
- def get_byte(self, position):
- return unpack_byte(self.get(position, 1))[0]
- def get_sbyte(self, position):
- return unpack_sbyte(self.get(position, 1))[0]
- def get_int(self, position):
- return unpack_int(self.get(position, _INT_SIZE))[0]
- def get_uint(self, position):
- return unpack_uint(self.get(position, _INT_SIZE))[0]
- def get_ushort(self, position):
- return unpack_ushort(self.get(position, _SHORT_SIZE))[0]
- def get_long(self, position):
- return unpack_long(self.get(position, _LONG_SIZE))[0]
- def get_ulong(self, position):
- return unpack_ulong(self.get(position, _LONG_SIZE))[0]
- def get_float(self, position):
- return unpack_float(self.get(position, _FLOAT_SIZE))[0]
- def get_array(self, position, typecode, length):
- self.seek(position)
- return self.read_array(typecode, length)
- class BufferFile(StructFile):
- def __init__(self, buf, name=None, onclose=None):
- self._buf = buf
- self._name = name
- self.file = BytesIO(buf)
- self.onclose = onclose
- self.is_real = False
- self.is_closed = False
- def subset(self, position, length, name=None):
- name = name or self._name
- return BufferFile(self.get(position, length), name=name)
- def get(self, position, length):
- return bytes_type(self._buf[position:position + length])
- def get_array(self, position, typecode, length):
- a = array(typecode)
- array_frombytes(a, self.get(position, length * _SIZEMAP[typecode]))
- if IS_LITTLE:
- a.byteswap()
- return a
- class ChecksumFile(StructFile):
- def __init__(self, *args, **kwargs):
- StructFile.__init__(self, *args, **kwargs)
- self._check = 0
- self._crc32 = __import__("zlib").crc32
- def __iter__(self):
- for line in self.file:
- self._check = self._crc32(line, self._check)
- yield line
- def seek(self, *args):
- raise Exception("Cannot seek on a ChecksumFile")
- def read(self, *args, **kwargs):
- b = self.file.read(*args, **kwargs)
- self._check = self._crc32(b, self._check)
- return b
- def write(self, b):
- self._check = self._crc32(b, self._check)
- self.file.write(b)
- def checksum(self):
- return self._check & 0xffffffff
|