123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663 |
- # -*- coding: cp1252 -*-
- ##
- # Support module for the xlrd package.
- #
- # <p>Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd</p>
- # <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
- ##
- # 2010-03-01 SJM Reading SCL record
- # 2010-03-01 SJM Added more record IDs for biff_dump & biff_count
- # 2008-02-10 SJM BIFF2 BLANK record
- # 2008-02-08 SJM Preparation for Excel 2.0 support
- # 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count
- # 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
- # 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
- # 2007-04-22 SJM Remove experimental "trimming" facility.
- from __future__ import print_function
- DEBUG = 0
- from struct import unpack
- import sys
- from .timemachine import *
- class XLRDError(Exception):
- pass
- ##
- # Parent of almost all other classes in the package. Defines a common "dump" method
- # for debugging.
- class BaseObject(object):
- _repr_these = []
- ##
- # @param f open file object, to which the dump is written
- # @param header text to write before the dump
- # @param footer text to write after the dump
- # @param indent number of leading spaces (for recursive calls)
- def dump(self, f=None, header=None, footer=None, indent=0):
- if f is None:
- f = sys.stderr
- if hasattr(self, "__slots__"):
- alist = []
- for attr in self.__slots__:
- alist.append((attr, getattr(self, attr)))
- else:
- alist = self.__dict__.items()
- alist = sorted(alist)
- pad = " " * indent
- if header is not None: print(header, file=f)
- list_type = type([])
- dict_type = type({})
- for attr, value in alist:
- if getattr(value, 'dump', None) and attr != 'book':
- value.dump(f,
- header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
- indent=indent+4)
- elif attr not in self._repr_these and (
- isinstance(value, list_type) or isinstance(value, dict_type)
- ):
- print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
- else:
- fprintf(f, "%s%s: %r\n", pad, attr, value)
- if footer is not None: print(footer, file=f)
- FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
- DATEFORMAT = FDT
- NUMBERFORMAT = FNU
- (
- XL_CELL_EMPTY,
- XL_CELL_TEXT,
- XL_CELL_NUMBER,
- XL_CELL_DATE,
- XL_CELL_BOOLEAN,
- XL_CELL_ERROR,
- XL_CELL_BLANK, # for use in debugging, gathering stats, etc
- ) = range(7)
- biff_text_from_num = {
- 0: "(not BIFF)",
- 20: "2.0",
- 21: "2.1",
- 30: "3",
- 40: "4S",
- 45: "4W",
- 50: "5",
- 70: "7",
- 80: "8",
- 85: "8X",
- }
- ##
- # <p>This dictionary can be used to produce a text version of the internal codes
- # that Excel uses for error cells. Here are its contents:
- # <pre>
- # 0x00: '#NULL!', # Intersection of two cell ranges is empty
- # 0x07: '#DIV/0!', # Division by zero
- # 0x0F: '#VALUE!', # Wrong type of operand
- # 0x17: '#REF!', # Illegal or deleted cell reference
- # 0x1D: '#NAME?', # Wrong function or range name
- # 0x24: '#NUM!', # Value range overflow
- # 0x2A: '#N/A', # Argument or function not available
- # </pre></p>
- error_text_from_code = {
- 0x00: '#NULL!', # Intersection of two cell ranges is empty
- 0x07: '#DIV/0!', # Division by zero
- 0x0F: '#VALUE!', # Wrong type of operand
- 0x17: '#REF!', # Illegal or deleted cell reference
- 0x1D: '#NAME?', # Wrong function or range name
- 0x24: '#NUM!', # Value range overflow
- 0x2A: '#N/A', # Argument or function not available
- }
- BIFF_FIRST_UNICODE = 80
- XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
- XL_WORKBOOK_GLOBALS_4W = 0x100
- XL_WORKSHEET = WRKSHEET = 0x10
- XL_BOUNDSHEET_WORKSHEET = 0x00
- XL_BOUNDSHEET_CHART = 0x02
- XL_BOUNDSHEET_VB_MODULE = 0x06
- # XL_RK2 = 0x7e
- XL_ARRAY = 0x0221
- XL_ARRAY2 = 0x0021
- XL_BLANK = 0x0201
- XL_BLANK_B2 = 0x01
- XL_BOF = 0x809
- XL_BOOLERR = 0x205
- XL_BOOLERR_B2 = 0x5
- XL_BOUNDSHEET = 0x85
- XL_BUILTINFMTCOUNT = 0x56
- XL_CF = 0x01B1
- XL_CODEPAGE = 0x42
- XL_COLINFO = 0x7D
- XL_COLUMNDEFAULT = 0x20 # BIFF2 only
- XL_COLWIDTH = 0x24 # BIFF2 only
- XL_CONDFMT = 0x01B0
- XL_CONTINUE = 0x3c
- XL_COUNTRY = 0x8C
- XL_DATEMODE = 0x22
- XL_DEFAULTROWHEIGHT = 0x0225
- XL_DEFCOLWIDTH = 0x55
- XL_DIMENSION = 0x200
- XL_DIMENSION2 = 0x0
- XL_EFONT = 0x45
- XL_EOF = 0x0a
- XL_EXTERNNAME = 0x23
- XL_EXTERNSHEET = 0x17
- XL_EXTSST = 0xff
- XL_FEAT11 = 0x872
- XL_FILEPASS = 0x2f
- XL_FONT = 0x31
- XL_FONT_B3B4 = 0x231
- XL_FORMAT = 0x41e
- XL_FORMAT2 = 0x1E # BIFF2, BIFF3
- XL_FORMULA = 0x6
- XL_FORMULA3 = 0x206
- XL_FORMULA4 = 0x406
- XL_GCW = 0xab
- XL_HLINK = 0x01B8
- XL_QUICKTIP = 0x0800
- XL_HORIZONTALPAGEBREAKS = 0x1b
- XL_INDEX = 0x20b
- XL_INTEGER = 0x2 # BIFF2 only
- XL_IXFE = 0x44 # BIFF2 only
- XL_LABEL = 0x204
- XL_LABEL_B2 = 0x04
- XL_LABELRANGES = 0x15f
- XL_LABELSST = 0xfd
- XL_LEFTMARGIN = 0x26
- XL_TOPMARGIN = 0x28
- XL_RIGHTMARGIN = 0x27
- XL_BOTTOMMARGIN = 0x29
- XL_HEADER = 0x14
- XL_FOOTER = 0x15
- XL_HCENTER = 0x83
- XL_VCENTER = 0x84
- XL_MERGEDCELLS = 0xE5
- XL_MSO_DRAWING = 0x00EC
- XL_MSO_DRAWING_GROUP = 0x00EB
- XL_MSO_DRAWING_SELECTION = 0x00ED
- XL_MULRK = 0xbd
- XL_MULBLANK = 0xbe
- XL_NAME = 0x18
- XL_NOTE = 0x1c
- XL_NUMBER = 0x203
- XL_NUMBER_B2 = 0x3
- XL_OBJ = 0x5D
- XL_PAGESETUP = 0xA1
- XL_PALETTE = 0x92
- XL_PANE = 0x41
- XL_PRINTGRIDLINES = 0x2B
- XL_PRINTHEADERS = 0x2A
- XL_RK = 0x27e
- XL_ROW = 0x208
- XL_ROW_B2 = 0x08
- XL_RSTRING = 0xd6
- XL_SCL = 0x00A0
- XL_SHEETHDR = 0x8F # BIFF4W only
- XL_SHEETPR = 0x81
- XL_SHEETSOFFSET = 0x8E # BIFF4W only
- XL_SHRFMLA = 0x04bc
- XL_SST = 0xfc
- XL_STANDARDWIDTH = 0x99
- XL_STRING = 0x207
- XL_STRING_B2 = 0x7
- XL_STYLE = 0x293
- XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs
- XL_TABLEOP = 0x236
- XL_TABLEOP2 = 0x37
- XL_TABLEOP_B2 = 0x36
- XL_TXO = 0x1b6
- XL_UNCALCED = 0x5e
- XL_UNKNOWN = 0xffff
- XL_VERTICALPAGEBREAKS = 0x1a
- XL_WINDOW2 = 0x023E
- XL_WINDOW2_B2 = 0x003E
- XL_WRITEACCESS = 0x5C
- XL_WSBOOL = XL_SHEETPR
- XL_XF = 0xe0
- XL_XF2 = 0x0043 # BIFF2 version of XF record
- XL_XF3 = 0x0243 # BIFF3 version of XF record
- XL_XF4 = 0x0443 # BIFF4 version of XF record
- boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
- bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
- XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
- _cell_opcode_list = [
- XL_BOOLERR,
- XL_FORMULA,
- XL_FORMULA3,
- XL_FORMULA4,
- XL_LABEL,
- XL_LABELSST,
- XL_MULRK,
- XL_NUMBER,
- XL_RK,
- XL_RSTRING,
- ]
- _cell_opcode_dict = {}
- for _cell_opcode in _cell_opcode_list:
- _cell_opcode_dict[_cell_opcode] = 1
- def is_cell_opcode(c):
- return c in _cell_opcode_dict
- def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
- for n, mask, attr in manifest:
- local_setattr(tgt_obj, attr, (src & mask) >> n)
- def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
- for n, mask, attr in manifest:
- local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
- def unpack_string(data, pos, encoding, lenlen=1):
- nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
- pos += lenlen
- return unicode(data[pos:pos+nchars], encoding)
- def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
- if known_len is not None:
- # On a NAME record, the length byte is detached from the front of the string.
- nchars = known_len
- else:
- nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
- pos += lenlen
- newpos = pos + nchars
- return (unicode(data[pos:newpos], encoding), newpos)
- def unpack_unicode(data, pos, lenlen=2):
- "Return unicode_strg"
- nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
- if not nchars:
- # Ambiguous whether 0-length string should have an "options" byte.
- # Avoid crash if missing.
- return UNICODE_LITERAL("")
- pos += lenlen
- options = BYTES_ORD(data[pos])
- pos += 1
- # phonetic = options & 0x04
- # richtext = options & 0x08
- if options & 0x08:
- # rt = unpack('<H', data[pos:pos+2])[0] # unused
- pos += 2
- if options & 0x04:
- # sz = unpack('<i', data[pos:pos+4])[0] # unused
- pos += 4
- if options & 0x01:
- # Uncompressed UTF-16-LE
- rawstrg = data[pos:pos+2*nchars]
- # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
- strg = unicode(rawstrg, 'utf_16_le')
- # pos += 2*nchars
- else:
- # Note: this is COMPRESSED (not ASCII!) encoding!!!
- # Merely returning the raw bytes would work OK 99.99% of the time
- # if the local codepage was cp1252 -- however this would rapidly go pear-shaped
- # for other codepages so we grit our Anglocentric teeth and return Unicode :-)
- strg = unicode(data[pos:pos+nchars], "latin_1")
- # pos += nchars
- # if richtext:
- # pos += 4 * rt
- # if phonetic:
- # pos += sz
- # return (strg, pos)
- return strg
- def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
- "Return (unicode_strg, updated value of pos)"
- if known_len is not None:
- # On a NAME record, the length byte is detached from the front of the string.
- nchars = known_len
- else:
- nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
- pos += lenlen
- if not nchars and not data[pos:]:
- # Zero-length string with no options byte
- return (UNICODE_LITERAL(""), pos)
- options = BYTES_ORD(data[pos])
- pos += 1
- phonetic = options & 0x04
- richtext = options & 0x08
- if richtext:
- rt = unpack('<H', data[pos:pos+2])[0]
- pos += 2
- if phonetic:
- sz = unpack('<i', data[pos:pos+4])[0]
- pos += 4
- if options & 0x01:
- # Uncompressed UTF-16-LE
- strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
- pos += 2*nchars
- else:
- # Note: this is COMPRESSED (not ASCII!) encoding!!!
- strg = unicode(data[pos:pos+nchars], "latin_1")
- pos += nchars
- if richtext:
- pos += 4 * rt
- if phonetic:
- pos += sz
- return (strg, pos)
- def unpack_cell_range_address_list_update_pos(
- output_list, data, pos, biff_version, addr_size=6):
- # output_list is updated in situ
- assert addr_size in (6, 8)
- # Used to assert size == 6 if not BIFF8, but pyWLWriter writes
- # BIFF8-only MERGEDCELLS records in a BIFF5 file!
- n, = unpack("<H", data[pos:pos+2])
- pos += 2
- if n:
- if addr_size == 6:
- fmt = "<HHBB"
- else:
- fmt = "<HHHH"
- for _unused in xrange(n):
- ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
- output_list.append((ra, rb+1, ca, cb+1))
- pos += addr_size
- return pos
- _brecstrg = """\
- 0000 DIMENSIONS_B2
- 0001 BLANK_B2
- 0002 INTEGER_B2_ONLY
- 0003 NUMBER_B2
- 0004 LABEL_B2
- 0005 BOOLERR_B2
- 0006 FORMULA
- 0007 STRING_B2
- 0008 ROW_B2
- 0009 BOF_B2
- 000A EOF
- 000B INDEX_B2_ONLY
- 000C CALCCOUNT
- 000D CALCMODE
- 000E PRECISION
- 000F REFMODE
- 0010 DELTA
- 0011 ITERATION
- 0012 PROTECT
- 0013 PASSWORD
- 0014 HEADER
- 0015 FOOTER
- 0016 EXTERNCOUNT
- 0017 EXTERNSHEET
- 0018 NAME_B2,5+
- 0019 WINDOWPROTECT
- 001A VERTICALPAGEBREAKS
- 001B HORIZONTALPAGEBREAKS
- 001C NOTE
- 001D SELECTION
- 001E FORMAT_B2-3
- 001F BUILTINFMTCOUNT_B2
- 0020 COLUMNDEFAULT_B2_ONLY
- 0021 ARRAY_B2_ONLY
- 0022 DATEMODE
- 0023 EXTERNNAME
- 0024 COLWIDTH_B2_ONLY
- 0025 DEFAULTROWHEIGHT_B2_ONLY
- 0026 LEFTMARGIN
- 0027 RIGHTMARGIN
- 0028 TOPMARGIN
- 0029 BOTTOMMARGIN
- 002A PRINTHEADERS
- 002B PRINTGRIDLINES
- 002F FILEPASS
- 0031 FONT
- 0032 FONT2_B2_ONLY
- 0036 TABLEOP_B2
- 0037 TABLEOP2_B2
- 003C CONTINUE
- 003D WINDOW1
- 003E WINDOW2_B2
- 0040 BACKUP
- 0041 PANE
- 0042 CODEPAGE
- 0043 XF_B2
- 0044 IXFE_B2_ONLY
- 0045 EFONT_B2_ONLY
- 004D PLS
- 0051 DCONREF
- 0055 DEFCOLWIDTH
- 0056 BUILTINFMTCOUNT_B3-4
- 0059 XCT
- 005A CRN
- 005B FILESHARING
- 005C WRITEACCESS
- 005D OBJECT
- 005E UNCALCED
- 005F SAVERECALC
- 0063 OBJECTPROTECT
- 007D COLINFO
- 007E RK2_mythical_?
- 0080 GUTS
- 0081 WSBOOL
- 0082 GRIDSET
- 0083 HCENTER
- 0084 VCENTER
- 0085 BOUNDSHEET
- 0086 WRITEPROT
- 008C COUNTRY
- 008D HIDEOBJ
- 008E SHEETSOFFSET
- 008F SHEETHDR
- 0090 SORT
- 0092 PALETTE
- 0099 STANDARDWIDTH
- 009B FILTERMODE
- 009C FNGROUPCOUNT
- 009D AUTOFILTERINFO
- 009E AUTOFILTER
- 00A0 SCL
- 00A1 SETUP
- 00AB GCW
- 00BD MULRK
- 00BE MULBLANK
- 00C1 MMS
- 00D6 RSTRING
- 00D7 DBCELL
- 00DA BOOKBOOL
- 00DD SCENPROTECT
- 00E0 XF
- 00E1 INTERFACEHDR
- 00E2 INTERFACEEND
- 00E5 MERGEDCELLS
- 00E9 BITMAP
- 00EB MSO_DRAWING_GROUP
- 00EC MSO_DRAWING
- 00ED MSO_DRAWING_SELECTION
- 00EF PHONETIC
- 00FC SST
- 00FD LABELSST
- 00FF EXTSST
- 013D TABID
- 015F LABELRANGES
- 0160 USESELFS
- 0161 DSF
- 01AE SUPBOOK
- 01AF PROTECTIONREV4
- 01B0 CONDFMT
- 01B1 CF
- 01B2 DVAL
- 01B6 TXO
- 01B7 REFRESHALL
- 01B8 HLINK
- 01BC PASSWORDREV4
- 01BE DV
- 01C0 XL9FILE
- 01C1 RECALCID
- 0200 DIMENSIONS
- 0201 BLANK
- 0203 NUMBER
- 0204 LABEL
- 0205 BOOLERR
- 0206 FORMULA_B3
- 0207 STRING
- 0208 ROW
- 0209 BOF
- 020B INDEX_B3+
- 0218 NAME
- 0221 ARRAY
- 0223 EXTERNNAME_B3-4
- 0225 DEFAULTROWHEIGHT
- 0231 FONT_B3B4
- 0236 TABLEOP
- 023E WINDOW2
- 0243 XF_B3
- 027E RK
- 0293 STYLE
- 0406 FORMULA_B4
- 0409 BOF
- 041E FORMAT
- 0443 XF_B4
- 04BC SHRFMLA
- 0800 QUICKTIP
- 0809 BOF
- 0862 SHEETLAYOUT
- 0867 SHEETPROTECTION
- 0868 RANGEPROTECTION
- """
- biff_rec_name_dict = {}
- for _buff in _brecstrg.splitlines():
- _numh, _name = _buff.split()
- biff_rec_name_dict[int(_numh, 16)] = _name
- del _buff, _name, _brecstrg
- def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
- endpos = min(ofs + dlen, len(strg))
- pos = ofs
- numbered = not unnumbered
- num_prefix = ''
- while pos < endpos:
- endsub = min(pos + 16, endpos)
- substrg = strg[pos:endsub]
- lensub = endsub - pos
- if lensub <= 0 or lensub != len(substrg):
- fprintf(
- sys.stdout,
- '??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
- ofs, dlen, base, endpos, pos, endsub, substrg)
- break
- hexd = ''.join(["%02x " % BYTES_ORD(c) for c in substrg])
-
- chard = ''
- for c in substrg:
- c = chr(BYTES_ORD(c))
- if c == '\0':
- c = '~'
- elif not (' ' <= c <= '~'):
- c = '?'
- chard += c
- if numbered:
- num_prefix = "%5d: " % (base+pos-ofs)
-
- fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
- pos = endsub
- def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
- pos = stream_offset
- stream_end = stream_offset + stream_len
- adj = base - stream_offset
- dummies = 0
- numbered = not unnumbered
- num_prefix = ''
- while stream_end - pos >= 4:
- rc, length = unpack('<HH', mem[pos:pos+4])
- if rc == 0 and length == 0:
- if mem[pos:] == b'\0' * (stream_end - pos):
- dummies = stream_end - pos
- savpos = pos
- pos = stream_end
- break
- if dummies:
- dummies += 4
- else:
- savpos = pos
- dummies = 4
- pos += 4
- else:
- if dummies:
- if numbered:
- num_prefix = "%5d: " % (adj + savpos)
- fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
- dummies = 0
- recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
- if numbered:
- num_prefix = "%5d: " % (adj + pos)
- fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
- pos += 4
- hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
- pos += length
- if dummies:
- if numbered:
- num_prefix = "%5d: " % (adj + savpos)
- fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
- if pos < stream_end:
- if numbered:
- num_prefix = "%5d: " % (adj + pos)
- fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
- hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
- elif pos > stream_end:
- fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
- def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
- pos = stream_offset
- stream_end = stream_offset + stream_len
- tally = {}
- while stream_end - pos >= 4:
- rc, length = unpack('<HH', mem[pos:pos+4])
- if rc == 0 and length == 0:
- if mem[pos:] == b'\0' * (stream_end - pos):
- break
- recname = "<Dummy (zero)>"
- else:
- recname = biff_rec_name_dict.get(rc, None)
- if recname is None:
- recname = "Unknown_0x%04X" % rc
- if recname in tally:
- tally[recname] += 1
- else:
- tally[recname] = 1
- pos += length + 4
- slist = sorted(tally.items())
- for recname, count in slist:
- print("%8d %s" % (count, recname), file=fout)
- encoding_from_codepage = {
- 1200 : 'utf_16_le',
- 10000: 'mac_roman',
- 10006: 'mac_greek', # guess
- 10007: 'mac_cyrillic', # guess
- 10029: 'mac_latin2', # guess
- 10079: 'mac_iceland', # guess
- 10081: 'mac_turkish', # guess
- 32768: 'mac_roman',
- 32769: 'cp1252',
- }
- # some more guessing, for Indic scripts
- # codepage 57000 range:
- # 2 Devanagari [0]
- # 3 Bengali [1]
- # 4 Tamil [5]
- # 5 Telegu [6]
- # 6 Assamese [1] c.f. Bengali
- # 7 Oriya [4]
- # 8 Kannada [7]
- # 9 Malayalam [8]
- # 10 Gujarati [3]
- # 11 Gurmukhi [2]
|