biffh.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. # -*- coding: cp1252 -*-
  2. ##
  3. # Support module for the xlrd package.
  4. #
  5. # <p>Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd</p>
  6. # <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
  7. ##
  8. # 2010-03-01 SJM Reading SCL record
  9. # 2010-03-01 SJM Added more record IDs for biff_dump & biff_count
  10. # 2008-02-10 SJM BIFF2 BLANK record
  11. # 2008-02-08 SJM Preparation for Excel 2.0 support
  12. # 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count
  13. # 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
  14. # 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
  15. # 2007-04-22 SJM Remove experimental "trimming" facility.
  16. from __future__ import print_function
  17. DEBUG = 0
  18. from struct import unpack
  19. import sys
  20. from .timemachine import *
  21. class XLRDError(Exception):
  22. pass
  23. ##
  24. # Parent of almost all other classes in the package. Defines a common "dump" method
  25. # for debugging.
  26. class BaseObject(object):
  27. _repr_these = []
  28. ##
  29. # @param f open file object, to which the dump is written
  30. # @param header text to write before the dump
  31. # @param footer text to write after the dump
  32. # @param indent number of leading spaces (for recursive calls)
  33. def dump(self, f=None, header=None, footer=None, indent=0):
  34. if f is None:
  35. f = sys.stderr
  36. if hasattr(self, "__slots__"):
  37. alist = []
  38. for attr in self.__slots__:
  39. alist.append((attr, getattr(self, attr)))
  40. else:
  41. alist = self.__dict__.items()
  42. alist = sorted(alist)
  43. pad = " " * indent
  44. if header is not None: print(header, file=f)
  45. list_type = type([])
  46. dict_type = type({})
  47. for attr, value in alist:
  48. if getattr(value, 'dump', None) and attr != 'book':
  49. value.dump(f,
  50. header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
  51. indent=indent+4)
  52. elif attr not in self._repr_these and (
  53. isinstance(value, list_type) or isinstance(value, dict_type)
  54. ):
  55. print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
  56. else:
  57. fprintf(f, "%s%s: %r\n", pad, attr, value)
  58. if footer is not None: print(footer, file=f)
  59. FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
  60. DATEFORMAT = FDT
  61. NUMBERFORMAT = FNU
  62. (
  63. XL_CELL_EMPTY,
  64. XL_CELL_TEXT,
  65. XL_CELL_NUMBER,
  66. XL_CELL_DATE,
  67. XL_CELL_BOOLEAN,
  68. XL_CELL_ERROR,
  69. XL_CELL_BLANK, # for use in debugging, gathering stats, etc
  70. ) = range(7)
  71. biff_text_from_num = {
  72. 0: "(not BIFF)",
  73. 20: "2.0",
  74. 21: "2.1",
  75. 30: "3",
  76. 40: "4S",
  77. 45: "4W",
  78. 50: "5",
  79. 70: "7",
  80. 80: "8",
  81. 85: "8X",
  82. }
  83. ##
  84. # <p>This dictionary can be used to produce a text version of the internal codes
  85. # that Excel uses for error cells. Here are its contents:
  86. # <pre>
  87. # 0x00: '#NULL!', # Intersection of two cell ranges is empty
  88. # 0x07: '#DIV/0!', # Division by zero
  89. # 0x0F: '#VALUE!', # Wrong type of operand
  90. # 0x17: '#REF!', # Illegal or deleted cell reference
  91. # 0x1D: '#NAME?', # Wrong function or range name
  92. # 0x24: '#NUM!', # Value range overflow
  93. # 0x2A: '#N/A', # Argument or function not available
  94. # </pre></p>
  95. error_text_from_code = {
  96. 0x00: '#NULL!', # Intersection of two cell ranges is empty
  97. 0x07: '#DIV/0!', # Division by zero
  98. 0x0F: '#VALUE!', # Wrong type of operand
  99. 0x17: '#REF!', # Illegal or deleted cell reference
  100. 0x1D: '#NAME?', # Wrong function or range name
  101. 0x24: '#NUM!', # Value range overflow
  102. 0x2A: '#N/A', # Argument or function not available
  103. }
  104. BIFF_FIRST_UNICODE = 80
  105. XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
  106. XL_WORKBOOK_GLOBALS_4W = 0x100
  107. XL_WORKSHEET = WRKSHEET = 0x10
  108. XL_BOUNDSHEET_WORKSHEET = 0x00
  109. XL_BOUNDSHEET_CHART = 0x02
  110. XL_BOUNDSHEET_VB_MODULE = 0x06
  111. # XL_RK2 = 0x7e
  112. XL_ARRAY = 0x0221
  113. XL_ARRAY2 = 0x0021
  114. XL_BLANK = 0x0201
  115. XL_BLANK_B2 = 0x01
  116. XL_BOF = 0x809
  117. XL_BOOLERR = 0x205
  118. XL_BOOLERR_B2 = 0x5
  119. XL_BOUNDSHEET = 0x85
  120. XL_BUILTINFMTCOUNT = 0x56
  121. XL_CF = 0x01B1
  122. XL_CODEPAGE = 0x42
  123. XL_COLINFO = 0x7D
  124. XL_COLUMNDEFAULT = 0x20 # BIFF2 only
  125. XL_COLWIDTH = 0x24 # BIFF2 only
  126. XL_CONDFMT = 0x01B0
  127. XL_CONTINUE = 0x3c
  128. XL_COUNTRY = 0x8C
  129. XL_DATEMODE = 0x22
  130. XL_DEFAULTROWHEIGHT = 0x0225
  131. XL_DEFCOLWIDTH = 0x55
  132. XL_DIMENSION = 0x200
  133. XL_DIMENSION2 = 0x0
  134. XL_EFONT = 0x45
  135. XL_EOF = 0x0a
  136. XL_EXTERNNAME = 0x23
  137. XL_EXTERNSHEET = 0x17
  138. XL_EXTSST = 0xff
  139. XL_FEAT11 = 0x872
  140. XL_FILEPASS = 0x2f
  141. XL_FONT = 0x31
  142. XL_FONT_B3B4 = 0x231
  143. XL_FORMAT = 0x41e
  144. XL_FORMAT2 = 0x1E # BIFF2, BIFF3
  145. XL_FORMULA = 0x6
  146. XL_FORMULA3 = 0x206
  147. XL_FORMULA4 = 0x406
  148. XL_GCW = 0xab
  149. XL_HLINK = 0x01B8
  150. XL_QUICKTIP = 0x0800
  151. XL_HORIZONTALPAGEBREAKS = 0x1b
  152. XL_INDEX = 0x20b
  153. XL_INTEGER = 0x2 # BIFF2 only
  154. XL_IXFE = 0x44 # BIFF2 only
  155. XL_LABEL = 0x204
  156. XL_LABEL_B2 = 0x04
  157. XL_LABELRANGES = 0x15f
  158. XL_LABELSST = 0xfd
  159. XL_LEFTMARGIN = 0x26
  160. XL_TOPMARGIN = 0x28
  161. XL_RIGHTMARGIN = 0x27
  162. XL_BOTTOMMARGIN = 0x29
  163. XL_HEADER = 0x14
  164. XL_FOOTER = 0x15
  165. XL_HCENTER = 0x83
  166. XL_VCENTER = 0x84
  167. XL_MERGEDCELLS = 0xE5
  168. XL_MSO_DRAWING = 0x00EC
  169. XL_MSO_DRAWING_GROUP = 0x00EB
  170. XL_MSO_DRAWING_SELECTION = 0x00ED
  171. XL_MULRK = 0xbd
  172. XL_MULBLANK = 0xbe
  173. XL_NAME = 0x18
  174. XL_NOTE = 0x1c
  175. XL_NUMBER = 0x203
  176. XL_NUMBER_B2 = 0x3
  177. XL_OBJ = 0x5D
  178. XL_PAGESETUP = 0xA1
  179. XL_PALETTE = 0x92
  180. XL_PANE = 0x41
  181. XL_PRINTGRIDLINES = 0x2B
  182. XL_PRINTHEADERS = 0x2A
  183. XL_RK = 0x27e
  184. XL_ROW = 0x208
  185. XL_ROW_B2 = 0x08
  186. XL_RSTRING = 0xd6
  187. XL_SCL = 0x00A0
  188. XL_SHEETHDR = 0x8F # BIFF4W only
  189. XL_SHEETPR = 0x81
  190. XL_SHEETSOFFSET = 0x8E # BIFF4W only
  191. XL_SHRFMLA = 0x04bc
  192. XL_SST = 0xfc
  193. XL_STANDARDWIDTH = 0x99
  194. XL_STRING = 0x207
  195. XL_STRING_B2 = 0x7
  196. XL_STYLE = 0x293
  197. XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs
  198. XL_TABLEOP = 0x236
  199. XL_TABLEOP2 = 0x37
  200. XL_TABLEOP_B2 = 0x36
  201. XL_TXO = 0x1b6
  202. XL_UNCALCED = 0x5e
  203. XL_UNKNOWN = 0xffff
  204. XL_VERTICALPAGEBREAKS = 0x1a
  205. XL_WINDOW2 = 0x023E
  206. XL_WINDOW2_B2 = 0x003E
  207. XL_WRITEACCESS = 0x5C
  208. XL_WSBOOL = XL_SHEETPR
  209. XL_XF = 0xe0
  210. XL_XF2 = 0x0043 # BIFF2 version of XF record
  211. XL_XF3 = 0x0243 # BIFF3 version of XF record
  212. XL_XF4 = 0x0443 # BIFF4 version of XF record
  213. boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
  214. bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
  215. XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
  216. _cell_opcode_list = [
  217. XL_BOOLERR,
  218. XL_FORMULA,
  219. XL_FORMULA3,
  220. XL_FORMULA4,
  221. XL_LABEL,
  222. XL_LABELSST,
  223. XL_MULRK,
  224. XL_NUMBER,
  225. XL_RK,
  226. XL_RSTRING,
  227. ]
  228. _cell_opcode_dict = {}
  229. for _cell_opcode in _cell_opcode_list:
  230. _cell_opcode_dict[_cell_opcode] = 1
  231. def is_cell_opcode(c):
  232. return c in _cell_opcode_dict
  233. def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
  234. for n, mask, attr in manifest:
  235. local_setattr(tgt_obj, attr, (src & mask) >> n)
  236. def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
  237. for n, mask, attr in manifest:
  238. local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
  239. def unpack_string(data, pos, encoding, lenlen=1):
  240. nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
  241. pos += lenlen
  242. return unicode(data[pos:pos+nchars], encoding)
  243. def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
  244. if known_len is not None:
  245. # On a NAME record, the length byte is detached from the front of the string.
  246. nchars = known_len
  247. else:
  248. nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
  249. pos += lenlen
  250. newpos = pos + nchars
  251. return (unicode(data[pos:newpos], encoding), newpos)
  252. def unpack_unicode(data, pos, lenlen=2):
  253. "Return unicode_strg"
  254. nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
  255. if not nchars:
  256. # Ambiguous whether 0-length string should have an "options" byte.
  257. # Avoid crash if missing.
  258. return UNICODE_LITERAL("")
  259. pos += lenlen
  260. options = BYTES_ORD(data[pos])
  261. pos += 1
  262. # phonetic = options & 0x04
  263. # richtext = options & 0x08
  264. if options & 0x08:
  265. # rt = unpack('<H', data[pos:pos+2])[0] # unused
  266. pos += 2
  267. if options & 0x04:
  268. # sz = unpack('<i', data[pos:pos+4])[0] # unused
  269. pos += 4
  270. if options & 0x01:
  271. # Uncompressed UTF-16-LE
  272. rawstrg = data[pos:pos+2*nchars]
  273. # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
  274. strg = unicode(rawstrg, 'utf_16_le')
  275. # pos += 2*nchars
  276. else:
  277. # Note: this is COMPRESSED (not ASCII!) encoding!!!
  278. # Merely returning the raw bytes would work OK 99.99% of the time
  279. # if the local codepage was cp1252 -- however this would rapidly go pear-shaped
  280. # for other codepages so we grit our Anglocentric teeth and return Unicode :-)
  281. strg = unicode(data[pos:pos+nchars], "latin_1")
  282. # pos += nchars
  283. # if richtext:
  284. # pos += 4 * rt
  285. # if phonetic:
  286. # pos += sz
  287. # return (strg, pos)
  288. return strg
  289. def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
  290. "Return (unicode_strg, updated value of pos)"
  291. if known_len is not None:
  292. # On a NAME record, the length byte is detached from the front of the string.
  293. nchars = known_len
  294. else:
  295. nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
  296. pos += lenlen
  297. if not nchars and not data[pos:]:
  298. # Zero-length string with no options byte
  299. return (UNICODE_LITERAL(""), pos)
  300. options = BYTES_ORD(data[pos])
  301. pos += 1
  302. phonetic = options & 0x04
  303. richtext = options & 0x08
  304. if richtext:
  305. rt = unpack('<H', data[pos:pos+2])[0]
  306. pos += 2
  307. if phonetic:
  308. sz = unpack('<i', data[pos:pos+4])[0]
  309. pos += 4
  310. if options & 0x01:
  311. # Uncompressed UTF-16-LE
  312. strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
  313. pos += 2*nchars
  314. else:
  315. # Note: this is COMPRESSED (not ASCII!) encoding!!!
  316. strg = unicode(data[pos:pos+nchars], "latin_1")
  317. pos += nchars
  318. if richtext:
  319. pos += 4 * rt
  320. if phonetic:
  321. pos += sz
  322. return (strg, pos)
  323. def unpack_cell_range_address_list_update_pos(
  324. output_list, data, pos, biff_version, addr_size=6):
  325. # output_list is updated in situ
  326. assert addr_size in (6, 8)
  327. # Used to assert size == 6 if not BIFF8, but pyWLWriter writes
  328. # BIFF8-only MERGEDCELLS records in a BIFF5 file!
  329. n, = unpack("<H", data[pos:pos+2])
  330. pos += 2
  331. if n:
  332. if addr_size == 6:
  333. fmt = "<HHBB"
  334. else:
  335. fmt = "<HHHH"
  336. for _unused in xrange(n):
  337. ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
  338. output_list.append((ra, rb+1, ca, cb+1))
  339. pos += addr_size
  340. return pos
  341. _brecstrg = """\
  342. 0000 DIMENSIONS_B2
  343. 0001 BLANK_B2
  344. 0002 INTEGER_B2_ONLY
  345. 0003 NUMBER_B2
  346. 0004 LABEL_B2
  347. 0005 BOOLERR_B2
  348. 0006 FORMULA
  349. 0007 STRING_B2
  350. 0008 ROW_B2
  351. 0009 BOF_B2
  352. 000A EOF
  353. 000B INDEX_B2_ONLY
  354. 000C CALCCOUNT
  355. 000D CALCMODE
  356. 000E PRECISION
  357. 000F REFMODE
  358. 0010 DELTA
  359. 0011 ITERATION
  360. 0012 PROTECT
  361. 0013 PASSWORD
  362. 0014 HEADER
  363. 0015 FOOTER
  364. 0016 EXTERNCOUNT
  365. 0017 EXTERNSHEET
  366. 0018 NAME_B2,5+
  367. 0019 WINDOWPROTECT
  368. 001A VERTICALPAGEBREAKS
  369. 001B HORIZONTALPAGEBREAKS
  370. 001C NOTE
  371. 001D SELECTION
  372. 001E FORMAT_B2-3
  373. 001F BUILTINFMTCOUNT_B2
  374. 0020 COLUMNDEFAULT_B2_ONLY
  375. 0021 ARRAY_B2_ONLY
  376. 0022 DATEMODE
  377. 0023 EXTERNNAME
  378. 0024 COLWIDTH_B2_ONLY
  379. 0025 DEFAULTROWHEIGHT_B2_ONLY
  380. 0026 LEFTMARGIN
  381. 0027 RIGHTMARGIN
  382. 0028 TOPMARGIN
  383. 0029 BOTTOMMARGIN
  384. 002A PRINTHEADERS
  385. 002B PRINTGRIDLINES
  386. 002F FILEPASS
  387. 0031 FONT
  388. 0032 FONT2_B2_ONLY
  389. 0036 TABLEOP_B2
  390. 0037 TABLEOP2_B2
  391. 003C CONTINUE
  392. 003D WINDOW1
  393. 003E WINDOW2_B2
  394. 0040 BACKUP
  395. 0041 PANE
  396. 0042 CODEPAGE
  397. 0043 XF_B2
  398. 0044 IXFE_B2_ONLY
  399. 0045 EFONT_B2_ONLY
  400. 004D PLS
  401. 0051 DCONREF
  402. 0055 DEFCOLWIDTH
  403. 0056 BUILTINFMTCOUNT_B3-4
  404. 0059 XCT
  405. 005A CRN
  406. 005B FILESHARING
  407. 005C WRITEACCESS
  408. 005D OBJECT
  409. 005E UNCALCED
  410. 005F SAVERECALC
  411. 0063 OBJECTPROTECT
  412. 007D COLINFO
  413. 007E RK2_mythical_?
  414. 0080 GUTS
  415. 0081 WSBOOL
  416. 0082 GRIDSET
  417. 0083 HCENTER
  418. 0084 VCENTER
  419. 0085 BOUNDSHEET
  420. 0086 WRITEPROT
  421. 008C COUNTRY
  422. 008D HIDEOBJ
  423. 008E SHEETSOFFSET
  424. 008F SHEETHDR
  425. 0090 SORT
  426. 0092 PALETTE
  427. 0099 STANDARDWIDTH
  428. 009B FILTERMODE
  429. 009C FNGROUPCOUNT
  430. 009D AUTOFILTERINFO
  431. 009E AUTOFILTER
  432. 00A0 SCL
  433. 00A1 SETUP
  434. 00AB GCW
  435. 00BD MULRK
  436. 00BE MULBLANK
  437. 00C1 MMS
  438. 00D6 RSTRING
  439. 00D7 DBCELL
  440. 00DA BOOKBOOL
  441. 00DD SCENPROTECT
  442. 00E0 XF
  443. 00E1 INTERFACEHDR
  444. 00E2 INTERFACEEND
  445. 00E5 MERGEDCELLS
  446. 00E9 BITMAP
  447. 00EB MSO_DRAWING_GROUP
  448. 00EC MSO_DRAWING
  449. 00ED MSO_DRAWING_SELECTION
  450. 00EF PHONETIC
  451. 00FC SST
  452. 00FD LABELSST
  453. 00FF EXTSST
  454. 013D TABID
  455. 015F LABELRANGES
  456. 0160 USESELFS
  457. 0161 DSF
  458. 01AE SUPBOOK
  459. 01AF PROTECTIONREV4
  460. 01B0 CONDFMT
  461. 01B1 CF
  462. 01B2 DVAL
  463. 01B6 TXO
  464. 01B7 REFRESHALL
  465. 01B8 HLINK
  466. 01BC PASSWORDREV4
  467. 01BE DV
  468. 01C0 XL9FILE
  469. 01C1 RECALCID
  470. 0200 DIMENSIONS
  471. 0201 BLANK
  472. 0203 NUMBER
  473. 0204 LABEL
  474. 0205 BOOLERR
  475. 0206 FORMULA_B3
  476. 0207 STRING
  477. 0208 ROW
  478. 0209 BOF
  479. 020B INDEX_B3+
  480. 0218 NAME
  481. 0221 ARRAY
  482. 0223 EXTERNNAME_B3-4
  483. 0225 DEFAULTROWHEIGHT
  484. 0231 FONT_B3B4
  485. 0236 TABLEOP
  486. 023E WINDOW2
  487. 0243 XF_B3
  488. 027E RK
  489. 0293 STYLE
  490. 0406 FORMULA_B4
  491. 0409 BOF
  492. 041E FORMAT
  493. 0443 XF_B4
  494. 04BC SHRFMLA
  495. 0800 QUICKTIP
  496. 0809 BOF
  497. 0862 SHEETLAYOUT
  498. 0867 SHEETPROTECTION
  499. 0868 RANGEPROTECTION
  500. """
  501. biff_rec_name_dict = {}
  502. for _buff in _brecstrg.splitlines():
  503. _numh, _name = _buff.split()
  504. biff_rec_name_dict[int(_numh, 16)] = _name
  505. del _buff, _name, _brecstrg
  506. def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
  507. endpos = min(ofs + dlen, len(strg))
  508. pos = ofs
  509. numbered = not unnumbered
  510. num_prefix = ''
  511. while pos < endpos:
  512. endsub = min(pos + 16, endpos)
  513. substrg = strg[pos:endsub]
  514. lensub = endsub - pos
  515. if lensub <= 0 or lensub != len(substrg):
  516. fprintf(
  517. sys.stdout,
  518. '??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
  519. ofs, dlen, base, endpos, pos, endsub, substrg)
  520. break
  521. hexd = ''.join(["%02x " % BYTES_ORD(c) for c in substrg])
  522. chard = ''
  523. for c in substrg:
  524. c = chr(BYTES_ORD(c))
  525. if c == '\0':
  526. c = '~'
  527. elif not (' ' <= c <= '~'):
  528. c = '?'
  529. chard += c
  530. if numbered:
  531. num_prefix = "%5d: " % (base+pos-ofs)
  532. fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
  533. pos = endsub
  534. def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
  535. pos = stream_offset
  536. stream_end = stream_offset + stream_len
  537. adj = base - stream_offset
  538. dummies = 0
  539. numbered = not unnumbered
  540. num_prefix = ''
  541. while stream_end - pos >= 4:
  542. rc, length = unpack('<HH', mem[pos:pos+4])
  543. if rc == 0 and length == 0:
  544. if mem[pos:] == b'\0' * (stream_end - pos):
  545. dummies = stream_end - pos
  546. savpos = pos
  547. pos = stream_end
  548. break
  549. if dummies:
  550. dummies += 4
  551. else:
  552. savpos = pos
  553. dummies = 4
  554. pos += 4
  555. else:
  556. if dummies:
  557. if numbered:
  558. num_prefix = "%5d: " % (adj + savpos)
  559. fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
  560. dummies = 0
  561. recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
  562. if numbered:
  563. num_prefix = "%5d: " % (adj + pos)
  564. fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
  565. pos += 4
  566. hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
  567. pos += length
  568. if dummies:
  569. if numbered:
  570. num_prefix = "%5d: " % (adj + savpos)
  571. fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
  572. if pos < stream_end:
  573. if numbered:
  574. num_prefix = "%5d: " % (adj + pos)
  575. fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
  576. hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
  577. elif pos > stream_end:
  578. fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
  579. def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
  580. pos = stream_offset
  581. stream_end = stream_offset + stream_len
  582. tally = {}
  583. while stream_end - pos >= 4:
  584. rc, length = unpack('<HH', mem[pos:pos+4])
  585. if rc == 0 and length == 0:
  586. if mem[pos:] == b'\0' * (stream_end - pos):
  587. break
  588. recname = "<Dummy (zero)>"
  589. else:
  590. recname = biff_rec_name_dict.get(rc, None)
  591. if recname is None:
  592. recname = "Unknown_0x%04X" % rc
  593. if recname in tally:
  594. tally[recname] += 1
  595. else:
  596. tally[recname] = 1
  597. pos += length + 4
  598. slist = sorted(tally.items())
  599. for recname, count in slist:
  600. print("%8d %s" % (count, recname), file=fout)
  601. encoding_from_codepage = {
  602. 1200 : 'utf_16_le',
  603. 10000: 'mac_roman',
  604. 10006: 'mac_greek', # guess
  605. 10007: 'mac_cyrillic', # guess
  606. 10029: 'mac_latin2', # guess
  607. 10079: 'mac_iceland', # guess
  608. 10081: 'mac_turkish', # guess
  609. 32768: 'mac_roman',
  610. 32769: 'cp1252',
  611. }
  612. # some more guessing, for Indic scripts
  613. # codepage 57000 range:
  614. # 2 Devanagari [0]
  615. # 3 Bengali [1]
  616. # 4 Tamil [5]
  617. # 5 Telegu [6]
  618. # 6 Assamese [1] c.f. Bengali
  619. # 7 Oriya [4]
  620. # 8 Kannada [7]
  621. # 9 Malayalam [8]
  622. # 10 Gujarati [3]
  623. # 11 Gurmukhi [2]