workbook.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. from __future__ import absolute_import
  2. # Copyright (c) 2010-2015 openpyxl
  3. import posixpath
  4. from warnings import warn
  5. from openpyxl.xml.functions import fromstring
  6. from openpyxl.packaging.relationship import (
  7. get_dependents,
  8. get_rels_path,
  9. get_rel,
  10. )
  11. from openpyxl.packaging.manifest import Manifest
  12. from openpyxl.packaging.workbook import WorkbookPackage
  13. from openpyxl.workbook import Workbook
  14. from openpyxl.workbook.defined_name import (
  15. _unpack_print_area,
  16. _unpack_print_titles,
  17. )
  18. from openpyxl.workbook.external_link.external import read_external_link
  19. from openpyxl.pivot.cache import CacheDefinition
  20. from openpyxl.pivot.record import RecordList
  21. from openpyxl.utils.datetime import CALENDAR_MAC_1904
  22. class WorkbookParser:
  23. _rels = None
  24. def __init__(self, archive, workbook_part_name, keep_links=True):
  25. self.archive = archive
  26. self.workbook_part_name = workbook_part_name
  27. self.wb = Workbook()
  28. self.keep_links = keep_links
  29. self.sheets = []
  30. @property
  31. def rels(self):
  32. if self._rels is None:
  33. self._rels = get_dependents(self.archive, get_rels_path(self.workbook_part_name))
  34. return self._rels
  35. def parse(self):
  36. src = self.archive.read(self.workbook_part_name)
  37. node = fromstring(src)
  38. package = WorkbookPackage.from_tree(node)
  39. if package.properties.date1904:
  40. self.wb.epoch = CALENDAR_MAC_1904
  41. self.wb.code_name = package.properties.codeName
  42. self.wb.active = package.active
  43. self.wb.views = package.bookViews
  44. self.sheets = package.sheets
  45. self.wb.calculation = package.calcPr
  46. self.caches = package.pivotCaches
  47. #external links contain cached worksheets and can be very big
  48. if not self.keep_links:
  49. package.externalReferences = []
  50. for ext_ref in package.externalReferences:
  51. rel = self.rels[ext_ref.id]
  52. self.wb._external_links.append(
  53. read_external_link(self.archive, rel.Target)
  54. )
  55. if package.definedNames:
  56. package.definedNames._cleanup()
  57. self.wb.defined_names = package.definedNames
  58. self.wb.security = package.workbookProtection
  59. def find_sheets(self):
  60. """
  61. Find all sheets in the workbook and return the link to the source file.
  62. Older XLSM files sometimes contain invalid sheet elements.
  63. Warn user when these are removed.
  64. """
  65. for sheet in self.sheets:
  66. if not sheet.id:
  67. msg = "File contains an invalid specification for {0}. This will be removed".format(sheet.name)
  68. warn(msg)
  69. continue
  70. yield sheet, self.rels[sheet.id]
  71. def assign_names(self):
  72. """
  73. Bind reserved names to parsed worksheets
  74. """
  75. defns = []
  76. for defn in self.wb.defined_names.definedName:
  77. reserved = defn.is_reserved
  78. if reserved in ("Print_Titles", "Print_Area"):
  79. sheet = self.wb._sheets[defn.localSheetId]
  80. if reserved == "Print_Titles":
  81. rows, cols = _unpack_print_titles(defn)
  82. sheet.print_title_rows = rows
  83. sheet.print_title_cols = cols
  84. elif reserved == "Print_Area":
  85. sheet.print_area = _unpack_print_area(defn)
  86. else:
  87. defns.append(defn)
  88. self.wb.defined_names.definedName = defns
  89. @property
  90. def pivot_caches(self):
  91. """
  92. Get PivotCache objects
  93. """
  94. d = {}
  95. for c in self.caches:
  96. cache = get_rel(self.archive, self.rels, id=c.id, cls=CacheDefinition)
  97. if cache.deps:
  98. records = get_rel(self.archive, cache.deps, cache.id, RecordList)
  99. cache.records = records
  100. d[c.cacheId] = cache
  101. return d