_read_only.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. from __future__ import absolute_import
  2. # Copyright (c) 2010-2019 openpyxl
  3. """ Read worksheets on-demand
  4. """
  5. from .worksheet import Worksheet
  6. from openpyxl.cell.read_only import ReadOnlyCell, EMPTY_CELL
  7. from openpyxl.utils import get_column_letter
  8. from ._reader import WorkSheetParser
  9. class ReadOnlyWorksheet(object):
  10. _min_column = 1
  11. _min_row = 1
  12. _max_column = _max_row = None
  13. def __init__(self, parent_workbook, title, worksheet_path, shared_strings):
  14. self.parent = parent_workbook
  15. self.title = title
  16. self._current_row = None
  17. self._worksheet_path = worksheet_path
  18. self._shared_strings = shared_strings
  19. self._get_size()
  20. # Methods from Worksheet
  21. self.iter_rows = Worksheet.iter_rows.__get__(self)
  22. self.values = Worksheet.values.__get__(self)
  23. self.rows = Worksheet.rows.__get__(self)
  24. self.cell = Worksheet.cell.__get__(self)
  25. def __iter__(self):
  26. # 2.7 compat
  27. meth = Worksheet.__iter__.__get__(self)
  28. return meth()
  29. def __getitem__(self, key):
  30. # 2.7 compat
  31. meth = Worksheet.__getitem__.__get__(self)
  32. return meth(key)
  33. def _get_size(self):
  34. src = self._get_source()
  35. parser = WorkSheetParser(src, [])
  36. dimensions = parser.parse_dimensions()
  37. src.close()
  38. if dimensions is not None:
  39. self._min_column, self._min_row, self._max_column, self._max_row = dimensions
  40. def _get_source(self):
  41. """Parse xml source on demand, must close after use"""
  42. return self.parent._archive.open(self._worksheet_path)
  43. def _cells_by_row(self, min_col, min_row, max_col, max_row, values_only=False):
  44. """
  45. The source worksheet file may have columns or rows missing.
  46. Missing cells will be created.
  47. """
  48. filler = EMPTY_CELL
  49. if values_only:
  50. filler = None
  51. max_col = max_col or self.max_column
  52. max_row = max_row or self.max_row
  53. empty_row = []
  54. if max_col is not None:
  55. empty_row = (filler,) * (max_col + 1 - min_col)
  56. counter = min_row
  57. idx = 1
  58. src = self._get_source()
  59. parser = WorkSheetParser(src, self._shared_strings,
  60. data_only=self.parent.data_only, epoch=self.parent.epoch,
  61. date_formats=self.parent._date_formats)
  62. for idx, row in parser.parse():
  63. if max_row is not None and idx > max_row:
  64. break
  65. # some rows are missing
  66. for _ in range(counter, idx):
  67. counter += 1
  68. yield empty_row
  69. # return cells from a row
  70. if counter <= idx:
  71. row = self._get_row(row, min_col, max_col, values_only)
  72. counter += 1
  73. yield row
  74. if max_row is not None and max_row < idx:
  75. for _ in range(counter, max_row+1):
  76. yield empty_row
  77. src.close()
  78. def _get_row(self, row, min_col=1, max_col=None, values_only=False):
  79. """
  80. Make sure a row contains always the same number of cells or values
  81. """
  82. if not row:
  83. return ()
  84. last_col = row[-1]['column']
  85. max_col = max_col or last_col
  86. row_width = max_col + 1 - min_col
  87. if values_only:
  88. new_row = [None] * row_width
  89. else:
  90. new_row = [EMPTY_CELL] * row_width
  91. for cell in row:
  92. counter = cell['column']
  93. if min_col <= counter <= max_col:
  94. idx = counter - min_col
  95. if values_only:
  96. new_row[idx] = cell['value']
  97. else:
  98. new_row[idx] = ReadOnlyCell(self, **cell)
  99. return tuple(new_row)
  100. def _get_cell(self, row, column):
  101. """Cells are returned by a generator which can be empty"""
  102. for row in self._cells_by_row(column, row, column, row):
  103. if row:
  104. return row[0]
  105. return EMPTY_CELL
  106. def calculate_dimension(self, force=False):
  107. if not all([self.max_column, self.max_row]):
  108. if force:
  109. self._calculate_dimension()
  110. else:
  111. raise ValueError("Worksheet is unsized, use calculate_dimension(force=True)")
  112. return '%s%d:%s%d' % (
  113. get_column_letter(self.min_column), self.min_row,
  114. get_column_letter(self.max_column), self.max_row
  115. )
  116. def _calculate_dimension(self):
  117. """
  118. Loop through all the cells to get the size of a worksheet.
  119. Do this only if it is explicitly requested.
  120. """
  121. max_col = 0
  122. for r in self.rows:
  123. if not r:
  124. continue
  125. cell = r[-1]
  126. max_col = max(max_col, cell.column)
  127. self._max_row = cell.row
  128. self._max_column = max_col
  129. def reset_dimensions(self):
  130. """
  131. Remove worksheet dimensions if these are incorrect in the worksheet source.
  132. NB. This probably indicates a bug in the library or application that created
  133. the workbook.
  134. """
  135. self._max_row = self._max_column = None
  136. @property
  137. def min_row(self):
  138. return self._min_row
  139. @property
  140. def max_row(self):
  141. return self._max_row
  142. @property
  143. def min_column(self):
  144. return self._min_column
  145. @property
  146. def max_column(self):
  147. return self._max_column