from __future__ import absolute_import # Copyright (c) 2010-2019 openpyxl """ Read worksheets on-demand """ from .worksheet import Worksheet from openpyxl.cell.read_only import ReadOnlyCell, EMPTY_CELL from openpyxl.utils import get_column_letter from ._reader import WorkSheetParser class ReadOnlyWorksheet(object): _min_column = 1 _min_row = 1 _max_column = _max_row = None def __init__(self, parent_workbook, title, worksheet_path, shared_strings): self.parent = parent_workbook self.title = title self._current_row = None self._worksheet_path = worksheet_path self._shared_strings = shared_strings self._get_size() # Methods from Worksheet self.iter_rows = Worksheet.iter_rows.__get__(self) self.values = Worksheet.values.__get__(self) self.rows = Worksheet.rows.__get__(self) self.cell = Worksheet.cell.__get__(self) def __iter__(self): # 2.7 compat meth = Worksheet.__iter__.__get__(self) return meth() def __getitem__(self, key): # 2.7 compat meth = Worksheet.__getitem__.__get__(self) return meth(key) def _get_size(self): src = self._get_source() parser = WorkSheetParser(src, []) dimensions = parser.parse_dimensions() src.close() if dimensions is not None: self._min_column, self._min_row, self._max_column, self._max_row = dimensions def _get_source(self): """Parse xml source on demand, must close after use""" return self.parent._archive.open(self._worksheet_path) def _cells_by_row(self, min_col, min_row, max_col, max_row, values_only=False): """ The source worksheet file may have columns or rows missing. Missing cells will be created. """ filler = EMPTY_CELL if values_only: filler = None max_col = max_col or self.max_column max_row = max_row or self.max_row empty_row = [] if max_col is not None: empty_row = (filler,) * (max_col + 1 - min_col) counter = min_row idx = 1 src = self._get_source() parser = WorkSheetParser(src, self._shared_strings, data_only=self.parent.data_only, epoch=self.parent.epoch, date_formats=self.parent._date_formats) for idx, row in parser.parse(): if max_row is not None and idx > max_row: break # some rows are missing for _ in range(counter, idx): counter += 1 yield empty_row # return cells from a row if counter <= idx: row = self._get_row(row, min_col, max_col, values_only) counter += 1 yield row if max_row is not None and max_row < idx: for _ in range(counter, max_row+1): yield empty_row src.close() def _get_row(self, row, min_col=1, max_col=None, values_only=False): """ Make sure a row contains always the same number of cells or values """ if not row: return () last_col = row[-1]['column'] max_col = max_col or last_col row_width = max_col + 1 - min_col if values_only: new_row = [None] * row_width else: new_row = [EMPTY_CELL] * row_width for cell in row: counter = cell['column'] if min_col <= counter <= max_col: idx = counter - min_col if values_only: new_row[idx] = cell['value'] else: new_row[idx] = ReadOnlyCell(self, **cell) return tuple(new_row) def _get_cell(self, row, column): """Cells are returned by a generator which can be empty""" for row in self._cells_by_row(column, row, column, row): if row: return row[0] return EMPTY_CELL def calculate_dimension(self, force=False): if not all([self.max_column, self.max_row]): if force: self._calculate_dimension() else: raise ValueError("Worksheet is unsized, use calculate_dimension(force=True)") return '%s%d:%s%d' % ( get_column_letter(self.min_column), self.min_row, get_column_letter(self.max_column), self.max_row ) def _calculate_dimension(self): """ Loop through all the cells to get the size of a worksheet. Do this only if it is explicitly requested. """ max_col = 0 for r in self.rows: if not r: continue cell = r[-1] max_col = max(max_col, cell.column) self._max_row = cell.row self._max_column = max_col def reset_dimensions(self): """ Remove worksheet dimensions if these are incorrect in the worksheet source. NB. This probably indicates a bug in the library or application that created the workbook. """ self._max_row = self._max_column = None @property def min_row(self): return self._min_row @property def max_row(self): return self._max_row @property def min_column(self): return self._min_column @property def max_column(self): return self._max_column