123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- from __future__ import absolute_import
- # Copyright (c) 2010-2019 openpyxl
- """ Read worksheets on-demand
- """
- from .worksheet import Worksheet
- from openpyxl.cell.read_only import ReadOnlyCell, EMPTY_CELL
- from openpyxl.utils import get_column_letter
- from ._reader import WorkSheetParser
- class ReadOnlyWorksheet(object):
- _min_column = 1
- _min_row = 1
- _max_column = _max_row = None
- def __init__(self, parent_workbook, title, worksheet_path, shared_strings):
- self.parent = parent_workbook
- self.title = title
- self._current_row = None
- self._worksheet_path = worksheet_path
- self._shared_strings = shared_strings
- self._get_size()
- # Methods from Worksheet
- self.iter_rows = Worksheet.iter_rows.__get__(self)
- self.values = Worksheet.values.__get__(self)
- self.rows = Worksheet.rows.__get__(self)
- self.cell = Worksheet.cell.__get__(self)
- def __iter__(self):
- # 2.7 compat
- meth = Worksheet.__iter__.__get__(self)
- return meth()
- def __getitem__(self, key):
- # 2.7 compat
- meth = Worksheet.__getitem__.__get__(self)
- return meth(key)
- def _get_size(self):
- src = self._get_source()
- parser = WorkSheetParser(src, [])
- dimensions = parser.parse_dimensions()
- src.close()
- if dimensions is not None:
- self._min_column, self._min_row, self._max_column, self._max_row = dimensions
- def _get_source(self):
- """Parse xml source on demand, must close after use"""
- return self.parent._archive.open(self._worksheet_path)
- def _cells_by_row(self, min_col, min_row, max_col, max_row, values_only=False):
- """
- The source worksheet file may have columns or rows missing.
- Missing cells will be created.
- """
- filler = EMPTY_CELL
- if values_only:
- filler = None
- max_col = max_col or self.max_column
- max_row = max_row or self.max_row
- empty_row = []
- if max_col is not None:
- empty_row = (filler,) * (max_col + 1 - min_col)
- counter = min_row
- idx = 1
- src = self._get_source()
- parser = WorkSheetParser(src, self._shared_strings,
- data_only=self.parent.data_only, epoch=self.parent.epoch,
- date_formats=self.parent._date_formats)
- for idx, row in parser.parse():
- if max_row is not None and idx > max_row:
- break
- # some rows are missing
- for _ in range(counter, idx):
- counter += 1
- yield empty_row
- # return cells from a row
- if counter <= idx:
- row = self._get_row(row, min_col, max_col, values_only)
- counter += 1
- yield row
- if max_row is not None and max_row < idx:
- for _ in range(counter, max_row+1):
- yield empty_row
- src.close()
- def _get_row(self, row, min_col=1, max_col=None, values_only=False):
- """
- Make sure a row contains always the same number of cells or values
- """
- if not row:
- return ()
- last_col = row[-1]['column']
- max_col = max_col or last_col
- row_width = max_col + 1 - min_col
- if values_only:
- new_row = [None] * row_width
- else:
- new_row = [EMPTY_CELL] * row_width
- for cell in row:
- counter = cell['column']
- if min_col <= counter <= max_col:
- idx = counter - min_col
- if values_only:
- new_row[idx] = cell['value']
- else:
- new_row[idx] = ReadOnlyCell(self, **cell)
- return tuple(new_row)
- def _get_cell(self, row, column):
- """Cells are returned by a generator which can be empty"""
- for row in self._cells_by_row(column, row, column, row):
- if row:
- return row[0]
- return EMPTY_CELL
- def calculate_dimension(self, force=False):
- if not all([self.max_column, self.max_row]):
- if force:
- self._calculate_dimension()
- else:
- raise ValueError("Worksheet is unsized, use calculate_dimension(force=True)")
- return '%s%d:%s%d' % (
- get_column_letter(self.min_column), self.min_row,
- get_column_letter(self.max_column), self.max_row
- )
- def _calculate_dimension(self):
- """
- Loop through all the cells to get the size of a worksheet.
- Do this only if it is explicitly requested.
- """
- max_col = 0
- for r in self.rows:
- if not r:
- continue
- cell = r[-1]
- max_col = max(max_col, cell.column)
- self._max_row = cell.row
- self._max_column = max_col
- def reset_dimensions(self):
- """
- Remove worksheet dimensions if these are incorrect in the worksheet source.
- NB. This probably indicates a bug in the library or application that created
- the workbook.
- """
- self._max_row = self._max_column = None
- @property
- def min_row(self):
- return self._min_row
- @property
- def max_row(self):
- return self._max_row
- @property
- def min_column(self):
- return self._min_column
- @property
- def max_column(self):
- return self._max_column
|