cell.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. from __future__ import absolute_import
  2. # Copyright (c) 2010-2019 openpyxl
  3. """
  4. Collection of utilities used within the package and also available for client code
  5. """
  6. import re
  7. from openpyxl.compat import basestring
  8. from .exceptions import CellCoordinatesException
  9. # constants
  10. COORD_RE = re.compile(r'^[$]?([A-Za-z]{1,3})[$]?(\d+)$')
  11. COL_RANGE = """[A-Z]{1,3}:[A-Z]{1,3}:"""
  12. ROW_RANGE = r"""\d+:\d+:"""
  13. RANGE_EXPR = r"""
  14. [$]?(?P<min_col>[A-Za-z]{1,3})?
  15. [$]?(?P<min_row>\d+)?
  16. (:[$]?(?P<max_col>[A-Za-z]{1,3})?
  17. [$]?(?P<max_row>\d+)?)?
  18. """
  19. ABSOLUTE_RE = re.compile('^' + RANGE_EXPR +'$', re.VERBOSE)
  20. SHEET_TITLE = r"""
  21. (('(?P<quoted>([^']|'')*)')|(?P<notquoted>[^'^ ^!]*))!"""
  22. SHEETRANGE_RE = re.compile("""{0}(?P<cells>{1})(?=,?)""".format(
  23. SHEET_TITLE, RANGE_EXPR), re.VERBOSE)
  24. def get_column_interval(start, end):
  25. """
  26. Given the start and end columns, return all the columns in the series.
  27. The start and end columns can be either column letters or 1-based
  28. indexes.
  29. """
  30. if isinstance(start, basestring):
  31. start = column_index_from_string(start)
  32. if isinstance(end, basestring):
  33. end = column_index_from_string(end)
  34. return [get_column_letter(x) for x in range(start, end + 1)]
  35. def coordinate_from_string(coord_string):
  36. """Convert a coordinate string like 'B12' to a tuple ('B', 12)"""
  37. match = COORD_RE.match(coord_string)
  38. if not match:
  39. msg = 'Invalid cell coordinates (%s)' % coord_string
  40. raise CellCoordinatesException(msg)
  41. column, row = match.groups()
  42. row = int(row)
  43. if not row:
  44. msg = "There is no row 0 (%s)" % coord_string
  45. raise CellCoordinatesException(msg)
  46. return column, row
  47. def absolute_coordinate(coord_string):
  48. """Convert a coordinate to an absolute coordinate string (B12 -> $B$12)"""
  49. m = ABSOLUTE_RE.match(coord_string)
  50. if not m:
  51. raise ValueError("{0} is not a valid coordinate range".format(
  52. coord_string))
  53. d = m.groupdict('')
  54. for k, v in d.items():
  55. if v:
  56. d[k] = "${0}".format(v)
  57. if d['max_col'] or d['max_row']:
  58. fmt = "{min_col}{min_row}:{max_col}{max_row}"
  59. else:
  60. fmt = "{min_col}{min_row}"
  61. return fmt.format(**d)
  62. def _get_column_letter(col_idx):
  63. """Convert a column number into a column letter (3 -> 'C')
  64. Right shift the column col_idx by 26 to find column letters in reverse
  65. order. These numbers are 1-based, and can be converted to ASCII
  66. ordinals by adding 64.
  67. """
  68. # these indicies corrospond to A -> ZZZ and include all allowed
  69. # columns
  70. if not 1 <= col_idx <= 18278:
  71. raise ValueError("Invalid column index {0}".format(col_idx))
  72. letters = []
  73. while col_idx > 0:
  74. col_idx, remainder = divmod(col_idx, 26)
  75. # check for exact division and borrow if needed
  76. if remainder == 0:
  77. remainder = 26
  78. col_idx -= 1
  79. letters.append(chr(remainder+64))
  80. return ''.join(reversed(letters))
  81. _COL_STRING_CACHE = {}
  82. _STRING_COL_CACHE = {}
  83. for i in range(1, 18279):
  84. col = _get_column_letter(i)
  85. _STRING_COL_CACHE[i] = col
  86. _COL_STRING_CACHE[col] = i
  87. def get_column_letter(idx,):
  88. """Convert a column index into a column letter
  89. (3 -> 'C')
  90. """
  91. try:
  92. return _STRING_COL_CACHE[idx]
  93. except KeyError:
  94. raise ValueError("Invalid column index {0}".format(idx))
  95. def column_index_from_string(str_col):
  96. """Convert a column name into a numerical index
  97. ('A' -> 1)
  98. """
  99. # we use a function argument to get indexed name lookup
  100. try:
  101. return _COL_STRING_CACHE[str_col.upper()]
  102. except KeyError:
  103. raise ValueError("{0} is not a valid column name".format(str_col))
  104. def range_boundaries(range_string):
  105. """
  106. Convert a range string into a tuple of boundaries:
  107. (min_col, min_row, max_col, max_row)
  108. Cell coordinates will be converted into a range with the cell at both end
  109. """
  110. msg = "{0} is not a valid coordinate or range".format(range_string)
  111. m = ABSOLUTE_RE.match(range_string)
  112. if not m:
  113. raise ValueError(msg)
  114. min_col, min_row, sep, max_col, max_row = m.groups()
  115. if sep:
  116. cols = min_col, max_col
  117. rows = min_row, max_row
  118. if not (
  119. all(cols + rows) or
  120. all(cols) and not any(rows) or
  121. all(rows) and not any(cols)
  122. ):
  123. raise ValueError(msg)
  124. if min_col is not None:
  125. min_col = column_index_from_string(min_col)
  126. if min_row is not None:
  127. min_row = int(min_row)
  128. if max_col is not None:
  129. max_col = column_index_from_string(max_col)
  130. else:
  131. max_col = min_col
  132. if max_row is not None:
  133. max_row = int(max_row)
  134. else:
  135. max_row = min_row
  136. return min_col, min_row, max_col, max_row
  137. def rows_from_range(range_string):
  138. """
  139. Get individual addresses for every cell in a range.
  140. Yields one row at a time.
  141. """
  142. min_col, min_row, max_col, max_row = range_boundaries(range_string)
  143. rows = range(min_row, max_row + 1)
  144. cols = [get_column_letter(col) for col in range(min_col, max_col + 1)]
  145. for row in rows:
  146. yield tuple('{0}{1}'.format(col, row) for col in cols)
  147. def cols_from_range(range_string):
  148. """
  149. Get individual addresses for every cell in a range.
  150. Yields one row at a time.
  151. """
  152. min_col, min_row, max_col, max_row = range_boundaries(range_string)
  153. rows = range(min_row, max_row+1)
  154. cols = (get_column_letter(col) for col in range(min_col, max_col+1))
  155. for col in cols:
  156. yield tuple('{0}{1}'.format(col, row) for row in rows)
  157. def coordinate_to_tuple(coordinate):
  158. """
  159. Convert an Excel style coordinate to (row, colum) tuple
  160. """
  161. match = COORD_RE.split(coordinate)
  162. col, row = match[1:3]
  163. return int(row), _COL_STRING_CACHE[col]
  164. def range_to_tuple(range_string):
  165. """
  166. Convert a worksheet range to the sheetname and maximum and minimum
  167. coordinate indices
  168. """
  169. m = SHEETRANGE_RE.match(range_string)
  170. if m is None:
  171. raise ValueError("Value must be of the form sheetname!A1:E4")
  172. sheetname = m.group("quoted") or m.group("notquoted")
  173. cells = m.group("cells")
  174. boundaries = range_boundaries(cells)
  175. return sheetname, boundaries
  176. def quote_sheetname(sheetname):
  177. """
  178. Add quotes around sheetnames if they contain spaces.
  179. """
  180. if "'" in sheetname:
  181. sheetname = sheetname.replace("'", "''")
  182. sheetname = u"'{0}'".format(sheetname)
  183. return sheetname