translate.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. from __future__ import absolute_import
  2. """
  3. This module contains code to translate formulae across cells in a worksheet.
  4. The idea is that if A1 has formula "=B1+C1", then translating it to cell A2
  5. results in formula "=B2+C2". The algorithm relies on the formula tokenizer
  6. to identify the parts of the formula that need to change.
  7. """
  8. import re
  9. from .tokenizer import Tokenizer, Token
  10. from openpyxl.utils import (
  11. coordinate_to_tuple,
  12. column_index_from_string,
  13. get_column_letter
  14. )
  15. class TranslatorError(Exception):
  16. """
  17. Raised when a formula can't be translated across cells.
  18. This error arises when a formula's references would be translated outside
  19. the worksheet's bounds on the top or left. Excel represents these
  20. situations with a #REF! literal error. E.g., if the formula at B2 is
  21. '=A1', attempting to translate the formula to B1 raises TranslatorError,
  22. since there's no cell above A1. Similarly, translating the same formula
  23. from B2 to A2 raises TranslatorError, since there's no cell to the left of
  24. A1.
  25. """
  26. class Translator(object):
  27. """
  28. Modifies a formula so that it can be translated from one cell to another.
  29. `formula`: The unicode string to translate. Must include the leading '='
  30. character.
  31. `origin`: The cell address (in A1 notation) where this formula was
  32. defined (excluding the worksheet name).
  33. """
  34. def __init__(self, formula, origin):
  35. # Excel errors out when a workbook has formulae in R1C1 notation,
  36. # regardless of the calcPr:refMode setting, so I'm assuming the
  37. # formulae stored in the workbook must be in A1 notation.
  38. self.row, self.col = coordinate_to_tuple(origin)
  39. self.tokenizer = Tokenizer(formula)
  40. def get_tokens(self):
  41. "Returns a list with the tokens comprising the formula."
  42. return self.tokenizer.items
  43. ROW_RANGE_RE = re.compile(r"(\$?[1-9][0-9]{0,6}):(\$?[1-9][0-9]{0,6})$")
  44. COL_RANGE_RE = re.compile(r"(\$?[A-Za-z]{1,3}):(\$?[A-Za-z]{1,3})$")
  45. CELL_REF_RE = re.compile(r"(\$?[A-Za-z]{1,3})(\$?[1-9][0-9]{0,6})$")
  46. @staticmethod
  47. def translate_row(row_str, rdelta):
  48. """
  49. Translate a range row-snippet by the given number of rows.
  50. """
  51. if row_str.startswith('$'):
  52. return row_str
  53. else:
  54. new_row = int(row_str) + rdelta
  55. if new_row <= 0:
  56. raise TranslatorError("Formula out of range")
  57. return str(new_row)
  58. @staticmethod
  59. def translate_col(col_str, cdelta):
  60. """
  61. Translate a range col-snippet by the given number of columns
  62. """
  63. if col_str.startswith('$'):
  64. return col_str
  65. else:
  66. try:
  67. return get_column_letter(
  68. column_index_from_string(col_str) + cdelta)
  69. except ValueError:
  70. raise TranslatorError("Formula out of range")
  71. @staticmethod
  72. def strip_ws_name(range_str):
  73. "Splits out the worksheet reference, if any, from a range reference."
  74. # This code assumes that named ranges cannot contain any exclamation
  75. # marks. Excel refuses to create these (even using VBA), and
  76. # complains of a corrupt workbook when there are names with
  77. # exclamation marks. The ECMA spec only states that named ranges will
  78. # be of `ST_Xstring` type, which in theory allows '!' (char code
  79. # 0x21) per http://www.w3.org/TR/xml/#charsets
  80. if '!' in range_str:
  81. sheet, range_str = range_str.rsplit('!', 1)
  82. return sheet + "!", range_str
  83. return "", range_str
  84. @classmethod
  85. def translate_range(cls, range_str, rdelta, cdelta):
  86. """
  87. Translate an A1-style range reference to the destination cell.
  88. `rdelta`: the row offset to add to the range
  89. `cdelta`: the column offset to add to the range
  90. `range_str`: an A1-style reference to a range. Potentially includes
  91. the worksheet reference. Could also be a named range.
  92. """
  93. ws_part, range_str = cls.strip_ws_name(range_str)
  94. match = cls.ROW_RANGE_RE.match(range_str) # e.g. `3:4`
  95. if match is not None:
  96. return (ws_part + cls.translate_row(match.group(1), rdelta) + ":"
  97. + cls.translate_row(match.group(2), rdelta))
  98. match = cls.COL_RANGE_RE.match(range_str) # e.g. `A:BC`
  99. if match is not None:
  100. return (ws_part + cls.translate_col(match.group(1), cdelta) + ':'
  101. + cls.translate_col(match.group(2), cdelta))
  102. if ':' in range_str: # e.g. `A1:B5`
  103. # The check is necessarily general because range references can
  104. # have one or both endpoints specified by named ranges. I.e.,
  105. # `named_range:C2`, `C2:named_range`, and `name1:name2` are all
  106. # valid references. Further, Excel allows chaining multiple
  107. # colons together (with unclear meaning)
  108. return ws_part + ":".join(
  109. cls.translate_range(piece, rdelta, cdelta)
  110. for piece in range_str.split(':'))
  111. match = cls.CELL_REF_RE.match(range_str)
  112. if match is None: # Must be a named range
  113. return range_str
  114. return (ws_part + cls.translate_col(match.group(1), cdelta)
  115. + cls.translate_row(match.group(2), rdelta))
  116. def translate_formula(self, dest=None, row_delta=0, col_delta=0):
  117. """
  118. Convert the formula into A1 notation, or as row and column coordinates
  119. The formula is converted into A1 assuming it is assigned to the cell
  120. whose address is `dest` (no worksheet name).
  121. """
  122. tokens = self.get_tokens()
  123. if not tokens:
  124. return ""
  125. elif tokens[0].type == Token.LITERAL:
  126. return tokens[0].value
  127. out = ['=']
  128. # per the spec:
  129. # A compliant producer or consumer considers a defined name in the
  130. # range A1-XFD1048576 to be an error. All other names outside this
  131. # range can be defined as names and overrides a cell reference if an
  132. # ambiguity exists. (I.18.2.5)
  133. if dest:
  134. row, col = coordinate_to_tuple(dest)
  135. row_delta = row - self.row
  136. col_delta = col - self.col
  137. for token in tokens:
  138. if (token.type == Token.OPERAND
  139. and token.subtype == Token.RANGE):
  140. out.append(self.translate_range(token.value, row_delta,
  141. col_delta))
  142. else:
  143. out.append(token.value)
  144. return "".join(out)