eval.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. #!/usr/bin/env python
  2. """Top level ``eval`` module.
  3. """
  4. import tokenize
  5. import warnings
  6. from pandas.compat import string_types
  7. from pandas.util._validators import validate_bool_kwarg
  8. from pandas.core.computation.engines import _engines
  9. from pandas.core.computation.scope import _ensure_scope
  10. from pandas.io.formats.printing import pprint_thing
  11. def _check_engine(engine):
  12. """Make sure a valid engine is passed.
  13. Parameters
  14. ----------
  15. engine : str
  16. Raises
  17. ------
  18. KeyError
  19. * If an invalid engine is passed
  20. ImportError
  21. * If numexpr was requested but doesn't exist
  22. Returns
  23. -------
  24. string engine
  25. """
  26. from pandas.core.computation.check import _NUMEXPR_INSTALLED
  27. if engine is None:
  28. if _NUMEXPR_INSTALLED:
  29. engine = 'numexpr'
  30. else:
  31. engine = 'python'
  32. if engine not in _engines:
  33. valid = list(_engines.keys())
  34. raise KeyError('Invalid engine {engine!r} passed, valid engines are'
  35. ' {valid}'.format(engine=engine, valid=valid))
  36. # TODO: validate this in a more general way (thinking of future engines
  37. # that won't necessarily be import-able)
  38. # Could potentially be done on engine instantiation
  39. if engine == 'numexpr':
  40. if not _NUMEXPR_INSTALLED:
  41. raise ImportError("'numexpr' is not installed or an "
  42. "unsupported version. Cannot use "
  43. "engine='numexpr' for query/eval "
  44. "if 'numexpr' is not installed")
  45. return engine
  46. def _check_parser(parser):
  47. """Make sure a valid parser is passed.
  48. Parameters
  49. ----------
  50. parser : str
  51. Raises
  52. ------
  53. KeyError
  54. * If an invalid parser is passed
  55. """
  56. from pandas.core.computation.expr import _parsers
  57. if parser not in _parsers:
  58. raise KeyError('Invalid parser {parser!r} passed, valid parsers are'
  59. ' {valid}'.format(parser=parser, valid=_parsers.keys()))
  60. def _check_resolvers(resolvers):
  61. if resolvers is not None:
  62. for resolver in resolvers:
  63. if not hasattr(resolver, '__getitem__'):
  64. name = type(resolver).__name__
  65. raise TypeError('Resolver of type {name!r} does not implement '
  66. 'the __getitem__ method'.format(name=name))
  67. def _check_expression(expr):
  68. """Make sure an expression is not an empty string
  69. Parameters
  70. ----------
  71. expr : object
  72. An object that can be converted to a string
  73. Raises
  74. ------
  75. ValueError
  76. * If expr is an empty string
  77. """
  78. if not expr:
  79. raise ValueError("expr cannot be an empty string")
  80. def _convert_expression(expr):
  81. """Convert an object to an expression.
  82. Thus function converts an object to an expression (a unicode string) and
  83. checks to make sure it isn't empty after conversion. This is used to
  84. convert operators to their string representation for recursive calls to
  85. :func:`~pandas.eval`.
  86. Parameters
  87. ----------
  88. expr : object
  89. The object to be converted to a string.
  90. Returns
  91. -------
  92. s : unicode
  93. The string representation of an object.
  94. Raises
  95. ------
  96. ValueError
  97. * If the expression is empty.
  98. """
  99. s = pprint_thing(expr)
  100. _check_expression(s)
  101. return s
  102. def _check_for_locals(expr, stack_level, parser):
  103. from pandas.core.computation.expr import tokenize_string
  104. at_top_of_stack = stack_level == 0
  105. not_pandas_parser = parser != 'pandas'
  106. if not_pandas_parser:
  107. msg = "The '@' prefix is only supported by the pandas parser"
  108. elif at_top_of_stack:
  109. msg = ("The '@' prefix is not allowed in "
  110. "top-level eval calls, \nplease refer to "
  111. "your variables by name without the '@' "
  112. "prefix")
  113. if at_top_of_stack or not_pandas_parser:
  114. for toknum, tokval in tokenize_string(expr):
  115. if toknum == tokenize.OP and tokval == '@':
  116. raise SyntaxError(msg)
  117. def eval(expr, parser='pandas', engine=None, truediv=True,
  118. local_dict=None, global_dict=None, resolvers=(), level=0,
  119. target=None, inplace=False):
  120. """Evaluate a Python expression as a string using various backends.
  121. The following arithmetic operations are supported: ``+``, ``-``, ``*``,
  122. ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
  123. boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
  124. Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
  125. :keyword:`or`, and :keyword:`not` with the same semantics as the
  126. corresponding bitwise operators. :class:`~pandas.Series` and
  127. :class:`~pandas.DataFrame` objects are supported and behave as they would
  128. with plain ol' Python evaluation.
  129. Parameters
  130. ----------
  131. expr : str or unicode
  132. The expression to evaluate. This string cannot contain any Python
  133. `statements
  134. <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
  135. only Python `expressions
  136. <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
  137. parser : string, default 'pandas', {'pandas', 'python'}
  138. The parser to use to construct the syntax tree from the expression. The
  139. default of ``'pandas'`` parses code slightly different than standard
  140. Python. Alternatively, you can parse an expression using the
  141. ``'python'`` parser to retain strict Python semantics. See the
  142. :ref:`enhancing performance <enhancingperf.eval>` documentation for
  143. more details.
  144. engine : string or None, default 'numexpr', {'python', 'numexpr'}
  145. The engine used to evaluate the expression. Supported engines are
  146. - None : tries to use ``numexpr``, falls back to ``python``
  147. - ``'numexpr'``: This default engine evaluates pandas objects using
  148. numexpr for large speed ups in complex expressions
  149. with large frames.
  150. - ``'python'``: Performs operations as if you had ``eval``'d in top
  151. level python. This engine is generally not that useful.
  152. More backends may be available in the future.
  153. truediv : bool, optional
  154. Whether to use true division, like in Python >= 3
  155. local_dict : dict or None, optional
  156. A dictionary of local variables, taken from locals() by default.
  157. global_dict : dict or None, optional
  158. A dictionary of global variables, taken from globals() by default.
  159. resolvers : list of dict-like or None, optional
  160. A list of objects implementing the ``__getitem__`` special method that
  161. you can use to inject an additional collection of namespaces to use for
  162. variable lookup. For example, this is used in the
  163. :meth:`~pandas.DataFrame.query` method to inject the
  164. ``DataFrame.index`` and ``DataFrame.columns``
  165. variables that refer to their respective :class:`~pandas.DataFrame`
  166. instance attributes.
  167. level : int, optional
  168. The number of prior stack frames to traverse and add to the current
  169. scope. Most users will **not** need to change this parameter.
  170. target : object, optional, default None
  171. This is the target object for assignment. It is used when there is
  172. variable assignment in the expression. If so, then `target` must
  173. support item assignment with string keys, and if a copy is being
  174. returned, it must also support `.copy()`.
  175. inplace : bool, default False
  176. If `target` is provided, and the expression mutates `target`, whether
  177. to modify `target` inplace. Otherwise, return a copy of `target` with
  178. the mutation.
  179. Returns
  180. -------
  181. ndarray, numeric scalar, DataFrame, Series
  182. Raises
  183. ------
  184. ValueError
  185. There are many instances where such an error can be raised:
  186. - `target=None`, but the expression is multiline.
  187. - The expression is multiline, but not all them have item assignment.
  188. An example of such an arrangement is this:
  189. a = b + 1
  190. a + 2
  191. Here, there are expressions on different lines, making it multiline,
  192. but the last line has no variable assigned to the output of `a + 2`.
  193. - `inplace=True`, but the expression is missing item assignment.
  194. - Item assignment is provided, but the `target` does not support
  195. string item assignment.
  196. - Item assignment is provided and `inplace=False`, but the `target`
  197. does not support the `.copy()` method
  198. See Also
  199. --------
  200. pandas.DataFrame.query
  201. pandas.DataFrame.eval
  202. Notes
  203. -----
  204. The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
  205. recursively cast to ``float64``.
  206. See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
  207. more details.
  208. """
  209. from pandas.core.computation.expr import Expr
  210. inplace = validate_bool_kwarg(inplace, "inplace")
  211. if isinstance(expr, string_types):
  212. _check_expression(expr)
  213. exprs = [e.strip() for e in expr.splitlines() if e.strip() != '']
  214. else:
  215. exprs = [expr]
  216. multi_line = len(exprs) > 1
  217. if multi_line and target is None:
  218. raise ValueError("multi-line expressions are only valid in the "
  219. "context of data, use DataFrame.eval")
  220. ret = None
  221. first_expr = True
  222. target_modified = False
  223. for expr in exprs:
  224. expr = _convert_expression(expr)
  225. engine = _check_engine(engine)
  226. _check_parser(parser)
  227. _check_resolvers(resolvers)
  228. _check_for_locals(expr, level, parser)
  229. # get our (possibly passed-in) scope
  230. env = _ensure_scope(level + 1, global_dict=global_dict,
  231. local_dict=local_dict, resolvers=resolvers,
  232. target=target)
  233. parsed_expr = Expr(expr, engine=engine, parser=parser, env=env,
  234. truediv=truediv)
  235. # construct the engine and evaluate the parsed expression
  236. eng = _engines[engine]
  237. eng_inst = eng(parsed_expr)
  238. ret = eng_inst.evaluate()
  239. if parsed_expr.assigner is None:
  240. if multi_line:
  241. raise ValueError("Multi-line expressions are only valid"
  242. " if all expressions contain an assignment")
  243. elif inplace:
  244. raise ValueError("Cannot operate inplace "
  245. "if there is no assignment")
  246. # assign if needed
  247. assigner = parsed_expr.assigner
  248. if env.target is not None and assigner is not None:
  249. target_modified = True
  250. # if returning a copy, copy only on the first assignment
  251. if not inplace and first_expr:
  252. try:
  253. target = env.target.copy()
  254. except AttributeError:
  255. raise ValueError("Cannot return a copy of the target")
  256. else:
  257. target = env.target
  258. # TypeError is most commonly raised (e.g. int, list), but you
  259. # get IndexError if you try to do this assignment on np.ndarray.
  260. # we will ignore numpy warnings here; e.g. if trying
  261. # to use a non-numeric indexer
  262. try:
  263. with warnings.catch_warnings(record=True):
  264. # TODO: Filter the warnings we actually care about here.
  265. target[assigner] = ret
  266. except (TypeError, IndexError):
  267. raise ValueError("Cannot assign expression output to target")
  268. if not resolvers:
  269. resolvers = ({assigner: ret},)
  270. else:
  271. # existing resolver needs updated to handle
  272. # case of mutating existing column in copy
  273. for resolver in resolvers:
  274. if assigner in resolver:
  275. resolver[assigner] = ret
  276. break
  277. else:
  278. resolvers += ({assigner: ret},)
  279. ret = None
  280. first_expr = False
  281. # We want to exclude `inplace=None` as being False.
  282. if inplace is False:
  283. return target if target_modified else ret