ops.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. """Operator classes for eval.
  2. """
  3. from datetime import datetime
  4. from distutils.version import LooseVersion
  5. from functools import partial
  6. import operator as op
  7. import numpy as np
  8. from pandas.compat import PY3, string_types, text_type
  9. from pandas.core.dtypes.common import is_list_like, is_scalar
  10. import pandas as pd
  11. from pandas.core.base import StringMixin
  12. import pandas.core.common as com
  13. from pandas.core.computation.common import _ensure_decoded, _result_type_many
  14. from pandas.core.computation.scope import _DEFAULT_GLOBALS
  15. from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded
  16. _reductions = 'sum', 'prod'
  17. _unary_math_ops = ('sin', 'cos', 'exp', 'log', 'expm1', 'log1p',
  18. 'sqrt', 'sinh', 'cosh', 'tanh', 'arcsin', 'arccos',
  19. 'arctan', 'arccosh', 'arcsinh', 'arctanh', 'abs', 'log10',
  20. 'floor', 'ceil'
  21. )
  22. _binary_math_ops = ('arctan2',)
  23. _mathops = _unary_math_ops + _binary_math_ops
  24. _LOCAL_TAG = '__pd_eval_local_'
  25. class UndefinedVariableError(NameError):
  26. """NameError subclass for local variables."""
  27. def __init__(self, name, is_local):
  28. if is_local:
  29. msg = 'local variable {0!r} is not defined'
  30. else:
  31. msg = 'name {0!r} is not defined'
  32. super(UndefinedVariableError, self).__init__(msg.format(name))
  33. class Term(StringMixin):
  34. def __new__(cls, name, env, side=None, encoding=None):
  35. klass = Constant if not isinstance(name, string_types) else cls
  36. supr_new = super(Term, klass).__new__
  37. return supr_new(klass)
  38. def __init__(self, name, env, side=None, encoding=None):
  39. self._name = name
  40. self.env = env
  41. self.side = side
  42. tname = text_type(name)
  43. self.is_local = (tname.startswith(_LOCAL_TAG) or
  44. tname in _DEFAULT_GLOBALS)
  45. self._value = self._resolve_name()
  46. self.encoding = encoding
  47. @property
  48. def local_name(self):
  49. return self.name.replace(_LOCAL_TAG, '')
  50. def __unicode__(self):
  51. return pprint_thing(self.name)
  52. def __call__(self, *args, **kwargs):
  53. return self.value
  54. def evaluate(self, *args, **kwargs):
  55. return self
  56. def _resolve_name(self):
  57. res = self.env.resolve(self.local_name, is_local=self.is_local)
  58. self.update(res)
  59. if hasattr(res, 'ndim') and res.ndim > 2:
  60. raise NotImplementedError("N-dimensional objects, where N > 2,"
  61. " are not supported with eval")
  62. return res
  63. def update(self, value):
  64. """
  65. search order for local (i.e., @variable) variables:
  66. scope, key_variable
  67. [('locals', 'local_name'),
  68. ('globals', 'local_name'),
  69. ('locals', 'key'),
  70. ('globals', 'key')]
  71. """
  72. key = self.name
  73. # if it's a variable name (otherwise a constant)
  74. if isinstance(key, string_types):
  75. self.env.swapkey(self.local_name, key, new_value=value)
  76. self.value = value
  77. @property
  78. def is_scalar(self):
  79. return is_scalar(self._value)
  80. @property
  81. def type(self):
  82. try:
  83. # potentially very slow for large, mixed dtype frames
  84. return self._value.values.dtype
  85. except AttributeError:
  86. try:
  87. # ndarray
  88. return self._value.dtype
  89. except AttributeError:
  90. # scalar
  91. return type(self._value)
  92. return_type = type
  93. @property
  94. def raw(self):
  95. return pprint_thing('{0}(name={1!r}, type={2})'
  96. ''.format(self.__class__.__name__, self.name,
  97. self.type))
  98. @property
  99. def is_datetime(self):
  100. try:
  101. t = self.type.type
  102. except AttributeError:
  103. t = self.type
  104. return issubclass(t, (datetime, np.datetime64))
  105. @property
  106. def value(self):
  107. return self._value
  108. @value.setter
  109. def value(self, new_value):
  110. self._value = new_value
  111. @property
  112. def name(self):
  113. return self._name
  114. @name.setter
  115. def name(self, new_name):
  116. self._name = new_name
  117. @property
  118. def ndim(self):
  119. return self._value.ndim
  120. class Constant(Term):
  121. def __init__(self, value, env, side=None, encoding=None):
  122. super(Constant, self).__init__(value, env, side=side,
  123. encoding=encoding)
  124. def _resolve_name(self):
  125. return self._name
  126. @property
  127. def name(self):
  128. return self.value
  129. def __unicode__(self):
  130. # in python 2 str() of float
  131. # can truncate shorter than repr()
  132. return repr(self.name)
  133. _bool_op_map = {'not': '~', 'and': '&', 'or': '|'}
  134. class Op(StringMixin):
  135. """Hold an operator of arbitrary arity
  136. """
  137. def __init__(self, op, operands, *args, **kwargs):
  138. self.op = _bool_op_map.get(op, op)
  139. self.operands = operands
  140. self.encoding = kwargs.get('encoding', None)
  141. def __iter__(self):
  142. return iter(self.operands)
  143. def __unicode__(self):
  144. """Print a generic n-ary operator and its operands using infix
  145. notation"""
  146. # recurse over the operands
  147. parened = ('({0})'.format(pprint_thing(opr))
  148. for opr in self.operands)
  149. return pprint_thing(' {0} '.format(self.op).join(parened))
  150. @property
  151. def return_type(self):
  152. # clobber types to bool if the op is a boolean operator
  153. if self.op in (_cmp_ops_syms + _bool_ops_syms):
  154. return np.bool_
  155. return _result_type_many(*(term.type for term in com.flatten(self)))
  156. @property
  157. def has_invalid_return_type(self):
  158. types = self.operand_types
  159. obj_dtype_set = frozenset([np.dtype('object')])
  160. return self.return_type == object and types - obj_dtype_set
  161. @property
  162. def operand_types(self):
  163. return frozenset(term.type for term in com.flatten(self))
  164. @property
  165. def is_scalar(self):
  166. return all(operand.is_scalar for operand in self.operands)
  167. @property
  168. def is_datetime(self):
  169. try:
  170. t = self.return_type.type
  171. except AttributeError:
  172. t = self.return_type
  173. return issubclass(t, (datetime, np.datetime64))
  174. def _in(x, y):
  175. """Compute the vectorized membership of ``x in y`` if possible, otherwise
  176. use Python.
  177. """
  178. try:
  179. return x.isin(y)
  180. except AttributeError:
  181. if is_list_like(x):
  182. try:
  183. return y.isin(x)
  184. except AttributeError:
  185. pass
  186. return x in y
  187. def _not_in(x, y):
  188. """Compute the vectorized membership of ``x not in y`` if possible,
  189. otherwise use Python.
  190. """
  191. try:
  192. return ~x.isin(y)
  193. except AttributeError:
  194. if is_list_like(x):
  195. try:
  196. return ~y.isin(x)
  197. except AttributeError:
  198. pass
  199. return x not in y
  200. _cmp_ops_syms = '>', '<', '>=', '<=', '==', '!=', 'in', 'not in'
  201. _cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne, _in, _not_in
  202. _cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs))
  203. _bool_ops_syms = '&', '|', 'and', 'or'
  204. _bool_ops_funcs = op.and_, op.or_, op.and_, op.or_
  205. _bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs))
  206. _arith_ops_syms = '+', '-', '*', '/', '**', '//', '%'
  207. _arith_ops_funcs = (op.add, op.sub, op.mul, op.truediv if PY3 else op.div,
  208. op.pow, op.floordiv, op.mod)
  209. _arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs))
  210. _special_case_arith_ops_syms = '**', '//', '%'
  211. _special_case_arith_ops_funcs = op.pow, op.floordiv, op.mod
  212. _special_case_arith_ops_dict = dict(zip(_special_case_arith_ops_syms,
  213. _special_case_arith_ops_funcs))
  214. _binary_ops_dict = {}
  215. for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
  216. _binary_ops_dict.update(d)
  217. def _cast_inplace(terms, acceptable_dtypes, dtype):
  218. """Cast an expression inplace.
  219. Parameters
  220. ----------
  221. terms : Op
  222. The expression that should cast.
  223. acceptable_dtypes : list of acceptable numpy.dtype
  224. Will not cast if term's dtype in this list.
  225. .. versionadded:: 0.19.0
  226. dtype : str or numpy.dtype
  227. The dtype to cast to.
  228. """
  229. dt = np.dtype(dtype)
  230. for term in terms:
  231. if term.type in acceptable_dtypes:
  232. continue
  233. try:
  234. new_value = term.value.astype(dt)
  235. except AttributeError:
  236. new_value = dt.type(term.value)
  237. term.update(new_value)
  238. def is_term(obj):
  239. return isinstance(obj, Term)
  240. class BinOp(Op):
  241. """Hold a binary operator and its operands
  242. Parameters
  243. ----------
  244. op : str
  245. left : Term or Op
  246. right : Term or Op
  247. """
  248. def __init__(self, op, lhs, rhs, **kwargs):
  249. super(BinOp, self).__init__(op, (lhs, rhs))
  250. self.lhs = lhs
  251. self.rhs = rhs
  252. self._disallow_scalar_only_bool_ops()
  253. self.convert_values()
  254. try:
  255. self.func = _binary_ops_dict[op]
  256. except KeyError:
  257. # has to be made a list for python3
  258. keys = list(_binary_ops_dict.keys())
  259. raise ValueError('Invalid binary operator {0!r}, valid'
  260. ' operators are {1}'.format(op, keys))
  261. def __call__(self, env):
  262. """Recursively evaluate an expression in Python space.
  263. Parameters
  264. ----------
  265. env : Scope
  266. Returns
  267. -------
  268. object
  269. The result of an evaluated expression.
  270. """
  271. # handle truediv
  272. if self.op == '/' and env.scope['truediv']:
  273. self.func = op.truediv
  274. # recurse over the left/right nodes
  275. left = self.lhs(env)
  276. right = self.rhs(env)
  277. return self.func(left, right)
  278. def evaluate(self, env, engine, parser, term_type, eval_in_python):
  279. """Evaluate a binary operation *before* being passed to the engine.
  280. Parameters
  281. ----------
  282. env : Scope
  283. engine : str
  284. parser : str
  285. term_type : type
  286. eval_in_python : list
  287. Returns
  288. -------
  289. term_type
  290. The "pre-evaluated" expression as an instance of ``term_type``
  291. """
  292. if engine == 'python':
  293. res = self(env)
  294. else:
  295. # recurse over the left/right nodes
  296. left = self.lhs.evaluate(env, engine=engine, parser=parser,
  297. term_type=term_type,
  298. eval_in_python=eval_in_python)
  299. right = self.rhs.evaluate(env, engine=engine, parser=parser,
  300. term_type=term_type,
  301. eval_in_python=eval_in_python)
  302. # base cases
  303. if self.op in eval_in_python:
  304. res = self.func(left.value, right.value)
  305. else:
  306. res = pd.eval(self, local_dict=env, engine=engine,
  307. parser=parser)
  308. name = env.add_tmp(res)
  309. return term_type(name, env=env)
  310. def convert_values(self):
  311. """Convert datetimes to a comparable value in an expression.
  312. """
  313. def stringify(value):
  314. if self.encoding is not None:
  315. encoder = partial(pprint_thing_encoded,
  316. encoding=self.encoding)
  317. else:
  318. encoder = pprint_thing
  319. return encoder(value)
  320. lhs, rhs = self.lhs, self.rhs
  321. if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
  322. v = rhs.value
  323. if isinstance(v, (int, float)):
  324. v = stringify(v)
  325. v = pd.Timestamp(_ensure_decoded(v))
  326. if v.tz is not None:
  327. v = v.tz_convert('UTC')
  328. self.rhs.update(v)
  329. if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
  330. v = lhs.value
  331. if isinstance(v, (int, float)):
  332. v = stringify(v)
  333. v = pd.Timestamp(_ensure_decoded(v))
  334. if v.tz is not None:
  335. v = v.tz_convert('UTC')
  336. self.lhs.update(v)
  337. def _disallow_scalar_only_bool_ops(self):
  338. if ((self.lhs.is_scalar or self.rhs.is_scalar) and
  339. self.op in _bool_ops_dict and
  340. (not (issubclass(self.rhs.return_type, (bool, np.bool_)) and
  341. issubclass(self.lhs.return_type, (bool, np.bool_))))):
  342. raise NotImplementedError("cannot evaluate scalar only bool ops")
  343. def isnumeric(dtype):
  344. return issubclass(np.dtype(dtype).type, np.number)
  345. class Div(BinOp):
  346. """Div operator to special case casting.
  347. Parameters
  348. ----------
  349. lhs, rhs : Term or Op
  350. The Terms or Ops in the ``/`` expression.
  351. truediv : bool
  352. Whether or not to use true division. With Python 3 this happens
  353. regardless of the value of ``truediv``.
  354. """
  355. def __init__(self, lhs, rhs, truediv, *args, **kwargs):
  356. super(Div, self).__init__('/', lhs, rhs, *args, **kwargs)
  357. if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
  358. raise TypeError("unsupported operand type(s) for {0}:"
  359. " '{1}' and '{2}'".format(self.op,
  360. lhs.return_type,
  361. rhs.return_type))
  362. if truediv or PY3:
  363. # do not upcast float32s to float64 un-necessarily
  364. acceptable_dtypes = [np.float32, np.float_]
  365. _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
  366. _unary_ops_syms = '+', '-', '~', 'not'
  367. _unary_ops_funcs = op.pos, op.neg, op.invert, op.invert
  368. _unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs))
  369. class UnaryOp(Op):
  370. """Hold a unary operator and its operands
  371. Parameters
  372. ----------
  373. op : str
  374. The token used to represent the operator.
  375. operand : Term or Op
  376. The Term or Op operand to the operator.
  377. Raises
  378. ------
  379. ValueError
  380. * If no function associated with the passed operator token is found.
  381. """
  382. def __init__(self, op, operand):
  383. super(UnaryOp, self).__init__(op, (operand,))
  384. self.operand = operand
  385. try:
  386. self.func = _unary_ops_dict[op]
  387. except KeyError:
  388. raise ValueError('Invalid unary operator {0!r}, valid operators '
  389. 'are {1}'.format(op, _unary_ops_syms))
  390. def __call__(self, env):
  391. operand = self.operand(env)
  392. return self.func(operand)
  393. def __unicode__(self):
  394. return pprint_thing('{0}({1})'.format(self.op, self.operand))
  395. @property
  396. def return_type(self):
  397. operand = self.operand
  398. if operand.return_type == np.dtype('bool'):
  399. return np.dtype('bool')
  400. if (isinstance(operand, Op) and
  401. (operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict)):
  402. return np.dtype('bool')
  403. return np.dtype('int')
  404. class MathCall(Op):
  405. def __init__(self, func, args):
  406. super(MathCall, self).__init__(func.name, args)
  407. self.func = func
  408. def __call__(self, env):
  409. operands = [op(env) for op in self.operands]
  410. with np.errstate(all='ignore'):
  411. return self.func.func(*operands)
  412. def __unicode__(self):
  413. operands = map(str, self.operands)
  414. return pprint_thing('{0}({1})'.format(self.op, ','.join(operands)))
  415. class FuncNode(object):
  416. def __init__(self, name):
  417. from pandas.core.computation.check import (_NUMEXPR_INSTALLED,
  418. _NUMEXPR_VERSION)
  419. if name not in _mathops or (
  420. _NUMEXPR_INSTALLED and
  421. _NUMEXPR_VERSION < LooseVersion('2.6.9') and
  422. name in ('floor', 'ceil')
  423. ):
  424. raise ValueError(
  425. "\"{0}\" is not a supported function".format(name))
  426. self.name = name
  427. self.func = getattr(np, name)
  428. def __call__(self, *args):
  429. return MathCall(self, args)