test_eval.py 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924
  1. from distutils.version import LooseVersion
  2. from itertools import product
  3. import operator
  4. import warnings
  5. import numpy as np
  6. from numpy.random import rand, randint, randn
  7. import pytest
  8. from pandas.compat import PY3, reduce
  9. from pandas.errors import PerformanceWarning
  10. import pandas.util._test_decorators as td
  11. from pandas.core.dtypes.common import is_bool, is_list_like, is_scalar
  12. import pandas as pd
  13. from pandas import DataFrame, Panel, Series, date_range
  14. from pandas.core.computation import pytables
  15. from pandas.core.computation.check import _NUMEXPR_VERSION
  16. from pandas.core.computation.engines import NumExprClobberingError, _engines
  17. import pandas.core.computation.expr as expr
  18. from pandas.core.computation.expr import PandasExprVisitor, PythonExprVisitor
  19. from pandas.core.computation.expressions import (
  20. _NUMEXPR_INSTALLED, _USE_NUMEXPR)
  21. from pandas.core.computation.ops import (
  22. _arith_ops_syms, _binary_math_ops, _binary_ops_dict, _bool_ops_syms,
  23. _special_case_arith_ops_syms, _unary_math_ops)
  24. import pandas.util.testing as tm
  25. from pandas.util.testing import (
  26. assert_frame_equal, assert_numpy_array_equal, assert_produces_warning,
  27. assert_series_equal, makeCustomDataframe as mkdf, randbool)
  28. _series_frame_incompatible = _bool_ops_syms
  29. _scalar_skip = 'in', 'not in'
  30. @pytest.fixture(params=(
  31. pytest.param(engine,
  32. marks=pytest.mark.skipif(
  33. engine == 'numexpr' and not _USE_NUMEXPR,
  34. reason='numexpr enabled->{enabled}, '
  35. 'installed->{installed}'.format(
  36. enabled=_USE_NUMEXPR,
  37. installed=_NUMEXPR_INSTALLED)))
  38. for engine in _engines)) # noqa
  39. def engine(request):
  40. return request.param
  41. @pytest.fixture(params=expr._parsers)
  42. def parser(request):
  43. return request.param
  44. @pytest.fixture
  45. def ne_lt_2_6_9():
  46. if _NUMEXPR_INSTALLED and _NUMEXPR_VERSION >= LooseVersion('2.6.9'):
  47. pytest.skip("numexpr is >= 2.6.9")
  48. return 'numexpr'
  49. @pytest.fixture
  50. def unary_fns_for_ne():
  51. if _NUMEXPR_INSTALLED:
  52. if _NUMEXPR_VERSION >= LooseVersion('2.6.9'):
  53. return _unary_math_ops
  54. else:
  55. return tuple(x for x in _unary_math_ops
  56. if x not in ("floor", "ceil"))
  57. else:
  58. pytest.skip("numexpr is not present")
  59. def engine_has_neg_frac(engine):
  60. return _engines[engine].has_neg_frac
  61. def _eval_single_bin(lhs, cmp1, rhs, engine):
  62. c = _binary_ops_dict[cmp1]
  63. if engine_has_neg_frac(engine):
  64. try:
  65. return c(lhs, rhs)
  66. except ValueError as e:
  67. if str(e).startswith('negative number cannot be '
  68. 'raised to a fractional power'):
  69. return np.nan
  70. raise
  71. return c(lhs, rhs)
  72. def _series_and_2d_ndarray(lhs, rhs):
  73. return ((isinstance(lhs, Series) and
  74. isinstance(rhs, np.ndarray) and rhs.ndim > 1) or
  75. (isinstance(rhs, Series) and
  76. isinstance(lhs, np.ndarray) and lhs.ndim > 1))
  77. def _series_and_frame(lhs, rhs):
  78. return ((isinstance(lhs, Series) and isinstance(rhs, DataFrame)) or
  79. (isinstance(rhs, Series) and isinstance(lhs, DataFrame)))
  80. def _bool_and_frame(lhs, rhs):
  81. return isinstance(lhs, bool) and isinstance(rhs, pd.core.generic.NDFrame)
  82. def _is_py3_complex_incompat(result, expected):
  83. return (PY3 and isinstance(expected, (complex, np.complexfloating)) and
  84. np.isnan(result))
  85. _good_arith_ops = set(_arith_ops_syms).difference(_special_case_arith_ops_syms)
  86. @td.skip_if_no_ne
  87. class TestEvalNumexprPandas(object):
  88. @classmethod
  89. def setup_class(cls):
  90. import numexpr as ne
  91. cls.ne = ne
  92. cls.engine = 'numexpr'
  93. cls.parser = 'pandas'
  94. @classmethod
  95. def teardown_class(cls):
  96. del cls.engine, cls.parser
  97. if hasattr(cls, 'ne'):
  98. del cls.ne
  99. def setup_data(self):
  100. nan_df1 = DataFrame(rand(10, 5))
  101. nan_df1[nan_df1 > 0.5] = np.nan
  102. nan_df2 = DataFrame(rand(10, 5))
  103. nan_df2[nan_df2 > 0.5] = np.nan
  104. self.pandas_lhses = (DataFrame(randn(10, 5)), Series(randn(5)),
  105. Series([1, 2, np.nan, np.nan, 5]), nan_df1)
  106. self.pandas_rhses = (DataFrame(randn(10, 5)), Series(randn(5)),
  107. Series([1, 2, np.nan, np.nan, 5]), nan_df2)
  108. self.scalar_lhses = randn(),
  109. self.scalar_rhses = randn(),
  110. self.lhses = self.pandas_lhses + self.scalar_lhses
  111. self.rhses = self.pandas_rhses + self.scalar_rhses
  112. def setup_ops(self):
  113. self.cmp_ops = expr._cmp_ops_syms
  114. self.cmp2_ops = self.cmp_ops[::-1]
  115. self.bin_ops = expr._bool_ops_syms
  116. self.special_case_ops = _special_case_arith_ops_syms
  117. self.arith_ops = _good_arith_ops
  118. self.unary_ops = '-', '~', 'not '
  119. def setup_method(self, method):
  120. self.setup_ops()
  121. self.setup_data()
  122. self.current_engines = filter(lambda x: x != self.engine, _engines)
  123. def teardown_method(self, method):
  124. del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses
  125. del self.pandas_rhses, self.pandas_lhses, self.current_engines
  126. @pytest.mark.slow
  127. def test_complex_cmp_ops(self):
  128. cmp_ops = ('!=', '==', '<=', '>=', '<', '>')
  129. cmp2_ops = ('>', '<')
  130. for lhs, cmp1, rhs, binop, cmp2 in product(self.lhses, cmp_ops,
  131. self.rhses, self.bin_ops,
  132. cmp2_ops):
  133. self.check_complex_cmp_op(lhs, cmp1, rhs, binop, cmp2)
  134. def test_simple_cmp_ops(self):
  135. bool_lhses = (DataFrame(randbool(size=(10, 5))),
  136. Series(randbool((5,))), randbool())
  137. bool_rhses = (DataFrame(randbool(size=(10, 5))),
  138. Series(randbool((5,))), randbool())
  139. for lhs, rhs, cmp_op in product(bool_lhses, bool_rhses, self.cmp_ops):
  140. self.check_simple_cmp_op(lhs, cmp_op, rhs)
  141. @pytest.mark.slow
  142. def test_binary_arith_ops(self):
  143. for lhs, op, rhs in product(self.lhses, self.arith_ops, self.rhses):
  144. self.check_binary_arith_op(lhs, op, rhs)
  145. def test_modulus(self):
  146. for lhs, rhs in product(self.lhses, self.rhses):
  147. self.check_modulus(lhs, '%', rhs)
  148. def test_floor_division(self):
  149. for lhs, rhs in product(self.lhses, self.rhses):
  150. self.check_floor_division(lhs, '//', rhs)
  151. @td.skip_if_windows
  152. def test_pow(self):
  153. # odd failure on win32 platform, so skip
  154. for lhs, rhs in product(self.lhses, self.rhses):
  155. self.check_pow(lhs, '**', rhs)
  156. @pytest.mark.slow
  157. def test_single_invert_op(self):
  158. for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses):
  159. self.check_single_invert_op(lhs, op, rhs)
  160. @pytest.mark.slow
  161. def test_compound_invert_op(self):
  162. for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses):
  163. self.check_compound_invert_op(lhs, op, rhs)
  164. @pytest.mark.slow
  165. def test_chained_cmp_op(self):
  166. mids = self.lhses
  167. cmp_ops = '<', '>'
  168. for lhs, cmp1, mid, cmp2, rhs in product(self.lhses, cmp_ops,
  169. mids, cmp_ops, self.rhses):
  170. self.check_chained_cmp_op(lhs, cmp1, mid, cmp2, rhs)
  171. def check_equal(self, result, expected):
  172. if isinstance(result, DataFrame):
  173. tm.assert_frame_equal(result, expected)
  174. elif isinstance(result, Series):
  175. tm.assert_series_equal(result, expected)
  176. elif isinstance(result, np.ndarray):
  177. tm.assert_numpy_array_equal(result, expected)
  178. else:
  179. assert result == expected
  180. def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2):
  181. skip_these = _scalar_skip
  182. ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(cmp1=cmp1,
  183. binop=binop,
  184. cmp2=cmp2)
  185. scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or
  186. cmp2 in skip_these))
  187. if scalar_with_in_notin:
  188. with pytest.raises(TypeError):
  189. pd.eval(ex, engine=self.engine, parser=self.parser)
  190. with pytest.raises(TypeError):
  191. pd.eval(ex, engine=self.engine, parser=self.parser,
  192. local_dict={'lhs': lhs, 'rhs': rhs})
  193. else:
  194. lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine)
  195. rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine)
  196. if (isinstance(lhs_new, Series) and
  197. isinstance(rhs_new, DataFrame) and
  198. binop in _series_frame_incompatible):
  199. pass
  200. # TODO: the code below should be added back when left and right
  201. # hand side bool ops are fixed.
  202. #
  203. # try:
  204. # pytest.raises(Exception, pd.eval, ex,
  205. # local_dict={'lhs': lhs, 'rhs': rhs},
  206. # engine=self.engine, parser=self.parser)
  207. # except AssertionError:
  208. # import ipdb
  209. #
  210. # ipdb.set_trace()
  211. # raise
  212. else:
  213. expected = _eval_single_bin(
  214. lhs_new, binop, rhs_new, self.engine)
  215. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  216. self.check_equal(result, expected)
  217. def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
  218. def check_operands(left, right, cmp_op):
  219. return _eval_single_bin(left, cmp_op, right, self.engine)
  220. lhs_new = check_operands(lhs, mid, cmp1)
  221. rhs_new = check_operands(mid, rhs, cmp2)
  222. if lhs_new is not None and rhs_new is not None:
  223. ex1 = 'lhs {0} mid {1} rhs'.format(cmp1, cmp2)
  224. ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp1, cmp2)
  225. ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp1, cmp2)
  226. expected = _eval_single_bin(lhs_new, '&', rhs_new, self.engine)
  227. for ex in (ex1, ex2, ex3):
  228. result = pd.eval(ex, engine=self.engine,
  229. parser=self.parser)
  230. tm.assert_almost_equal(result, expected)
  231. def check_simple_cmp_op(self, lhs, cmp1, rhs):
  232. ex = 'lhs {0} rhs'.format(cmp1)
  233. if cmp1 in ('in', 'not in') and not is_list_like(rhs):
  234. pytest.raises(TypeError, pd.eval, ex, engine=self.engine,
  235. parser=self.parser, local_dict={'lhs': lhs,
  236. 'rhs': rhs})
  237. else:
  238. expected = _eval_single_bin(lhs, cmp1, rhs, self.engine)
  239. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  240. self.check_equal(result, expected)
  241. def check_binary_arith_op(self, lhs, arith1, rhs):
  242. ex = 'lhs {0} rhs'.format(arith1)
  243. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  244. expected = _eval_single_bin(lhs, arith1, rhs, self.engine)
  245. tm.assert_almost_equal(result, expected)
  246. ex = 'lhs {0} rhs {0} rhs'.format(arith1)
  247. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  248. nlhs = _eval_single_bin(lhs, arith1, rhs,
  249. self.engine)
  250. self.check_alignment(result, nlhs, rhs, arith1)
  251. def check_alignment(self, result, nlhs, ghs, op):
  252. try:
  253. nlhs, ghs = nlhs.align(ghs)
  254. except (ValueError, TypeError, AttributeError):
  255. # ValueError: series frame or frame series align
  256. # TypeError, AttributeError: series or frame with scalar align
  257. pass
  258. else:
  259. # direct numpy comparison
  260. expected = self.ne.evaluate('nlhs {0} ghs'.format(op))
  261. tm.assert_numpy_array_equal(result.values, expected)
  262. # modulus, pow, and floor division require special casing
  263. def check_modulus(self, lhs, arith1, rhs):
  264. ex = 'lhs {0} rhs'.format(arith1)
  265. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  266. expected = lhs % rhs
  267. tm.assert_almost_equal(result, expected)
  268. expected = self.ne.evaluate('expected {0} rhs'.format(arith1))
  269. if isinstance(result, (DataFrame, Series)):
  270. tm.assert_almost_equal(result.values, expected)
  271. else:
  272. tm.assert_almost_equal(result, expected.item())
  273. def check_floor_division(self, lhs, arith1, rhs):
  274. ex = 'lhs {0} rhs'.format(arith1)
  275. if self.engine == 'python':
  276. res = pd.eval(ex, engine=self.engine, parser=self.parser)
  277. expected = lhs // rhs
  278. self.check_equal(res, expected)
  279. else:
  280. pytest.raises(TypeError, pd.eval, ex,
  281. local_dict={'lhs': lhs, 'rhs': rhs},
  282. engine=self.engine, parser=self.parser)
  283. def get_expected_pow_result(self, lhs, rhs):
  284. try:
  285. expected = _eval_single_bin(lhs, '**', rhs, self.engine)
  286. except ValueError as e:
  287. if str(e).startswith('negative number cannot be '
  288. 'raised to a fractional power'):
  289. if self.engine == 'python':
  290. pytest.skip(str(e))
  291. else:
  292. expected = np.nan
  293. else:
  294. raise
  295. return expected
  296. def check_pow(self, lhs, arith1, rhs):
  297. ex = 'lhs {0} rhs'.format(arith1)
  298. expected = self.get_expected_pow_result(lhs, rhs)
  299. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  300. if (is_scalar(lhs) and is_scalar(rhs) and
  301. _is_py3_complex_incompat(result, expected)):
  302. pytest.raises(AssertionError, tm.assert_numpy_array_equal,
  303. result, expected)
  304. else:
  305. tm.assert_almost_equal(result, expected)
  306. ex = '(lhs {0} rhs) {0} rhs'.format(arith1)
  307. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  308. expected = self.get_expected_pow_result(
  309. self.get_expected_pow_result(lhs, rhs), rhs)
  310. tm.assert_almost_equal(result, expected)
  311. def check_single_invert_op(self, lhs, cmp1, rhs):
  312. # simple
  313. for el in (lhs, rhs):
  314. try:
  315. elb = el.astype(bool)
  316. except AttributeError:
  317. elb = np.array([bool(el)])
  318. expected = ~elb
  319. result = pd.eval('~elb', engine=self.engine, parser=self.parser)
  320. tm.assert_almost_equal(expected, result)
  321. for engine in self.current_engines:
  322. tm.assert_almost_equal(result, pd.eval('~elb', engine=engine,
  323. parser=self.parser))
  324. def check_compound_invert_op(self, lhs, cmp1, rhs):
  325. skip_these = 'in', 'not in'
  326. ex = '~(lhs {0} rhs)'.format(cmp1)
  327. if is_scalar(rhs) and cmp1 in skip_these:
  328. pytest.raises(TypeError, pd.eval, ex, engine=self.engine,
  329. parser=self.parser, local_dict={'lhs': lhs,
  330. 'rhs': rhs})
  331. else:
  332. # compound
  333. if is_scalar(lhs) and is_scalar(rhs):
  334. lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs))
  335. expected = _eval_single_bin(lhs, cmp1, rhs, self.engine)
  336. if is_scalar(expected):
  337. expected = not expected
  338. else:
  339. expected = ~expected
  340. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  341. tm.assert_almost_equal(expected, result)
  342. # make sure the other engines work the same as this one
  343. for engine in self.current_engines:
  344. ev = pd.eval(ex, engine=self.engine, parser=self.parser)
  345. tm.assert_almost_equal(ev, result)
  346. def ex(self, op, var_name='lhs'):
  347. return '{0}{1}'.format(op, var_name)
  348. def test_frame_invert(self):
  349. expr = self.ex('~')
  350. # ~ ##
  351. # frame
  352. # float always raises
  353. lhs = DataFrame(randn(5, 2))
  354. if self.engine == 'numexpr':
  355. with pytest.raises(NotImplementedError):
  356. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  357. else:
  358. with pytest.raises(TypeError):
  359. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  360. # int raises on numexpr
  361. lhs = DataFrame(randint(5, size=(5, 2)))
  362. if self.engine == 'numexpr':
  363. with pytest.raises(NotImplementedError):
  364. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  365. else:
  366. expect = ~lhs
  367. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  368. assert_frame_equal(expect, result)
  369. # bool always works
  370. lhs = DataFrame(rand(5, 2) > 0.5)
  371. expect = ~lhs
  372. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  373. assert_frame_equal(expect, result)
  374. # object raises
  375. lhs = DataFrame({'b': ['a', 1, 2.0], 'c': rand(3) > 0.5})
  376. if self.engine == 'numexpr':
  377. with pytest.raises(ValueError):
  378. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  379. else:
  380. with pytest.raises(TypeError):
  381. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  382. def test_series_invert(self):
  383. # ~ ####
  384. expr = self.ex('~')
  385. # series
  386. # float raises
  387. lhs = Series(randn(5))
  388. if self.engine == 'numexpr':
  389. with pytest.raises(NotImplementedError):
  390. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  391. else:
  392. with pytest.raises(TypeError):
  393. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  394. # int raises on numexpr
  395. lhs = Series(randint(5, size=5))
  396. if self.engine == 'numexpr':
  397. with pytest.raises(NotImplementedError):
  398. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  399. else:
  400. expect = ~lhs
  401. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  402. assert_series_equal(expect, result)
  403. # bool
  404. lhs = Series(rand(5) > 0.5)
  405. expect = ~lhs
  406. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  407. assert_series_equal(expect, result)
  408. # float
  409. # int
  410. # bool
  411. # object
  412. lhs = Series(['a', 1, 2.0])
  413. if self.engine == 'numexpr':
  414. with pytest.raises(ValueError):
  415. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  416. else:
  417. with pytest.raises(TypeError):
  418. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  419. def test_frame_negate(self):
  420. expr = self.ex('-')
  421. # float
  422. lhs = DataFrame(randn(5, 2))
  423. expect = -lhs
  424. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  425. assert_frame_equal(expect, result)
  426. # int
  427. lhs = DataFrame(randint(5, size=(5, 2)))
  428. expect = -lhs
  429. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  430. assert_frame_equal(expect, result)
  431. # bool doesn't work with numexpr but works elsewhere
  432. lhs = DataFrame(rand(5, 2) > 0.5)
  433. if self.engine == 'numexpr':
  434. with pytest.raises(NotImplementedError):
  435. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  436. else:
  437. expect = -lhs
  438. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  439. assert_frame_equal(expect, result)
  440. def test_series_negate(self):
  441. expr = self.ex('-')
  442. # float
  443. lhs = Series(randn(5))
  444. expect = -lhs
  445. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  446. assert_series_equal(expect, result)
  447. # int
  448. lhs = Series(randint(5, size=5))
  449. expect = -lhs
  450. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  451. assert_series_equal(expect, result)
  452. # bool doesn't work with numexpr but works elsewhere
  453. lhs = Series(rand(5) > 0.5)
  454. if self.engine == 'numexpr':
  455. with pytest.raises(NotImplementedError):
  456. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  457. else:
  458. expect = -lhs
  459. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  460. assert_series_equal(expect, result)
  461. def test_frame_pos(self):
  462. expr = self.ex('+')
  463. # float
  464. lhs = DataFrame(randn(5, 2))
  465. expect = lhs
  466. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  467. assert_frame_equal(expect, result)
  468. # int
  469. lhs = DataFrame(randint(5, size=(5, 2)))
  470. expect = lhs
  471. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  472. assert_frame_equal(expect, result)
  473. # bool doesn't work with numexpr but works elsewhere
  474. lhs = DataFrame(rand(5, 2) > 0.5)
  475. expect = lhs
  476. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  477. assert_frame_equal(expect, result)
  478. def test_series_pos(self):
  479. expr = self.ex('+')
  480. # float
  481. lhs = Series(randn(5))
  482. expect = lhs
  483. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  484. assert_series_equal(expect, result)
  485. # int
  486. lhs = Series(randint(5, size=5))
  487. expect = lhs
  488. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  489. assert_series_equal(expect, result)
  490. # bool doesn't work with numexpr but works elsewhere
  491. lhs = Series(rand(5) > 0.5)
  492. expect = lhs
  493. result = pd.eval(expr, engine=self.engine, parser=self.parser)
  494. assert_series_equal(expect, result)
  495. def test_scalar_unary(self):
  496. with pytest.raises(TypeError):
  497. pd.eval('~1.0', engine=self.engine, parser=self.parser)
  498. assert pd.eval('-1.0', parser=self.parser,
  499. engine=self.engine) == -1.0
  500. assert pd.eval('+1.0', parser=self.parser,
  501. engine=self.engine) == +1.0
  502. assert pd.eval('~1', parser=self.parser,
  503. engine=self.engine) == ~1
  504. assert pd.eval('-1', parser=self.parser,
  505. engine=self.engine) == -1
  506. assert pd.eval('+1', parser=self.parser,
  507. engine=self.engine) == +1
  508. assert pd.eval('~True', parser=self.parser,
  509. engine=self.engine) == ~True
  510. assert pd.eval('~False', parser=self.parser,
  511. engine=self.engine) == ~False
  512. assert pd.eval('-True', parser=self.parser,
  513. engine=self.engine) == -True
  514. assert pd.eval('-False', parser=self.parser,
  515. engine=self.engine) == -False
  516. assert pd.eval('+True', parser=self.parser,
  517. engine=self.engine) == +True
  518. assert pd.eval('+False', parser=self.parser,
  519. engine=self.engine) == +False
  520. def test_unary_in_array(self):
  521. # GH 11235
  522. assert_numpy_array_equal(
  523. pd.eval('[-True, True, ~True, +True,'
  524. '-False, False, ~False, +False,'
  525. '-37, 37, ~37, +37]'),
  526. np.array([-True, True, ~True, +True,
  527. -False, False, ~False, +False,
  528. -37, 37, ~37, +37], dtype=np.object_))
  529. def test_disallow_scalar_bool_ops(self):
  530. exprs = '1 or 2', '1 and 2'
  531. exprs += 'a and b', 'a or b'
  532. exprs += '1 or 2 and (3 + 2) > 3',
  533. exprs += '2 * x > 2 or 1 and 2',
  534. exprs += '2 * df > 3 and 1 or a',
  535. x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa
  536. for ex in exprs:
  537. with pytest.raises(NotImplementedError):
  538. pd.eval(ex, engine=self.engine, parser=self.parser)
  539. def test_identical(self):
  540. # see gh-10546
  541. x = 1
  542. result = pd.eval('x', engine=self.engine, parser=self.parser)
  543. assert result == 1
  544. assert is_scalar(result)
  545. x = 1.5
  546. result = pd.eval('x', engine=self.engine, parser=self.parser)
  547. assert result == 1.5
  548. assert is_scalar(result)
  549. x = False
  550. result = pd.eval('x', engine=self.engine, parser=self.parser)
  551. assert not result
  552. assert is_bool(result)
  553. assert is_scalar(result)
  554. x = np.array([1])
  555. result = pd.eval('x', engine=self.engine, parser=self.parser)
  556. tm.assert_numpy_array_equal(result, np.array([1]))
  557. assert result.shape == (1, )
  558. x = np.array([1.5])
  559. result = pd.eval('x', engine=self.engine, parser=self.parser)
  560. tm.assert_numpy_array_equal(result, np.array([1.5]))
  561. assert result.shape == (1, )
  562. x = np.array([False]) # noqa
  563. result = pd.eval('x', engine=self.engine, parser=self.parser)
  564. tm.assert_numpy_array_equal(result, np.array([False]))
  565. assert result.shape == (1, )
  566. def test_line_continuation(self):
  567. # GH 11149
  568. exp = """1 + 2 * \
  569. 5 - 1 + 2 """
  570. result = pd.eval(exp, engine=self.engine, parser=self.parser)
  571. assert result == 12
  572. def test_float_truncation(self):
  573. # GH 14241
  574. exp = '1000000000.006'
  575. result = pd.eval(exp, engine=self.engine, parser=self.parser)
  576. expected = np.float64(exp)
  577. assert result == expected
  578. df = pd.DataFrame({'A': [1000000000.0009,
  579. 1000000000.0011,
  580. 1000000000.0015]})
  581. cutoff = 1000000000.0006
  582. result = df.query("A < %.4f" % cutoff)
  583. assert result.empty
  584. cutoff = 1000000000.0010
  585. result = df.query("A > %.4f" % cutoff)
  586. expected = df.loc[[1, 2], :]
  587. tm.assert_frame_equal(expected, result)
  588. exact = 1000000000.0011
  589. result = df.query('A == %.4f' % exact)
  590. expected = df.loc[[1], :]
  591. tm.assert_frame_equal(expected, result)
  592. def test_disallow_python_keywords(self):
  593. # GH 18221
  594. df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class'])
  595. msg = "Python keyword not valid identifier in numexpr query"
  596. with pytest.raises(SyntaxError, match=msg):
  597. df.query('class == 0')
  598. df = pd.DataFrame()
  599. df.index.name = 'lambda'
  600. with pytest.raises(SyntaxError, match=msg):
  601. df.query('lambda == 0')
  602. @td.skip_if_no_ne
  603. class TestEvalNumexprPython(TestEvalNumexprPandas):
  604. @classmethod
  605. def setup_class(cls):
  606. super(TestEvalNumexprPython, cls).setup_class()
  607. import numexpr as ne
  608. cls.ne = ne
  609. cls.engine = 'numexpr'
  610. cls.parser = 'python'
  611. def setup_ops(self):
  612. self.cmp_ops = list(filter(lambda x: x not in ('in', 'not in'),
  613. expr._cmp_ops_syms))
  614. self.cmp2_ops = self.cmp_ops[::-1]
  615. self.bin_ops = [s for s in expr._bool_ops_syms
  616. if s not in ('and', 'or')]
  617. self.special_case_ops = _special_case_arith_ops_syms
  618. self.arith_ops = _good_arith_ops
  619. self.unary_ops = '+', '-', '~'
  620. def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
  621. ex1 = 'lhs {0} mid {1} rhs'.format(cmp1, cmp2)
  622. with pytest.raises(NotImplementedError):
  623. pd.eval(ex1, engine=self.engine, parser=self.parser)
  624. class TestEvalPythonPython(TestEvalNumexprPython):
  625. @classmethod
  626. def setup_class(cls):
  627. super(TestEvalPythonPython, cls).setup_class()
  628. cls.engine = 'python'
  629. cls.parser = 'python'
  630. def check_modulus(self, lhs, arith1, rhs):
  631. ex = 'lhs {0} rhs'.format(arith1)
  632. result = pd.eval(ex, engine=self.engine, parser=self.parser)
  633. expected = lhs % rhs
  634. tm.assert_almost_equal(result, expected)
  635. expected = _eval_single_bin(expected, arith1, rhs, self.engine)
  636. tm.assert_almost_equal(result, expected)
  637. def check_alignment(self, result, nlhs, ghs, op):
  638. try:
  639. nlhs, ghs = nlhs.align(ghs)
  640. except (ValueError, TypeError, AttributeError):
  641. # ValueError: series frame or frame series align
  642. # TypeError, AttributeError: series or frame with scalar align
  643. pass
  644. else:
  645. expected = eval('nlhs {0} ghs'.format(op))
  646. tm.assert_almost_equal(result, expected)
  647. class TestEvalPythonPandas(TestEvalPythonPython):
  648. @classmethod
  649. def setup_class(cls):
  650. super(TestEvalPythonPandas, cls).setup_class()
  651. cls.engine = 'python'
  652. cls.parser = 'pandas'
  653. def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
  654. TestEvalNumexprPandas.check_chained_cmp_op(self, lhs, cmp1, mid, cmp2,
  655. rhs)
  656. f = lambda *args, **kwargs: np.random.randn()
  657. # -------------------------------------
  658. # gh-12388: Typecasting rules consistency with python
  659. class TestTypeCasting(object):
  660. @pytest.mark.parametrize('op', ['+', '-', '*', '**', '/'])
  661. # maybe someday... numexpr has too many upcasting rules now
  662. # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float']))
  663. @pytest.mark.parametrize('dt', [np.float32, np.float64])
  664. def test_binop_typecasting(self, engine, parser, op, dt):
  665. df = mkdf(5, 3, data_gen_f=f, dtype=dt)
  666. s = 'df {} 3'.format(op)
  667. res = pd.eval(s, engine=engine, parser=parser)
  668. assert df.values.dtype == dt
  669. assert res.values.dtype == dt
  670. assert_frame_equal(res, eval(s))
  671. s = '3 {} df'.format(op)
  672. res = pd.eval(s, engine=engine, parser=parser)
  673. assert df.values.dtype == dt
  674. assert res.values.dtype == dt
  675. assert_frame_equal(res, eval(s))
  676. # -------------------------------------
  677. # Basic and complex alignment
  678. def _is_datetime(x):
  679. return issubclass(x.dtype.type, np.datetime64)
  680. def should_warn(*args):
  681. not_mono = not any(map(operator.attrgetter('is_monotonic'), args))
  682. only_one_dt = reduce(operator.xor, map(_is_datetime, args))
  683. return not_mono and only_one_dt
  684. class TestAlignment(object):
  685. index_types = 'i', 'u', 'dt'
  686. lhs_index_types = index_types + ('s',) # 'p'
  687. def test_align_nested_unary_op(self, engine, parser):
  688. s = 'df * ~2'
  689. df = mkdf(5, 3, data_gen_f=f)
  690. res = pd.eval(s, engine=engine, parser=parser)
  691. assert_frame_equal(res, df * ~2)
  692. def test_basic_frame_alignment(self, engine, parser):
  693. args = product(self.lhs_index_types, self.index_types,
  694. self.index_types)
  695. with warnings.catch_warnings(record=True):
  696. warnings.simplefilter('always', RuntimeWarning)
  697. for lr_idx_type, rr_idx_type, c_idx_type in args:
  698. df = mkdf(10, 10, data_gen_f=f, r_idx_type=lr_idx_type,
  699. c_idx_type=c_idx_type)
  700. df2 = mkdf(20, 10, data_gen_f=f, r_idx_type=rr_idx_type,
  701. c_idx_type=c_idx_type)
  702. # only warns if not monotonic and not sortable
  703. if should_warn(df.index, df2.index):
  704. with tm.assert_produces_warning(RuntimeWarning):
  705. res = pd.eval('df + df2', engine=engine, parser=parser)
  706. else:
  707. res = pd.eval('df + df2', engine=engine, parser=parser)
  708. assert_frame_equal(res, df + df2)
  709. def test_frame_comparison(self, engine, parser):
  710. args = product(self.lhs_index_types, repeat=2)
  711. for r_idx_type, c_idx_type in args:
  712. df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
  713. c_idx_type=c_idx_type)
  714. res = pd.eval('df < 2', engine=engine, parser=parser)
  715. assert_frame_equal(res, df < 2)
  716. df3 = DataFrame(randn(*df.shape), index=df.index,
  717. columns=df.columns)
  718. res = pd.eval('df < df3', engine=engine, parser=parser)
  719. assert_frame_equal(res, df < df3)
  720. @pytest.mark.slow
  721. def test_medium_complex_frame_alignment(self, engine, parser):
  722. args = product(self.lhs_index_types, self.index_types,
  723. self.index_types, self.index_types)
  724. with warnings.catch_warnings(record=True):
  725. warnings.simplefilter('always', RuntimeWarning)
  726. for r1, c1, r2, c2 in args:
  727. df = mkdf(3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
  728. df2 = mkdf(4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
  729. df3 = mkdf(5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
  730. if should_warn(df.index, df2.index, df3.index):
  731. with tm.assert_produces_warning(RuntimeWarning):
  732. res = pd.eval('df + df2 + df3', engine=engine,
  733. parser=parser)
  734. else:
  735. res = pd.eval('df + df2 + df3',
  736. engine=engine, parser=parser)
  737. assert_frame_equal(res, df + df2 + df3)
  738. def test_basic_frame_series_alignment(self, engine, parser):
  739. def testit(r_idx_type, c_idx_type, index_name):
  740. df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
  741. c_idx_type=c_idx_type)
  742. index = getattr(df, index_name)
  743. s = Series(np.random.randn(5), index[:5])
  744. if should_warn(df.index, s.index):
  745. with tm.assert_produces_warning(RuntimeWarning):
  746. res = pd.eval('df + s', engine=engine, parser=parser)
  747. else:
  748. res = pd.eval('df + s', engine=engine, parser=parser)
  749. if r_idx_type == 'dt' or c_idx_type == 'dt':
  750. expected = df.add(s) if engine == 'numexpr' else df + s
  751. else:
  752. expected = df + s
  753. assert_frame_equal(res, expected)
  754. args = product(self.lhs_index_types, self.index_types,
  755. ('index', 'columns'))
  756. with warnings.catch_warnings(record=True):
  757. warnings.simplefilter('always', RuntimeWarning)
  758. for r_idx_type, c_idx_type, index_name in args:
  759. testit(r_idx_type, c_idx_type, index_name)
  760. def test_basic_series_frame_alignment(self, engine, parser):
  761. def testit(r_idx_type, c_idx_type, index_name):
  762. df = mkdf(10, 7, data_gen_f=f, r_idx_type=r_idx_type,
  763. c_idx_type=c_idx_type)
  764. index = getattr(df, index_name)
  765. s = Series(np.random.randn(5), index[:5])
  766. if should_warn(s.index, df.index):
  767. with tm.assert_produces_warning(RuntimeWarning):
  768. res = pd.eval('s + df', engine=engine, parser=parser)
  769. else:
  770. res = pd.eval('s + df', engine=engine, parser=parser)
  771. if r_idx_type == 'dt' or c_idx_type == 'dt':
  772. expected = df.add(s) if engine == 'numexpr' else s + df
  773. else:
  774. expected = s + df
  775. assert_frame_equal(res, expected)
  776. # only test dt with dt, otherwise weird joins result
  777. args = product(['i', 'u', 's'], ['i', 'u', 's'], ('index', 'columns'))
  778. with warnings.catch_warnings(record=True):
  779. # avoid warning about comparing strings and ints
  780. warnings.simplefilter("ignore", RuntimeWarning)
  781. for r_idx_type, c_idx_type, index_name in args:
  782. testit(r_idx_type, c_idx_type, index_name)
  783. # dt with dt
  784. args = product(['dt'], ['dt'], ('index', 'columns'))
  785. with warnings.catch_warnings(record=True):
  786. # avoid warning about comparing strings and ints
  787. warnings.simplefilter("ignore", RuntimeWarning)
  788. for r_idx_type, c_idx_type, index_name in args:
  789. testit(r_idx_type, c_idx_type, index_name)
  790. def test_series_frame_commutativity(self, engine, parser):
  791. args = product(self.lhs_index_types, self.index_types, ('+', '*'),
  792. ('index', 'columns'))
  793. with warnings.catch_warnings(record=True):
  794. warnings.simplefilter('always', RuntimeWarning)
  795. for r_idx_type, c_idx_type, op, index_name in args:
  796. df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
  797. c_idx_type=c_idx_type)
  798. index = getattr(df, index_name)
  799. s = Series(np.random.randn(5), index[:5])
  800. lhs = 's {0} df'.format(op)
  801. rhs = 'df {0} s'.format(op)
  802. if should_warn(df.index, s.index):
  803. with tm.assert_produces_warning(RuntimeWarning):
  804. a = pd.eval(lhs, engine=engine, parser=parser)
  805. with tm.assert_produces_warning(RuntimeWarning):
  806. b = pd.eval(rhs, engine=engine, parser=parser)
  807. else:
  808. a = pd.eval(lhs, engine=engine, parser=parser)
  809. b = pd.eval(rhs, engine=engine, parser=parser)
  810. if r_idx_type != 'dt' and c_idx_type != 'dt':
  811. if engine == 'numexpr':
  812. assert_frame_equal(a, b)
  813. @pytest.mark.slow
  814. def test_complex_series_frame_alignment(self, engine, parser):
  815. import random
  816. args = product(self.lhs_index_types, self.index_types,
  817. self.index_types, self.index_types)
  818. n = 3
  819. m1 = 5
  820. m2 = 2 * m1
  821. with warnings.catch_warnings(record=True):
  822. warnings.simplefilter('always', RuntimeWarning)
  823. for r1, r2, c1, c2 in args:
  824. index_name = random.choice(['index', 'columns'])
  825. obj_name = random.choice(['df', 'df2'])
  826. df = mkdf(m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
  827. df2 = mkdf(m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
  828. index = getattr(locals().get(obj_name), index_name)
  829. s = Series(np.random.randn(n), index[:n])
  830. if r2 == 'dt' or c2 == 'dt':
  831. if engine == 'numexpr':
  832. expected2 = df2.add(s)
  833. else:
  834. expected2 = df2 + s
  835. else:
  836. expected2 = df2 + s
  837. if r1 == 'dt' or c1 == 'dt':
  838. if engine == 'numexpr':
  839. expected = expected2.add(df)
  840. else:
  841. expected = expected2 + df
  842. else:
  843. expected = expected2 + df
  844. if should_warn(df2.index, s.index, df.index):
  845. with tm.assert_produces_warning(RuntimeWarning):
  846. res = pd.eval('df2 + s + df', engine=engine,
  847. parser=parser)
  848. else:
  849. res = pd.eval('df2 + s + df', engine=engine, parser=parser)
  850. assert res.shape == expected.shape
  851. assert_frame_equal(res, expected)
  852. def test_performance_warning_for_poor_alignment(self, engine, parser):
  853. df = DataFrame(randn(1000, 10))
  854. s = Series(randn(10000))
  855. if engine == 'numexpr':
  856. seen = PerformanceWarning
  857. else:
  858. seen = False
  859. with assert_produces_warning(seen):
  860. pd.eval('df + s', engine=engine, parser=parser)
  861. s = Series(randn(1000))
  862. with assert_produces_warning(False):
  863. pd.eval('df + s', engine=engine, parser=parser)
  864. df = DataFrame(randn(10, 10000))
  865. s = Series(randn(10000))
  866. with assert_produces_warning(False):
  867. pd.eval('df + s', engine=engine, parser=parser)
  868. df = DataFrame(randn(10, 10))
  869. s = Series(randn(10000))
  870. is_python_engine = engine == 'python'
  871. if not is_python_engine:
  872. wrn = PerformanceWarning
  873. else:
  874. wrn = False
  875. with assert_produces_warning(wrn) as w:
  876. pd.eval('df + s', engine=engine, parser=parser)
  877. if not is_python_engine:
  878. assert len(w) == 1
  879. msg = str(w[0].message)
  880. expected = ("Alignment difference on axis {0} is larger"
  881. " than an order of magnitude on term {1!r}, "
  882. "by more than {2:.4g}; performance may suffer"
  883. "".format(1, 'df', np.log10(s.size - df.shape[1])))
  884. assert msg == expected
  885. # ------------------------------------
  886. # Slightly more complex ops
  887. @td.skip_if_no_ne
  888. class TestOperationsNumExprPandas(object):
  889. @classmethod
  890. def setup_class(cls):
  891. cls.engine = 'numexpr'
  892. cls.parser = 'pandas'
  893. cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms
  894. @classmethod
  895. def teardown_class(cls):
  896. del cls.engine, cls.parser
  897. def eval(self, *args, **kwargs):
  898. kwargs['engine'] = self.engine
  899. kwargs['parser'] = self.parser
  900. kwargs['level'] = kwargs.pop('level', 0) + 1
  901. return pd.eval(*args, **kwargs)
  902. def test_simple_arith_ops(self):
  903. ops = self.arith_ops
  904. for op in filter(lambda x: x != '//', ops):
  905. ex = '1 {0} 1'.format(op)
  906. ex2 = 'x {0} 1'.format(op)
  907. ex3 = '1 {0} (x + 1)'.format(op)
  908. if op in ('in', 'not in'):
  909. pytest.raises(TypeError, pd.eval, ex,
  910. engine=self.engine, parser=self.parser)
  911. else:
  912. expec = _eval_single_bin(1, op, 1, self.engine)
  913. x = self.eval(ex, engine=self.engine, parser=self.parser)
  914. assert x == expec
  915. expec = _eval_single_bin(x, op, 1, self.engine)
  916. y = self.eval(ex2, local_dict={'x': x}, engine=self.engine,
  917. parser=self.parser)
  918. assert y == expec
  919. expec = _eval_single_bin(1, op, x + 1, self.engine)
  920. y = self.eval(ex3, local_dict={'x': x},
  921. engine=self.engine, parser=self.parser)
  922. assert y == expec
  923. def test_simple_bool_ops(self):
  924. for op, lhs, rhs in product(expr._bool_ops_syms, (True, False),
  925. (True, False)):
  926. ex = '{0} {1} {2}'.format(lhs, op, rhs)
  927. res = self.eval(ex)
  928. exp = eval(ex)
  929. assert res == exp
  930. def test_bool_ops_with_constants(self):
  931. for op, lhs, rhs in product(expr._bool_ops_syms, ('True', 'False'),
  932. ('True', 'False')):
  933. ex = '{0} {1} {2}'.format(lhs, op, rhs)
  934. res = self.eval(ex)
  935. exp = eval(ex)
  936. assert res == exp
  937. @pytest.mark.filterwarnings("ignore::FutureWarning")
  938. def test_panel_fails(self):
  939. x = Panel(randn(3, 4, 5))
  940. y = Series(randn(10))
  941. with pytest.raises(NotImplementedError):
  942. self.eval('x + y',
  943. local_dict={'x': x, 'y': y})
  944. def test_4d_ndarray_fails(self):
  945. x = randn(3, 4, 5, 6)
  946. y = Series(randn(10))
  947. with pytest.raises(NotImplementedError):
  948. self.eval('x + y',
  949. local_dict={'x': x, 'y': y})
  950. def test_constant(self):
  951. x = self.eval('1')
  952. assert x == 1
  953. def test_single_variable(self):
  954. df = DataFrame(randn(10, 2))
  955. df2 = self.eval('df', local_dict={'df': df})
  956. assert_frame_equal(df, df2)
  957. def test_truediv(self):
  958. s = np.array([1])
  959. ex = 's / 1'
  960. d = {'s': s} # noqa
  961. if PY3:
  962. res = self.eval(ex, truediv=False)
  963. tm.assert_numpy_array_equal(res, np.array([1.0]))
  964. res = self.eval(ex, truediv=True)
  965. tm.assert_numpy_array_equal(res, np.array([1.0]))
  966. res = self.eval('1 / 2', truediv=True)
  967. expec = 0.5
  968. assert res == expec
  969. res = self.eval('1 / 2', truediv=False)
  970. expec = 0.5
  971. assert res == expec
  972. res = self.eval('s / 2', truediv=False)
  973. expec = 0.5
  974. assert res == expec
  975. res = self.eval('s / 2', truediv=True)
  976. expec = 0.5
  977. assert res == expec
  978. else:
  979. res = self.eval(ex, truediv=False)
  980. tm.assert_numpy_array_equal(res, np.array([1]))
  981. res = self.eval(ex, truediv=True)
  982. tm.assert_numpy_array_equal(res, np.array([1.0]))
  983. res = self.eval('1 / 2', truediv=True)
  984. expec = 0.5
  985. assert res == expec
  986. res = self.eval('1 / 2', truediv=False)
  987. expec = 0
  988. assert res == expec
  989. res = self.eval('s / 2', truediv=False)
  990. expec = 0
  991. assert res == expec
  992. res = self.eval('s / 2', truediv=True)
  993. expec = 0.5
  994. assert res == expec
  995. def test_failing_subscript_with_name_error(self):
  996. df = DataFrame(np.random.randn(5, 3)) # noqa
  997. with pytest.raises(NameError):
  998. self.eval('df[x > 2] > 2')
  999. def test_lhs_expression_subscript(self):
  1000. df = DataFrame(np.random.randn(5, 3))
  1001. result = self.eval('(df + 1)[df > 2]', local_dict={'df': df})
  1002. expected = (df + 1)[df > 2]
  1003. assert_frame_equal(result, expected)
  1004. def test_attr_expression(self):
  1005. df = DataFrame(np.random.randn(5, 3), columns=list('abc'))
  1006. expr1 = 'df.a < df.b'
  1007. expec1 = df.a < df.b
  1008. expr2 = 'df.a + df.b + df.c'
  1009. expec2 = df.a + df.b + df.c
  1010. expr3 = 'df.a + df.b + df.c[df.b < 0]'
  1011. expec3 = df.a + df.b + df.c[df.b < 0]
  1012. exprs = expr1, expr2, expr3
  1013. expecs = expec1, expec2, expec3
  1014. for e, expec in zip(exprs, expecs):
  1015. assert_series_equal(expec, self.eval(e, local_dict={'df': df}))
  1016. def test_assignment_fails(self):
  1017. df = DataFrame(np.random.randn(5, 3), columns=list('abc'))
  1018. df2 = DataFrame(np.random.randn(5, 3))
  1019. expr1 = 'df = df2'
  1020. pytest.raises(ValueError, self.eval, expr1,
  1021. local_dict={'df': df, 'df2': df2})
  1022. def test_assignment_column(self):
  1023. df = DataFrame(np.random.randn(5, 2), columns=list('ab'))
  1024. orig_df = df.copy()
  1025. # multiple assignees
  1026. pytest.raises(SyntaxError, df.eval, 'd c = a + b')
  1027. # invalid assignees
  1028. pytest.raises(SyntaxError, df.eval, 'd,c = a + b')
  1029. pytest.raises(SyntaxError, df.eval, 'Timestamp("20131001") = a + b')
  1030. # single assignment - existing variable
  1031. expected = orig_df.copy()
  1032. expected['a'] = expected['a'] + expected['b']
  1033. df = orig_df.copy()
  1034. df.eval('a = a + b', inplace=True)
  1035. assert_frame_equal(df, expected)
  1036. # single assignment - new variable
  1037. expected = orig_df.copy()
  1038. expected['c'] = expected['a'] + expected['b']
  1039. df = orig_df.copy()
  1040. df.eval('c = a + b', inplace=True)
  1041. assert_frame_equal(df, expected)
  1042. # with a local name overlap
  1043. def f():
  1044. df = orig_df.copy()
  1045. a = 1 # noqa
  1046. df.eval('a = 1 + b', inplace=True)
  1047. return df
  1048. df = f()
  1049. expected = orig_df.copy()
  1050. expected['a'] = 1 + expected['b']
  1051. assert_frame_equal(df, expected)
  1052. df = orig_df.copy()
  1053. def f():
  1054. a = 1 # noqa
  1055. old_a = df.a.copy()
  1056. df.eval('a = a + b', inplace=True)
  1057. result = old_a + df.b
  1058. assert_series_equal(result, df.a, check_names=False)
  1059. assert result.name is None
  1060. f()
  1061. # multiple assignment
  1062. df = orig_df.copy()
  1063. df.eval('c = a + b', inplace=True)
  1064. pytest.raises(SyntaxError, df.eval, 'c = a = b')
  1065. # explicit targets
  1066. df = orig_df.copy()
  1067. self.eval('c = df.a + df.b', local_dict={'df': df},
  1068. target=df, inplace=True)
  1069. expected = orig_df.copy()
  1070. expected['c'] = expected['a'] + expected['b']
  1071. assert_frame_equal(df, expected)
  1072. def test_column_in(self):
  1073. # GH 11235
  1074. df = DataFrame({'a': [11], 'b': [-32]})
  1075. result = df.eval('a in [11, -32]')
  1076. expected = Series([True])
  1077. assert_series_equal(result, expected)
  1078. def assignment_not_inplace(self):
  1079. # see gh-9297
  1080. df = DataFrame(np.random.randn(5, 2), columns=list('ab'))
  1081. actual = df.eval('c = a + b', inplace=False)
  1082. assert actual is not None
  1083. expected = df.copy()
  1084. expected['c'] = expected['a'] + expected['b']
  1085. tm.assert_frame_equal(df, expected)
  1086. def test_multi_line_expression(self):
  1087. # GH 11149
  1088. df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  1089. expected = df.copy()
  1090. expected['c'] = expected['a'] + expected['b']
  1091. expected['d'] = expected['c'] + expected['b']
  1092. ans = df.eval("""
  1093. c = a + b
  1094. d = c + b""", inplace=True)
  1095. assert_frame_equal(expected, df)
  1096. assert ans is None
  1097. expected['a'] = expected['a'] - 1
  1098. expected['e'] = expected['a'] + 2
  1099. ans = df.eval("""
  1100. a = a - 1
  1101. e = a + 2""", inplace=True)
  1102. assert_frame_equal(expected, df)
  1103. assert ans is None
  1104. # multi-line not valid if not all assignments
  1105. with pytest.raises(ValueError):
  1106. df.eval("""
  1107. a = b + 2
  1108. b - 2""", inplace=False)
  1109. def test_multi_line_expression_not_inplace(self):
  1110. # GH 11149
  1111. df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  1112. expected = df.copy()
  1113. expected['c'] = expected['a'] + expected['b']
  1114. expected['d'] = expected['c'] + expected['b']
  1115. df = df.eval("""
  1116. c = a + b
  1117. d = c + b""", inplace=False)
  1118. assert_frame_equal(expected, df)
  1119. expected['a'] = expected['a'] - 1
  1120. expected['e'] = expected['a'] + 2
  1121. df = df.eval("""
  1122. a = a - 1
  1123. e = a + 2""", inplace=False)
  1124. assert_frame_equal(expected, df)
  1125. def test_multi_line_expression_local_variable(self):
  1126. # GH 15342
  1127. df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  1128. expected = df.copy()
  1129. local_var = 7
  1130. expected['c'] = expected['a'] * local_var
  1131. expected['d'] = expected['c'] + local_var
  1132. ans = df.eval("""
  1133. c = a * @local_var
  1134. d = c + @local_var
  1135. """, inplace=True)
  1136. assert_frame_equal(expected, df)
  1137. assert ans is None
  1138. def test_assignment_in_query(self):
  1139. # GH 8664
  1140. df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  1141. df_orig = df.copy()
  1142. with pytest.raises(ValueError):
  1143. df.query('a = 1')
  1144. assert_frame_equal(df, df_orig)
  1145. def test_query_inplace(self):
  1146. # see gh-11149
  1147. df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  1148. expected = df.copy()
  1149. expected = expected[expected['a'] == 2]
  1150. df.query('a == 2', inplace=True)
  1151. assert_frame_equal(expected, df)
  1152. df = {}
  1153. expected = {"a": 3}
  1154. self.eval("a = 1 + 2", target=df, inplace=True)
  1155. tm.assert_dict_equal(df, expected)
  1156. @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2],
  1157. np.array([]), (1, 3)])
  1158. @pytest.mark.filterwarnings("ignore::FutureWarning")
  1159. def test_cannot_item_assign(self, invalid_target):
  1160. msg = "Cannot assign expression output to target"
  1161. expression = "a = 1 + 2"
  1162. with pytest.raises(ValueError, match=msg):
  1163. self.eval(expression, target=invalid_target, inplace=True)
  1164. if hasattr(invalid_target, "copy"):
  1165. with pytest.raises(ValueError, match=msg):
  1166. self.eval(expression, target=invalid_target, inplace=False)
  1167. @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)])
  1168. def test_cannot_copy_item(self, invalid_target):
  1169. msg = "Cannot return a copy of the target"
  1170. expression = "a = 1 + 2"
  1171. with pytest.raises(ValueError, match=msg):
  1172. self.eval(expression, target=invalid_target, inplace=False)
  1173. @pytest.mark.parametrize("target", [1, "cat", [1, 2],
  1174. np.array([]), (1, 3), {1: 2}])
  1175. def test_inplace_no_assignment(self, target):
  1176. expression = "1 + 2"
  1177. assert self.eval(expression, target=target, inplace=False) == 3
  1178. msg = "Cannot operate inplace if there is no assignment"
  1179. with pytest.raises(ValueError, match=msg):
  1180. self.eval(expression, target=target, inplace=True)
  1181. def test_basic_period_index_boolean_expression(self):
  1182. df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')
  1183. e = df < 2
  1184. r = self.eval('df < 2', local_dict={'df': df})
  1185. x = df < 2
  1186. assert_frame_equal(r, e)
  1187. assert_frame_equal(x, e)
  1188. def test_basic_period_index_subscript_expression(self):
  1189. df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')
  1190. r = self.eval('df[df < 2 + 3]', local_dict={'df': df})
  1191. e = df[df < 2 + 3]
  1192. assert_frame_equal(r, e)
  1193. def test_nested_period_index_subscript_expression(self):
  1194. df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')
  1195. r = self.eval('df[df[df < 2] < 2] + df * 2', local_dict={'df': df})
  1196. e = df[df[df < 2] < 2] + df * 2
  1197. assert_frame_equal(r, e)
  1198. def test_date_boolean(self):
  1199. df = DataFrame(randn(5, 3))
  1200. df['dates1'] = date_range('1/1/2012', periods=5)
  1201. res = self.eval('df.dates1 < 20130101', local_dict={'df': df},
  1202. engine=self.engine, parser=self.parser)
  1203. expec = df.dates1 < '20130101'
  1204. assert_series_equal(res, expec, check_names=False)
  1205. def test_simple_in_ops(self):
  1206. if self.parser != 'python':
  1207. res = pd.eval('1 in [1, 2]', engine=self.engine,
  1208. parser=self.parser)
  1209. assert res
  1210. res = pd.eval('2 in (1, 2)', engine=self.engine,
  1211. parser=self.parser)
  1212. assert res
  1213. res = pd.eval('3 in (1, 2)', engine=self.engine,
  1214. parser=self.parser)
  1215. assert not res
  1216. res = pd.eval('3 not in (1, 2)', engine=self.engine,
  1217. parser=self.parser)
  1218. assert res
  1219. res = pd.eval('[3] not in (1, 2)', engine=self.engine,
  1220. parser=self.parser)
  1221. assert res
  1222. res = pd.eval('[3] in ([3], 2)', engine=self.engine,
  1223. parser=self.parser)
  1224. assert res
  1225. res = pd.eval('[[3]] in [[[3]], 2]', engine=self.engine,
  1226. parser=self.parser)
  1227. assert res
  1228. res = pd.eval('(3,) in [(3,), 2]', engine=self.engine,
  1229. parser=self.parser)
  1230. assert res
  1231. res = pd.eval('(3,) not in [(3,), 2]', engine=self.engine,
  1232. parser=self.parser)
  1233. assert not res
  1234. res = pd.eval('[(3,)] in [[(3,)], 2]', engine=self.engine,
  1235. parser=self.parser)
  1236. assert res
  1237. else:
  1238. with pytest.raises(NotImplementedError):
  1239. pd.eval('1 in [1, 2]', engine=self.engine, parser=self.parser)
  1240. with pytest.raises(NotImplementedError):
  1241. pd.eval('2 in (1, 2)', engine=self.engine, parser=self.parser)
  1242. with pytest.raises(NotImplementedError):
  1243. pd.eval('3 in (1, 2)', engine=self.engine, parser=self.parser)
  1244. with pytest.raises(NotImplementedError):
  1245. pd.eval('3 not in (1, 2)', engine=self.engine,
  1246. parser=self.parser)
  1247. with pytest.raises(NotImplementedError):
  1248. pd.eval('[(3,)] in (1, 2, [(3,)])', engine=self.engine,
  1249. parser=self.parser)
  1250. with pytest.raises(NotImplementedError):
  1251. pd.eval('[3] not in (1, 2, [[3]])', engine=self.engine,
  1252. parser=self.parser)
  1253. @td.skip_if_no_ne
  1254. class TestOperationsNumExprPython(TestOperationsNumExprPandas):
  1255. @classmethod
  1256. def setup_class(cls):
  1257. super(TestOperationsNumExprPython, cls).setup_class()
  1258. cls.engine = 'numexpr'
  1259. cls.parser = 'python'
  1260. cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms
  1261. cls.arith_ops = filter(lambda x: x not in ('in', 'not in'),
  1262. cls.arith_ops)
  1263. def test_check_many_exprs(self):
  1264. a = 1 # noqa
  1265. expr = ' * '.join('a' * 33)
  1266. expected = 1
  1267. res = pd.eval(expr, engine=self.engine, parser=self.parser)
  1268. assert res == expected
  1269. def test_fails_and(self):
  1270. df = DataFrame(np.random.randn(5, 3))
  1271. pytest.raises(NotImplementedError, pd.eval, 'df > 2 and df > 3',
  1272. local_dict={'df': df}, parser=self.parser,
  1273. engine=self.engine)
  1274. def test_fails_or(self):
  1275. df = DataFrame(np.random.randn(5, 3))
  1276. pytest.raises(NotImplementedError, pd.eval, 'df > 2 or df > 3',
  1277. local_dict={'df': df}, parser=self.parser,
  1278. engine=self.engine)
  1279. def test_fails_not(self):
  1280. df = DataFrame(np.random.randn(5, 3))
  1281. pytest.raises(NotImplementedError, pd.eval, 'not df > 2',
  1282. local_dict={'df': df}, parser=self.parser,
  1283. engine=self.engine)
  1284. def test_fails_ampersand(self):
  1285. df = DataFrame(np.random.randn(5, 3)) # noqa
  1286. ex = '(df + 2)[df > 1] > 0 & (df > 0)'
  1287. with pytest.raises(NotImplementedError):
  1288. pd.eval(ex, parser=self.parser, engine=self.engine)
  1289. def test_fails_pipe(self):
  1290. df = DataFrame(np.random.randn(5, 3)) # noqa
  1291. ex = '(df + 2)[df > 1] > 0 | (df > 0)'
  1292. with pytest.raises(NotImplementedError):
  1293. pd.eval(ex, parser=self.parser, engine=self.engine)
  1294. def test_bool_ops_with_constants(self):
  1295. for op, lhs, rhs in product(expr._bool_ops_syms, ('True', 'False'),
  1296. ('True', 'False')):
  1297. ex = '{0} {1} {2}'.format(lhs, op, rhs)
  1298. if op in ('and', 'or'):
  1299. with pytest.raises(NotImplementedError):
  1300. self.eval(ex)
  1301. else:
  1302. res = self.eval(ex)
  1303. exp = eval(ex)
  1304. assert res == exp
  1305. def test_simple_bool_ops(self):
  1306. for op, lhs, rhs in product(expr._bool_ops_syms, (True, False),
  1307. (True, False)):
  1308. ex = 'lhs {0} rhs'.format(op)
  1309. if op in ('and', 'or'):
  1310. with pytest.raises(NotImplementedError):
  1311. pd.eval(ex, engine=self.engine, parser=self.parser)
  1312. else:
  1313. res = pd.eval(ex, engine=self.engine, parser=self.parser)
  1314. exp = eval(ex)
  1315. assert res == exp
  1316. class TestOperationsPythonPython(TestOperationsNumExprPython):
  1317. @classmethod
  1318. def setup_class(cls):
  1319. super(TestOperationsPythonPython, cls).setup_class()
  1320. cls.engine = cls.parser = 'python'
  1321. cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms
  1322. cls.arith_ops = filter(lambda x: x not in ('in', 'not in'),
  1323. cls.arith_ops)
  1324. class TestOperationsPythonPandas(TestOperationsNumExprPandas):
  1325. @classmethod
  1326. def setup_class(cls):
  1327. super(TestOperationsPythonPandas, cls).setup_class()
  1328. cls.engine = 'python'
  1329. cls.parser = 'pandas'
  1330. cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms
  1331. @td.skip_if_no_ne
  1332. class TestMathPythonPython(object):
  1333. @classmethod
  1334. def setup_class(cls):
  1335. cls.engine = 'python'
  1336. cls.parser = 'pandas'
  1337. cls.unary_fns = _unary_math_ops
  1338. cls.binary_fns = _binary_math_ops
  1339. @classmethod
  1340. def teardown_class(cls):
  1341. del cls.engine, cls.parser
  1342. def eval(self, *args, **kwargs):
  1343. kwargs['engine'] = self.engine
  1344. kwargs['parser'] = self.parser
  1345. kwargs['level'] = kwargs.pop('level', 0) + 1
  1346. return pd.eval(*args, **kwargs)
  1347. def test_unary_functions(self, unary_fns_for_ne):
  1348. df = DataFrame({'a': np.random.randn(10)})
  1349. a = df.a
  1350. for fn in unary_fns_for_ne:
  1351. expr = "{0}(a)".format(fn)
  1352. got = self.eval(expr)
  1353. with np.errstate(all='ignore'):
  1354. expect = getattr(np, fn)(a)
  1355. tm.assert_series_equal(got, expect, check_names=False)
  1356. def test_floor_and_ceil_functions_raise_error(self,
  1357. ne_lt_2_6_9,
  1358. unary_fns_for_ne):
  1359. for fn in ('floor', 'ceil'):
  1360. msg = "\"{0}\" is not a supported function".format(fn)
  1361. with pytest.raises(ValueError, match=msg):
  1362. expr = "{0}(100)".format(fn)
  1363. self.eval(expr)
  1364. def test_binary_functions(self):
  1365. df = DataFrame({'a': np.random.randn(10),
  1366. 'b': np.random.randn(10)})
  1367. a = df.a
  1368. b = df.b
  1369. for fn in self.binary_fns:
  1370. expr = "{0}(a, b)".format(fn)
  1371. got = self.eval(expr)
  1372. with np.errstate(all='ignore'):
  1373. expect = getattr(np, fn)(a, b)
  1374. tm.assert_almost_equal(got, expect, check_names=False)
  1375. def test_df_use_case(self):
  1376. df = DataFrame({'a': np.random.randn(10),
  1377. 'b': np.random.randn(10)})
  1378. df.eval("e = arctan2(sin(a), b)",
  1379. engine=self.engine,
  1380. parser=self.parser, inplace=True)
  1381. got = df.e
  1382. expect = np.arctan2(np.sin(df.a), df.b)
  1383. tm.assert_series_equal(got, expect, check_names=False)
  1384. def test_df_arithmetic_subexpression(self):
  1385. df = DataFrame({'a': np.random.randn(10),
  1386. 'b': np.random.randn(10)})
  1387. df.eval("e = sin(a + b)",
  1388. engine=self.engine,
  1389. parser=self.parser, inplace=True)
  1390. got = df.e
  1391. expect = np.sin(df.a + df.b)
  1392. tm.assert_series_equal(got, expect, check_names=False)
  1393. def check_result_type(self, dtype, expect_dtype):
  1394. df = DataFrame({'a': np.random.randn(10).astype(dtype)})
  1395. assert df.a.dtype == dtype
  1396. df.eval("b = sin(a)",
  1397. engine=self.engine,
  1398. parser=self.parser, inplace=True)
  1399. got = df.b
  1400. expect = np.sin(df.a)
  1401. assert expect.dtype == got.dtype
  1402. assert expect_dtype == got.dtype
  1403. tm.assert_series_equal(got, expect, check_names=False)
  1404. def test_result_types(self):
  1405. self.check_result_type(np.int32, np.float64)
  1406. self.check_result_type(np.int64, np.float64)
  1407. self.check_result_type(np.float32, np.float32)
  1408. self.check_result_type(np.float64, np.float64)
  1409. def test_result_types2(self):
  1410. # xref https://github.com/pandas-dev/pandas/issues/12293
  1411. pytest.skip("unreliable tests on complex128")
  1412. # Did not test complex64 because DataFrame is converting it to
  1413. # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952
  1414. self.check_result_type(np.complex128, np.complex128)
  1415. def test_undefined_func(self):
  1416. df = DataFrame({'a': np.random.randn(10)})
  1417. msg = "\"mysin\" is not a supported function"
  1418. with pytest.raises(ValueError, match=msg):
  1419. df.eval("mysin(a)",
  1420. engine=self.engine,
  1421. parser=self.parser)
  1422. def test_keyword_arg(self):
  1423. df = DataFrame({'a': np.random.randn(10)})
  1424. msg = "Function \"sin\" does not support keyword arguments"
  1425. with pytest.raises(TypeError, match=msg):
  1426. df.eval("sin(x=a)",
  1427. engine=self.engine,
  1428. parser=self.parser)
  1429. class TestMathPythonPandas(TestMathPythonPython):
  1430. @classmethod
  1431. def setup_class(cls):
  1432. super(TestMathPythonPandas, cls).setup_class()
  1433. cls.engine = 'python'
  1434. cls.parser = 'pandas'
  1435. class TestMathNumExprPandas(TestMathPythonPython):
  1436. @classmethod
  1437. def setup_class(cls):
  1438. super(TestMathNumExprPandas, cls).setup_class()
  1439. cls.engine = 'numexpr'
  1440. cls.parser = 'pandas'
  1441. class TestMathNumExprPython(TestMathPythonPython):
  1442. @classmethod
  1443. def setup_class(cls):
  1444. super(TestMathNumExprPython, cls).setup_class()
  1445. cls.engine = 'numexpr'
  1446. cls.parser = 'python'
  1447. _var_s = randn(10)
  1448. class TestScope(object):
  1449. def test_global_scope(self, engine, parser):
  1450. e = '_var_s * 2'
  1451. tm.assert_numpy_array_equal(_var_s * 2, pd.eval(e, engine=engine,
  1452. parser=parser))
  1453. def test_no_new_locals(self, engine, parser):
  1454. x = 1 # noqa
  1455. lcls = locals().copy()
  1456. pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser)
  1457. lcls2 = locals().copy()
  1458. lcls2.pop('lcls')
  1459. assert lcls == lcls2
  1460. def test_no_new_globals(self, engine, parser):
  1461. x = 1 # noqa
  1462. gbls = globals().copy()
  1463. pd.eval('x + 1', engine=engine, parser=parser)
  1464. gbls2 = globals().copy()
  1465. assert gbls == gbls2
  1466. @td.skip_if_no_ne
  1467. def test_invalid_engine():
  1468. msg = 'Invalid engine \'asdf\' passed'
  1469. with pytest.raises(KeyError, match=msg):
  1470. pd.eval('x + y', local_dict={'x': 1, 'y': 2}, engine='asdf')
  1471. @td.skip_if_no_ne
  1472. def test_invalid_parser():
  1473. msg = 'Invalid parser \'asdf\' passed'
  1474. with pytest.raises(KeyError, match=msg):
  1475. pd.eval('x + y', local_dict={'x': 1, 'y': 2}, parser='asdf')
  1476. _parsers = {'python': PythonExprVisitor, 'pytables': pytables.ExprVisitor,
  1477. 'pandas': PandasExprVisitor}
  1478. @pytest.mark.parametrize('engine', _engines)
  1479. @pytest.mark.parametrize('parser', _parsers)
  1480. def test_disallowed_nodes(engine, parser):
  1481. VisitorClass = _parsers[parser]
  1482. uns_ops = VisitorClass.unsupported_nodes
  1483. inst = VisitorClass('x + 1', engine, parser)
  1484. for ops in uns_ops:
  1485. with pytest.raises(NotImplementedError):
  1486. getattr(inst, ops)()
  1487. def test_syntax_error_exprs(engine, parser):
  1488. e = 's +'
  1489. with pytest.raises(SyntaxError):
  1490. pd.eval(e, engine=engine, parser=parser)
  1491. def test_name_error_exprs(engine, parser):
  1492. e = 's + t'
  1493. with pytest.raises(NameError):
  1494. pd.eval(e, engine=engine, parser=parser)
  1495. def test_invalid_local_variable_reference(engine, parser):
  1496. a, b = 1, 2 # noqa
  1497. exprs = 'a + @b', '@a + b', '@a + @b'
  1498. for _expr in exprs:
  1499. if parser != 'pandas':
  1500. with pytest.raises(SyntaxError, match="The '@' prefix is only"):
  1501. pd.eval(_expr, engine=engine, parser=parser)
  1502. else:
  1503. with pytest.raises(SyntaxError, match="The '@' prefix is not"):
  1504. pd.eval(_expr, engine=engine, parser=parser)
  1505. def test_numexpr_builtin_raises(engine, parser):
  1506. sin, dotted_line = 1, 2
  1507. if engine == 'numexpr':
  1508. msg = 'Variables in expression .+'
  1509. with pytest.raises(NumExprClobberingError, match=msg):
  1510. pd.eval('sin + dotted_line', engine=engine, parser=parser)
  1511. else:
  1512. res = pd.eval('sin + dotted_line', engine=engine, parser=parser)
  1513. assert res == sin + dotted_line
  1514. def test_bad_resolver_raises(engine, parser):
  1515. cannot_resolve = 42, 3.0
  1516. with pytest.raises(TypeError, match='Resolver of type .+'):
  1517. pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine,
  1518. parser=parser)
  1519. def test_empty_string_raises(engine, parser):
  1520. # GH 13139
  1521. with pytest.raises(ValueError, match="expr cannot be an empty string"):
  1522. pd.eval('', engine=engine, parser=parser)
  1523. def test_more_than_one_expression_raises(engine, parser):
  1524. with pytest.raises(SyntaxError, match=("only a single expression "
  1525. "is allowed")):
  1526. pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser)
  1527. @pytest.mark.parametrize('cmp', ('and', 'or'))
  1528. @pytest.mark.parametrize('lhs', (int, float))
  1529. @pytest.mark.parametrize('rhs', (int, float))
  1530. def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser):
  1531. gen = {int: lambda: np.random.randint(10), float: np.random.randn}
  1532. mid = gen[lhs]() # noqa
  1533. lhs = gen[lhs]() # noqa
  1534. rhs = gen[rhs]() # noqa
  1535. ex1 = 'lhs {0} mid {1} rhs'.format(cmp, cmp)
  1536. ex2 = 'lhs {0} mid and mid {1} rhs'.format(cmp, cmp)
  1537. ex3 = '(lhs {0} mid) & (mid {1} rhs)'.format(cmp, cmp)
  1538. for ex in (ex1, ex2, ex3):
  1539. with pytest.raises(NotImplementedError):
  1540. pd.eval(ex, engine=engine, parser=parser)
  1541. def test_inf(engine, parser):
  1542. s = 'inf + 1'
  1543. expected = np.inf
  1544. result = pd.eval(s, engine=engine, parser=parser)
  1545. assert result == expected
  1546. def test_negate_lt_eq_le(engine, parser):
  1547. df = pd.DataFrame([[0, 10], [1, 20]], columns=['cat', 'count'])
  1548. expected = df[~(df.cat > 0)]
  1549. result = df.query('~(cat > 0)', engine=engine, parser=parser)
  1550. tm.assert_frame_equal(result, expected)
  1551. if parser == 'python':
  1552. with pytest.raises(NotImplementedError):
  1553. df.query('not (cat > 0)', engine=engine, parser=parser)
  1554. else:
  1555. result = df.query('not (cat > 0)', engine=engine, parser=parser)
  1556. tm.assert_frame_equal(result, expected)
  1557. class TestValidate(object):
  1558. def test_validate_bool_args(self):
  1559. invalid_values = [1, "True", [1, 2, 3], 5.0]
  1560. for value in invalid_values:
  1561. with pytest.raises(ValueError):
  1562. pd.eval("2+2", inplace=value)