test_numeric.py 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076
  1. # -*- coding: utf-8 -*-
  2. # Arithmetc tests for DataFrame/Series/Index/Array classes that should
  3. # behave identically.
  4. # Specifically for numeric dtypes
  5. from decimal import Decimal
  6. from itertools import combinations
  7. import operator
  8. import numpy as np
  9. import pytest
  10. from pandas.compat import PY3, Iterable
  11. import pandas as pd
  12. from pandas import Index, Series, Timedelta, TimedeltaIndex
  13. from pandas.core import ops
  14. import pandas.util.testing as tm
  15. # ------------------------------------------------------------------
  16. # Comparisons
  17. class TestNumericComparisons(object):
  18. def test_operator_series_comparison_zerorank(self):
  19. # GH#13006
  20. result = np.float64(0) > pd.Series([1, 2, 3])
  21. expected = 0.0 > pd.Series([1, 2, 3])
  22. tm.assert_series_equal(result, expected)
  23. result = pd.Series([1, 2, 3]) < np.float64(0)
  24. expected = pd.Series([1, 2, 3]) < 0.0
  25. tm.assert_series_equal(result, expected)
  26. result = np.array([0, 1, 2])[0] > pd.Series([0, 1, 2])
  27. expected = 0.0 > pd.Series([1, 2, 3])
  28. tm.assert_series_equal(result, expected)
  29. def test_df_numeric_cmp_dt64_raises(self):
  30. # GH#8932, GH#22163
  31. ts = pd.Timestamp.now()
  32. df = pd.DataFrame({'x': range(5)})
  33. with pytest.raises(TypeError):
  34. df > ts
  35. with pytest.raises(TypeError):
  36. df < ts
  37. with pytest.raises(TypeError):
  38. ts < df
  39. with pytest.raises(TypeError):
  40. ts > df
  41. assert not (df == ts).any().any()
  42. assert (df != ts).all().all()
  43. def test_compare_invalid(self):
  44. # GH#8058
  45. # ops testing
  46. a = pd.Series(np.random.randn(5), name=0)
  47. b = pd.Series(np.random.randn(5))
  48. b.name = pd.Timestamp('2000-01-01')
  49. tm.assert_series_equal(a / b, 1 / (b / a))
  50. # ------------------------------------------------------------------
  51. # Numeric dtypes Arithmetic with Timedelta Scalar
  52. class TestNumericArraylikeArithmeticWithTimedeltaLike(object):
  53. # TODO: also check name retentention
  54. @pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
  55. @pytest.mark.parametrize('left', [
  56. pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
  57. for dtype in ['i1', 'i2', 'i4', 'i8',
  58. 'u1', 'u2', 'u4', 'u8',
  59. 'f2', 'f4', 'f8']
  60. for cls in [pd.Series, pd.Index]],
  61. ids=lambda x: type(x).__name__ + str(x.dtype))
  62. def test_mul_td64arr(self, left, box_cls):
  63. # GH#22390
  64. right = np.array([1, 2, 3], dtype='m8[s]')
  65. right = box_cls(right)
  66. expected = pd.TimedeltaIndex(['10s', '40s', '90s'])
  67. if isinstance(left, pd.Series) or box_cls is pd.Series:
  68. expected = pd.Series(expected)
  69. result = left * right
  70. tm.assert_equal(result, expected)
  71. result = right * left
  72. tm.assert_equal(result, expected)
  73. # TODO: also check name retentention
  74. @pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
  75. @pytest.mark.parametrize('left', [
  76. pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
  77. for dtype in ['i1', 'i2', 'i4', 'i8',
  78. 'u1', 'u2', 'u4', 'u8',
  79. 'f2', 'f4', 'f8']
  80. for cls in [pd.Series, pd.Index]],
  81. ids=lambda x: type(x).__name__ + str(x.dtype))
  82. def test_div_td64arr(self, left, box_cls):
  83. # GH#22390
  84. right = np.array([10, 40, 90], dtype='m8[s]')
  85. right = box_cls(right)
  86. expected = pd.TimedeltaIndex(['1s', '2s', '3s'])
  87. if isinstance(left, pd.Series) or box_cls is pd.Series:
  88. expected = pd.Series(expected)
  89. result = right / left
  90. tm.assert_equal(result, expected)
  91. result = right // left
  92. tm.assert_equal(result, expected)
  93. with pytest.raises(TypeError):
  94. left / right
  95. with pytest.raises(TypeError):
  96. left // right
  97. # TODO: de-duplicate with test_numeric_arr_mul_tdscalar
  98. def test_ops_series(self):
  99. # regression test for G#H8813
  100. td = Timedelta('1 day')
  101. other = pd.Series([1, 2])
  102. expected = pd.Series(pd.to_timedelta(['1 day', '2 days']))
  103. tm.assert_series_equal(expected, td * other)
  104. tm.assert_series_equal(expected, other * td)
  105. # TODO: also test non-nanosecond timedelta64 and Tick objects;
  106. # see test_numeric_arr_rdiv_tdscalar for note on these failing
  107. @pytest.mark.parametrize('scalar_td', [
  108. Timedelta(days=1),
  109. Timedelta(days=1).to_timedelta64(),
  110. Timedelta(days=1).to_pytimedelta()],
  111. ids=lambda x: type(x).__name__)
  112. def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box):
  113. # GH#19333
  114. index = numeric_idx
  115. expected = pd.timedelta_range('0 days', '4 days')
  116. index = tm.box_expected(index, box)
  117. expected = tm.box_expected(expected, box)
  118. result = index * scalar_td
  119. tm.assert_equal(result, expected)
  120. commute = scalar_td * index
  121. tm.assert_equal(commute, expected)
  122. def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box):
  123. index = numeric_idx[1:3]
  124. expected = TimedeltaIndex(['3 Days', '36 Hours'])
  125. index = tm.box_expected(index, box)
  126. expected = tm.box_expected(expected, box)
  127. result = three_days / index
  128. tm.assert_equal(result, expected)
  129. with pytest.raises(TypeError):
  130. index / three_days
  131. @pytest.mark.parametrize('other', [
  132. pd.Timedelta(hours=31),
  133. pd.Timedelta(hours=31).to_pytimedelta(),
  134. pd.Timedelta(hours=31).to_timedelta64(),
  135. pd.Timedelta(hours=31).to_timedelta64().astype('m8[h]'),
  136. np.timedelta64('NaT'),
  137. np.timedelta64('NaT', 'D'),
  138. pd.offsets.Minute(3),
  139. pd.offsets.Second(0)])
  140. def test_add_sub_timedeltalike_invalid(self, numeric_idx, other, box):
  141. left = tm.box_expected(numeric_idx, box)
  142. with pytest.raises(TypeError):
  143. left + other
  144. with pytest.raises(TypeError):
  145. other + left
  146. with pytest.raises(TypeError):
  147. left - other
  148. with pytest.raises(TypeError):
  149. other - left
  150. # ------------------------------------------------------------------
  151. # Arithmetic
  152. class TestDivisionByZero(object):
  153. def test_div_zero(self, zero, numeric_idx):
  154. idx = numeric_idx
  155. expected = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf],
  156. dtype=np.float64)
  157. result = idx / zero
  158. tm.assert_index_equal(result, expected)
  159. ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8')
  160. tm.assert_series_equal(ser_compat, Series(result))
  161. def test_floordiv_zero(self, zero, numeric_idx):
  162. idx = numeric_idx
  163. expected = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf],
  164. dtype=np.float64)
  165. result = idx // zero
  166. tm.assert_index_equal(result, expected)
  167. ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8')
  168. tm.assert_series_equal(ser_compat, Series(result))
  169. def test_mod_zero(self, zero, numeric_idx):
  170. idx = numeric_idx
  171. expected = pd.Index([np.nan, np.nan, np.nan, np.nan, np.nan],
  172. dtype=np.float64)
  173. result = idx % zero
  174. tm.assert_index_equal(result, expected)
  175. ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8')
  176. tm.assert_series_equal(ser_compat, Series(result))
  177. def test_divmod_zero(self, zero, numeric_idx):
  178. idx = numeric_idx
  179. exleft = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf],
  180. dtype=np.float64)
  181. exright = pd.Index([np.nan, np.nan, np.nan, np.nan, np.nan],
  182. dtype=np.float64)
  183. result = divmod(idx, zero)
  184. tm.assert_index_equal(result[0], exleft)
  185. tm.assert_index_equal(result[1], exright)
  186. # ------------------------------------------------------------------
  187. @pytest.mark.parametrize('dtype2', [
  188. np.int64, np.int32, np.int16, np.int8,
  189. np.float64, np.float32, np.float16,
  190. np.uint64, np.uint32, np.uint16, np.uint8])
  191. @pytest.mark.parametrize('dtype1', [np.int64, np.float64, np.uint64])
  192. def test_ser_div_ser(self, dtype1, dtype2):
  193. # no longer do integer div for any ops, but deal with the 0's
  194. first = Series([3, 4, 5, 8], name='first').astype(dtype1)
  195. second = Series([0, 0, 0, 3], name='second').astype(dtype2)
  196. with np.errstate(all='ignore'):
  197. expected = Series(first.values.astype(np.float64) / second.values,
  198. dtype='float64', name=None)
  199. expected.iloc[0:3] = np.inf
  200. result = first / second
  201. tm.assert_series_equal(result, expected)
  202. assert not result.equals(second / first)
  203. def test_rdiv_zero_compat(self):
  204. # GH#8674
  205. zero_array = np.array([0] * 5)
  206. data = np.random.randn(5)
  207. expected = Series([0.] * 5)
  208. result = zero_array / Series(data)
  209. tm.assert_series_equal(result, expected)
  210. result = Series(zero_array) / data
  211. tm.assert_series_equal(result, expected)
  212. result = Series(zero_array) / Series(data)
  213. tm.assert_series_equal(result, expected)
  214. def test_div_zero_inf_signs(self):
  215. # GH#9144, inf signing
  216. ser = Series([-1, 0, 1], name='first')
  217. expected = Series([-np.inf, np.nan, np.inf], name='first')
  218. result = ser / 0
  219. tm.assert_series_equal(result, expected)
  220. def test_rdiv_zero(self):
  221. # GH#9144
  222. ser = Series([-1, 0, 1], name='first')
  223. expected = Series([0.0, np.nan, 0.0], name='first')
  224. result = 0 / ser
  225. tm.assert_series_equal(result, expected)
  226. def test_floordiv_div(self):
  227. # GH#9144
  228. ser = Series([-1, 0, 1], name='first')
  229. result = ser // 0
  230. expected = Series([-np.inf, np.nan, np.inf], name='first')
  231. tm.assert_series_equal(result, expected)
  232. def test_df_div_zero_df(self):
  233. # integer div, but deal with the 0's (GH#9144)
  234. df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  235. result = df / df
  236. first = pd.Series([1.0, 1.0, 1.0, 1.0])
  237. second = pd.Series([np.nan, np.nan, np.nan, 1])
  238. expected = pd.DataFrame({'first': first, 'second': second})
  239. tm.assert_frame_equal(result, expected)
  240. def test_df_div_zero_array(self):
  241. # integer div, but deal with the 0's (GH#9144)
  242. df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  243. first = pd.Series([1.0, 1.0, 1.0, 1.0])
  244. second = pd.Series([np.nan, np.nan, np.nan, 1])
  245. expected = pd.DataFrame({'first': first, 'second': second})
  246. with np.errstate(all='ignore'):
  247. arr = df.values.astype('float') / df.values
  248. result = pd.DataFrame(arr, index=df.index,
  249. columns=df.columns)
  250. tm.assert_frame_equal(result, expected)
  251. def test_df_div_zero_int(self):
  252. # integer div, but deal with the 0's (GH#9144)
  253. df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  254. result = df / 0
  255. expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns)
  256. expected.iloc[0:3, 1] = np.nan
  257. tm.assert_frame_equal(result, expected)
  258. # numpy has a slightly different (wrong) treatment
  259. with np.errstate(all='ignore'):
  260. arr = df.values.astype('float64') / 0
  261. result2 = pd.DataFrame(arr, index=df.index,
  262. columns=df.columns)
  263. tm.assert_frame_equal(result2, expected)
  264. def test_df_div_zero_series_does_not_commute(self):
  265. # integer div, but deal with the 0's (GH#9144)
  266. df = pd.DataFrame(np.random.randn(10, 5))
  267. ser = df[0]
  268. res = ser / df
  269. res2 = df / ser
  270. assert not res.fillna(0).equals(res2.fillna(0))
  271. # ------------------------------------------------------------------
  272. # Mod By Zero
  273. def test_df_mod_zero_df(self):
  274. # GH#3590, modulo as ints
  275. df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  276. # this is technically wrong, as the integer portion is coerced to float
  277. # ###
  278. first = pd.Series([0, 0, 0, 0], dtype='float64')
  279. second = pd.Series([np.nan, np.nan, np.nan, 0])
  280. expected = pd.DataFrame({'first': first, 'second': second})
  281. result = df % df
  282. tm.assert_frame_equal(result, expected)
  283. def test_df_mod_zero_array(self):
  284. # GH#3590, modulo as ints
  285. df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  286. # this is technically wrong, as the integer portion is coerced to float
  287. # ###
  288. first = pd.Series([0, 0, 0, 0], dtype='float64')
  289. second = pd.Series([np.nan, np.nan, np.nan, 0])
  290. expected = pd.DataFrame({'first': first, 'second': second})
  291. # numpy has a slightly different (wrong) treatment
  292. with np.errstate(all='ignore'):
  293. arr = df.values % df.values
  294. result2 = pd.DataFrame(arr, index=df.index,
  295. columns=df.columns, dtype='float64')
  296. result2.iloc[0:3, 1] = np.nan
  297. tm.assert_frame_equal(result2, expected)
  298. def test_df_mod_zero_int(self):
  299. # GH#3590, modulo as ints
  300. df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  301. result = df % 0
  302. expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns)
  303. tm.assert_frame_equal(result, expected)
  304. # numpy has a slightly different (wrong) treatment
  305. with np.errstate(all='ignore'):
  306. arr = df.values.astype('float64') % 0
  307. result2 = pd.DataFrame(arr, index=df.index, columns=df.columns)
  308. tm.assert_frame_equal(result2, expected)
  309. def test_df_mod_zero_series_does_not_commute(self):
  310. # GH#3590, modulo as ints
  311. # not commutative with series
  312. df = pd.DataFrame(np.random.randn(10, 5))
  313. ser = df[0]
  314. res = ser % df
  315. res2 = df % ser
  316. assert not res.fillna(0).equals(res2.fillna(0))
  317. class TestMultiplicationDivision(object):
  318. # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__
  319. # for non-timestamp/timedelta/period dtypes
  320. @pytest.mark.parametrize('box', [
  321. pytest.param(pd.Index,
  322. marks=pytest.mark.xfail(reason="Index.__div__ always "
  323. "raises",
  324. raises=TypeError)),
  325. pd.Series,
  326. pd.DataFrame
  327. ], ids=lambda x: x.__name__)
  328. def test_divide_decimal(self, box):
  329. # resolves issue GH#9787
  330. ser = Series([Decimal(10)])
  331. expected = Series([Decimal(5)])
  332. ser = tm.box_expected(ser, box)
  333. expected = tm.box_expected(expected, box)
  334. result = ser / Decimal(2)
  335. tm.assert_equal(result, expected)
  336. result = ser // Decimal(2)
  337. tm.assert_equal(result, expected)
  338. def test_div_equiv_binop(self):
  339. # Test Series.div as well as Series.__div__
  340. # float/integer issue
  341. # GH#7785
  342. first = Series([1, 0], name='first')
  343. second = Series([-0.01, -0.02], name='second')
  344. expected = Series([-0.01, -np.inf])
  345. result = second.div(first)
  346. tm.assert_series_equal(result, expected, check_names=False)
  347. result = second / first
  348. tm.assert_series_equal(result, expected)
  349. def test_div_int(self, numeric_idx):
  350. # truediv under PY3
  351. idx = numeric_idx
  352. result = idx / 1
  353. expected = idx
  354. if PY3:
  355. expected = expected.astype('float64')
  356. tm.assert_index_equal(result, expected)
  357. result = idx / 2
  358. if PY3:
  359. expected = expected.astype('float64')
  360. expected = Index(idx.values / 2)
  361. tm.assert_index_equal(result, expected)
  362. @pytest.mark.parametrize('op', [operator.mul, ops.rmul, operator.floordiv])
  363. def test_mul_int_identity(self, op, numeric_idx, box):
  364. idx = numeric_idx
  365. idx = tm.box_expected(idx, box)
  366. result = op(idx, 1)
  367. tm.assert_equal(result, idx)
  368. def test_mul_int_array(self, numeric_idx):
  369. idx = numeric_idx
  370. didx = idx * idx
  371. result = idx * np.array(5, dtype='int64')
  372. tm.assert_index_equal(result, idx * 5)
  373. arr_dtype = 'uint64' if isinstance(idx, pd.UInt64Index) else 'int64'
  374. result = idx * np.arange(5, dtype=arr_dtype)
  375. tm.assert_index_equal(result, didx)
  376. def test_mul_int_series(self, numeric_idx):
  377. idx = numeric_idx
  378. didx = idx * idx
  379. arr_dtype = 'uint64' if isinstance(idx, pd.UInt64Index) else 'int64'
  380. result = idx * Series(np.arange(5, dtype=arr_dtype))
  381. tm.assert_series_equal(result, Series(didx))
  382. def test_mul_float_series(self, numeric_idx):
  383. idx = numeric_idx
  384. rng5 = np.arange(5, dtype='float64')
  385. result = idx * Series(rng5 + 0.1)
  386. expected = Series(rng5 * (rng5 + 0.1))
  387. tm.assert_series_equal(result, expected)
  388. def test_mul_index(self, numeric_idx):
  389. # in general not true for RangeIndex
  390. idx = numeric_idx
  391. if not isinstance(idx, pd.RangeIndex):
  392. result = idx * idx
  393. tm.assert_index_equal(result, idx ** 2)
  394. def test_mul_datelike_raises(self, numeric_idx):
  395. idx = numeric_idx
  396. with pytest.raises(TypeError):
  397. idx * pd.date_range('20130101', periods=5)
  398. def test_mul_size_mismatch_raises(self, numeric_idx):
  399. idx = numeric_idx
  400. with pytest.raises(ValueError):
  401. idx * idx[0:3]
  402. with pytest.raises(ValueError):
  403. idx * np.array([1, 2])
  404. @pytest.mark.parametrize('op', [operator.pow, ops.rpow])
  405. def test_pow_float(self, op, numeric_idx, box):
  406. # test power calculations both ways, GH#14973
  407. idx = numeric_idx
  408. expected = pd.Float64Index(op(idx.values, 2.0))
  409. idx = tm.box_expected(idx, box)
  410. expected = tm.box_expected(expected, box)
  411. result = op(idx, 2.0)
  412. tm.assert_equal(result, expected)
  413. def test_modulo(self, numeric_idx, box):
  414. # GH#9244
  415. idx = numeric_idx
  416. expected = Index(idx.values % 2)
  417. idx = tm.box_expected(idx, box)
  418. expected = tm.box_expected(expected, box)
  419. result = idx % 2
  420. tm.assert_equal(result, expected)
  421. def test_divmod_scalar(self, numeric_idx):
  422. idx = numeric_idx
  423. result = divmod(idx, 2)
  424. with np.errstate(all='ignore'):
  425. div, mod = divmod(idx.values, 2)
  426. expected = Index(div), Index(mod)
  427. for r, e in zip(result, expected):
  428. tm.assert_index_equal(r, e)
  429. def test_divmod_ndarray(self, numeric_idx):
  430. idx = numeric_idx
  431. other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2
  432. result = divmod(idx, other)
  433. with np.errstate(all='ignore'):
  434. div, mod = divmod(idx.values, other)
  435. expected = Index(div), Index(mod)
  436. for r, e in zip(result, expected):
  437. tm.assert_index_equal(r, e)
  438. def test_divmod_series(self, numeric_idx):
  439. idx = numeric_idx
  440. other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2
  441. result = divmod(idx, Series(other))
  442. with np.errstate(all='ignore'):
  443. div, mod = divmod(idx.values, other)
  444. expected = Series(div), Series(mod)
  445. for r, e in zip(result, expected):
  446. tm.assert_series_equal(r, e)
  447. @pytest.mark.parametrize('other', [np.nan, 7, -23, 2.718, -3.14, np.inf])
  448. def test_ops_np_scalar(self, other):
  449. vals = np.random.randn(5, 3)
  450. f = lambda x: pd.DataFrame(x, index=list('ABCDE'),
  451. columns=['jim', 'joe', 'jolie'])
  452. df = f(vals)
  453. tm.assert_frame_equal(df / np.array(other), f(vals / other))
  454. tm.assert_frame_equal(np.array(other) * df, f(vals * other))
  455. tm.assert_frame_equal(df + np.array(other), f(vals + other))
  456. tm.assert_frame_equal(np.array(other) - df, f(other - vals))
  457. # TODO: This came from series.test.test_operators, needs cleanup
  458. def test_operators_frame(self):
  459. # rpow does not work with DataFrame
  460. ts = tm.makeTimeSeries()
  461. ts.name = 'ts'
  462. df = pd.DataFrame({'A': ts})
  463. tm.assert_series_equal(ts + ts, ts + df['A'],
  464. check_names=False)
  465. tm.assert_series_equal(ts ** ts, ts ** df['A'],
  466. check_names=False)
  467. tm.assert_series_equal(ts < ts, ts < df['A'],
  468. check_names=False)
  469. tm.assert_series_equal(ts / ts, ts / df['A'],
  470. check_names=False)
  471. # TODO: this came from tests.series.test_analytics, needs cleannup and
  472. # de-duplication with test_modulo above
  473. def test_modulo2(self):
  474. with np.errstate(all='ignore'):
  475. # GH#3590, modulo as ints
  476. p = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
  477. result = p['first'] % p['second']
  478. expected = Series(p['first'].values % p['second'].values,
  479. dtype='float64')
  480. expected.iloc[0:3] = np.nan
  481. tm.assert_series_equal(result, expected)
  482. result = p['first'] % 0
  483. expected = Series(np.nan, index=p.index, name='first')
  484. tm.assert_series_equal(result, expected)
  485. p = p.astype('float64')
  486. result = p['first'] % p['second']
  487. expected = Series(p['first'].values % p['second'].values)
  488. tm.assert_series_equal(result, expected)
  489. p = p.astype('float64')
  490. result = p['first'] % p['second']
  491. result2 = p['second'] % p['first']
  492. assert not result.equals(result2)
  493. # GH#9144
  494. s = Series([0, 1])
  495. result = s % 0
  496. expected = Series([np.nan, np.nan])
  497. tm.assert_series_equal(result, expected)
  498. result = 0 % s
  499. expected = Series([np.nan, 0.0])
  500. tm.assert_series_equal(result, expected)
  501. class TestAdditionSubtraction(object):
  502. # __add__, __sub__, __radd__, __rsub__, __iadd__, __isub__
  503. # for non-timestamp/timedelta/period dtypes
  504. # TODO: This came from series.test.test_operators, needs cleanup
  505. def test_arith_ops_df_compat(self):
  506. # GH#1134
  507. s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
  508. s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x')
  509. exp = pd.Series([3.0, 4.0, np.nan, np.nan],
  510. index=list('ABCD'), name='x')
  511. tm.assert_series_equal(s1 + s2, exp)
  512. tm.assert_series_equal(s2 + s1, exp)
  513. exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]},
  514. index=list('ABCD'))
  515. tm.assert_frame_equal(s1.to_frame() + s2.to_frame(), exp)
  516. tm.assert_frame_equal(s2.to_frame() + s1.to_frame(), exp)
  517. # different length
  518. s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
  519. s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x')
  520. exp = pd.Series([3, 4, 5, np.nan],
  521. index=list('ABCD'), name='x')
  522. tm.assert_series_equal(s3 + s4, exp)
  523. tm.assert_series_equal(s4 + s3, exp)
  524. exp = pd.DataFrame({'x': [3, 4, 5, np.nan]},
  525. index=list('ABCD'))
  526. tm.assert_frame_equal(s3.to_frame() + s4.to_frame(), exp)
  527. tm.assert_frame_equal(s4.to_frame() + s3.to_frame(), exp)
  528. # TODO: This came from series.test.test_operators, needs cleanup
  529. def test_series_frame_radd_bug(self):
  530. # GH#353
  531. vals = pd.Series(tm.rands_array(5, 10))
  532. result = 'foo_' + vals
  533. expected = vals.map(lambda x: 'foo_' + x)
  534. tm.assert_series_equal(result, expected)
  535. frame = pd.DataFrame({'vals': vals})
  536. result = 'foo_' + frame
  537. expected = pd.DataFrame({'vals': vals.map(lambda x: 'foo_' + x)})
  538. tm.assert_frame_equal(result, expected)
  539. ts = tm.makeTimeSeries()
  540. ts.name = 'ts'
  541. # really raise this time
  542. now = pd.Timestamp.now().to_pydatetime()
  543. with pytest.raises(TypeError):
  544. now + ts
  545. with pytest.raises(TypeError):
  546. ts + now
  547. # TODO: This came from series.test.test_operators, needs cleanup
  548. def test_datetime64_with_index(self):
  549. # arithmetic integer ops with an index
  550. ser = pd.Series(np.random.randn(5))
  551. expected = ser - ser.index.to_series()
  552. result = ser - ser.index
  553. tm.assert_series_equal(result, expected)
  554. # GH#4629
  555. # arithmetic datetime64 ops with an index
  556. ser = pd.Series(pd.date_range('20130101', periods=5),
  557. index=pd.date_range('20130101', periods=5))
  558. expected = ser - ser.index.to_series()
  559. result = ser - ser.index
  560. tm.assert_series_equal(result, expected)
  561. with pytest.raises(TypeError):
  562. # GH#18850
  563. result = ser - ser.index.to_period()
  564. df = pd.DataFrame(np.random.randn(5, 2),
  565. index=pd.date_range('20130101', periods=5))
  566. df['date'] = pd.Timestamp('20130102')
  567. df['expected'] = df['date'] - df.index.to_series()
  568. df['result'] = df['date'] - df.index
  569. tm.assert_series_equal(df['result'], df['expected'], check_names=False)
  570. # TODO: taken from tests.frame.test_operators, needs cleanup
  571. def test_frame_operators(self):
  572. seriesd = tm.getSeriesData()
  573. frame = pd.DataFrame(seriesd)
  574. frame2 = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
  575. garbage = np.random.random(4)
  576. colSeries = pd.Series(garbage, index=np.array(frame.columns))
  577. idSum = frame + frame
  578. seriesSum = frame + colSeries
  579. for col, series in idSum.items():
  580. for idx, val in series.items():
  581. origVal = frame[col][idx] * 2
  582. if not np.isnan(val):
  583. assert val == origVal
  584. else:
  585. assert np.isnan(origVal)
  586. for col, series in seriesSum.items():
  587. for idx, val in series.items():
  588. origVal = frame[col][idx] + colSeries[col]
  589. if not np.isnan(val):
  590. assert val == origVal
  591. else:
  592. assert np.isnan(origVal)
  593. added = frame2 + frame2
  594. expected = frame2 * 2
  595. tm.assert_frame_equal(added, expected)
  596. df = pd.DataFrame({'a': ['a', None, 'b']})
  597. tm.assert_frame_equal(df + df,
  598. pd.DataFrame({'a': ['aa', np.nan, 'bb']}))
  599. # Test for issue #10181
  600. for dtype in ('float', 'int64'):
  601. frames = [
  602. pd.DataFrame(dtype=dtype),
  603. pd.DataFrame(columns=['A'], dtype=dtype),
  604. pd.DataFrame(index=[0], dtype=dtype),
  605. ]
  606. for df in frames:
  607. assert (df + df).equals(df)
  608. tm.assert_frame_equal(df + df, df)
  609. # TODO: taken from tests.series.test_operators; needs cleanup
  610. def test_series_operators(self):
  611. def _check_op(series, other, op, pos_only=False, check_dtype=True):
  612. left = np.abs(series) if pos_only else series
  613. right = np.abs(other) if pos_only else other
  614. cython_or_numpy = op(left, right)
  615. python = left.combine(right, op)
  616. tm.assert_series_equal(cython_or_numpy, python,
  617. check_dtype=check_dtype)
  618. def check(series, other):
  619. simple_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod']
  620. for opname in simple_ops:
  621. _check_op(series, other, getattr(operator, opname))
  622. _check_op(series, other, operator.pow, pos_only=True)
  623. _check_op(series, other, lambda x, y: operator.add(y, x))
  624. _check_op(series, other, lambda x, y: operator.sub(y, x))
  625. _check_op(series, other, lambda x, y: operator.truediv(y, x))
  626. _check_op(series, other, lambda x, y: operator.floordiv(y, x))
  627. _check_op(series, other, lambda x, y: operator.mul(y, x))
  628. _check_op(series, other, lambda x, y: operator.pow(y, x),
  629. pos_only=True)
  630. _check_op(series, other, lambda x, y: operator.mod(y, x))
  631. tser = tm.makeTimeSeries().rename('ts')
  632. check(tser, tser * 2)
  633. check(tser, tser * 0)
  634. check(tser, tser[::2])
  635. check(tser, 5)
  636. def check_comparators(series, other, check_dtype=True):
  637. _check_op(series, other, operator.gt, check_dtype=check_dtype)
  638. _check_op(series, other, operator.ge, check_dtype=check_dtype)
  639. _check_op(series, other, operator.eq, check_dtype=check_dtype)
  640. _check_op(series, other, operator.lt, check_dtype=check_dtype)
  641. _check_op(series, other, operator.le, check_dtype=check_dtype)
  642. check_comparators(tser, 5)
  643. check_comparators(tser, tser + 1, check_dtype=False)
  644. # TODO: taken from tests.series.test_operators; needs cleanup
  645. def test_divmod(self):
  646. def check(series, other):
  647. results = divmod(series, other)
  648. if isinstance(other, Iterable) and len(series) != len(other):
  649. # if the lengths don't match, this is the test where we use
  650. # `tser[::2]`. Pad every other value in `other_np` with nan.
  651. other_np = []
  652. for n in other:
  653. other_np.append(n)
  654. other_np.append(np.nan)
  655. else:
  656. other_np = other
  657. other_np = np.asarray(other_np)
  658. with np.errstate(all='ignore'):
  659. expecteds = divmod(series.values, np.asarray(other_np))
  660. for result, expected in zip(results, expecteds):
  661. # check the values, name, and index separately
  662. tm.assert_almost_equal(np.asarray(result), expected)
  663. assert result.name == series.name
  664. tm.assert_index_equal(result.index, series.index)
  665. tser = tm.makeTimeSeries().rename('ts')
  666. check(tser, tser * 2)
  667. check(tser, tser * 0)
  668. check(tser, tser[::2])
  669. check(tser, 5)
  670. class TestUFuncCompat(object):
  671. @pytest.mark.parametrize('holder', [pd.Int64Index, pd.UInt64Index,
  672. pd.Float64Index, pd.RangeIndex,
  673. pd.Series])
  674. def test_ufunc_compat(self, holder):
  675. box = pd.Series if holder is pd.Series else pd.Index
  676. if holder is pd.RangeIndex:
  677. idx = pd.RangeIndex(0, 5)
  678. else:
  679. idx = holder(np.arange(5, dtype='int64'))
  680. result = np.sin(idx)
  681. expected = box(np.sin(np.arange(5, dtype='int64')))
  682. tm.assert_equal(result, expected)
  683. @pytest.mark.parametrize('holder', [pd.Int64Index, pd.UInt64Index,
  684. pd.Float64Index, pd.Series])
  685. def test_ufunc_coercions(self, holder):
  686. idx = holder([1, 2, 3, 4, 5], name='x')
  687. box = pd.Series if holder is pd.Series else pd.Index
  688. result = np.sqrt(idx)
  689. assert result.dtype == 'f8' and isinstance(result, box)
  690. exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x')
  691. exp = tm.box_expected(exp, box)
  692. tm.assert_equal(result, exp)
  693. result = np.divide(idx, 2.)
  694. assert result.dtype == 'f8' and isinstance(result, box)
  695. exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
  696. exp = tm.box_expected(exp, box)
  697. tm.assert_equal(result, exp)
  698. # _evaluate_numeric_binop
  699. result = idx + 2.
  700. assert result.dtype == 'f8' and isinstance(result, box)
  701. exp = pd.Float64Index([3., 4., 5., 6., 7.], name='x')
  702. exp = tm.box_expected(exp, box)
  703. tm.assert_equal(result, exp)
  704. result = idx - 2.
  705. assert result.dtype == 'f8' and isinstance(result, box)
  706. exp = pd.Float64Index([-1., 0., 1., 2., 3.], name='x')
  707. exp = tm.box_expected(exp, box)
  708. tm.assert_equal(result, exp)
  709. result = idx * 1.
  710. assert result.dtype == 'f8' and isinstance(result, box)
  711. exp = pd.Float64Index([1., 2., 3., 4., 5.], name='x')
  712. exp = tm.box_expected(exp, box)
  713. tm.assert_equal(result, exp)
  714. result = idx / 2.
  715. assert result.dtype == 'f8' and isinstance(result, box)
  716. exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
  717. exp = tm.box_expected(exp, box)
  718. tm.assert_equal(result, exp)
  719. class TestObjectDtypeEquivalence(object):
  720. # Tests that arithmetic operations match operations executed elementwise
  721. @pytest.mark.parametrize('dtype', [None, object])
  722. def test_numarr_with_dtype_add_nan(self, dtype, box):
  723. ser = pd.Series([1, 2, 3], dtype=dtype)
  724. expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype)
  725. ser = tm.box_expected(ser, box)
  726. expected = tm.box_expected(expected, box)
  727. result = np.nan + ser
  728. tm.assert_equal(result, expected)
  729. result = ser + np.nan
  730. tm.assert_equal(result, expected)
  731. @pytest.mark.parametrize('dtype', [None, object])
  732. def test_numarr_with_dtype_add_int(self, dtype, box):
  733. ser = pd.Series([1, 2, 3], dtype=dtype)
  734. expected = pd.Series([2, 3, 4], dtype=dtype)
  735. ser = tm.box_expected(ser, box)
  736. expected = tm.box_expected(expected, box)
  737. result = 1 + ser
  738. tm.assert_equal(result, expected)
  739. result = ser + 1
  740. tm.assert_equal(result, expected)
  741. # TODO: moved from tests.series.test_operators; needs cleanup
  742. @pytest.mark.parametrize('op', [operator.add, operator.sub, operator.mul,
  743. operator.truediv, operator.floordiv])
  744. def test_operators_reverse_object(self, op):
  745. # GH#56
  746. arr = pd.Series(np.random.randn(10), index=np.arange(10), dtype=object)
  747. result = op(1., arr)
  748. expected = op(1., arr.astype(float))
  749. tm.assert_series_equal(result.astype(float), expected)
  750. class TestNumericArithmeticUnsorted(object):
  751. # Tests in this class have been moved from type-specific test modules
  752. # but not yet sorted, parametrized, and de-duplicated
  753. def check_binop(self, ops, scalars, idxs):
  754. for op in ops:
  755. for a, b in combinations(idxs, 2):
  756. result = op(a, b)
  757. expected = op(pd.Int64Index(a), pd.Int64Index(b))
  758. tm.assert_index_equal(result, expected)
  759. for idx in idxs:
  760. for scalar in scalars:
  761. result = op(idx, scalar)
  762. expected = op(pd.Int64Index(idx), scalar)
  763. tm.assert_index_equal(result, expected)
  764. def test_binops(self):
  765. ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
  766. operator.truediv]
  767. scalars = [-1, 1, 2]
  768. idxs = [pd.RangeIndex(0, 10, 1), pd.RangeIndex(0, 20, 2),
  769. pd.RangeIndex(-10, 10, 2), pd.RangeIndex(5, -5, -1)]
  770. self.check_binop(ops, scalars, idxs)
  771. def test_binops_pow(self):
  772. # later versions of numpy don't allow powers of negative integers
  773. # so test separately
  774. # https://github.com/numpy/numpy/pull/8127
  775. ops = [pow]
  776. scalars = [1, 2]
  777. idxs = [pd.RangeIndex(0, 10, 1), pd.RangeIndex(0, 20, 2)]
  778. self.check_binop(ops, scalars, idxs)
  779. # TODO: mod, divmod?
  780. @pytest.mark.parametrize('op', [operator.add, operator.sub,
  781. operator.mul, operator.floordiv,
  782. operator.truediv, operator.pow])
  783. def test_arithmetic_with_frame_or_series(self, op):
  784. # check that we return NotImplemented when operating with Series
  785. # or DataFrame
  786. index = pd.RangeIndex(5)
  787. other = pd.Series(np.random.randn(5))
  788. expected = op(pd.Series(index), other)
  789. result = op(index, other)
  790. tm.assert_series_equal(result, expected)
  791. other = pd.DataFrame(np.random.randn(2, 5))
  792. expected = op(pd.DataFrame([index, index]), other)
  793. result = op(index, other)
  794. tm.assert_frame_equal(result, expected)
  795. def test_numeric_compat2(self):
  796. # validate that we are handling the RangeIndex overrides to numeric ops
  797. # and returning RangeIndex where possible
  798. idx = pd.RangeIndex(0, 10, 2)
  799. result = idx * 2
  800. expected = pd.RangeIndex(0, 20, 4)
  801. tm.assert_index_equal(result, expected, exact=True)
  802. result = idx + 2
  803. expected = pd.RangeIndex(2, 12, 2)
  804. tm.assert_index_equal(result, expected, exact=True)
  805. result = idx - 2
  806. expected = pd.RangeIndex(-2, 8, 2)
  807. tm.assert_index_equal(result, expected, exact=True)
  808. # truediv under PY3
  809. result = idx / 2
  810. if PY3:
  811. expected = pd.RangeIndex(0, 5, 1).astype('float64')
  812. else:
  813. expected = pd.RangeIndex(0, 5, 1)
  814. tm.assert_index_equal(result, expected, exact=True)
  815. result = idx / 4
  816. expected = pd.RangeIndex(0, 10, 2) / 4
  817. tm.assert_index_equal(result, expected, exact=True)
  818. result = idx // 1
  819. expected = idx
  820. tm.assert_index_equal(result, expected, exact=True)
  821. # __mul__
  822. result = idx * idx
  823. expected = Index(idx.values * idx.values)
  824. tm.assert_index_equal(result, expected, exact=True)
  825. # __pow__
  826. idx = pd.RangeIndex(0, 1000, 2)
  827. result = idx ** 2
  828. expected = idx._int64index ** 2
  829. tm.assert_index_equal(Index(result.values), expected, exact=True)
  830. # __floordiv__
  831. cases_exact = [
  832. (pd.RangeIndex(0, 1000, 2), 2, pd.RangeIndex(0, 500, 1)),
  833. (pd.RangeIndex(-99, -201, -3), -3, pd.RangeIndex(33, 67, 1)),
  834. (pd.RangeIndex(0, 1000, 1), 2,
  835. pd.RangeIndex(0, 1000, 1)._int64index // 2),
  836. (pd.RangeIndex(0, 100, 1), 2.0,
  837. pd.RangeIndex(0, 100, 1)._int64index // 2.0),
  838. (pd.RangeIndex(0), 50, pd.RangeIndex(0)),
  839. (pd.RangeIndex(2, 4, 2), 3, pd.RangeIndex(0, 1, 1)),
  840. (pd.RangeIndex(-5, -10, -6), 4, pd.RangeIndex(-2, -1, 1)),
  841. (pd.RangeIndex(-100, -200, 3), 2, pd.RangeIndex(0))]
  842. for idx, div, expected in cases_exact:
  843. tm.assert_index_equal(idx // div, expected, exact=True)
  844. @pytest.mark.parametrize('dtype', [np.int64, np.float64])
  845. @pytest.mark.parametrize('delta', [1, 0, -1])
  846. def test_addsub_arithmetic(self, dtype, delta):
  847. # GH#8142
  848. delta = dtype(delta)
  849. index = pd.Index([10, 11, 12], dtype=dtype)
  850. result = index + delta
  851. expected = pd.Index(index.values + delta, dtype=dtype)
  852. tm.assert_index_equal(result, expected)
  853. # this subtraction used to fail
  854. result = index - delta
  855. expected = pd.Index(index.values - delta, dtype=dtype)
  856. tm.assert_index_equal(result, expected)
  857. tm.assert_index_equal(index + index, 2 * index)
  858. tm.assert_index_equal(index - index, 0 * index)
  859. assert not (index - index).empty