test_object.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. # -*- coding: utf-8 -*-
  2. # Arithmetc tests for DataFrame/Series/Index/Array classes that should
  3. # behave identically.
  4. # Specifically for object dtype
  5. from decimal import Decimal
  6. import operator
  7. import numpy as np
  8. import pytest
  9. import pandas as pd
  10. from pandas import Series, Timestamp
  11. from pandas.core import ops
  12. import pandas.util.testing as tm
  13. # ------------------------------------------------------------------
  14. # Comparisons
  15. class TestObjectComparisons(object):
  16. def test_comparison_object_numeric_nas(self):
  17. ser = Series(np.random.randn(10), dtype=object)
  18. shifted = ser.shift(2)
  19. ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
  20. for op in ops:
  21. func = getattr(operator, op)
  22. result = func(ser, shifted)
  23. expected = func(ser.astype(float), shifted.astype(float))
  24. tm.assert_series_equal(result, expected)
  25. def test_object_comparisons(self):
  26. ser = Series(['a', 'b', np.nan, 'c', 'a'])
  27. result = ser == 'a'
  28. expected = Series([True, False, False, False, True])
  29. tm.assert_series_equal(result, expected)
  30. result = ser < 'a'
  31. expected = Series([False, False, False, False, False])
  32. tm.assert_series_equal(result, expected)
  33. result = ser != 'a'
  34. expected = -(ser == 'a')
  35. tm.assert_series_equal(result, expected)
  36. @pytest.mark.parametrize('dtype', [None, object])
  37. def test_more_na_comparisons(self, dtype):
  38. left = Series(['a', np.nan, 'c'], dtype=dtype)
  39. right = Series(['a', np.nan, 'd'], dtype=dtype)
  40. result = left == right
  41. expected = Series([True, False, False])
  42. tm.assert_series_equal(result, expected)
  43. result = left != right
  44. expected = Series([False, True, True])
  45. tm.assert_series_equal(result, expected)
  46. result = left == np.nan
  47. expected = Series([False, False, False])
  48. tm.assert_series_equal(result, expected)
  49. result = left != np.nan
  50. expected = Series([True, True, True])
  51. tm.assert_series_equal(result, expected)
  52. # ------------------------------------------------------------------
  53. # Arithmetic
  54. class TestArithmetic(object):
  55. # TODO: parametrize
  56. def test_pow_ops_object(self):
  57. # GH#22922
  58. # pow is weird with masking & 1, so testing here
  59. a = Series([1, np.nan, 1, np.nan], dtype=object)
  60. b = Series([1, np.nan, np.nan, 1], dtype=object)
  61. result = a ** b
  62. expected = Series(a.values ** b.values, dtype=object)
  63. tm.assert_series_equal(result, expected)
  64. result = b ** a
  65. expected = Series(b.values ** a.values, dtype=object)
  66. tm.assert_series_equal(result, expected)
  67. @pytest.mark.parametrize("op", [operator.add, ops.radd])
  68. @pytest.mark.parametrize("other", ["category", "Int64"])
  69. def test_add_extension_scalar(self, other, box, op):
  70. # GH#22378
  71. # Check that scalars satisfying is_extension_array_dtype(obj)
  72. # do not incorrectly try to dispatch to an ExtensionArray operation
  73. arr = pd.Series(['a', 'b', 'c'])
  74. expected = pd.Series([op(x, other) for x in arr])
  75. arr = tm.box_expected(arr, box)
  76. expected = tm.box_expected(expected, box)
  77. result = op(arr, other)
  78. tm.assert_equal(result, expected)
  79. @pytest.mark.parametrize('box', [
  80. pytest.param(pd.Index,
  81. marks=pytest.mark.xfail(reason="Does not mask nulls",
  82. raises=TypeError)),
  83. pd.Series,
  84. pd.DataFrame
  85. ], ids=lambda x: x.__name__)
  86. def test_objarr_add_str(self, box):
  87. ser = pd.Series(['x', np.nan, 'x'])
  88. expected = pd.Series(['xa', np.nan, 'xa'])
  89. ser = tm.box_expected(ser, box)
  90. expected = tm.box_expected(expected, box)
  91. result = ser + 'a'
  92. tm.assert_equal(result, expected)
  93. @pytest.mark.parametrize('box', [
  94. pytest.param(pd.Index,
  95. marks=pytest.mark.xfail(reason="Does not mask nulls",
  96. raises=TypeError)),
  97. pd.Series,
  98. pd.DataFrame
  99. ], ids=lambda x: x.__name__)
  100. def test_objarr_radd_str(self, box):
  101. ser = pd.Series(['x', np.nan, 'x'])
  102. expected = pd.Series(['ax', np.nan, 'ax'])
  103. ser = tm.box_expected(ser, box)
  104. expected = tm.box_expected(expected, box)
  105. result = 'a' + ser
  106. tm.assert_equal(result, expected)
  107. @pytest.mark.parametrize('data', [
  108. [1, 2, 3],
  109. [1.1, 2.2, 3.3],
  110. [Timestamp('2011-01-01'), Timestamp('2011-01-02'), pd.NaT],
  111. ['x', 'y', 1]])
  112. @pytest.mark.parametrize('dtype', [None, object])
  113. def test_objarr_radd_str_invalid(self, dtype, data, box):
  114. ser = Series(data, dtype=dtype)
  115. ser = tm.box_expected(ser, box)
  116. with pytest.raises(TypeError):
  117. 'foo_' + ser
  118. @pytest.mark.parametrize('op', [operator.add, ops.radd,
  119. operator.sub, ops.rsub])
  120. def test_objarr_add_invalid(self, op, box):
  121. # invalid ops
  122. obj_ser = tm.makeObjectSeries()
  123. obj_ser.name = 'objects'
  124. obj_ser = tm.box_expected(obj_ser, box)
  125. with pytest.raises(Exception):
  126. op(obj_ser, 1)
  127. with pytest.raises(Exception):
  128. op(obj_ser, np.array(1, dtype=np.int64))
  129. # TODO: Moved from tests.series.test_operators; needs cleanup
  130. def test_operators_na_handling(self):
  131. ser = Series(['foo', 'bar', 'baz', np.nan])
  132. result = 'prefix_' + ser
  133. expected = pd.Series(['prefix_foo', 'prefix_bar',
  134. 'prefix_baz', np.nan])
  135. tm.assert_series_equal(result, expected)
  136. result = ser + '_suffix'
  137. expected = pd.Series(['foo_suffix', 'bar_suffix',
  138. 'baz_suffix', np.nan])
  139. tm.assert_series_equal(result, expected)
  140. # TODO: parametrize over box
  141. @pytest.mark.parametrize('dtype', [None, object])
  142. def test_series_with_dtype_radd_timedelta(self, dtype):
  143. # note this test is _not_ aimed at timedelta64-dtyped Series
  144. ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'),
  145. pd.Timedelta('3 days')], dtype=dtype)
  146. expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'),
  147. pd.Timedelta('6 days')])
  148. result = pd.Timedelta('3 days') + ser
  149. tm.assert_series_equal(result, expected)
  150. result = ser + pd.Timedelta('3 days')
  151. tm.assert_series_equal(result, expected)
  152. # TODO: cleanup & parametrize over box
  153. def test_mixed_timezone_series_ops_object(self):
  154. # GH#13043
  155. ser = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'),
  156. pd.Timestamp('2015-01-01', tz='Asia/Tokyo')],
  157. name='xxx')
  158. assert ser.dtype == object
  159. exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'),
  160. pd.Timestamp('2015-01-02', tz='Asia/Tokyo')],
  161. name='xxx')
  162. tm.assert_series_equal(ser + pd.Timedelta('1 days'), exp)
  163. tm.assert_series_equal(pd.Timedelta('1 days') + ser, exp)
  164. # object series & object series
  165. ser2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'),
  166. pd.Timestamp('2015-01-05', tz='Asia/Tokyo')],
  167. name='xxx')
  168. assert ser2.dtype == object
  169. exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')],
  170. name='xxx')
  171. tm.assert_series_equal(ser2 - ser, exp)
  172. tm.assert_series_equal(ser - ser2, -exp)
  173. ser = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')],
  174. name='xxx', dtype=object)
  175. assert ser.dtype == object
  176. exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')],
  177. name='xxx')
  178. tm.assert_series_equal(ser + pd.Timedelta('00:30:00'), exp)
  179. tm.assert_series_equal(pd.Timedelta('00:30:00') + ser, exp)
  180. # TODO: cleanup & parametrize over box
  181. def test_iadd_preserves_name(self):
  182. # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
  183. ser = pd.Series([1, 2, 3])
  184. ser.index.name = 'foo'
  185. ser.index += 1
  186. assert ser.index.name == "foo"
  187. ser.index -= 1
  188. assert ser.index.name == "foo"
  189. def test_add_string(self):
  190. # from bug report
  191. index = pd.Index(['a', 'b', 'c'])
  192. index2 = index + 'foo'
  193. assert 'a' not in index2
  194. assert 'afoo' in index2
  195. def test_iadd_string(self):
  196. index = pd.Index(['a', 'b', 'c'])
  197. # doesn't fail test unless there is a check before `+=`
  198. assert 'a' in index
  199. index += '_x'
  200. assert 'a_x' in index
  201. def test_add(self):
  202. index = tm.makeStringIndex(100)
  203. expected = pd.Index(index.values * 2)
  204. tm.assert_index_equal(index + index, expected)
  205. tm.assert_index_equal(index + index.tolist(), expected)
  206. tm.assert_index_equal(index.tolist() + index, expected)
  207. # test add and radd
  208. index = pd.Index(list('abc'))
  209. expected = pd.Index(['a1', 'b1', 'c1'])
  210. tm.assert_index_equal(index + '1', expected)
  211. expected = pd.Index(['1a', '1b', '1c'])
  212. tm.assert_index_equal('1' + index, expected)
  213. def test_sub_fail(self):
  214. index = tm.makeStringIndex(100)
  215. with pytest.raises(TypeError):
  216. index - 'a'
  217. with pytest.raises(TypeError):
  218. index - index
  219. with pytest.raises(TypeError):
  220. index - index.tolist()
  221. with pytest.raises(TypeError):
  222. index.tolist() - index
  223. def test_sub_object(self):
  224. # GH#19369
  225. index = pd.Index([Decimal(1), Decimal(2)])
  226. expected = pd.Index([Decimal(0), Decimal(1)])
  227. result = index - Decimal(1)
  228. tm.assert_index_equal(result, expected)
  229. result = index - pd.Index([Decimal(1), Decimal(1)])
  230. tm.assert_index_equal(result, expected)
  231. with pytest.raises(TypeError):
  232. index - 'foo'
  233. with pytest.raises(TypeError):
  234. index - np.array([2, 'foo'])
  235. def test_rsub_object(self):
  236. # GH#19369
  237. index = pd.Index([Decimal(1), Decimal(2)])
  238. expected = pd.Index([Decimal(1), Decimal(0)])
  239. result = Decimal(2) - index
  240. tm.assert_index_equal(result, expected)
  241. result = np.array([Decimal(2), Decimal(2)]) - index
  242. tm.assert_index_equal(result, expected)
  243. with pytest.raises(TypeError):
  244. 'foo' - index
  245. with pytest.raises(TypeError):
  246. np.array([True, pd.Timestamp.now()]) - index