test_coercion.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. # -*- coding: utf-8 -*-
  2. import itertools
  3. import numpy as np
  4. import pytest
  5. import pandas.compat as compat
  6. import pandas as pd
  7. import pandas.util.testing as tm
  8. ###############################################################
  9. # Index / Series common tests which may trigger dtype coercions
  10. ###############################################################
  11. @pytest.fixture(autouse=True, scope='class')
  12. def check_comprehensiveness(request):
  13. # Iterate over combination of dtype, method and klass
  14. # and ensure that each are contained within a collected test
  15. cls = request.cls
  16. combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
  17. def has_test(combo):
  18. klass, dtype, method = combo
  19. cls_funcs = request.node.session.items
  20. return any(klass in x.name and dtype in x.name and
  21. method in x.name for x in cls_funcs)
  22. for combo in combos:
  23. if not has_test(combo):
  24. msg = 'test method is not defined: {0}, {1}'
  25. raise AssertionError(msg.format(cls.__name__, combo))
  26. yield
  27. class CoercionBase(object):
  28. klasses = ['index', 'series']
  29. dtypes = ['object', 'int64', 'float64', 'complex128', 'bool',
  30. 'datetime64', 'datetime64tz', 'timedelta64', 'period']
  31. @property
  32. def method(self):
  33. raise NotImplementedError(self)
  34. def _assert(self, left, right, dtype):
  35. # explicitly check dtype to avoid any unexpected result
  36. if isinstance(left, pd.Series):
  37. tm.assert_series_equal(left, right)
  38. elif isinstance(left, pd.Index):
  39. tm.assert_index_equal(left, right)
  40. else:
  41. raise NotImplementedError
  42. assert left.dtype == dtype
  43. assert right.dtype == dtype
  44. class TestSetitemCoercion(CoercionBase):
  45. method = 'setitem'
  46. def _assert_setitem_series_conversion(self, original_series, loc_value,
  47. expected_series, expected_dtype):
  48. """ test series value's coercion triggered by assignment """
  49. temp = original_series.copy()
  50. temp[1] = loc_value
  51. tm.assert_series_equal(temp, expected_series)
  52. # check dtype explicitly for sure
  53. assert temp.dtype == expected_dtype
  54. # .loc works different rule, temporary disable
  55. # temp = original_series.copy()
  56. # temp.loc[1] = loc_value
  57. # tm.assert_series_equal(temp, expected_series)
  58. @pytest.mark.parametrize("val,exp_dtype", [
  59. (1, np.object),
  60. (1.1, np.object),
  61. (1 + 1j, np.object),
  62. (True, np.object)])
  63. def test_setitem_series_object(self, val, exp_dtype):
  64. obj = pd.Series(list('abcd'))
  65. assert obj.dtype == np.object
  66. exp = pd.Series(['a', val, 'c', 'd'])
  67. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  68. @pytest.mark.parametrize("val,exp_dtype", [
  69. (1, np.int64),
  70. (1.1, np.float64),
  71. (1 + 1j, np.complex128),
  72. (True, np.object)])
  73. def test_setitem_series_int64(self, val, exp_dtype):
  74. obj = pd.Series([1, 2, 3, 4])
  75. assert obj.dtype == np.int64
  76. if exp_dtype is np.float64:
  77. exp = pd.Series([1, 1, 3, 4])
  78. self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
  79. pytest.xfail("GH12747 The result must be float")
  80. exp = pd.Series([1, val, 3, 4])
  81. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  82. @pytest.mark.parametrize("val,exp_dtype", [
  83. (np.int32(1), np.int8),
  84. (np.int16(2**9), np.int16)])
  85. def test_setitem_series_int8(self, val, exp_dtype):
  86. obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
  87. assert obj.dtype == np.int8
  88. if exp_dtype is np.int16:
  89. exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
  90. self._assert_setitem_series_conversion(obj, val, exp, np.int8)
  91. pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16")
  92. exp = pd.Series([1, val, 3, 4], dtype=np.int8)
  93. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  94. @pytest.mark.parametrize("val,exp_dtype", [
  95. (1, np.float64),
  96. (1.1, np.float64),
  97. (1 + 1j, np.complex128),
  98. (True, np.object)])
  99. def test_setitem_series_float64(self, val, exp_dtype):
  100. obj = pd.Series([1.1, 2.2, 3.3, 4.4])
  101. assert obj.dtype == np.float64
  102. exp = pd.Series([1.1, val, 3.3, 4.4])
  103. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  104. @pytest.mark.parametrize("val,exp_dtype", [
  105. (1, np.complex128),
  106. (1.1, np.complex128),
  107. (1 + 1j, np.complex128),
  108. (True, np.object)])
  109. def test_setitem_series_complex128(self, val, exp_dtype):
  110. obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
  111. assert obj.dtype == np.complex128
  112. exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j])
  113. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  114. @pytest.mark.parametrize("val,exp_dtype", [
  115. (1, np.int64),
  116. (3, np.int64),
  117. (1.1, np.float64),
  118. (1 + 1j, np.complex128),
  119. (True, np.bool)])
  120. def test_setitem_series_bool(self, val, exp_dtype):
  121. obj = pd.Series([True, False, True, False])
  122. assert obj.dtype == np.bool
  123. if exp_dtype is np.int64:
  124. exp = pd.Series([True, True, True, False])
  125. self._assert_setitem_series_conversion(obj, val, exp, np.bool)
  126. pytest.xfail("TODO_GH12747 The result must be int")
  127. elif exp_dtype is np.float64:
  128. exp = pd.Series([True, True, True, False])
  129. self._assert_setitem_series_conversion(obj, val, exp, np.bool)
  130. pytest.xfail("TODO_GH12747 The result must be float")
  131. elif exp_dtype is np.complex128:
  132. exp = pd.Series([True, True, True, False])
  133. self._assert_setitem_series_conversion(obj, val, exp, np.bool)
  134. pytest.xfail("TODO_GH12747 The result must be complex")
  135. exp = pd.Series([True, val, True, False])
  136. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  137. @pytest.mark.parametrize("val,exp_dtype", [
  138. (pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
  139. (1, np.object),
  140. ('x', np.object)])
  141. def test_setitem_series_datetime64(self, val, exp_dtype):
  142. obj = pd.Series([pd.Timestamp('2011-01-01'),
  143. pd.Timestamp('2011-01-02'),
  144. pd.Timestamp('2011-01-03'),
  145. pd.Timestamp('2011-01-04')])
  146. assert obj.dtype == 'datetime64[ns]'
  147. exp = pd.Series([pd.Timestamp('2011-01-01'),
  148. val,
  149. pd.Timestamp('2011-01-03'),
  150. pd.Timestamp('2011-01-04')])
  151. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  152. @pytest.mark.parametrize("val,exp_dtype", [
  153. (pd.Timestamp('2012-01-01', tz='US/Eastern'),
  154. 'datetime64[ns, US/Eastern]'),
  155. (pd.Timestamp('2012-01-01', tz='US/Pacific'), np.object),
  156. (pd.Timestamp('2012-01-01'), np.object),
  157. (1, np.object)])
  158. def test_setitem_series_datetime64tz(self, val, exp_dtype):
  159. tz = 'US/Eastern'
  160. obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
  161. pd.Timestamp('2011-01-02', tz=tz),
  162. pd.Timestamp('2011-01-03', tz=tz),
  163. pd.Timestamp('2011-01-04', tz=tz)])
  164. assert obj.dtype == 'datetime64[ns, US/Eastern]'
  165. exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
  166. val,
  167. pd.Timestamp('2011-01-03', tz=tz),
  168. pd.Timestamp('2011-01-04', tz=tz)])
  169. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  170. @pytest.mark.parametrize("val,exp_dtype", [
  171. (pd.Timedelta('12 day'), 'timedelta64[ns]'),
  172. (1, np.object),
  173. ('x', np.object)])
  174. def test_setitem_series_timedelta64(self, val, exp_dtype):
  175. obj = pd.Series([pd.Timedelta('1 day'),
  176. pd.Timedelta('2 day'),
  177. pd.Timedelta('3 day'),
  178. pd.Timedelta('4 day')])
  179. assert obj.dtype == 'timedelta64[ns]'
  180. exp = pd.Series([pd.Timedelta('1 day'),
  181. val,
  182. pd.Timedelta('3 day'),
  183. pd.Timedelta('4 day')])
  184. self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
  185. def _assert_setitem_index_conversion(self, original_series, loc_key,
  186. expected_index, expected_dtype):
  187. """ test index's coercion triggered by assign key """
  188. temp = original_series.copy()
  189. temp[loc_key] = 5
  190. exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
  191. tm.assert_series_equal(temp, exp)
  192. # check dtype explicitly for sure
  193. assert temp.index.dtype == expected_dtype
  194. temp = original_series.copy()
  195. temp.loc[loc_key] = 5
  196. exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
  197. tm.assert_series_equal(temp, exp)
  198. # check dtype explicitly for sure
  199. assert temp.index.dtype == expected_dtype
  200. @pytest.mark.parametrize("val,exp_dtype", [
  201. ('x', np.object),
  202. (5, IndexError),
  203. (1.1, np.object)])
  204. def test_setitem_index_object(self, val, exp_dtype):
  205. obj = pd.Series([1, 2, 3, 4], index=list('abcd'))
  206. assert obj.index.dtype == np.object
  207. if exp_dtype is IndexError:
  208. temp = obj.copy()
  209. with pytest.raises(exp_dtype):
  210. temp[5] = 5
  211. else:
  212. exp_index = pd.Index(list('abcd') + [val])
  213. self._assert_setitem_index_conversion(obj, val, exp_index,
  214. exp_dtype)
  215. @pytest.mark.parametrize("val,exp_dtype", [
  216. (5, np.int64),
  217. (1.1, np.float64),
  218. ('x', np.object)])
  219. def test_setitem_index_int64(self, val, exp_dtype):
  220. obj = pd.Series([1, 2, 3, 4])
  221. assert obj.index.dtype == np.int64
  222. exp_index = pd.Index([0, 1, 2, 3, val])
  223. self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
  224. @pytest.mark.parametrize("val,exp_dtype", [
  225. (5, IndexError),
  226. (5.1, np.float64),
  227. ('x', np.object)])
  228. def test_setitem_index_float64(self, val, exp_dtype):
  229. obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
  230. assert obj.index.dtype == np.float64
  231. if exp_dtype is IndexError:
  232. # float + int -> int
  233. temp = obj.copy()
  234. with pytest.raises(exp_dtype):
  235. temp[5] = 5
  236. pytest.xfail("TODO_GH12747 The result must be float")
  237. exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
  238. self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
  239. def test_setitem_series_period(self):
  240. pass
  241. def test_setitem_index_complex128(self):
  242. pass
  243. def test_setitem_index_bool(self):
  244. pass
  245. def test_setitem_index_datetime64(self):
  246. pass
  247. def test_setitem_index_datetime64tz(self):
  248. pass
  249. def test_setitem_index_timedelta64(self):
  250. pass
  251. def test_setitem_index_period(self):
  252. pass
  253. class TestInsertIndexCoercion(CoercionBase):
  254. klasses = ['index']
  255. method = 'insert'
  256. def _assert_insert_conversion(self, original, value,
  257. expected, expected_dtype):
  258. """ test coercion triggered by insert """
  259. target = original.copy()
  260. res = target.insert(1, value)
  261. tm.assert_index_equal(res, expected)
  262. assert res.dtype == expected_dtype
  263. @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
  264. (1, 1, np.object),
  265. (1.1, 1.1, np.object),
  266. (False, False, np.object),
  267. ('x', 'x', np.object)])
  268. def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
  269. obj = pd.Index(list('abcd'))
  270. assert obj.dtype == np.object
  271. exp = pd.Index(['a', coerced_val, 'b', 'c', 'd'])
  272. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  273. @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
  274. (1, 1, np.int64),
  275. (1.1, 1.1, np.float64),
  276. (False, 0, np.int64),
  277. ('x', 'x', np.object)])
  278. def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
  279. obj = pd.Int64Index([1, 2, 3, 4])
  280. assert obj.dtype == np.int64
  281. exp = pd.Index([1, coerced_val, 2, 3, 4])
  282. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  283. @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
  284. (1, 1., np.float64),
  285. (1.1, 1.1, np.float64),
  286. (False, 0., np.float64),
  287. ('x', 'x', np.object)])
  288. def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
  289. obj = pd.Float64Index([1., 2., 3., 4.])
  290. assert obj.dtype == np.float64
  291. exp = pd.Index([1., coerced_val, 2., 3., 4.])
  292. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  293. @pytest.mark.parametrize('fill_val,exp_dtype', [
  294. (pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
  295. (pd.Timestamp('2012-01-01', tz='US/Eastern'),
  296. 'datetime64[ns, US/Eastern]')],
  297. ids=['datetime64', 'datetime64tz'])
  298. def test_insert_index_datetimes(self, fill_val, exp_dtype):
  299. obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
  300. '2011-01-04'], tz=fill_val.tz)
  301. assert obj.dtype == exp_dtype
  302. exp = pd.DatetimeIndex(['2011-01-01', fill_val.date(), '2011-01-02',
  303. '2011-01-03', '2011-01-04'], tz=fill_val.tz)
  304. self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
  305. msg = "Passed item and index have different timezone"
  306. if fill_val.tz:
  307. with pytest.raises(ValueError, match=msg):
  308. obj.insert(1, pd.Timestamp('2012-01-01'))
  309. with pytest.raises(ValueError, match=msg):
  310. obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))
  311. msg = "cannot insert DatetimeIndex with incompatible label"
  312. with pytest.raises(TypeError, match=msg):
  313. obj.insert(1, 1)
  314. pytest.xfail("ToDo: must coerce to object")
  315. def test_insert_index_timedelta64(self):
  316. obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day'])
  317. assert obj.dtype == 'timedelta64[ns]'
  318. # timedelta64 + timedelta64 => timedelta64
  319. exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day'])
  320. self._assert_insert_conversion(obj, pd.Timedelta('10 day'),
  321. exp, 'timedelta64[ns]')
  322. # ToDo: must coerce to object
  323. msg = "cannot insert TimedeltaIndex with incompatible label"
  324. with pytest.raises(TypeError, match=msg):
  325. obj.insert(1, pd.Timestamp('2012-01-01'))
  326. # ToDo: must coerce to object
  327. msg = "cannot insert TimedeltaIndex with incompatible label"
  328. with pytest.raises(TypeError, match=msg):
  329. obj.insert(1, 1)
  330. @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
  331. (pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'),
  332. (pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object),
  333. (1, 1, np.object),
  334. ('x', 'x', np.object)])
  335. def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
  336. obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
  337. freq='M')
  338. assert obj.dtype == 'period[M]'
  339. if isinstance(insert, pd.Period):
  340. index_type = pd.PeriodIndex
  341. else:
  342. index_type = pd.Index
  343. exp = index_type([pd.Period('2011-01', freq='M'),
  344. coerced_val,
  345. pd.Period('2011-02', freq='M'),
  346. pd.Period('2011-03', freq='M'),
  347. pd.Period('2011-04', freq='M')], freq='M')
  348. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  349. def test_insert_index_complex128(self):
  350. pass
  351. def test_insert_index_bool(self):
  352. pass
  353. class TestWhereCoercion(CoercionBase):
  354. method = 'where'
  355. def _assert_where_conversion(self, original, cond, values,
  356. expected, expected_dtype):
  357. """ test coercion triggered by where """
  358. target = original.copy()
  359. res = target.where(cond, values)
  360. self._assert(res, expected, expected_dtype)
  361. @pytest.mark.parametrize("klass", [pd.Series, pd.Index],
  362. ids=['series', 'index'])
  363. @pytest.mark.parametrize("fill_val,exp_dtype", [
  364. (1, np.object),
  365. (1.1, np.object),
  366. (1 + 1j, np.object),
  367. (True, np.object)])
  368. def test_where_object(self, klass, fill_val, exp_dtype):
  369. obj = klass(list('abcd'))
  370. assert obj.dtype == np.object
  371. cond = klass([True, False, True, False])
  372. if fill_val is True and klass is pd.Series:
  373. ret_val = 1
  374. else:
  375. ret_val = fill_val
  376. exp = klass(['a', ret_val, 'c', ret_val])
  377. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  378. if fill_val is True:
  379. values = klass([True, False, True, True])
  380. else:
  381. values = klass(fill_val * x for x in [5, 6, 7, 8])
  382. exp = klass(['a', values[1], 'c', values[3]])
  383. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  384. @pytest.mark.parametrize("klass", [pd.Series, pd.Index],
  385. ids=['series', 'index'])
  386. @pytest.mark.parametrize("fill_val,exp_dtype", [
  387. (1, np.int64),
  388. (1.1, np.float64),
  389. (1 + 1j, np.complex128),
  390. (True, np.object)])
  391. def test_where_int64(self, klass, fill_val, exp_dtype):
  392. if klass is pd.Index and exp_dtype is np.complex128:
  393. pytest.skip("Complex Index not supported")
  394. obj = klass([1, 2, 3, 4])
  395. assert obj.dtype == np.int64
  396. cond = klass([True, False, True, False])
  397. exp = klass([1, fill_val, 3, fill_val])
  398. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  399. if fill_val is True:
  400. values = klass([True, False, True, True])
  401. else:
  402. values = klass(x * fill_val for x in [5, 6, 7, 8])
  403. exp = klass([1, values[1], 3, values[3]])
  404. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  405. @pytest.mark.parametrize("klass", [pd.Series, pd.Index],
  406. ids=['series', 'index'])
  407. @pytest.mark.parametrize("fill_val, exp_dtype", [
  408. (1, np.float64),
  409. (1.1, np.float64),
  410. (1 + 1j, np.complex128),
  411. (True, np.object)])
  412. def test_where_float64(self, klass, fill_val, exp_dtype):
  413. if klass is pd.Index and exp_dtype is np.complex128:
  414. pytest.skip("Complex Index not supported")
  415. obj = klass([1.1, 2.2, 3.3, 4.4])
  416. assert obj.dtype == np.float64
  417. cond = klass([True, False, True, False])
  418. exp = klass([1.1, fill_val, 3.3, fill_val])
  419. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  420. if fill_val is True:
  421. values = klass([True, False, True, True])
  422. else:
  423. values = klass(x * fill_val for x in [5, 6, 7, 8])
  424. exp = klass([1.1, values[1], 3.3, values[3]])
  425. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  426. @pytest.mark.parametrize("fill_val,exp_dtype", [
  427. (1, np.complex128),
  428. (1.1, np.complex128),
  429. (1 + 1j, np.complex128),
  430. (True, np.object)])
  431. def test_where_series_complex128(self, fill_val, exp_dtype):
  432. obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
  433. assert obj.dtype == np.complex128
  434. cond = pd.Series([True, False, True, False])
  435. exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val])
  436. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  437. if fill_val is True:
  438. values = pd.Series([True, False, True, True])
  439. else:
  440. values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
  441. exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]])
  442. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  443. @pytest.mark.parametrize("fill_val,exp_dtype", [
  444. (1, np.object),
  445. (1.1, np.object),
  446. (1 + 1j, np.object),
  447. (True, np.bool)])
  448. def test_where_series_bool(self, fill_val, exp_dtype):
  449. obj = pd.Series([True, False, True, False])
  450. assert obj.dtype == np.bool
  451. cond = pd.Series([True, False, True, False])
  452. exp = pd.Series([True, fill_val, True, fill_val])
  453. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  454. if fill_val is True:
  455. values = pd.Series([True, False, True, True])
  456. else:
  457. values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
  458. exp = pd.Series([True, values[1], True, values[3]])
  459. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  460. @pytest.mark.parametrize("fill_val,exp_dtype", [
  461. (pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
  462. (pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
  463. ids=['datetime64', 'datetime64tz'])
  464. def test_where_series_datetime64(self, fill_val, exp_dtype):
  465. obj = pd.Series([pd.Timestamp('2011-01-01'),
  466. pd.Timestamp('2011-01-02'),
  467. pd.Timestamp('2011-01-03'),
  468. pd.Timestamp('2011-01-04')])
  469. assert obj.dtype == 'datetime64[ns]'
  470. cond = pd.Series([True, False, True, False])
  471. exp = pd.Series([pd.Timestamp('2011-01-01'), fill_val,
  472. pd.Timestamp('2011-01-03'), fill_val])
  473. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  474. values = pd.Series(pd.date_range(fill_val, periods=4))
  475. if fill_val.tz:
  476. exp = pd.Series([pd.Timestamp('2011-01-01'),
  477. pd.Timestamp('2012-01-02 00:00', tz='US/Eastern'),
  478. pd.Timestamp('2011-01-03'),
  479. pd.Timestamp('2012-01-04 00:00',
  480. tz='US/Eastern')])
  481. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  482. exp = pd.Series([pd.Timestamp('2011-01-01'), values[1],
  483. pd.Timestamp('2011-01-03'), values[3]])
  484. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  485. def test_where_index_datetime(self):
  486. fill_val = pd.Timestamp('2012-01-01')
  487. exp_dtype = 'datetime64[ns]'
  488. obj = pd.Index([pd.Timestamp('2011-01-01'),
  489. pd.Timestamp('2011-01-02'),
  490. pd.Timestamp('2011-01-03'),
  491. pd.Timestamp('2011-01-04')])
  492. assert obj.dtype == 'datetime64[ns]'
  493. cond = pd.Index([True, False, True, False])
  494. msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
  495. "of some kind")
  496. with pytest.raises(TypeError, match=msg):
  497. obj.where(cond, fill_val)
  498. values = pd.Index(pd.date_range(fill_val, periods=4))
  499. exp = pd.Index([pd.Timestamp('2011-01-01'),
  500. pd.Timestamp('2012-01-02'),
  501. pd.Timestamp('2011-01-03'),
  502. pd.Timestamp('2012-01-04')])
  503. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  504. @pytest.mark.xfail(
  505. reason="GH 22839: do not ignore timezone, must be object")
  506. def test_where_index_datetimetz(self):
  507. fill_val = pd.Timestamp('2012-01-01', tz='US/Eastern')
  508. exp_dtype = np.object
  509. obj = pd.Index([pd.Timestamp('2011-01-01'),
  510. pd.Timestamp('2011-01-02'),
  511. pd.Timestamp('2011-01-03'),
  512. pd.Timestamp('2011-01-04')])
  513. assert obj.dtype == 'datetime64[ns]'
  514. cond = pd.Index([True, False, True, False])
  515. msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
  516. "of some kind")
  517. with pytest.raises(TypeError, match=msg):
  518. obj.where(cond, fill_val)
  519. values = pd.Index(pd.date_range(fill_val, periods=4))
  520. exp = pd.Index([pd.Timestamp('2011-01-01'),
  521. pd.Timestamp('2012-01-02', tz='US/Eastern'),
  522. pd.Timestamp('2011-01-03'),
  523. pd.Timestamp('2012-01-04', tz='US/Eastern')],
  524. dtype=exp_dtype)
  525. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  526. def test_where_index_complex128(self):
  527. pass
  528. def test_where_index_bool(self):
  529. pass
  530. def test_where_series_datetime64tz(self):
  531. pass
  532. def test_where_series_timedelta64(self):
  533. pass
  534. def test_where_series_period(self):
  535. pass
  536. def test_where_index_datetime64tz(self):
  537. pass
  538. def test_where_index_timedelta64(self):
  539. pass
  540. def test_where_index_period(self):
  541. pass
  542. class TestFillnaSeriesCoercion(CoercionBase):
  543. # not indexing, but place here for consisntency
  544. method = 'fillna'
  545. def test_has_comprehensive_tests(self):
  546. pass
  547. def _assert_fillna_conversion(self, original, value,
  548. expected, expected_dtype):
  549. """ test coercion triggered by fillna """
  550. target = original.copy()
  551. res = target.fillna(value)
  552. self._assert(res, expected, expected_dtype)
  553. @pytest.mark.parametrize("klass", [pd.Series, pd.Index],
  554. ids=['series', 'index'])
  555. @pytest.mark.parametrize("fill_val, fill_dtype", [
  556. (1, np.object),
  557. (1.1, np.object),
  558. (1 + 1j, np.object),
  559. (True, np.object)])
  560. def test_fillna_object(self, klass, fill_val, fill_dtype):
  561. obj = klass(['a', np.nan, 'c', 'd'])
  562. assert obj.dtype == np.object
  563. exp = klass(['a', fill_val, 'c', 'd'])
  564. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  565. @pytest.mark.parametrize("klass", [pd.Series, pd.Index],
  566. ids=['series', 'index'])
  567. @pytest.mark.parametrize("fill_val,fill_dtype", [
  568. (1, np.float64),
  569. (1.1, np.float64),
  570. (1 + 1j, np.complex128),
  571. (True, np.object)])
  572. def test_fillna_float64(self, klass, fill_val, fill_dtype):
  573. obj = klass([1.1, np.nan, 3.3, 4.4])
  574. assert obj.dtype == np.float64
  575. exp = klass([1.1, fill_val, 3.3, 4.4])
  576. # float + complex -> we don't support a complex Index
  577. # complex for Series,
  578. # object for Index
  579. if fill_dtype == np.complex128 and klass == pd.Index:
  580. fill_dtype = np.object
  581. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  582. @pytest.mark.parametrize("fill_val,fill_dtype", [
  583. (1, np.complex128),
  584. (1.1, np.complex128),
  585. (1 + 1j, np.complex128),
  586. (True, np.object)])
  587. def test_fillna_series_complex128(self, fill_val, fill_dtype):
  588. obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j])
  589. assert obj.dtype == np.complex128
  590. exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
  591. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  592. @pytest.mark.parametrize("klass", [pd.Series, pd.Index],
  593. ids=['series', 'index'])
  594. @pytest.mark.parametrize("fill_val,fill_dtype", [
  595. (pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
  596. (pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object),
  597. (1, np.object), ('x', np.object)],
  598. ids=['datetime64', 'datetime64tz', 'object', 'object'])
  599. def test_fillna_datetime(self, klass, fill_val, fill_dtype):
  600. obj = klass([pd.Timestamp('2011-01-01'),
  601. pd.NaT,
  602. pd.Timestamp('2011-01-03'),
  603. pd.Timestamp('2011-01-04')])
  604. assert obj.dtype == 'datetime64[ns]'
  605. exp = klass([pd.Timestamp('2011-01-01'),
  606. fill_val,
  607. pd.Timestamp('2011-01-03'),
  608. pd.Timestamp('2011-01-04')])
  609. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  610. @pytest.mark.parametrize("klass", [pd.Series, pd.Index])
  611. @pytest.mark.parametrize("fill_val,fill_dtype", [
  612. (pd.Timestamp('2012-01-01', tz='US/Eastern'),
  613. 'datetime64[ns, US/Eastern]'),
  614. (pd.Timestamp('2012-01-01'), np.object),
  615. (pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), np.object),
  616. (1, np.object),
  617. ('x', np.object)])
  618. def test_fillna_datetime64tz(self, klass, fill_val, fill_dtype):
  619. tz = 'US/Eastern'
  620. obj = klass([pd.Timestamp('2011-01-01', tz=tz),
  621. pd.NaT,
  622. pd.Timestamp('2011-01-03', tz=tz),
  623. pd.Timestamp('2011-01-04', tz=tz)])
  624. assert obj.dtype == 'datetime64[ns, US/Eastern]'
  625. exp = klass([pd.Timestamp('2011-01-01', tz=tz),
  626. fill_val,
  627. pd.Timestamp('2011-01-03', tz=tz),
  628. pd.Timestamp('2011-01-04', tz=tz)])
  629. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  630. def test_fillna_series_int64(self):
  631. pass
  632. def test_fillna_index_int64(self):
  633. pass
  634. def test_fillna_series_bool(self):
  635. pass
  636. def test_fillna_index_bool(self):
  637. pass
  638. def test_fillna_series_timedelta64(self):
  639. pass
  640. def test_fillna_series_period(self):
  641. pass
  642. def test_fillna_index_timedelta64(self):
  643. pass
  644. def test_fillna_index_period(self):
  645. pass
  646. class TestReplaceSeriesCoercion(CoercionBase):
  647. klasses = ['series']
  648. method = 'replace'
  649. rep = {}
  650. rep['object'] = ['a', 'b']
  651. rep['int64'] = [4, 5]
  652. rep['float64'] = [1.1, 2.2]
  653. rep['complex128'] = [1 + 1j, 2 + 2j]
  654. rep['bool'] = [True, False]
  655. rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'),
  656. pd.Timestamp('2011-01-03')]
  657. for tz in ['UTC', 'US/Eastern']:
  658. # to test tz => different tz replacement
  659. key = 'datetime64[ns, {0}]'.format(tz)
  660. rep[key] = [pd.Timestamp('2011-01-01', tz=tz),
  661. pd.Timestamp('2011-01-03', tz=tz)]
  662. rep['timedelta64[ns]'] = [pd.Timedelta('1 day'),
  663. pd.Timedelta('2 day')]
  664. @pytest.mark.parametrize('how', ['dict', 'series'])
  665. @pytest.mark.parametrize('to_key', [
  666. 'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
  667. 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]'
  668. ], ids=['object', 'int64', 'float64', 'complex128', 'bool',
  669. 'datetime64', 'datetime64tz', 'datetime64tz', 'timedelta64'])
  670. @pytest.mark.parametrize('from_key', [
  671. 'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
  672. 'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]']
  673. )
  674. def test_replace_series(self, how, to_key, from_key):
  675. if from_key == 'bool' and how == 'series' and compat.PY3:
  676. # doesn't work in PY3, though ...dict_from_bool works fine
  677. pytest.skip("doesn't work as in PY3")
  678. index = pd.Index([3, 4], name='xxx')
  679. obj = pd.Series(self.rep[from_key], index=index, name='yyy')
  680. assert obj.dtype == from_key
  681. if (from_key.startswith('datetime') and to_key.startswith('datetime')):
  682. # tested below
  683. return
  684. elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
  685. # tested below
  686. return
  687. if how == 'dict':
  688. replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
  689. elif how == 'series':
  690. replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
  691. else:
  692. raise ValueError
  693. result = obj.replace(replacer)
  694. if ((from_key == 'float64' and to_key in ('int64')) or
  695. (from_key == 'complex128' and
  696. to_key in ('int64', 'float64'))):
  697. if compat.is_platform_32bit() or compat.is_platform_windows():
  698. pytest.skip("32-bit platform buggy: {0} -> {1}".format
  699. (from_key, to_key))
  700. # Expected: do not downcast by replacement
  701. exp = pd.Series(self.rep[to_key], index=index,
  702. name='yyy', dtype=from_key)
  703. else:
  704. exp = pd.Series(self.rep[to_key], index=index, name='yyy')
  705. assert exp.dtype == to_key
  706. tm.assert_series_equal(result, exp)
  707. # TODO(jbrockmendel) commented out to only have a single xfail printed
  708. @pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug '
  709. 'in BlockManager.replace_list')
  710. # @pytest.mark.parametrize('how', ['dict', 'series'])
  711. # @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object',
  712. # 'complex128', 'float64', 'int64'])
  713. # @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]',
  714. # 'datetime64[ns, US/Eastern]'])
  715. # def test_replace_series_datetime_tz(self, how, to_key, from_key):
  716. def test_replace_series_datetime_tz(self):
  717. how = 'series'
  718. from_key = 'datetime64[ns, US/Eastern]'
  719. to_key = 'timedelta64[ns]'
  720. index = pd.Index([3, 4], name='xxx')
  721. obj = pd.Series(self.rep[from_key], index=index, name='yyy')
  722. assert obj.dtype == from_key
  723. if how == 'dict':
  724. replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
  725. elif how == 'series':
  726. replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
  727. else:
  728. raise ValueError
  729. result = obj.replace(replacer)
  730. exp = pd.Series(self.rep[to_key], index=index, name='yyy')
  731. assert exp.dtype == to_key
  732. tm.assert_series_equal(result, exp)
  733. # TODO(jreback) commented out to only have a single xfail printed
  734. @pytest.mark.xfail(reason="different tz, "
  735. "currently mask_missing raises SystemError",
  736. strict=False)
  737. # @pytest.mark.parametrize('how', ['dict', 'series'])
  738. # @pytest.mark.parametrize('to_key', [
  739. # 'datetime64[ns]', 'datetime64[ns, UTC]',
  740. # 'datetime64[ns, US/Eastern]'])
  741. # @pytest.mark.parametrize('from_key', [
  742. # 'datetime64[ns]', 'datetime64[ns, UTC]',
  743. # 'datetime64[ns, US/Eastern]'])
  744. # def test_replace_series_datetime_datetime(self, how, to_key, from_key):
  745. def test_replace_series_datetime_datetime(self):
  746. how = 'dict'
  747. to_key = 'datetime64[ns]'
  748. from_key = 'datetime64[ns]'
  749. index = pd.Index([3, 4], name='xxx')
  750. obj = pd.Series(self.rep[from_key], index=index, name='yyy')
  751. assert obj.dtype == from_key
  752. if how == 'dict':
  753. replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
  754. elif how == 'series':
  755. replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
  756. else:
  757. raise ValueError
  758. result = obj.replace(replacer)
  759. exp = pd.Series(self.rep[to_key], index=index, name='yyy')
  760. assert exp.dtype == to_key
  761. tm.assert_series_equal(result, exp)
  762. def test_replace_series_period(self):
  763. pass