test_alter_index.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from datetime import datetime
  4. import numpy as np
  5. from numpy import nan
  6. import pytest
  7. import pandas.compat as compat
  8. from pandas.compat import lrange, range
  9. import pandas as pd
  10. from pandas import Categorical, Series, date_range, isna
  11. import pandas.util.testing as tm
  12. from pandas.util.testing import assert_series_equal
  13. @pytest.mark.parametrize(
  14. 'first_slice,second_slice', [
  15. [[2, None], [None, -5]],
  16. [[None, 0], [None, -5]],
  17. [[None, -5], [None, 0]],
  18. [[None, 0], [None, 0]]
  19. ])
  20. @pytest.mark.parametrize('fill', [None, -1])
  21. def test_align(test_data, first_slice, second_slice, join_type, fill):
  22. a = test_data.ts[slice(*first_slice)]
  23. b = test_data.ts[slice(*second_slice)]
  24. aa, ab = a.align(b, join=join_type, fill_value=fill)
  25. join_index = a.index.join(b.index, how=join_type)
  26. if fill is not None:
  27. diff_a = aa.index.difference(join_index)
  28. diff_b = ab.index.difference(join_index)
  29. if len(diff_a) > 0:
  30. assert (aa.reindex(diff_a) == fill).all()
  31. if len(diff_b) > 0:
  32. assert (ab.reindex(diff_b) == fill).all()
  33. ea = a.reindex(join_index)
  34. eb = b.reindex(join_index)
  35. if fill is not None:
  36. ea = ea.fillna(fill)
  37. eb = eb.fillna(fill)
  38. assert_series_equal(aa, ea)
  39. assert_series_equal(ab, eb)
  40. assert aa.name == 'ts'
  41. assert ea.name == 'ts'
  42. assert ab.name == 'ts'
  43. assert eb.name == 'ts'
  44. @pytest.mark.parametrize(
  45. 'first_slice,second_slice', [
  46. [[2, None], [None, -5]],
  47. [[None, 0], [None, -5]],
  48. [[None, -5], [None, 0]],
  49. [[None, 0], [None, 0]]
  50. ])
  51. @pytest.mark.parametrize('method', ['pad', 'bfill'])
  52. @pytest.mark.parametrize('limit', [None, 1])
  53. def test_align_fill_method(test_data,
  54. first_slice, second_slice,
  55. join_type, method, limit):
  56. a = test_data.ts[slice(*first_slice)]
  57. b = test_data.ts[slice(*second_slice)]
  58. aa, ab = a.align(b, join=join_type, method=method, limit=limit)
  59. join_index = a.index.join(b.index, how=join_type)
  60. ea = a.reindex(join_index)
  61. eb = b.reindex(join_index)
  62. ea = ea.fillna(method=method, limit=limit)
  63. eb = eb.fillna(method=method, limit=limit)
  64. assert_series_equal(aa, ea)
  65. assert_series_equal(ab, eb)
  66. def test_align_nocopy(test_data):
  67. b = test_data.ts[:5].copy()
  68. # do copy
  69. a = test_data.ts.copy()
  70. ra, _ = a.align(b, join='left')
  71. ra[:5] = 5
  72. assert not (a[:5] == 5).any()
  73. # do not copy
  74. a = test_data.ts.copy()
  75. ra, _ = a.align(b, join='left', copy=False)
  76. ra[:5] = 5
  77. assert (a[:5] == 5).all()
  78. # do copy
  79. a = test_data.ts.copy()
  80. b = test_data.ts[:5].copy()
  81. _, rb = a.align(b, join='right')
  82. rb[:3] = 5
  83. assert not (b[:3] == 5).any()
  84. # do not copy
  85. a = test_data.ts.copy()
  86. b = test_data.ts[:5].copy()
  87. _, rb = a.align(b, join='right', copy=False)
  88. rb[:2] = 5
  89. assert (b[:2] == 5).all()
  90. def test_align_same_index(test_data):
  91. a, b = test_data.ts.align(test_data.ts, copy=False)
  92. assert a.index is test_data.ts.index
  93. assert b.index is test_data.ts.index
  94. a, b = test_data.ts.align(test_data.ts, copy=True)
  95. assert a.index is not test_data.ts.index
  96. assert b.index is not test_data.ts.index
  97. def test_align_multiindex():
  98. # GH 10665
  99. midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
  100. names=('a', 'b', 'c'))
  101. idx = pd.Index(range(2), name='b')
  102. s1 = pd.Series(np.arange(12, dtype='int64'), index=midx)
  103. s2 = pd.Series(np.arange(2, dtype='int64'), index=idx)
  104. # these must be the same results (but flipped)
  105. res1l, res1r = s1.align(s2, join='left')
  106. res2l, res2r = s2.align(s1, join='right')
  107. expl = s1
  108. tm.assert_series_equal(expl, res1l)
  109. tm.assert_series_equal(expl, res2r)
  110. expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
  111. tm.assert_series_equal(expr, res1r)
  112. tm.assert_series_equal(expr, res2l)
  113. res1l, res1r = s1.align(s2, join='right')
  114. res2l, res2r = s2.align(s1, join='left')
  115. exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
  116. names=('a', 'b', 'c'))
  117. expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
  118. tm.assert_series_equal(expl, res1l)
  119. tm.assert_series_equal(expl, res2r)
  120. expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx)
  121. tm.assert_series_equal(expr, res1r)
  122. tm.assert_series_equal(expr, res2l)
  123. def test_reindex(test_data):
  124. identity = test_data.series.reindex(test_data.series.index)
  125. # __array_interface__ is not defined for older numpies
  126. # and on some pythons
  127. try:
  128. assert np.may_share_memory(test_data.series.index, identity.index)
  129. except AttributeError:
  130. pass
  131. assert identity.index.is_(test_data.series.index)
  132. assert identity.index.identical(test_data.series.index)
  133. subIndex = test_data.series.index[10:20]
  134. subSeries = test_data.series.reindex(subIndex)
  135. for idx, val in compat.iteritems(subSeries):
  136. assert val == test_data.series[idx]
  137. subIndex2 = test_data.ts.index[10:20]
  138. subTS = test_data.ts.reindex(subIndex2)
  139. for idx, val in compat.iteritems(subTS):
  140. assert val == test_data.ts[idx]
  141. stuffSeries = test_data.ts.reindex(subIndex)
  142. assert np.isnan(stuffSeries).all()
  143. # This is extremely important for the Cython code to not screw up
  144. nonContigIndex = test_data.ts.index[::2]
  145. subNonContig = test_data.ts.reindex(nonContigIndex)
  146. for idx, val in compat.iteritems(subNonContig):
  147. assert val == test_data.ts[idx]
  148. # return a copy the same index here
  149. result = test_data.ts.reindex()
  150. assert not (result is test_data.ts)
  151. def test_reindex_nan():
  152. ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])
  153. i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
  154. assert_series_equal(ts.reindex(i), ts.iloc[j])
  155. ts.index = ts.index.astype('object')
  156. # reindex coerces index.dtype to float, loc/iloc doesn't
  157. assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
  158. def test_reindex_series_add_nat():
  159. rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
  160. series = Series(rng)
  161. result = series.reindex(lrange(15))
  162. assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))
  163. mask = result.isna()
  164. assert mask[-5:].all()
  165. assert not mask[:-5].any()
  166. def test_reindex_with_datetimes():
  167. rng = date_range('1/1/2000', periods=20)
  168. ts = Series(np.random.randn(20), index=rng)
  169. result = ts.reindex(list(ts.index[5:10]))
  170. expected = ts[5:10]
  171. tm.assert_series_equal(result, expected)
  172. result = ts[list(ts.index[5:10])]
  173. tm.assert_series_equal(result, expected)
  174. def test_reindex_corner(test_data):
  175. # (don't forget to fix this) I think it's fixed
  176. test_data.empty.reindex(test_data.ts.index, method='pad') # it works
  177. # corner case: pad empty series
  178. reindexed = test_data.empty.reindex(test_data.ts.index, method='pad')
  179. # pass non-Index
  180. reindexed = test_data.ts.reindex(list(test_data.ts.index))
  181. assert_series_equal(test_data.ts, reindexed)
  182. # bad fill method
  183. ts = test_data.ts[::2]
  184. msg = (r"Invalid fill method\. Expecting pad \(ffill\), backfill"
  185. r" \(bfill\) or nearest\. Got foo")
  186. with pytest.raises(ValueError, match=msg):
  187. ts.reindex(test_data.ts.index, method='foo')
  188. def test_reindex_pad():
  189. s = Series(np.arange(10), dtype='int64')
  190. s2 = s[::2]
  191. reindexed = s2.reindex(s.index, method='pad')
  192. reindexed2 = s2.reindex(s.index, method='ffill')
  193. assert_series_equal(reindexed, reindexed2)
  194. expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
  195. assert_series_equal(reindexed, expected)
  196. # GH4604
  197. s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
  198. new_index = ['a', 'g', 'c', 'f']
  199. expected = Series([1, 1, 3, 3], index=new_index)
  200. # this changes dtype because the ffill happens after
  201. result = s.reindex(new_index).ffill()
  202. assert_series_equal(result, expected.astype('float64'))
  203. result = s.reindex(new_index).ffill(downcast='infer')
  204. assert_series_equal(result, expected)
  205. expected = Series([1, 5, 3, 5], index=new_index)
  206. result = s.reindex(new_index, method='ffill')
  207. assert_series_equal(result, expected)
  208. # inference of new dtype
  209. s = Series([True, False, False, True], index=list('abcd'))
  210. new_index = 'agc'
  211. result = s.reindex(list(new_index)).ffill()
  212. expected = Series([True, True, False], index=list(new_index))
  213. assert_series_equal(result, expected)
  214. # GH4618 shifted series downcasting
  215. s = Series(False, index=lrange(0, 5))
  216. result = s.shift(1).fillna(method='bfill')
  217. expected = Series(False, index=lrange(0, 5))
  218. assert_series_equal(result, expected)
  219. def test_reindex_nearest():
  220. s = Series(np.arange(10, dtype='int64'))
  221. target = [0.1, 0.9, 1.5, 2.0]
  222. actual = s.reindex(target, method='nearest')
  223. expected = Series(np.around(target).astype('int64'), target)
  224. assert_series_equal(expected, actual)
  225. actual = s.reindex_like(actual, method='nearest')
  226. assert_series_equal(expected, actual)
  227. actual = s.reindex_like(actual, method='nearest', tolerance=1)
  228. assert_series_equal(expected, actual)
  229. actual = s.reindex_like(actual, method='nearest',
  230. tolerance=[1, 2, 3, 4])
  231. assert_series_equal(expected, actual)
  232. actual = s.reindex(target, method='nearest', tolerance=0.2)
  233. expected = Series([0, 1, np.nan, 2], target)
  234. assert_series_equal(expected, actual)
  235. actual = s.reindex(target, method='nearest',
  236. tolerance=[0.3, 0.01, 0.4, 3])
  237. expected = Series([0, np.nan, np.nan, 2], target)
  238. assert_series_equal(expected, actual)
  239. def test_reindex_backfill():
  240. pass
  241. def test_reindex_int(test_data):
  242. ts = test_data.ts[::2]
  243. int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
  244. # this should work fine
  245. reindexed_int = int_ts.reindex(test_data.ts.index)
  246. # if NaNs introduced
  247. assert reindexed_int.dtype == np.float_
  248. # NO NaNs introduced
  249. reindexed_int = int_ts.reindex(int_ts.index[::2])
  250. assert reindexed_int.dtype == np.int_
  251. def test_reindex_bool(test_data):
  252. # A series other than float, int, string, or object
  253. ts = test_data.ts[::2]
  254. bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
  255. # this should work fine
  256. reindexed_bool = bool_ts.reindex(test_data.ts.index)
  257. # if NaNs introduced
  258. assert reindexed_bool.dtype == np.object_
  259. # NO NaNs introduced
  260. reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
  261. assert reindexed_bool.dtype == np.bool_
  262. def test_reindex_bool_pad(test_data):
  263. # fail
  264. ts = test_data.ts[5:]
  265. bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
  266. filled_bool = bool_ts.reindex(test_data.ts.index, method='pad')
  267. assert isna(filled_bool[:5]).all()
  268. def test_reindex_categorical():
  269. index = date_range('20000101', periods=3)
  270. # reindexing to an invalid Categorical
  271. s = Series(['a', 'b', 'c'], dtype='category')
  272. result = s.reindex(index)
  273. expected = Series(Categorical(values=[np.nan, np.nan, np.nan],
  274. categories=['a', 'b', 'c']))
  275. expected.index = index
  276. tm.assert_series_equal(result, expected)
  277. # partial reindexing
  278. expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b',
  279. 'c']))
  280. expected.index = [1, 2]
  281. result = s.reindex([1, 2])
  282. tm.assert_series_equal(result, expected)
  283. expected = Series(Categorical(
  284. values=['c', np.nan], categories=['a', 'b', 'c']))
  285. expected.index = [2, 3]
  286. result = s.reindex([2, 3])
  287. tm.assert_series_equal(result, expected)
  288. def test_reindex_like(test_data):
  289. other = test_data.ts[::2]
  290. assert_series_equal(test_data.ts.reindex(other.index),
  291. test_data.ts.reindex_like(other))
  292. # GH 7179
  293. day1 = datetime(2013, 3, 5)
  294. day2 = datetime(2013, 5, 5)
  295. day3 = datetime(2014, 3, 5)
  296. series1 = Series([5, None, None], [day1, day2, day3])
  297. series2 = Series([None, None], [day1, day3])
  298. result = series1.reindex_like(series2, method='pad')
  299. expected = Series([5, np.nan], index=[day1, day3])
  300. assert_series_equal(result, expected)
  301. def test_reindex_fill_value():
  302. # -----------------------------------------------------------
  303. # floats
  304. floats = Series([1., 2., 3.])
  305. result = floats.reindex([1, 2, 3])
  306. expected = Series([2., 3., np.nan], index=[1, 2, 3])
  307. assert_series_equal(result, expected)
  308. result = floats.reindex([1, 2, 3], fill_value=0)
  309. expected = Series([2., 3., 0], index=[1, 2, 3])
  310. assert_series_equal(result, expected)
  311. # -----------------------------------------------------------
  312. # ints
  313. ints = Series([1, 2, 3])
  314. result = ints.reindex([1, 2, 3])
  315. expected = Series([2., 3., np.nan], index=[1, 2, 3])
  316. assert_series_equal(result, expected)
  317. # don't upcast
  318. result = ints.reindex([1, 2, 3], fill_value=0)
  319. expected = Series([2, 3, 0], index=[1, 2, 3])
  320. assert issubclass(result.dtype.type, np.integer)
  321. assert_series_equal(result, expected)
  322. # -----------------------------------------------------------
  323. # objects
  324. objects = Series([1, 2, 3], dtype=object)
  325. result = objects.reindex([1, 2, 3])
  326. expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
  327. assert_series_equal(result, expected)
  328. result = objects.reindex([1, 2, 3], fill_value='foo')
  329. expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
  330. assert_series_equal(result, expected)
  331. # ------------------------------------------------------------
  332. # bools
  333. bools = Series([True, False, True])
  334. result = bools.reindex([1, 2, 3])
  335. expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
  336. assert_series_equal(result, expected)
  337. result = bools.reindex([1, 2, 3], fill_value=False)
  338. expected = Series([False, True, False], index=[1, 2, 3])
  339. assert_series_equal(result, expected)
  340. def test_reindex_datetimeindexes_tz_naive_and_aware():
  341. # GH 8306
  342. idx = date_range('20131101', tz='America/Chicago', periods=7)
  343. newidx = date_range('20131103', periods=10, freq='H')
  344. s = Series(range(7), index=idx)
  345. with pytest.raises(TypeError):
  346. s.reindex(newidx, method='ffill')
  347. def test_reindex_empty_series_tz_dtype():
  348. # GH 20869
  349. result = Series(dtype='datetime64[ns, UTC]').reindex([0, 1])
  350. expected = Series([pd.NaT] * 2, dtype='datetime64[ns, UTC]')
  351. tm.assert_equal(result, expected)
  352. def test_rename():
  353. # GH 17407
  354. s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
  355. result = s.rename(str)
  356. expected = s.rename(lambda i: str(i))
  357. assert_series_equal(result, expected)
  358. assert result.name == expected.name
  359. @pytest.mark.parametrize(
  360. 'data, index, drop_labels,'
  361. ' axis, expected_data, expected_index',
  362. [
  363. # Unique Index
  364. ([1, 2], ['one', 'two'], ['two'],
  365. 0, [1], ['one']),
  366. ([1, 2], ['one', 'two'], ['two'],
  367. 'rows', [1], ['one']),
  368. ([1, 1, 2], ['one', 'two', 'one'], ['two'],
  369. 0, [1, 2], ['one', 'one']),
  370. # GH 5248 Non-Unique Index
  371. ([1, 1, 2], ['one', 'two', 'one'], 'two',
  372. 0, [1, 2], ['one', 'one']),
  373. ([1, 1, 2], ['one', 'two', 'one'], ['one'],
  374. 0, [1], ['two']),
  375. ([1, 1, 2], ['one', 'two', 'one'], 'one',
  376. 0, [1], ['two'])])
  377. def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
  378. expected_data, expected_index):
  379. s = Series(data=data, index=index)
  380. result = s.drop(drop_labels, axis=axis)
  381. expected = Series(data=expected_data, index=expected_index)
  382. tm.assert_series_equal(result, expected)
  383. @pytest.mark.parametrize(
  384. 'data, index, drop_labels,'
  385. ' axis, error_type, error_desc',
  386. [
  387. # single string/tuple-like
  388. (range(3), list('abc'), 'bc',
  389. 0, KeyError, 'not found in axis'),
  390. # bad axis
  391. (range(3), list('abc'), ('a',),
  392. 0, KeyError, 'not found in axis'),
  393. (range(3), list('abc'), 'one',
  394. 'columns', ValueError, 'No axis named columns')])
  395. def test_drop_exception_raised(data, index, drop_labels,
  396. axis, error_type, error_desc):
  397. with pytest.raises(error_type, match=error_desc):
  398. Series(data, index=index).drop(drop_labels, axis=axis)
  399. def test_drop_with_ignore_errors():
  400. # errors='ignore'
  401. s = Series(range(3), index=list('abc'))
  402. result = s.drop('bc', errors='ignore')
  403. tm.assert_series_equal(result, s)
  404. result = s.drop(['a', 'd'], errors='ignore')
  405. expected = s.iloc[1:]
  406. tm.assert_series_equal(result, expected)
  407. # GH 8522
  408. s = Series([2, 3], index=[True, False])
  409. assert s.index.is_object()
  410. result = s.drop(True)
  411. expected = Series([3], index=[False])
  412. tm.assert_series_equal(result, expected)
  413. @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
  414. @pytest.mark.parametrize('drop_labels', [[], [1], [3]])
  415. def test_drop_empty_list(index, drop_labels):
  416. # GH 21494
  417. expected_index = [i for i in index if i not in drop_labels]
  418. series = pd.Series(index=index).drop(drop_labels)
  419. tm.assert_series_equal(series, pd.Series(index=expected_index))
  420. @pytest.mark.parametrize('data, index, drop_labels', [
  421. (None, [1, 2, 3], [1, 4]),
  422. (None, [1, 2, 2], [1, 4]),
  423. ([2, 3], [0, 1], [False, True])
  424. ])
  425. def test_drop_non_empty_list(data, index, drop_labels):
  426. # GH 21494 and GH 16877
  427. with pytest.raises(KeyError, match='not found in axis'):
  428. pd.Series(data=data, index=index).drop(drop_labels)