test_axis_select_reindex.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import datetime
  4. import numpy as np
  5. import pytest
  6. from pandas.compat import lrange, lzip, u
  7. from pandas.errors import PerformanceWarning
  8. import pandas as pd
  9. from pandas import (
  10. Categorical, DataFrame, Index, MultiIndex, Series, compat, date_range,
  11. isna)
  12. from pandas.tests.frame.common import TestData
  13. import pandas.util.testing as tm
  14. from pandas.util.testing import assert_frame_equal
  15. class TestDataFrameSelectReindex(TestData):
  16. # These are specific reindex-based tests; other indexing tests should go in
  17. # test_indexing
  18. def test_drop_names(self):
  19. df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
  20. index=['a', 'b', 'c'],
  21. columns=['d', 'e', 'f'])
  22. df.index.name, df.columns.name = 'first', 'second'
  23. df_dropped_b = df.drop('b')
  24. df_dropped_e = df.drop('e', axis=1)
  25. df_inplace_b, df_inplace_e = df.copy(), df.copy()
  26. df_inplace_b.drop('b', inplace=True)
  27. df_inplace_e.drop('e', axis=1, inplace=True)
  28. for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e):
  29. assert obj.index.name == 'first'
  30. assert obj.columns.name == 'second'
  31. assert list(df.columns) == ['d', 'e', 'f']
  32. pytest.raises(KeyError, df.drop, ['g'])
  33. pytest.raises(KeyError, df.drop, ['g'], 1)
  34. # errors = 'ignore'
  35. dropped = df.drop(['g'], errors='ignore')
  36. expected = Index(['a', 'b', 'c'], name='first')
  37. tm.assert_index_equal(dropped.index, expected)
  38. dropped = df.drop(['b', 'g'], errors='ignore')
  39. expected = Index(['a', 'c'], name='first')
  40. tm.assert_index_equal(dropped.index, expected)
  41. dropped = df.drop(['g'], axis=1, errors='ignore')
  42. expected = Index(['d', 'e', 'f'], name='second')
  43. tm.assert_index_equal(dropped.columns, expected)
  44. dropped = df.drop(['d', 'g'], axis=1, errors='ignore')
  45. expected = Index(['e', 'f'], name='second')
  46. tm.assert_index_equal(dropped.columns, expected)
  47. # GH 16398
  48. dropped = df.drop([], errors='ignore')
  49. expected = Index(['a', 'b', 'c'], name='first')
  50. tm.assert_index_equal(dropped.index, expected)
  51. def test_drop_col_still_multiindex(self):
  52. arrays = [['a', 'b', 'c', 'top'],
  53. ['', '', '', 'OD'],
  54. ['', '', '', 'wx']]
  55. tuples = sorted(zip(*arrays))
  56. index = MultiIndex.from_tuples(tuples)
  57. df = DataFrame(np.random.randn(3, 4), columns=index)
  58. del df[('a', '', '')]
  59. assert(isinstance(df.columns, MultiIndex))
  60. def test_drop(self):
  61. simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]})
  62. assert_frame_equal(simple.drop("A", axis=1), simple[['B']])
  63. assert_frame_equal(simple.drop(["A", "B"], axis='columns'),
  64. simple[[]])
  65. assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])
  66. assert_frame_equal(simple.drop(
  67. [0, 3], axis='index'), simple.loc[[1, 2], :])
  68. pytest.raises(KeyError, simple.drop, 5)
  69. pytest.raises(KeyError, simple.drop, 'C', 1)
  70. pytest.raises(KeyError, simple.drop, [1, 5])
  71. pytest.raises(KeyError, simple.drop, ['A', 'C'], 1)
  72. # errors = 'ignore'
  73. assert_frame_equal(simple.drop(5, errors='ignore'), simple)
  74. assert_frame_equal(simple.drop([0, 5], errors='ignore'),
  75. simple.loc[[1, 2, 3], :])
  76. assert_frame_equal(simple.drop('C', axis=1, errors='ignore'), simple)
  77. assert_frame_equal(simple.drop(['A', 'C'], axis=1, errors='ignore'),
  78. simple[['B']])
  79. # non-unique - wheee!
  80. nu_df = DataFrame(lzip(range(3), range(-3, 1), list('abc')),
  81. columns=['a', 'a', 'b'])
  82. assert_frame_equal(nu_df.drop('a', axis=1), nu_df[['b']])
  83. assert_frame_equal(nu_df.drop('b', axis='columns'), nu_df['a'])
  84. assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398
  85. nu_df = nu_df.set_index(pd.Index(['X', 'Y', 'X']))
  86. nu_df.columns = list('abc')
  87. assert_frame_equal(nu_df.drop('X', axis='rows'), nu_df.loc[["Y"], :])
  88. assert_frame_equal(nu_df.drop(['X', 'Y'], axis=0), nu_df.loc[[], :])
  89. # inplace cache issue
  90. # GH 5628
  91. df = pd.DataFrame(np.random.randn(10, 3), columns=list('abc'))
  92. expected = df[~(df.b > 0)]
  93. df.drop(labels=df[df.b > 0].index, inplace=True)
  94. assert_frame_equal(df, expected)
  95. def test_drop_multiindex_not_lexsorted(self):
  96. # GH 11640
  97. # define the lexsorted version
  98. lexsorted_mi = MultiIndex.from_tuples(
  99. [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
  100. lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
  101. assert lexsorted_df.columns.is_lexsorted()
  102. # define the non-lexsorted version
  103. not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
  104. data=[[1, 'b1', 'c1', 3],
  105. [1, 'b2', 'c2', 4]])
  106. not_lexsorted_df = not_lexsorted_df.pivot_table(
  107. index='a', columns=['b', 'c'], values='d')
  108. not_lexsorted_df = not_lexsorted_df.reset_index()
  109. assert not not_lexsorted_df.columns.is_lexsorted()
  110. # compare the results
  111. tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
  112. expected = lexsorted_df.drop('a', axis=1)
  113. with tm.assert_produces_warning(PerformanceWarning):
  114. result = not_lexsorted_df.drop('a', axis=1)
  115. tm.assert_frame_equal(result, expected)
  116. def test_drop_api_equivalence(self):
  117. # equivalence of the labels/axis and index/columns API's (GH12392)
  118. df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
  119. index=['a', 'b', 'c'],
  120. columns=['d', 'e', 'f'])
  121. res1 = df.drop('a')
  122. res2 = df.drop(index='a')
  123. tm.assert_frame_equal(res1, res2)
  124. res1 = df.drop('d', 1)
  125. res2 = df.drop(columns='d')
  126. tm.assert_frame_equal(res1, res2)
  127. res1 = df.drop(labels='e', axis=1)
  128. res2 = df.drop(columns='e')
  129. tm.assert_frame_equal(res1, res2)
  130. res1 = df.drop(['a'], axis=0)
  131. res2 = df.drop(index=['a'])
  132. tm.assert_frame_equal(res1, res2)
  133. res1 = df.drop(['a'], axis=0).drop(['d'], axis=1)
  134. res2 = df.drop(index=['a'], columns=['d'])
  135. tm.assert_frame_equal(res1, res2)
  136. with pytest.raises(ValueError):
  137. df.drop(labels='a', index='b')
  138. with pytest.raises(ValueError):
  139. df.drop(labels='a', columns='b')
  140. with pytest.raises(ValueError):
  141. df.drop(axis=1)
  142. def test_merge_join_different_levels(self):
  143. # GH 9455
  144. # first dataframe
  145. df1 = DataFrame(columns=['a', 'b'], data=[[1, 11], [0, 22]])
  146. # second dataframe
  147. columns = MultiIndex.from_tuples([('a', ''), ('c', 'c1')])
  148. df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]])
  149. # merge
  150. columns = ['a', 'b', ('c', 'c1')]
  151. expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]])
  152. with tm.assert_produces_warning(UserWarning):
  153. result = pd.merge(df1, df2, on='a')
  154. tm.assert_frame_equal(result, expected)
  155. # join, see discussion in GH 12219
  156. columns = ['a', 'b', ('a', ''), ('c', 'c1')]
  157. expected = DataFrame(columns=columns,
  158. data=[[1, 11, 0, 44], [0, 22, 1, 33]])
  159. with tm.assert_produces_warning(UserWarning):
  160. result = df1.join(df2, on='a')
  161. tm.assert_frame_equal(result, expected)
  162. def test_reindex(self):
  163. newFrame = self.frame.reindex(self.ts1.index)
  164. for col in newFrame.columns:
  165. for idx, val in compat.iteritems(newFrame[col]):
  166. if idx in self.frame.index:
  167. if np.isnan(val):
  168. assert np.isnan(self.frame[col][idx])
  169. else:
  170. assert val == self.frame[col][idx]
  171. else:
  172. assert np.isnan(val)
  173. for col, series in compat.iteritems(newFrame):
  174. assert tm.equalContents(series.index, newFrame.index)
  175. emptyFrame = self.frame.reindex(Index([]))
  176. assert len(emptyFrame.index) == 0
  177. # Cython code should be unit-tested directly
  178. nonContigFrame = self.frame.reindex(self.ts1.index[::2])
  179. for col in nonContigFrame.columns:
  180. for idx, val in compat.iteritems(nonContigFrame[col]):
  181. if idx in self.frame.index:
  182. if np.isnan(val):
  183. assert np.isnan(self.frame[col][idx])
  184. else:
  185. assert val == self.frame[col][idx]
  186. else:
  187. assert np.isnan(val)
  188. for col, series in compat.iteritems(nonContigFrame):
  189. assert tm.equalContents(series.index, nonContigFrame.index)
  190. # corner cases
  191. # Same index, copies values but not index if copy=False
  192. newFrame = self.frame.reindex(self.frame.index, copy=False)
  193. assert newFrame.index is self.frame.index
  194. # length zero
  195. newFrame = self.frame.reindex([])
  196. assert newFrame.empty
  197. assert len(newFrame.columns) == len(self.frame.columns)
  198. # length zero with columns reindexed with non-empty index
  199. newFrame = self.frame.reindex([])
  200. newFrame = newFrame.reindex(self.frame.index)
  201. assert len(newFrame.index) == len(self.frame.index)
  202. assert len(newFrame.columns) == len(self.frame.columns)
  203. # pass non-Index
  204. newFrame = self.frame.reindex(list(self.ts1.index))
  205. tm.assert_index_equal(newFrame.index, self.ts1.index)
  206. # copy with no axes
  207. result = self.frame.reindex()
  208. assert_frame_equal(result, self.frame)
  209. assert result is not self.frame
  210. def test_reindex_nan(self):
  211. df = pd.DataFrame([[1, 2], [3, 5], [7, 11], [9, 23]],
  212. index=[2, np.nan, 1, 5],
  213. columns=['joe', 'jim'])
  214. i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1]
  215. assert_frame_equal(df.reindex(i), df.iloc[j])
  216. df.index = df.index.astype('object')
  217. assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False)
  218. # GH10388
  219. df = pd.DataFrame({'other': ['a', 'b', np.nan, 'c'],
  220. 'date': ['2015-03-22', np.nan,
  221. '2012-01-08', np.nan],
  222. 'amount': [2, 3, 4, 5]})
  223. df['date'] = pd.to_datetime(df.date)
  224. df['delta'] = (pd.to_datetime('2015-06-18') - df['date']).shift(1)
  225. left = df.set_index(['delta', 'other', 'date']).reset_index()
  226. right = df.reindex(columns=['delta', 'other', 'date', 'amount'])
  227. assert_frame_equal(left, right)
  228. def test_reindex_name_remains(self):
  229. s = Series(np.random.rand(10))
  230. df = DataFrame(s, index=np.arange(len(s)))
  231. i = Series(np.arange(10), name='iname')
  232. df = df.reindex(i)
  233. assert df.index.name == 'iname'
  234. df = df.reindex(Index(np.arange(10), name='tmpname'))
  235. assert df.index.name == 'tmpname'
  236. s = Series(np.random.rand(10))
  237. df = DataFrame(s.T, index=np.arange(len(s)))
  238. i = Series(np.arange(10), name='iname')
  239. df = df.reindex(columns=i)
  240. assert df.columns.name == 'iname'
  241. def test_reindex_int(self):
  242. smaller = self.intframe.reindex(self.intframe.index[::2])
  243. assert smaller['A'].dtype == np.int64
  244. bigger = smaller.reindex(self.intframe.index)
  245. assert bigger['A'].dtype == np.float64
  246. smaller = self.intframe.reindex(columns=['A', 'B'])
  247. assert smaller['A'].dtype == np.int64
  248. def test_reindex_like(self):
  249. other = self.frame.reindex(index=self.frame.index[:10],
  250. columns=['C', 'B'])
  251. assert_frame_equal(other, self.frame.reindex_like(other))
  252. def test_reindex_columns(self):
  253. new_frame = self.frame.reindex(columns=['A', 'B', 'E'])
  254. tm.assert_series_equal(new_frame['B'], self.frame['B'])
  255. assert np.isnan(new_frame['E']).all()
  256. assert 'C' not in new_frame
  257. # Length zero
  258. new_frame = self.frame.reindex(columns=[])
  259. assert new_frame.empty
  260. def test_reindex_columns_method(self):
  261. # GH 14992, reindexing over columns ignored method
  262. df = DataFrame(data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]],
  263. index=[1, 2, 4],
  264. columns=[1, 2, 4],
  265. dtype=float)
  266. # default method
  267. result = df.reindex(columns=range(6))
  268. expected = DataFrame(data=[[np.nan, 11, 12, np.nan, 13, np.nan],
  269. [np.nan, 21, 22, np.nan, 23, np.nan],
  270. [np.nan, 31, 32, np.nan, 33, np.nan]],
  271. index=[1, 2, 4],
  272. columns=range(6),
  273. dtype=float)
  274. assert_frame_equal(result, expected)
  275. # method='ffill'
  276. result = df.reindex(columns=range(6), method='ffill')
  277. expected = DataFrame(data=[[np.nan, 11, 12, 12, 13, 13],
  278. [np.nan, 21, 22, 22, 23, 23],
  279. [np.nan, 31, 32, 32, 33, 33]],
  280. index=[1, 2, 4],
  281. columns=range(6),
  282. dtype=float)
  283. assert_frame_equal(result, expected)
  284. # method='bfill'
  285. result = df.reindex(columns=range(6), method='bfill')
  286. expected = DataFrame(data=[[11, 11, 12, 13, 13, np.nan],
  287. [21, 21, 22, 23, 23, np.nan],
  288. [31, 31, 32, 33, 33, np.nan]],
  289. index=[1, 2, 4],
  290. columns=range(6),
  291. dtype=float)
  292. assert_frame_equal(result, expected)
  293. def test_reindex_axes(self):
  294. # GH 3317, reindexing by both axes loses freq of the index
  295. df = DataFrame(np.ones((3, 3)),
  296. index=[datetime(2012, 1, 1),
  297. datetime(2012, 1, 2),
  298. datetime(2012, 1, 3)],
  299. columns=['a', 'b', 'c'])
  300. time_freq = date_range('2012-01-01', '2012-01-03', freq='d')
  301. some_cols = ['a', 'b']
  302. index_freq = df.reindex(index=time_freq).index.freq
  303. both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq
  304. seq_freq = df.reindex(index=time_freq).reindex(
  305. columns=some_cols).index.freq
  306. assert index_freq == both_freq
  307. assert index_freq == seq_freq
  308. def test_reindex_fill_value(self):
  309. df = DataFrame(np.random.randn(10, 4))
  310. # axis=0
  311. result = df.reindex(lrange(15))
  312. assert np.isnan(result.values[-5:]).all()
  313. result = df.reindex(lrange(15), fill_value=0)
  314. expected = df.reindex(lrange(15)).fillna(0)
  315. assert_frame_equal(result, expected)
  316. # axis=1
  317. result = df.reindex(columns=lrange(5), fill_value=0.)
  318. expected = df.copy()
  319. expected[4] = 0.
  320. assert_frame_equal(result, expected)
  321. result = df.reindex(columns=lrange(5), fill_value=0)
  322. expected = df.copy()
  323. expected[4] = 0
  324. assert_frame_equal(result, expected)
  325. result = df.reindex(columns=lrange(5), fill_value='foo')
  326. expected = df.copy()
  327. expected[4] = 'foo'
  328. assert_frame_equal(result, expected)
  329. # reindex_axis
  330. with tm.assert_produces_warning(FutureWarning):
  331. result = df.reindex_axis(lrange(15), fill_value=0., axis=0)
  332. expected = df.reindex(lrange(15)).fillna(0)
  333. assert_frame_equal(result, expected)
  334. with tm.assert_produces_warning(FutureWarning):
  335. result = df.reindex_axis(lrange(5), fill_value=0., axis=1)
  336. expected = df.reindex(columns=lrange(5)).fillna(0)
  337. assert_frame_equal(result, expected)
  338. # other dtypes
  339. df['foo'] = 'foo'
  340. result = df.reindex(lrange(15), fill_value=0)
  341. expected = df.reindex(lrange(15)).fillna(0)
  342. assert_frame_equal(result, expected)
  343. def test_reindex_dups(self):
  344. # GH4746, reindex on duplicate index error messages
  345. arr = np.random.randn(10)
  346. df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5])
  347. # set index is ok
  348. result = df.copy()
  349. result.index = list(range(len(df)))
  350. expected = DataFrame(arr, index=list(range(len(df))))
  351. assert_frame_equal(result, expected)
  352. # reindex fails
  353. pytest.raises(ValueError, df.reindex, index=list(range(len(df))))
  354. def test_reindex_axis_style(self):
  355. # https://github.com/pandas-dev/pandas/issues/12392
  356. df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  357. expected = pd.DataFrame({"A": [1, 2, np.nan], "B": [4, 5, np.nan]},
  358. index=[0, 1, 3])
  359. result = df.reindex([0, 1, 3])
  360. assert_frame_equal(result, expected)
  361. result = df.reindex([0, 1, 3], axis=0)
  362. assert_frame_equal(result, expected)
  363. result = df.reindex([0, 1, 3], axis='index')
  364. assert_frame_equal(result, expected)
  365. def test_reindex_positional_warns(self):
  366. # https://github.com/pandas-dev/pandas/issues/12392
  367. df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  368. expected = pd.DataFrame({"A": [1., 2], 'B': [4., 5],
  369. "C": [np.nan, np.nan]})
  370. with tm.assert_produces_warning(FutureWarning):
  371. result = df.reindex([0, 1], ['A', 'B', 'C'])
  372. assert_frame_equal(result, expected)
  373. def test_reindex_axis_style_raises(self):
  374. # https://github.com/pandas-dev/pandas/issues/12392
  375. df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]})
  376. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  377. df.reindex([0, 1], ['A'], axis=1)
  378. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  379. df.reindex([0, 1], ['A'], axis='index')
  380. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  381. df.reindex(index=[0, 1], axis='index')
  382. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  383. df.reindex(index=[0, 1], axis='columns')
  384. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  385. df.reindex(columns=[0, 1], axis='columns')
  386. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  387. df.reindex(index=[0, 1], columns=[0, 1], axis='columns')
  388. with pytest.raises(TypeError, match='Cannot specify all'):
  389. df.reindex([0, 1], [0], ['A'])
  390. # Mixing styles
  391. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  392. df.reindex(index=[0, 1], axis='index')
  393. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  394. df.reindex(index=[0, 1], axis='columns')
  395. # Duplicates
  396. with pytest.raises(TypeError, match="multiple values"):
  397. df.reindex([0, 1], labels=[0, 1])
  398. def test_reindex_single_named_indexer(self):
  399. # https://github.com/pandas-dev/pandas/issues/12392
  400. df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]})
  401. result = df.reindex([0, 1], columns=['A'])
  402. expected = pd.DataFrame({"A": [1, 2]})
  403. assert_frame_equal(result, expected)
  404. def test_reindex_api_equivalence(self):
  405. # https://github.com/pandas-dev/pandas/issues/12392
  406. # equivalence of the labels/axis and index/columns API's
  407. df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
  408. index=['a', 'b', 'c'],
  409. columns=['d', 'e', 'f'])
  410. res1 = df.reindex(['b', 'a'])
  411. res2 = df.reindex(index=['b', 'a'])
  412. res3 = df.reindex(labels=['b', 'a'])
  413. res4 = df.reindex(labels=['b', 'a'], axis=0)
  414. res5 = df.reindex(['b', 'a'], axis=0)
  415. for res in [res2, res3, res4, res5]:
  416. tm.assert_frame_equal(res1, res)
  417. res1 = df.reindex(columns=['e', 'd'])
  418. res2 = df.reindex(['e', 'd'], axis=1)
  419. res3 = df.reindex(labels=['e', 'd'], axis=1)
  420. for res in [res2, res3]:
  421. tm.assert_frame_equal(res1, res)
  422. with tm.assert_produces_warning(FutureWarning) as m:
  423. res1 = df.reindex(['b', 'a'], ['e', 'd'])
  424. assert 'reindex' in str(m[0].message)
  425. res2 = df.reindex(columns=['e', 'd'], index=['b', 'a'])
  426. res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'],
  427. axis=1)
  428. for res in [res2, res3]:
  429. tm.assert_frame_equal(res1, res)
  430. def test_align(self):
  431. af, bf = self.frame.align(self.frame)
  432. assert af._data is not self.frame._data
  433. af, bf = self.frame.align(self.frame, copy=False)
  434. assert af._data is self.frame._data
  435. # axis = 0
  436. other = self.frame.iloc[:-5, :3]
  437. af, bf = self.frame.align(other, axis=0, fill_value=-1)
  438. tm.assert_index_equal(bf.columns, other.columns)
  439. # test fill value
  440. join_idx = self.frame.index.join(other.index)
  441. diff_a = self.frame.index.difference(join_idx)
  442. diff_b = other.index.difference(join_idx)
  443. diff_a_vals = af.reindex(diff_a).values
  444. diff_b_vals = bf.reindex(diff_b).values
  445. assert (diff_a_vals == -1).all()
  446. af, bf = self.frame.align(other, join='right', axis=0)
  447. tm.assert_index_equal(bf.columns, other.columns)
  448. tm.assert_index_equal(bf.index, other.index)
  449. tm.assert_index_equal(af.index, other.index)
  450. # axis = 1
  451. other = self.frame.iloc[:-5, :3].copy()
  452. af, bf = self.frame.align(other, axis=1)
  453. tm.assert_index_equal(bf.columns, self.frame.columns)
  454. tm.assert_index_equal(bf.index, other.index)
  455. # test fill value
  456. join_idx = self.frame.index.join(other.index)
  457. diff_a = self.frame.index.difference(join_idx)
  458. diff_b = other.index.difference(join_idx)
  459. diff_a_vals = af.reindex(diff_a).values
  460. # TODO(wesm): unused?
  461. diff_b_vals = bf.reindex(diff_b).values # noqa
  462. assert (diff_a_vals == -1).all()
  463. af, bf = self.frame.align(other, join='inner', axis=1)
  464. tm.assert_index_equal(bf.columns, other.columns)
  465. af, bf = self.frame.align(other, join='inner', axis=1, method='pad')
  466. tm.assert_index_equal(bf.columns, other.columns)
  467. # test other non-float types
  468. af, bf = self.intframe.align(other, join='inner', axis=1, method='pad')
  469. tm.assert_index_equal(bf.columns, other.columns)
  470. af, bf = self.mixed_frame.align(self.mixed_frame,
  471. join='inner', axis=1, method='pad')
  472. tm.assert_index_equal(bf.columns, self.mixed_frame.columns)
  473. af, bf = self.frame.align(other.iloc[:, 0], join='inner', axis=1,
  474. method=None, fill_value=None)
  475. tm.assert_index_equal(bf.index, Index([]))
  476. af, bf = self.frame.align(other.iloc[:, 0], join='inner', axis=1,
  477. method=None, fill_value=0)
  478. tm.assert_index_equal(bf.index, Index([]))
  479. # mixed floats/ints
  480. af, bf = self.mixed_float.align(other.iloc[:, 0], join='inner', axis=1,
  481. method=None, fill_value=0)
  482. tm.assert_index_equal(bf.index, Index([]))
  483. af, bf = self.mixed_int.align(other.iloc[:, 0], join='inner', axis=1,
  484. method=None, fill_value=0)
  485. tm.assert_index_equal(bf.index, Index([]))
  486. # Try to align DataFrame to Series along bad axis
  487. with pytest.raises(ValueError):
  488. self.frame.align(af.iloc[0, :3], join='inner', axis=2)
  489. # align dataframe to series with broadcast or not
  490. idx = self.frame.index
  491. s = Series(range(len(idx)), index=idx)
  492. left, right = self.frame.align(s, axis=0)
  493. tm.assert_index_equal(left.index, self.frame.index)
  494. tm.assert_index_equal(right.index, self.frame.index)
  495. assert isinstance(right, Series)
  496. left, right = self.frame.align(s, broadcast_axis=1)
  497. tm.assert_index_equal(left.index, self.frame.index)
  498. expected = {c: s for c in self.frame.columns}
  499. expected = DataFrame(expected, index=self.frame.index,
  500. columns=self.frame.columns)
  501. tm.assert_frame_equal(right, expected)
  502. # see gh-9558
  503. df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  504. result = df[df['a'] == 2]
  505. expected = DataFrame([[2, 5]], index=[1], columns=['a', 'b'])
  506. tm.assert_frame_equal(result, expected)
  507. result = df.where(df['a'] == 2, 0)
  508. expected = DataFrame({'a': [0, 2, 0], 'b': [0, 5, 0]})
  509. tm.assert_frame_equal(result, expected)
  510. def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
  511. aa, ab = a.align(b, axis=axis, join=how, method=method, limit=limit,
  512. fill_axis=fill_axis)
  513. join_index, join_columns = None, None
  514. ea, eb = a, b
  515. if axis is None or axis == 0:
  516. join_index = a.index.join(b.index, how=how)
  517. ea = ea.reindex(index=join_index)
  518. eb = eb.reindex(index=join_index)
  519. if axis is None or axis == 1:
  520. join_columns = a.columns.join(b.columns, how=how)
  521. ea = ea.reindex(columns=join_columns)
  522. eb = eb.reindex(columns=join_columns)
  523. ea = ea.fillna(axis=fill_axis, method=method, limit=limit)
  524. eb = eb.fillna(axis=fill_axis, method=method, limit=limit)
  525. assert_frame_equal(aa, ea)
  526. assert_frame_equal(ab, eb)
  527. @pytest.mark.parametrize('meth', ['pad', 'bfill'])
  528. @pytest.mark.parametrize('ax', [0, 1, None])
  529. @pytest.mark.parametrize('fax', [0, 1])
  530. @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right'])
  531. def test_align_fill_method(self, how, meth, ax, fax):
  532. self._check_align_fill(how, meth, ax, fax)
  533. def _check_align_fill(self, kind, meth, ax, fax):
  534. left = self.frame.iloc[0:4, :10]
  535. right = self.frame.iloc[2:, 6:]
  536. empty = self.frame.iloc[:0, :0]
  537. self._check_align(left, right, axis=ax, fill_axis=fax,
  538. how=kind, method=meth)
  539. self._check_align(left, right, axis=ax, fill_axis=fax,
  540. how=kind, method=meth, limit=1)
  541. # empty left
  542. self._check_align(empty, right, axis=ax, fill_axis=fax,
  543. how=kind, method=meth)
  544. self._check_align(empty, right, axis=ax, fill_axis=fax,
  545. how=kind, method=meth, limit=1)
  546. # empty right
  547. self._check_align(left, empty, axis=ax, fill_axis=fax,
  548. how=kind, method=meth)
  549. self._check_align(left, empty, axis=ax, fill_axis=fax,
  550. how=kind, method=meth, limit=1)
  551. # both empty
  552. self._check_align(empty, empty, axis=ax, fill_axis=fax,
  553. how=kind, method=meth)
  554. self._check_align(empty, empty, axis=ax, fill_axis=fax,
  555. how=kind, method=meth, limit=1)
  556. def test_align_int_fill_bug(self):
  557. # GH #910
  558. X = np.arange(10 * 10, dtype='float64').reshape(10, 10)
  559. Y = np.ones((10, 1), dtype=int)
  560. df1 = DataFrame(X)
  561. df1['0.X'] = Y.squeeze()
  562. df2 = df1.astype(float)
  563. result = df1 - df1.mean()
  564. expected = df2 - df2.mean()
  565. assert_frame_equal(result, expected)
  566. def test_align_multiindex(self):
  567. # GH 10665
  568. # same test cases as test_align_multiindex in test_series.py
  569. midx = pd.MultiIndex.from_product([range(2), range(3), range(2)],
  570. names=('a', 'b', 'c'))
  571. idx = pd.Index(range(2), name='b')
  572. df1 = pd.DataFrame(np.arange(12, dtype='int64'), index=midx)
  573. df2 = pd.DataFrame(np.arange(2, dtype='int64'), index=idx)
  574. # these must be the same results (but flipped)
  575. res1l, res1r = df1.align(df2, join='left')
  576. res2l, res2r = df2.align(df1, join='right')
  577. expl = df1
  578. assert_frame_equal(expl, res1l)
  579. assert_frame_equal(expl, res2r)
  580. expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
  581. assert_frame_equal(expr, res1r)
  582. assert_frame_equal(expr, res2l)
  583. res1l, res1r = df1.align(df2, join='right')
  584. res2l, res2r = df2.align(df1, join='left')
  585. exp_idx = pd.MultiIndex.from_product([range(2), range(2), range(2)],
  586. names=('a', 'b', 'c'))
  587. expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
  588. assert_frame_equal(expl, res1l)
  589. assert_frame_equal(expl, res2r)
  590. expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx)
  591. assert_frame_equal(expr, res1r)
  592. assert_frame_equal(expr, res2l)
  593. def test_align_series_combinations(self):
  594. df = pd.DataFrame({'a': [1, 3, 5],
  595. 'b': [1, 3, 5]}, index=list('ACE'))
  596. s = pd.Series([1, 2, 4], index=list('ABD'), name='x')
  597. # frame + series
  598. res1, res2 = df.align(s, axis=0)
  599. exp1 = pd.DataFrame({'a': [1, np.nan, 3, np.nan, 5],
  600. 'b': [1, np.nan, 3, np.nan, 5]},
  601. index=list('ABCDE'))
  602. exp2 = pd.Series([1, 2, np.nan, 4, np.nan],
  603. index=list('ABCDE'), name='x')
  604. tm.assert_frame_equal(res1, exp1)
  605. tm.assert_series_equal(res2, exp2)
  606. # series + frame
  607. res1, res2 = s.align(df)
  608. tm.assert_series_equal(res1, exp2)
  609. tm.assert_frame_equal(res2, exp1)
  610. def test_filter(self):
  611. # Items
  612. filtered = self.frame.filter(['A', 'B', 'E'])
  613. assert len(filtered.columns) == 2
  614. assert 'E' not in filtered
  615. filtered = self.frame.filter(['A', 'B', 'E'], axis='columns')
  616. assert len(filtered.columns) == 2
  617. assert 'E' not in filtered
  618. # Other axis
  619. idx = self.frame.index[0:4]
  620. filtered = self.frame.filter(idx, axis='index')
  621. expected = self.frame.reindex(index=idx)
  622. tm.assert_frame_equal(filtered, expected)
  623. # like
  624. fcopy = self.frame.copy()
  625. fcopy['AA'] = 1
  626. filtered = fcopy.filter(like='A')
  627. assert len(filtered.columns) == 2
  628. assert 'AA' in filtered
  629. # like with ints in column names
  630. df = DataFrame(0., index=[0, 1, 2], columns=[0, 1, '_A', '_B'])
  631. filtered = df.filter(like='_')
  632. assert len(filtered.columns) == 2
  633. # regex with ints in column names
  634. # from PR #10384
  635. df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C'])
  636. expected = DataFrame(
  637. 0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object))
  638. filtered = df.filter(regex='^[0-9]+$')
  639. tm.assert_frame_equal(filtered, expected)
  640. expected = DataFrame(0., index=[0, 1, 2], columns=[0, '0', 1, '1'])
  641. # shouldn't remove anything
  642. filtered = expected.filter(regex='^[0-9]+$')
  643. tm.assert_frame_equal(filtered, expected)
  644. # pass in None
  645. with pytest.raises(TypeError, match='Must pass'):
  646. self.frame.filter()
  647. with pytest.raises(TypeError, match='Must pass'):
  648. self.frame.filter(items=None)
  649. with pytest.raises(TypeError, match='Must pass'):
  650. self.frame.filter(axis=1)
  651. # test mutually exclusive arguments
  652. with pytest.raises(TypeError, match='mutually exclusive'):
  653. self.frame.filter(items=['one', 'three'], regex='e$', like='bbi')
  654. with pytest.raises(TypeError, match='mutually exclusive'):
  655. self.frame.filter(items=['one', 'three'], regex='e$', axis=1)
  656. with pytest.raises(TypeError, match='mutually exclusive'):
  657. self.frame.filter(items=['one', 'three'], regex='e$')
  658. with pytest.raises(TypeError, match='mutually exclusive'):
  659. self.frame.filter(items=['one', 'three'], like='bbi', axis=0)
  660. with pytest.raises(TypeError, match='mutually exclusive'):
  661. self.frame.filter(items=['one', 'three'], like='bbi')
  662. # objects
  663. filtered = self.mixed_frame.filter(like='foo')
  664. assert 'foo' in filtered
  665. # unicode columns, won't ascii-encode
  666. df = self.frame.rename(columns={'B': u('\u2202')})
  667. filtered = df.filter(like='C')
  668. assert 'C' in filtered
  669. def test_filter_regex_search(self):
  670. fcopy = self.frame.copy()
  671. fcopy['AA'] = 1
  672. # regex
  673. filtered = fcopy.filter(regex='[A]+')
  674. assert len(filtered.columns) == 2
  675. assert 'AA' in filtered
  676. # doesn't have to be at beginning
  677. df = DataFrame({'aBBa': [1, 2],
  678. 'BBaBB': [1, 2],
  679. 'aCCa': [1, 2],
  680. 'aCCaBB': [1, 2]})
  681. result = df.filter(regex='BB')
  682. exp = df[[x for x in df.columns if 'BB' in x]]
  683. assert_frame_equal(result, exp)
  684. @pytest.mark.parametrize('name,expected', [
  685. ('a', DataFrame({u'a': [1, 2]})),
  686. (u'a', DataFrame({u'a': [1, 2]})),
  687. (u'あ', DataFrame({u'あ': [3, 4]}))
  688. ])
  689. def test_filter_unicode(self, name, expected):
  690. # GH13101
  691. df = DataFrame({u'a': [1, 2], u'あ': [3, 4]})
  692. assert_frame_equal(df.filter(like=name), expected)
  693. assert_frame_equal(df.filter(regex=name), expected)
  694. @pytest.mark.parametrize('name', ['a', u'a'])
  695. def test_filter_bytestring(self, name):
  696. # GH13101
  697. df = DataFrame({b'a': [1, 2], b'b': [3, 4]})
  698. expected = DataFrame({b'a': [1, 2]})
  699. assert_frame_equal(df.filter(like=name), expected)
  700. assert_frame_equal(df.filter(regex=name), expected)
  701. def test_filter_corner(self):
  702. empty = DataFrame()
  703. result = empty.filter([])
  704. assert_frame_equal(result, empty)
  705. result = empty.filter(like='foo')
  706. assert_frame_equal(result, empty)
  707. def test_select(self):
  708. # deprecated: gh-12410
  709. f = lambda x: x.weekday() == 2
  710. index = self.tsframe.index[[f(x) for x in self.tsframe.index]]
  711. expected_weekdays = self.tsframe.reindex(index=index)
  712. with tm.assert_produces_warning(FutureWarning,
  713. check_stacklevel=False):
  714. result = self.tsframe.select(f, axis=0)
  715. assert_frame_equal(result, expected_weekdays)
  716. result = self.frame.select(lambda x: x in ('B', 'D'), axis=1)
  717. expected = self.frame.reindex(columns=['B', 'D'])
  718. assert_frame_equal(result, expected, check_names=False)
  719. # replacement
  720. f = lambda x: x.weekday == 2
  721. result = self.tsframe.loc(axis=0)[f(self.tsframe.index)]
  722. assert_frame_equal(result, expected_weekdays)
  723. crit = lambda x: x in ['B', 'D']
  724. result = self.frame.loc(axis=1)[(self.frame.columns.map(crit))]
  725. expected = self.frame.reindex(columns=['B', 'D'])
  726. assert_frame_equal(result, expected, check_names=False)
  727. # doc example
  728. df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz'])
  729. crit = lambda x: x in ['bar', 'baz']
  730. with tm.assert_produces_warning(FutureWarning):
  731. expected = df.select(crit)
  732. result = df.loc[df.index.map(crit)]
  733. assert_frame_equal(result, expected, check_names=False)
  734. def test_take(self):
  735. # homogeneous
  736. order = [3, 1, 2, 0]
  737. for df in [self.frame]:
  738. result = df.take(order, axis=0)
  739. expected = df.reindex(df.index.take(order))
  740. assert_frame_equal(result, expected)
  741. # axis = 1
  742. result = df.take(order, axis=1)
  743. expected = df.loc[:, ['D', 'B', 'C', 'A']]
  744. assert_frame_equal(result, expected, check_names=False)
  745. # negative indices
  746. order = [2, 1, -1]
  747. for df in [self.frame]:
  748. result = df.take(order, axis=0)
  749. expected = df.reindex(df.index.take(order))
  750. assert_frame_equal(result, expected)
  751. with tm.assert_produces_warning(FutureWarning):
  752. result = df.take(order, convert=True, axis=0)
  753. assert_frame_equal(result, expected)
  754. with tm.assert_produces_warning(FutureWarning):
  755. result = df.take(order, convert=False, axis=0)
  756. assert_frame_equal(result, expected)
  757. # axis = 1
  758. result = df.take(order, axis=1)
  759. expected = df.loc[:, ['C', 'B', 'D']]
  760. assert_frame_equal(result, expected, check_names=False)
  761. # illegal indices
  762. pytest.raises(IndexError, df.take, [3, 1, 2, 30], axis=0)
  763. pytest.raises(IndexError, df.take, [3, 1, 2, -31], axis=0)
  764. pytest.raises(IndexError, df.take, [3, 1, 2, 5], axis=1)
  765. pytest.raises(IndexError, df.take, [3, 1, 2, -5], axis=1)
  766. # mixed-dtype
  767. order = [4, 1, 2, 0, 3]
  768. for df in [self.mixed_frame]:
  769. result = df.take(order, axis=0)
  770. expected = df.reindex(df.index.take(order))
  771. assert_frame_equal(result, expected)
  772. # axis = 1
  773. result = df.take(order, axis=1)
  774. expected = df.loc[:, ['foo', 'B', 'C', 'A', 'D']]
  775. assert_frame_equal(result, expected)
  776. # negative indices
  777. order = [4, 1, -2]
  778. for df in [self.mixed_frame]:
  779. result = df.take(order, axis=0)
  780. expected = df.reindex(df.index.take(order))
  781. assert_frame_equal(result, expected)
  782. # axis = 1
  783. result = df.take(order, axis=1)
  784. expected = df.loc[:, ['foo', 'B', 'D']]
  785. assert_frame_equal(result, expected)
  786. # by dtype
  787. order = [1, 2, 0, 3]
  788. for df in [self.mixed_float, self.mixed_int]:
  789. result = df.take(order, axis=0)
  790. expected = df.reindex(df.index.take(order))
  791. assert_frame_equal(result, expected)
  792. # axis = 1
  793. result = df.take(order, axis=1)
  794. expected = df.loc[:, ['B', 'C', 'A', 'D']]
  795. assert_frame_equal(result, expected)
  796. def test_reindex_boolean(self):
  797. frame = DataFrame(np.ones((10, 2), dtype=bool),
  798. index=np.arange(0, 20, 2),
  799. columns=[0, 2])
  800. reindexed = frame.reindex(np.arange(10))
  801. assert reindexed.values.dtype == np.object_
  802. assert isna(reindexed[0][1])
  803. reindexed = frame.reindex(columns=lrange(3))
  804. assert reindexed.values.dtype == np.object_
  805. assert isna(reindexed[1]).all()
  806. def test_reindex_objects(self):
  807. reindexed = self.mixed_frame.reindex(columns=['foo', 'A', 'B'])
  808. assert 'foo' in reindexed
  809. reindexed = self.mixed_frame.reindex(columns=['A', 'B'])
  810. assert 'foo' not in reindexed
  811. def test_reindex_corner(self):
  812. index = Index(['a', 'b', 'c'])
  813. dm = self.empty.reindex(index=[1, 2, 3])
  814. reindexed = dm.reindex(columns=index)
  815. tm.assert_index_equal(reindexed.columns, index)
  816. # ints are weird
  817. smaller = self.intframe.reindex(columns=['A', 'B', 'E'])
  818. assert smaller['E'].dtype == np.float64
  819. def test_reindex_axis(self):
  820. cols = ['A', 'B', 'E']
  821. with tm.assert_produces_warning(FutureWarning) as m:
  822. reindexed1 = self.intframe.reindex_axis(cols, axis=1)
  823. assert 'reindex' in str(m[0].message)
  824. reindexed2 = self.intframe.reindex(columns=cols)
  825. assert_frame_equal(reindexed1, reindexed2)
  826. rows = self.intframe.index[0:5]
  827. with tm.assert_produces_warning(FutureWarning) as m:
  828. reindexed1 = self.intframe.reindex_axis(rows, axis=0)
  829. assert 'reindex' in str(m[0].message)
  830. reindexed2 = self.intframe.reindex(index=rows)
  831. assert_frame_equal(reindexed1, reindexed2)
  832. pytest.raises(ValueError, self.intframe.reindex_axis, rows, axis=2)
  833. # no-op case
  834. cols = self.frame.columns.copy()
  835. with tm.assert_produces_warning(FutureWarning) as m:
  836. newFrame = self.frame.reindex_axis(cols, axis=1)
  837. assert 'reindex' in str(m[0].message)
  838. assert_frame_equal(newFrame, self.frame)
  839. def test_reindex_with_nans(self):
  840. df = DataFrame([[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]],
  841. columns=['a', 'b'],
  842. index=[100.0, 101.0, np.nan, 102.0, 103.0])
  843. result = df.reindex(index=[101.0, 102.0, 103.0])
  844. expected = df.iloc[[1, 3, 4]]
  845. assert_frame_equal(result, expected)
  846. result = df.reindex(index=[103.0])
  847. expected = df.iloc[[4]]
  848. assert_frame_equal(result, expected)
  849. result = df.reindex(index=[101.0])
  850. expected = df.iloc[[1]]
  851. assert_frame_equal(result, expected)
  852. def test_reindex_multi(self):
  853. df = DataFrame(np.random.randn(3, 3))
  854. result = df.reindex(index=lrange(4), columns=lrange(4))
  855. expected = df.reindex(lrange(4)).reindex(columns=lrange(4))
  856. assert_frame_equal(result, expected)
  857. df = DataFrame(np.random.randint(0, 10, (3, 3)))
  858. result = df.reindex(index=lrange(4), columns=lrange(4))
  859. expected = df.reindex(lrange(4)).reindex(columns=lrange(4))
  860. assert_frame_equal(result, expected)
  861. df = DataFrame(np.random.randint(0, 10, (3, 3)))
  862. result = df.reindex(index=lrange(2), columns=lrange(2))
  863. expected = df.reindex(lrange(2)).reindex(columns=lrange(2))
  864. assert_frame_equal(result, expected)
  865. df = DataFrame(np.random.randn(5, 3) + 1j, columns=['a', 'b', 'c'])
  866. result = df.reindex(index=[0, 1], columns=['a', 'b'])
  867. expected = df.reindex([0, 1]).reindex(columns=['a', 'b'])
  868. assert_frame_equal(result, expected)
  869. def test_reindex_multi_categorical_time(self):
  870. # https://github.com/pandas-dev/pandas/issues/21390
  871. midx = pd.MultiIndex.from_product(
  872. [Categorical(['a', 'b', 'c']),
  873. Categorical(date_range("2012-01-01", periods=3, freq='H'))])
  874. df = pd.DataFrame({'a': range(len(midx))}, index=midx)
  875. df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]]
  876. result = df2.reindex(midx)
  877. expected = pd.DataFrame(
  878. {'a': [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
  879. assert_frame_equal(result, expected)
  880. data = [[1, 2, 3], [1, 2, 3]]
  881. @pytest.mark.parametrize('actual', [
  882. DataFrame(data=data, index=['a', 'a']),
  883. DataFrame(data=data, index=['a', 'b']),
  884. DataFrame(data=data, index=['a', 'b']).set_index([0, 1]),
  885. DataFrame(data=data, index=['a', 'a']).set_index([0, 1])
  886. ])
  887. def test_raise_on_drop_duplicate_index(self, actual):
  888. # issue 19186
  889. level = 0 if isinstance(actual.index, MultiIndex) else None
  890. with pytest.raises(KeyError):
  891. actual.drop('c', level=level, axis=0)
  892. with pytest.raises(KeyError):
  893. actual.T.drop('c', level=level, axis=1)
  894. expected_no_err = actual.drop('c', axis=0, level=level,
  895. errors='ignore')
  896. assert_frame_equal(expected_no_err, actual)
  897. expected_no_err = actual.T.drop('c', axis=1, level=level,
  898. errors='ignore')
  899. assert_frame_equal(expected_no_err.T, actual)
  900. @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
  901. @pytest.mark.parametrize('drop_labels', [[], [1], [2]])
  902. def test_drop_empty_list(self, index, drop_labels):
  903. # GH 21494
  904. expected_index = [i for i in index if i not in drop_labels]
  905. frame = pd.DataFrame(index=index).drop(drop_labels)
  906. tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
  907. @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
  908. @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
  909. def test_drop_non_empty_list(self, index, drop_labels):
  910. # GH 21494
  911. with pytest.raises(KeyError, match='not found in axis'):
  912. pd.DataFrame(index=index).drop(drop_labels)