test_categorical.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. import pandas.compat as compat
  5. from pandas.core.dtypes.common import is_categorical_dtype
  6. from pandas.core.dtypes.dtypes import CategoricalDtype
  7. import pandas as pd
  8. from pandas import (
  9. Categorical, CategoricalIndex, DataFrame, Index, Interval, Series,
  10. Timestamp)
  11. from pandas.api.types import CategoricalDtype as CDT
  12. from pandas.util import testing as tm
  13. from pandas.util.testing import assert_frame_equal, assert_series_equal
  14. class TestCategoricalIndex(object):
  15. def setup_method(self, method):
  16. self.df = DataFrame({'A': np.arange(6, dtype='int64'),
  17. 'B': Series(list('aabbca')).astype(
  18. CDT(list('cab')))}).set_index('B')
  19. self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
  20. 'B': Series(list('aabbca')).astype(
  21. CDT(list('cabe')))}).set_index('B')
  22. self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
  23. 'B': (Series([1, 1, 2, 1, 3, 2])
  24. .astype(CDT([3, 2, 1], ordered=True)))
  25. }).set_index('B')
  26. self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
  27. 'B': (Series([1, 1, 2, 1, 3, 2])
  28. .astype(CDT([3, 2, 1], ordered=False)))
  29. }).set_index('B')
  30. def test_loc_scalar(self):
  31. result = self.df.loc['a']
  32. expected = (DataFrame({'A': [0, 1, 5],
  33. 'B': (Series(list('aaa'))
  34. .astype(CDT(list('cab'))))})
  35. .set_index('B'))
  36. assert_frame_equal(result, expected)
  37. df = self.df.copy()
  38. df.loc['a'] = 20
  39. expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
  40. 'B': (Series(list('aabbca'))
  41. .astype(CDT(list('cab'))))})
  42. .set_index('B'))
  43. assert_frame_equal(df, expected)
  44. # value not in the categories
  45. pytest.raises(KeyError, lambda: df.loc['d'])
  46. def f():
  47. df.loc['d'] = 10
  48. pytest.raises(TypeError, f)
  49. def f():
  50. df.loc['d', 'A'] = 10
  51. pytest.raises(TypeError, f)
  52. def f():
  53. df.loc['d', 'C'] = 10
  54. pytest.raises(TypeError, f)
  55. def test_getitem_scalar(self):
  56. cats = Categorical([Timestamp('12-31-1999'),
  57. Timestamp('12-31-2000')])
  58. s = Series([1, 2], index=cats)
  59. expected = s.iloc[0]
  60. result = s[cats[0]]
  61. assert result == expected
  62. def test_slicing_directly(self):
  63. cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
  64. sliced = cat[3]
  65. assert sliced == "d"
  66. sliced = cat[3:5]
  67. expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd'])
  68. tm.assert_numpy_array_equal(sliced._codes, expected._codes)
  69. tm.assert_index_equal(sliced.categories, expected.categories)
  70. def test_slicing(self):
  71. cat = Series(Categorical([1, 2, 3, 4]))
  72. reversed = cat[::-1]
  73. exp = np.array([4, 3, 2, 1], dtype=np.int64)
  74. tm.assert_numpy_array_equal(reversed.__array__(), exp)
  75. df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
  76. df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
  77. expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
  78. result = df.iloc[10]
  79. tm.assert_series_equal(result, expected)
  80. expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
  81. index=np.arange(10, 20).astype('int64'))
  82. expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
  83. result = df.iloc[10:20]
  84. tm.assert_frame_equal(result, expected)
  85. expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
  86. result = df.loc[8]
  87. tm.assert_series_equal(result, expected)
  88. def test_slicing_and_getting_ops(self):
  89. # systematically test the slicing operations:
  90. # for all slicing ops:
  91. # - returning a dataframe
  92. # - returning a column
  93. # - returning a row
  94. # - returning a single value
  95. cats = Categorical(
  96. ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"])
  97. idx = Index(["h", "i", "j", "k", "l", "m", "n"])
  98. values = [1, 2, 3, 4, 5, 6, 7]
  99. df = DataFrame({"cats": cats, "values": values}, index=idx)
  100. # the expected values
  101. cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
  102. idx2 = Index(["j", "k"])
  103. values2 = [3, 4]
  104. # 2:4,: | "j":"k",:
  105. exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
  106. # :,"cats" | :,0
  107. exp_col = Series(cats, index=idx, name='cats')
  108. # "j",: | 2,:
  109. exp_row = Series(["b", 3], index=["cats", "values"], dtype="object",
  110. name="j")
  111. # "j","cats | 2,0
  112. exp_val = "b"
  113. # iloc
  114. # frame
  115. res_df = df.iloc[2:4, :]
  116. tm.assert_frame_equal(res_df, exp_df)
  117. assert is_categorical_dtype(res_df["cats"])
  118. # row
  119. res_row = df.iloc[2, :]
  120. tm.assert_series_equal(res_row, exp_row)
  121. assert isinstance(res_row["cats"], compat.string_types)
  122. # col
  123. res_col = df.iloc[:, 0]
  124. tm.assert_series_equal(res_col, exp_col)
  125. assert is_categorical_dtype(res_col)
  126. # single value
  127. res_val = df.iloc[2, 0]
  128. assert res_val == exp_val
  129. # loc
  130. # frame
  131. res_df = df.loc["j":"k", :]
  132. tm.assert_frame_equal(res_df, exp_df)
  133. assert is_categorical_dtype(res_df["cats"])
  134. # row
  135. res_row = df.loc["j", :]
  136. tm.assert_series_equal(res_row, exp_row)
  137. assert isinstance(res_row["cats"], compat.string_types)
  138. # col
  139. res_col = df.loc[:, "cats"]
  140. tm.assert_series_equal(res_col, exp_col)
  141. assert is_categorical_dtype(res_col)
  142. # single value
  143. res_val = df.loc["j", "cats"]
  144. assert res_val == exp_val
  145. # ix
  146. # frame
  147. # res_df = df.loc["j":"k",[0,1]] # doesn't work?
  148. res_df = df.loc["j":"k", :]
  149. tm.assert_frame_equal(res_df, exp_df)
  150. assert is_categorical_dtype(res_df["cats"])
  151. # row
  152. res_row = df.loc["j", :]
  153. tm.assert_series_equal(res_row, exp_row)
  154. assert isinstance(res_row["cats"], compat.string_types)
  155. # col
  156. res_col = df.loc[:, "cats"]
  157. tm.assert_series_equal(res_col, exp_col)
  158. assert is_categorical_dtype(res_col)
  159. # single value
  160. res_val = df.loc["j", df.columns[0]]
  161. assert res_val == exp_val
  162. # iat
  163. res_val = df.iat[2, 0]
  164. assert res_val == exp_val
  165. # at
  166. res_val = df.at["j", "cats"]
  167. assert res_val == exp_val
  168. # fancy indexing
  169. exp_fancy = df.iloc[[2]]
  170. res_fancy = df[df["cats"] == "b"]
  171. tm.assert_frame_equal(res_fancy, exp_fancy)
  172. res_fancy = df[df["values"] == 3]
  173. tm.assert_frame_equal(res_fancy, exp_fancy)
  174. # get_value
  175. res_val = df.at["j", "cats"]
  176. assert res_val == exp_val
  177. # i : int, slice, or sequence of integers
  178. res_row = df.iloc[2]
  179. tm.assert_series_equal(res_row, exp_row)
  180. assert isinstance(res_row["cats"], compat.string_types)
  181. res_df = df.iloc[slice(2, 4)]
  182. tm.assert_frame_equal(res_df, exp_df)
  183. assert is_categorical_dtype(res_df["cats"])
  184. res_df = df.iloc[[2, 3]]
  185. tm.assert_frame_equal(res_df, exp_df)
  186. assert is_categorical_dtype(res_df["cats"])
  187. res_col = df.iloc[:, 0]
  188. tm.assert_series_equal(res_col, exp_col)
  189. assert is_categorical_dtype(res_col)
  190. res_df = df.iloc[:, slice(0, 2)]
  191. tm.assert_frame_equal(res_df, df)
  192. assert is_categorical_dtype(res_df["cats"])
  193. res_df = df.iloc[:, [0, 1]]
  194. tm.assert_frame_equal(res_df, df)
  195. assert is_categorical_dtype(res_df["cats"])
  196. def test_slicing_doc_examples(self):
  197. # GH 7918
  198. cats = Categorical(["a", "b", "b", "b", "c", "c", "c"],
  199. categories=["a", "b", "c"])
  200. idx = Index(["h", "i", "j", "k", "l", "m", "n", ])
  201. values = [1, 2, 2, 2, 3, 4, 5]
  202. df = DataFrame({"cats": cats, "values": values}, index=idx)
  203. result = df.iloc[2:4, :]
  204. expected = DataFrame(
  205. {"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']),
  206. "values": [2, 2]}, index=['j', 'k'])
  207. tm.assert_frame_equal(result, expected)
  208. result = df.iloc[2:4, :].dtypes
  209. expected = Series(['category', 'int64'], ['cats', 'values'])
  210. tm.assert_series_equal(result, expected)
  211. result = df.loc["h":"j", "cats"]
  212. expected = Series(Categorical(['a', 'b', 'b'],
  213. categories=['a', 'b', 'c']),
  214. index=['h', 'i', 'j'], name='cats')
  215. tm.assert_series_equal(result, expected)
  216. result = df.loc["h":"j", df.columns[0:1]]
  217. expected = DataFrame({'cats': Categorical(['a', 'b', 'b'],
  218. categories=['a', 'b', 'c'])},
  219. index=['h', 'i', 'j'])
  220. tm.assert_frame_equal(result, expected)
  221. def test_getitem_category_type(self):
  222. # GH 14580
  223. # test iloc() on Series with Categorical data
  224. s = Series([1, 2, 3]).astype('category')
  225. # get slice
  226. result = s.iloc[0:2]
  227. expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
  228. tm.assert_series_equal(result, expected)
  229. # get list of indexes
  230. result = s.iloc[[0, 1]]
  231. expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
  232. tm.assert_series_equal(result, expected)
  233. # get boolean array
  234. result = s.iloc[[True, False, False]]
  235. expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
  236. tm.assert_series_equal(result, expected)
  237. def test_loc_listlike(self):
  238. # list of labels
  239. result = self.df.loc[['c', 'a']]
  240. expected = self.df.iloc[[4, 0, 1, 5]]
  241. assert_frame_equal(result, expected, check_index_type=True)
  242. result = self.df2.loc[['a', 'b', 'e']]
  243. exp_index = CategoricalIndex(
  244. list('aaabbe'), categories=list('cabe'), name='B')
  245. expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
  246. assert_frame_equal(result, expected, check_index_type=True)
  247. # element in the categories but not in the values
  248. pytest.raises(KeyError, lambda: self.df2.loc['e'])
  249. # assign is ok
  250. df = self.df2.copy()
  251. df.loc['e'] = 20
  252. result = df.loc[['a', 'b', 'e']]
  253. exp_index = CategoricalIndex(
  254. list('aaabbe'), categories=list('cabe'), name='B')
  255. expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
  256. assert_frame_equal(result, expected)
  257. df = self.df2.copy()
  258. result = df.loc[['a', 'b', 'e']]
  259. exp_index = CategoricalIndex(
  260. list('aaabbe'), categories=list('cabe'), name='B')
  261. expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
  262. assert_frame_equal(result, expected, check_index_type=True)
  263. # not all labels in the categories
  264. with pytest.raises(KeyError):
  265. self.df2.loc[['a', 'd']]
  266. def test_loc_listlike_dtypes(self):
  267. # GH 11586
  268. # unique categories and codes
  269. index = CategoricalIndex(['a', 'b', 'c'])
  270. df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
  271. # unique slice
  272. res = df.loc[['a', 'b']]
  273. exp_index = CategoricalIndex(['a', 'b'],
  274. categories=index.categories)
  275. exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
  276. tm.assert_frame_equal(res, exp, check_index_type=True)
  277. # duplicated slice
  278. res = df.loc[['a', 'a', 'b']]
  279. exp_index = CategoricalIndex(['a', 'a', 'b'],
  280. categories=index.categories)
  281. exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
  282. tm.assert_frame_equal(res, exp, check_index_type=True)
  283. msg = ('a list-indexer must only include '
  284. 'values that are in the categories')
  285. with pytest.raises(KeyError, match=msg):
  286. df.loc[['a', 'x']]
  287. # duplicated categories and codes
  288. index = CategoricalIndex(['a', 'b', 'a'])
  289. df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
  290. # unique slice
  291. res = df.loc[['a', 'b']]
  292. exp = DataFrame({'A': [1, 3, 2],
  293. 'B': [4, 6, 5]},
  294. index=CategoricalIndex(['a', 'a', 'b']))
  295. tm.assert_frame_equal(res, exp, check_index_type=True)
  296. # duplicated slice
  297. res = df.loc[['a', 'a', 'b']]
  298. exp = DataFrame(
  299. {'A': [1, 3, 1, 3, 2],
  300. 'B': [4, 6, 4, 6, 5
  301. ]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
  302. tm.assert_frame_equal(res, exp, check_index_type=True)
  303. msg = ('a list-indexer must only include values '
  304. 'that are in the categories')
  305. with pytest.raises(KeyError, match=msg):
  306. df.loc[['a', 'x']]
  307. # contains unused category
  308. index = CategoricalIndex(
  309. ['a', 'b', 'a', 'c'], categories=list('abcde'))
  310. df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
  311. res = df.loc[['a', 'b']]
  312. exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]},
  313. index=CategoricalIndex(['a', 'a', 'b'],
  314. categories=list('abcde')))
  315. tm.assert_frame_equal(res, exp, check_index_type=True)
  316. res = df.loc[['a', 'e']]
  317. exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
  318. index=CategoricalIndex(['a', 'a', 'e'],
  319. categories=list('abcde')))
  320. tm.assert_frame_equal(res, exp, check_index_type=True)
  321. # duplicated slice
  322. res = df.loc[['a', 'a', 'b']]
  323. exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
  324. index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
  325. categories=list('abcde')))
  326. tm.assert_frame_equal(res, exp, check_index_type=True)
  327. msg = ('a list-indexer must only include values '
  328. 'that are in the categories')
  329. with pytest.raises(KeyError, match=msg):
  330. df.loc[['a', 'x']]
  331. def test_get_indexer_array(self):
  332. arr = np.array([Timestamp('1999-12-31 00:00:00'),
  333. Timestamp('2000-12-31 00:00:00')], dtype=object)
  334. cats = [Timestamp('1999-12-31 00:00:00'),
  335. Timestamp('2000-12-31 00:00:00')]
  336. ci = CategoricalIndex(cats,
  337. categories=cats,
  338. ordered=False, dtype='category')
  339. result = ci.get_indexer(arr)
  340. expected = np.array([0, 1], dtype='intp')
  341. tm.assert_numpy_array_equal(result, expected)
  342. def test_get_indexer_same_categories_same_order(self):
  343. ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
  344. result = ci.get_indexer(CategoricalIndex(['b', 'b'],
  345. categories=['a', 'b']))
  346. expected = np.array([1, 1], dtype='intp')
  347. tm.assert_numpy_array_equal(result, expected)
  348. def test_get_indexer_same_categories_different_order(self):
  349. # https://github.com/pandas-dev/pandas/issues/19551
  350. ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
  351. result = ci.get_indexer(CategoricalIndex(['b', 'b'],
  352. categories=['b', 'a']))
  353. expected = np.array([1, 1], dtype='intp')
  354. tm.assert_numpy_array_equal(result, expected)
  355. def test_getitem_with_listlike(self):
  356. # GH 16115
  357. cats = Categorical([Timestamp('12-31-1999'),
  358. Timestamp('12-31-2000')])
  359. expected = DataFrame([[1, 0], [0, 1]], dtype='uint8',
  360. index=[0, 1], columns=cats)
  361. dummies = pd.get_dummies(cats)
  362. result = dummies[[c for c in dummies.columns]]
  363. assert_frame_equal(result, expected)
  364. def test_setitem_listlike(self):
  365. # GH 9469
  366. # properly coerce the input indexers
  367. np.random.seed(1)
  368. c = Categorical(np.random.randint(0, 5, size=150000).astype(
  369. np.int8)).add_categories([-1000])
  370. indexer = np.array([100000]).astype(np.int64)
  371. c[indexer] = -1000
  372. # we are asserting the code result here
  373. # which maps to the -1000 category
  374. result = c.codes[np.array([100000]).astype(np.int64)]
  375. tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
  376. def test_ix_categorical_index(self):
  377. # GH 12531
  378. df = DataFrame(np.random.randn(3, 3),
  379. index=list('ABC'), columns=list('XYZ'))
  380. cdf = df.copy()
  381. cdf.index = CategoricalIndex(df.index)
  382. cdf.columns = CategoricalIndex(df.columns)
  383. expect = Series(df.loc['A', :], index=cdf.columns, name='A')
  384. assert_series_equal(cdf.loc['A', :], expect)
  385. expect = Series(df.loc[:, 'X'], index=cdf.index, name='X')
  386. assert_series_equal(cdf.loc[:, 'X'], expect)
  387. exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
  388. expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
  389. index=exp_index)
  390. assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
  391. exp_columns = CategoricalIndex(list('XY'),
  392. categories=['X', 'Y', 'Z'])
  393. expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
  394. columns=exp_columns)
  395. assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
  396. # non-unique
  397. df = DataFrame(np.random.randn(3, 3),
  398. index=list('ABA'), columns=list('XYX'))
  399. cdf = df.copy()
  400. cdf.index = CategoricalIndex(df.index)
  401. cdf.columns = CategoricalIndex(df.columns)
  402. exp_index = CategoricalIndex(list('AA'), categories=['A', 'B'])
  403. expect = DataFrame(df.loc['A', :], columns=cdf.columns,
  404. index=exp_index)
  405. assert_frame_equal(cdf.loc['A', :], expect)
  406. exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y'])
  407. expect = DataFrame(df.loc[:, 'X'], index=cdf.index,
  408. columns=exp_columns)
  409. assert_frame_equal(cdf.loc[:, 'X'], expect)
  410. expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
  411. index=CategoricalIndex(list('AAB')))
  412. assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
  413. expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
  414. columns=CategoricalIndex(list('XXY')))
  415. assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
  416. def test_read_only_source(self):
  417. # GH 10043
  418. rw_array = np.eye(10)
  419. rw_df = DataFrame(rw_array)
  420. ro_array = np.eye(10)
  421. ro_array.setflags(write=False)
  422. ro_df = DataFrame(ro_array)
  423. assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
  424. assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
  425. assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
  426. assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
  427. assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
  428. assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
  429. assert_series_equal(rw_df.loc[1], ro_df.loc[1])
  430. assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
  431. def test_reindexing(self):
  432. # reindexing
  433. # convert to a regular index
  434. result = self.df2.reindex(['a', 'b', 'e'])
  435. expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
  436. 'B': Series(list('aaabbe'))}).set_index('B')
  437. assert_frame_equal(result, expected, check_index_type=True)
  438. result = self.df2.reindex(['a', 'b'])
  439. expected = DataFrame({'A': [0, 1, 5, 2, 3],
  440. 'B': Series(list('aaabb'))}).set_index('B')
  441. assert_frame_equal(result, expected, check_index_type=True)
  442. result = self.df2.reindex(['e'])
  443. expected = DataFrame({'A': [np.nan],
  444. 'B': Series(['e'])}).set_index('B')
  445. assert_frame_equal(result, expected, check_index_type=True)
  446. result = self.df2.reindex(['d'])
  447. expected = DataFrame({'A': [np.nan],
  448. 'B': Series(['d'])}).set_index('B')
  449. assert_frame_equal(result, expected, check_index_type=True)
  450. # since we are actually reindexing with a Categorical
  451. # then return a Categorical
  452. cats = list('cabe')
  453. result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
  454. expected = DataFrame({'A': [0, 1, 5, np.nan],
  455. 'B': Series(list('aaad')).astype(
  456. CDT(cats))}).set_index('B')
  457. assert_frame_equal(result, expected, check_index_type=True)
  458. result = self.df2.reindex(Categorical(['a'], categories=cats))
  459. expected = DataFrame({'A': [0, 1, 5],
  460. 'B': Series(list('aaa')).astype(
  461. CDT(cats))}).set_index('B')
  462. assert_frame_equal(result, expected, check_index_type=True)
  463. result = self.df2.reindex(['a', 'b', 'e'])
  464. expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
  465. 'B': Series(list('aaabbe'))}).set_index('B')
  466. assert_frame_equal(result, expected, check_index_type=True)
  467. result = self.df2.reindex(['a', 'b'])
  468. expected = DataFrame({'A': [0, 1, 5, 2, 3],
  469. 'B': Series(list('aaabb'))}).set_index('B')
  470. assert_frame_equal(result, expected, check_index_type=True)
  471. result = self.df2.reindex(['e'])
  472. expected = DataFrame({'A': [np.nan],
  473. 'B': Series(['e'])}).set_index('B')
  474. assert_frame_equal(result, expected, check_index_type=True)
  475. # give back the type of categorical that we received
  476. result = self.df2.reindex(Categorical(
  477. ['a', 'd'], categories=cats, ordered=True))
  478. expected = DataFrame(
  479. {'A': [0, 1, 5, np.nan],
  480. 'B': Series(list('aaad')).astype(
  481. CDT(cats, ordered=True))}).set_index('B')
  482. assert_frame_equal(result, expected, check_index_type=True)
  483. result = self.df2.reindex(Categorical(
  484. ['a', 'd'], categories=['a', 'd']))
  485. expected = DataFrame({'A': [0, 1, 5, np.nan],
  486. 'B': Series(list('aaad')).astype(
  487. CDT(['a', 'd']))}).set_index('B')
  488. assert_frame_equal(result, expected, check_index_type=True)
  489. # passed duplicate indexers are not allowed
  490. pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a']))
  491. # args NotImplemented ATM
  492. pytest.raises(NotImplementedError,
  493. lambda: self.df2.reindex(['a'], method='ffill'))
  494. pytest.raises(NotImplementedError,
  495. lambda: self.df2.reindex(['a'], level=1))
  496. pytest.raises(NotImplementedError,
  497. lambda: self.df2.reindex(['a'], limit=2))
  498. def test_loc_slice(self):
  499. # slicing
  500. # not implemented ATM
  501. # GH9748
  502. pytest.raises(TypeError, lambda: self.df.loc[1:5])
  503. # result = df.loc[1:5]
  504. # expected = df.iloc[[1,2,3,4]]
  505. # assert_frame_equal(result, expected)
  506. def test_boolean_selection(self):
  507. df3 = self.df3
  508. df4 = self.df4
  509. result = df3[df3.index == 'a']
  510. expected = df3.iloc[[]]
  511. assert_frame_equal(result, expected)
  512. result = df4[df4.index == 'a']
  513. expected = df4.iloc[[]]
  514. assert_frame_equal(result, expected)
  515. result = df3[df3.index == 1]
  516. expected = df3.iloc[[0, 1, 3]]
  517. assert_frame_equal(result, expected)
  518. result = df4[df4.index == 1]
  519. expected = df4.iloc[[0, 1, 3]]
  520. assert_frame_equal(result, expected)
  521. # since we have an ordered categorical
  522. # CategoricalIndex([1, 1, 2, 1, 3, 2],
  523. # categories=[3, 2, 1],
  524. # ordered=True,
  525. # name=u'B')
  526. result = df3[df3.index < 2]
  527. expected = df3.iloc[[4]]
  528. assert_frame_equal(result, expected)
  529. result = df3[df3.index > 1]
  530. expected = df3.iloc[[]]
  531. assert_frame_equal(result, expected)
  532. # unordered
  533. # cannot be compared
  534. # CategoricalIndex([1, 1, 2, 1, 3, 2],
  535. # categories=[3, 2, 1],
  536. # ordered=False,
  537. # name=u'B')
  538. pytest.raises(TypeError, lambda: df4[df4.index < 2])
  539. pytest.raises(TypeError, lambda: df4[df4.index > 1])
  540. def test_indexing_with_category(self):
  541. # https://github.com/pandas-dev/pandas/issues/12564
  542. # consistent result if comparing as Dataframe
  543. cat = DataFrame({'A': ['foo', 'bar', 'baz']})
  544. exp = DataFrame({'A': [True, False, False]})
  545. res = (cat[['A']] == 'foo')
  546. tm.assert_frame_equal(res, exp)
  547. cat['A'] = cat['A'].astype('category')
  548. res = (cat[['A']] == 'foo')
  549. tm.assert_frame_equal(res, exp)
  550. def test_map_with_dict_or_series(self):
  551. orig_values = ['a', 'B', 1, 'a']
  552. new_values = ['one', 2, 3.0, 'one']
  553. cur_index = pd.CategoricalIndex(orig_values, name='XXX')
  554. expected = pd.CategoricalIndex(new_values,
  555. name='XXX', categories=[3.0, 2, 'one'])
  556. mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
  557. output = cur_index.map(mapper)
  558. # Order of categories in output can be different
  559. tm.assert_index_equal(expected, output)
  560. mapper = {o: n for o, n in
  561. zip(orig_values[:-1], new_values[:-1])}
  562. output = cur_index.map(mapper)
  563. # Order of categories in output can be different
  564. tm.assert_index_equal(expected, output)