test_indexing.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
  6. import pandas.core.common as com
  7. from pandas.tests.arrays.categorical.common import TestCategorical
  8. import pandas.util.testing as tm
  9. class TestCategoricalIndexingWithFactor(TestCategorical):
  10. def test_getitem(self):
  11. assert self.factor[0] == 'a'
  12. assert self.factor[-1] == 'c'
  13. subf = self.factor[[0, 1, 2]]
  14. tm.assert_numpy_array_equal(subf._codes,
  15. np.array([0, 1, 1], dtype=np.int8))
  16. subf = self.factor[np.asarray(self.factor) == 'c']
  17. tm.assert_numpy_array_equal(subf._codes,
  18. np.array([2, 2, 2], dtype=np.int8))
  19. def test_setitem(self):
  20. # int/positional
  21. c = self.factor.copy()
  22. c[0] = 'b'
  23. assert c[0] == 'b'
  24. c[-1] = 'a'
  25. assert c[-1] == 'a'
  26. # boolean
  27. c = self.factor.copy()
  28. indexer = np.zeros(len(c), dtype='bool')
  29. indexer[0] = True
  30. indexer[-1] = True
  31. c[indexer] = 'c'
  32. expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
  33. ordered=True)
  34. tm.assert_categorical_equal(c, expected)
  35. @pytest.mark.parametrize('other', [
  36. pd.Categorical(['b', 'a']),
  37. pd.Categorical(['b', 'a'], categories=['b', 'a']),
  38. ])
  39. def test_setitem_same_but_unordered(self, other):
  40. # GH-24142
  41. target = pd.Categorical(['a', 'b'], categories=['a', 'b'])
  42. mask = np.array([True, False])
  43. target[mask] = other[mask]
  44. expected = pd.Categorical(['b', 'b'], categories=['a', 'b'])
  45. tm.assert_categorical_equal(target, expected)
  46. @pytest.mark.parametrize('other', [
  47. pd.Categorical(['b', 'a'], categories=['b', 'a', 'c']),
  48. pd.Categorical(['b', 'a'], categories=['a', 'b', 'c']),
  49. pd.Categorical(['a', 'a'], categories=['a']),
  50. pd.Categorical(['b', 'b'], categories=['b']),
  51. ])
  52. def test_setitem_different_unordered_raises(self, other):
  53. # GH-24142
  54. target = pd.Categorical(['a', 'b'], categories=['a', 'b'])
  55. mask = np.array([True, False])
  56. with pytest.raises(ValueError):
  57. target[mask] = other[mask]
  58. @pytest.mark.parametrize('other', [
  59. pd.Categorical(['b', 'a']),
  60. pd.Categorical(['b', 'a'], categories=['b', 'a'], ordered=True),
  61. pd.Categorical(['b', 'a'], categories=['a', 'b', 'c'], ordered=True),
  62. ])
  63. def test_setitem_same_ordered_rasies(self, other):
  64. # Gh-24142
  65. target = pd.Categorical(['a', 'b'], categories=['a', 'b'],
  66. ordered=True)
  67. mask = np.array([True, False])
  68. with pytest.raises(ValueError):
  69. target[mask] = other[mask]
  70. class TestCategoricalIndexing(object):
  71. def test_getitem_listlike(self):
  72. # GH 9469
  73. # properly coerce the input indexers
  74. np.random.seed(1)
  75. c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
  76. result = c.codes[np.array([100000]).astype(np.int64)]
  77. expected = c[np.array([100000]).astype(np.int64)].codes
  78. tm.assert_numpy_array_equal(result, expected)
  79. def test_periodindex(self):
  80. idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
  81. '2014-03', '2014-03'], freq='M')
  82. cat1 = Categorical(idx1)
  83. str(cat1)
  84. exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
  85. exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
  86. tm.assert_numpy_array_equal(cat1._codes, exp_arr)
  87. tm.assert_index_equal(cat1.categories, exp_idx)
  88. idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
  89. '2014-03', '2014-01'], freq='M')
  90. cat2 = Categorical(idx2, ordered=True)
  91. str(cat2)
  92. exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
  93. exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
  94. tm.assert_numpy_array_equal(cat2._codes, exp_arr)
  95. tm.assert_index_equal(cat2.categories, exp_idx2)
  96. idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09',
  97. '2013-08', '2013-07', '2013-05'], freq='M')
  98. cat3 = Categorical(idx3, ordered=True)
  99. exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
  100. exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09',
  101. '2013-10', '2013-11', '2013-12'], freq='M')
  102. tm.assert_numpy_array_equal(cat3._codes, exp_arr)
  103. tm.assert_index_equal(cat3.categories, exp_idx)
  104. def test_categories_assigments(self):
  105. s = Categorical(["a", "b", "c", "a"])
  106. exp = np.array([1, 2, 3, 1], dtype=np.int64)
  107. s.categories = [1, 2, 3]
  108. tm.assert_numpy_array_equal(s.__array__(), exp)
  109. tm.assert_index_equal(s.categories, Index([1, 2, 3]))
  110. # lengthen
  111. with pytest.raises(ValueError):
  112. s.categories = [1, 2, 3, 4]
  113. # shorten
  114. with pytest.raises(ValueError):
  115. s.categories = [1, 2]
  116. # Combinations of sorted/unique:
  117. @pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
  118. [1, 3, 3, 4], [1, 2, 2, 4]])
  119. # Combinations of missing/unique
  120. @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
  121. @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
  122. def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
  123. # GH 21448
  124. key = key_class(key_values, categories=range(1, 5))
  125. # Test for flat index and CategoricalIndex with same/different cats:
  126. for dtype in None, 'category', key.dtype:
  127. idx = Index(idx_values, dtype=dtype)
  128. expected, exp_miss = idx.get_indexer_non_unique(key_values)
  129. result, res_miss = idx.get_indexer_non_unique(key)
  130. tm.assert_numpy_array_equal(expected, result)
  131. tm.assert_numpy_array_equal(exp_miss, res_miss)
  132. def test_where_unobserved_nan(self):
  133. ser = pd.Series(pd.Categorical(['a', 'b']))
  134. result = ser.where([True, False])
  135. expected = pd.Series(pd.Categorical(['a', None],
  136. categories=['a', 'b']))
  137. tm.assert_series_equal(result, expected)
  138. # all NA
  139. ser = pd.Series(pd.Categorical(['a', 'b']))
  140. result = ser.where([False, False])
  141. expected = pd.Series(pd.Categorical([None, None],
  142. categories=['a', 'b']))
  143. tm.assert_series_equal(result, expected)
  144. def test_where_unobserved_categories(self):
  145. ser = pd.Series(
  146. Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'])
  147. )
  148. result = ser.where([True, True, False], other='b')
  149. expected = pd.Series(
  150. Categorical(['a', 'b', 'b'], categories=ser.cat.categories)
  151. )
  152. tm.assert_series_equal(result, expected)
  153. def test_where_other_categorical(self):
  154. ser = pd.Series(
  155. Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'])
  156. )
  157. other = Categorical(['b', 'c', 'a'], categories=['a', 'c', 'b', 'd'])
  158. result = ser.where([True, False, True], other)
  159. expected = pd.Series(Categorical(['a', 'c', 'c'], dtype=ser.dtype))
  160. tm.assert_series_equal(result, expected)
  161. def test_where_warns(self):
  162. ser = pd.Series(Categorical(['a', 'b', 'c']))
  163. with tm.assert_produces_warning(FutureWarning):
  164. result = ser.where([True, False, True], 'd')
  165. expected = pd.Series(np.array(['a', 'd', 'c'], dtype='object'))
  166. tm.assert_series_equal(result, expected)
  167. def test_where_ordered_differs_rasies(self):
  168. ser = pd.Series(
  169. Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'],
  170. ordered=True)
  171. )
  172. other = Categorical(['b', 'c', 'a'], categories=['a', 'c', 'b', 'd'],
  173. ordered=True)
  174. with tm.assert_produces_warning(FutureWarning):
  175. result = ser.where([True, False, True], other)
  176. expected = pd.Series(np.array(['a', 'c', 'c'], dtype=object))
  177. tm.assert_series_equal(result, expected)
  178. @pytest.mark.parametrize("index", [True, False])
  179. def test_mask_with_boolean(index):
  180. s = Series(range(3))
  181. idx = Categorical([True, False, True])
  182. if index:
  183. idx = CategoricalIndex(idx)
  184. assert com.is_bool_indexer(idx)
  185. result = s[idx]
  186. expected = s[idx.astype('object')]
  187. tm.assert_series_equal(result, expected)
  188. @pytest.mark.parametrize("index", [True, False])
  189. def test_mask_with_boolean_raises(index):
  190. s = Series(range(3))
  191. idx = Categorical([True, False, None])
  192. if index:
  193. idx = CategoricalIndex(idx)
  194. with pytest.raises(ValueError, match='NA / NaN'):
  195. s[idx]
  196. @pytest.fixture
  197. def non_coercible_categorical(monkeypatch):
  198. """
  199. Monkeypatch Categorical.__array__ to ensure no implicit conversion.
  200. Raises
  201. ------
  202. ValueError
  203. When Categorical.__array__ is called.
  204. """
  205. # TODO(Categorical): identify other places where this may be
  206. # useful and move to a conftest.py
  207. def array(self, dtype=None):
  208. raise ValueError("I cannot be converted.")
  209. with monkeypatch.context() as m:
  210. m.setattr(Categorical, "__array__", array)
  211. yield
  212. def test_series_at(non_coercible_categorical):
  213. arr = Categorical(['a', 'b', 'c'])
  214. ser = Series(arr)
  215. result = ser.at[0]
  216. assert result == 'a'