test_common.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. """
  2. Collection of tests asserting things that should be true for
  3. any index subclass. Makes use of the `indices` fixture defined
  4. in pandas/tests/indexes/conftest.py.
  5. """
  6. import numpy as np
  7. import pytest
  8. from pandas._libs.tslibs import iNaT
  9. from pandas.core.dtypes.common import needs_i8_conversion
  10. import pandas as pd
  11. from pandas import CategoricalIndex, MultiIndex, RangeIndex, compat
  12. import pandas.util.testing as tm
  13. class TestCommon(object):
  14. def test_droplevel(self, indices):
  15. # GH 21115
  16. if isinstance(indices, MultiIndex):
  17. # Tested separately in test_multi.py
  18. return
  19. assert indices.droplevel([]).equals(indices)
  20. for level in indices.name, [indices.name]:
  21. if isinstance(indices.name, tuple) and level is indices.name:
  22. # GH 21121 : droplevel with tuple name
  23. continue
  24. with pytest.raises(ValueError):
  25. indices.droplevel(level)
  26. for level in 'wrong', ['wrong']:
  27. with pytest.raises(KeyError):
  28. indices.droplevel(level)
  29. def test_constructor_non_hashable_name(self, indices):
  30. # GH 20527
  31. if isinstance(indices, MultiIndex):
  32. pytest.skip("multiindex handled in test_multi.py")
  33. message = "Index.name must be a hashable type"
  34. renamed = [['1']]
  35. # With .rename()
  36. with pytest.raises(TypeError, match=message):
  37. indices.rename(name=renamed)
  38. # With .set_names()
  39. with pytest.raises(TypeError, match=message):
  40. indices.set_names(names=renamed)
  41. def test_constructor_unwraps_index(self, indices):
  42. if isinstance(indices, pd.MultiIndex):
  43. raise pytest.skip("MultiIndex has no ._data")
  44. a = indices
  45. b = type(a)(a)
  46. tm.assert_equal(a._data, b._data)
  47. @pytest.mark.parametrize("itm", [101, 'no_int'])
  48. # FutureWarning from non-tuple sequence of nd indexing
  49. @pytest.mark.filterwarnings("ignore::FutureWarning")
  50. def test_getitem_error(self, indices, itm):
  51. with pytest.raises(IndexError):
  52. indices[itm]
  53. @pytest.mark.parametrize(
  54. 'fname, sname, expected_name',
  55. [
  56. ('A', 'A', 'A'),
  57. ('A', 'B', None),
  58. ('A', None, None),
  59. (None, 'B', None),
  60. (None, None, None),
  61. ])
  62. def test_corner_union(self, indices, fname, sname, expected_name):
  63. # GH 9943 9862
  64. # Test unions with various name combinations
  65. # Do not test MultiIndex or repeats
  66. if isinstance(indices, MultiIndex) or not indices.is_unique:
  67. pytest.skip("Not for MultiIndex or repeated indices")
  68. # Test copy.union(copy)
  69. first = indices.copy().set_names(fname)
  70. second = indices.copy().set_names(sname)
  71. union = first.union(second)
  72. expected = indices.copy().set_names(expected_name)
  73. tm.assert_index_equal(union, expected)
  74. # Test copy.union(empty)
  75. first = indices.copy().set_names(fname)
  76. second = indices.drop(indices).set_names(sname)
  77. union = first.union(second)
  78. expected = indices.copy().set_names(expected_name)
  79. tm.assert_index_equal(union, expected)
  80. # Test empty.union(copy)
  81. first = indices.drop(indices).set_names(fname)
  82. second = indices.copy().set_names(sname)
  83. union = first.union(second)
  84. expected = indices.copy().set_names(expected_name)
  85. tm.assert_index_equal(union, expected)
  86. # Test empty.union(empty)
  87. first = indices.drop(indices).set_names(fname)
  88. second = indices.drop(indices).set_names(sname)
  89. union = first.union(second)
  90. expected = indices.drop(indices).set_names(expected_name)
  91. tm.assert_index_equal(union, expected)
  92. def test_to_flat_index(self, indices):
  93. # 22866
  94. if isinstance(indices, MultiIndex):
  95. pytest.skip("Separate expectation for MultiIndex")
  96. result = indices.to_flat_index()
  97. tm.assert_index_equal(result, indices)
  98. def test_wrong_number_names(self, indices):
  99. with pytest.raises(ValueError, match="^Length"):
  100. indices.names = ["apple", "banana", "carrot"]
  101. def test_set_name_methods(self, indices):
  102. new_name = "This is the new name for this index"
  103. # don't tests a MultiIndex here (as its tested separated)
  104. if isinstance(indices, MultiIndex):
  105. pytest.skip('Skip check for MultiIndex')
  106. original_name = indices.name
  107. new_ind = indices.set_names([new_name])
  108. assert new_ind.name == new_name
  109. assert indices.name == original_name
  110. res = indices.rename(new_name, inplace=True)
  111. # should return None
  112. assert res is None
  113. assert indices.name == new_name
  114. assert indices.names == [new_name]
  115. # with pytest.raises(TypeError, match="list-like"):
  116. # # should still fail even if it would be the right length
  117. # ind.set_names("a")
  118. with pytest.raises(ValueError, match="Level must be None"):
  119. indices.set_names("a", level=0)
  120. # rename in place just leaves tuples and other containers alone
  121. name = ('A', 'B')
  122. indices.rename(name, inplace=True)
  123. assert indices.name == name
  124. assert indices.names == [name]
  125. def test_dtype_str(self, indices):
  126. dtype = indices.dtype_str
  127. assert isinstance(dtype, compat.string_types)
  128. assert dtype == str(indices.dtype)
  129. def test_hash_error(self, indices):
  130. index = indices
  131. with pytest.raises(TypeError, match=("unhashable type: %r" %
  132. type(index).__name__)):
  133. hash(indices)
  134. def test_copy_and_deepcopy(self, indices):
  135. from copy import copy, deepcopy
  136. if isinstance(indices, MultiIndex):
  137. pytest.skip('Skip check for MultiIndex')
  138. for func in (copy, deepcopy):
  139. idx_copy = func(indices)
  140. assert idx_copy is not indices
  141. assert idx_copy.equals(indices)
  142. new_copy = indices.copy(deep=True, name="banana")
  143. assert new_copy.name == "banana"
  144. def test_unique(self, indices):
  145. # don't test a MultiIndex here (as its tested separated)
  146. # don't test a CategoricalIndex because categories change (GH 18291)
  147. if isinstance(indices, (MultiIndex, CategoricalIndex)):
  148. pytest.skip('Skip check for MultiIndex/CategoricalIndex')
  149. # GH 17896
  150. expected = indices.drop_duplicates()
  151. for level in 0, indices.name, None:
  152. result = indices.unique(level=level)
  153. tm.assert_index_equal(result, expected)
  154. for level in 3, 'wrong':
  155. pytest.raises((IndexError, KeyError), indices.unique, level=level)
  156. def test_get_unique_index(self, indices):
  157. # MultiIndex tested separately
  158. if not len(indices) or isinstance(indices, MultiIndex):
  159. pytest.skip('Skip check for empty Index and MultiIndex')
  160. idx = indices[[0] * 5]
  161. idx_unique = indices[[0]]
  162. # We test against `idx_unique`, so first we make sure it's unique
  163. # and doesn't contain nans.
  164. assert idx_unique.is_unique is True
  165. try:
  166. assert idx_unique.hasnans is False
  167. except NotImplementedError:
  168. pass
  169. for dropna in [False, True]:
  170. result = idx._get_unique_index(dropna=dropna)
  171. tm.assert_index_equal(result, idx_unique)
  172. # nans:
  173. if not indices._can_hold_na:
  174. pytest.skip('Skip na-check if index cannot hold na')
  175. if needs_i8_conversion(indices):
  176. vals = indices.asi8[[0] * 5]
  177. vals[0] = iNaT
  178. else:
  179. vals = indices.values[[0] * 5]
  180. vals[0] = np.nan
  181. vals_unique = vals[:2]
  182. idx_nan = indices._shallow_copy(vals)
  183. idx_unique_nan = indices._shallow_copy(vals_unique)
  184. assert idx_unique_nan.is_unique is True
  185. assert idx_nan.dtype == indices.dtype
  186. assert idx_unique_nan.dtype == indices.dtype
  187. for dropna, expected in zip([False, True],
  188. [idx_unique_nan,
  189. idx_unique]):
  190. for i in [idx_nan, idx_unique_nan]:
  191. result = i._get_unique_index(dropna=dropna)
  192. tm.assert_index_equal(result, expected)
  193. def test_sort(self, indices):
  194. pytest.raises(TypeError, indices.sort)
  195. def test_mutability(self, indices):
  196. if not len(indices):
  197. pytest.skip('Skip check for empty Index')
  198. pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
  199. def test_view(self, indices):
  200. assert indices.view().name == indices.name
  201. def test_compat(self, indices):
  202. assert indices.tolist() == list(indices)
  203. def test_searchsorted_monotonic(self, indices):
  204. # GH17271
  205. # not implemented for tuple searches in MultiIndex
  206. # or Intervals searches in IntervalIndex
  207. if isinstance(indices, (MultiIndex, pd.IntervalIndex)):
  208. pytest.skip('Skip check for MultiIndex/IntervalIndex')
  209. # nothing to test if the index is empty
  210. if indices.empty:
  211. pytest.skip('Skip check for empty Index')
  212. value = indices[0]
  213. # determine the expected results (handle dupes for 'right')
  214. expected_left, expected_right = 0, (indices == value).argmin()
  215. if expected_right == 0:
  216. # all values are the same, expected_right should be length
  217. expected_right = len(indices)
  218. # test _searchsorted_monotonic in all cases
  219. # test searchsorted only for increasing
  220. if indices.is_monotonic_increasing:
  221. ssm_left = indices._searchsorted_monotonic(value, side='left')
  222. assert expected_left == ssm_left
  223. ssm_right = indices._searchsorted_monotonic(value, side='right')
  224. assert expected_right == ssm_right
  225. ss_left = indices.searchsorted(value, side='left')
  226. assert expected_left == ss_left
  227. ss_right = indices.searchsorted(value, side='right')
  228. assert expected_right == ss_right
  229. elif indices.is_monotonic_decreasing:
  230. ssm_left = indices._searchsorted_monotonic(value, side='left')
  231. assert expected_left == ssm_left
  232. ssm_right = indices._searchsorted_monotonic(value, side='right')
  233. assert expected_right == ssm_right
  234. else:
  235. # non-monotonic should raise.
  236. with pytest.raises(ValueError):
  237. indices._searchsorted_monotonic(value, side='left')
  238. def test_pickle(self, indices):
  239. original_name, indices.name = indices.name, 'foo'
  240. unpickled = tm.round_trip_pickle(indices)
  241. assert indices.equals(unpickled)
  242. indices.name = original_name
  243. @pytest.mark.parametrize('keep', ['first', 'last', False])
  244. def test_duplicated(self, indices, keep):
  245. if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
  246. # MultiIndex tested separately in:
  247. # tests/indexes/multi/test_unique_and_duplicates
  248. pytest.skip('Skip check for empty Index, MultiIndex, RangeIndex')
  249. holder = type(indices)
  250. idx = holder(indices)
  251. if idx.has_duplicates:
  252. # We are testing the duplicated-method here, so we need to know
  253. # exactly which indices are duplicate and how (for the result).
  254. # This is not possible if "idx" has duplicates already, which we
  255. # therefore remove. This is seemingly circular, as drop_duplicates
  256. # invokes duplicated, but in the end, it all works out because we
  257. # cross-check with Series.duplicated, which is tested separately.
  258. idx = idx.drop_duplicates()
  259. n, k = len(idx), 10
  260. duplicated_selection = np.random.choice(n, k * n)
  261. expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
  262. idx = holder(idx.values[duplicated_selection])
  263. result = idx.duplicated(keep=keep)
  264. tm.assert_numpy_array_equal(result, expected)
  265. def test_has_duplicates(self, indices):
  266. holder = type(indices)
  267. if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
  268. # MultiIndex tested separately in:
  269. # tests/indexes/multi/test_unique_and_duplicates.
  270. # RangeIndex is unique by definition.
  271. pytest.skip('Skip check for empty Index, MultiIndex, '
  272. 'and RangeIndex')
  273. idx = holder([indices[0]] * 5)
  274. assert idx.is_unique is False
  275. assert idx.has_duplicates is True