test_loc.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. import itertools
  2. from warnings import catch_warnings
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. from pandas import DataFrame, Index, MultiIndex, Series
  7. from pandas.util import testing as tm
  8. @pytest.fixture
  9. def single_level_multiindex():
  10. """single level MultiIndex"""
  11. return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
  12. codes=[[0, 1, 2, 3]], names=['first'])
  13. @pytest.fixture
  14. def frame_random_data_integer_multi_index():
  15. levels = [[0, 1], [0, 1, 2]]
  16. codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
  17. index = MultiIndex(levels=levels, codes=codes)
  18. return DataFrame(np.random.randn(6, 2), index=index)
  19. @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
  20. class TestMultiIndexLoc(object):
  21. def test_loc_getitem_series(self):
  22. # GH14730
  23. # passing a series as a key with a MultiIndex
  24. index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
  25. x = Series(index=index, data=range(9), dtype=np.float64)
  26. y = Series([1, 3])
  27. expected = Series(
  28. data=[0, 1, 2, 6, 7, 8],
  29. index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
  30. dtype=np.float64)
  31. result = x.loc[y]
  32. tm.assert_series_equal(result, expected)
  33. result = x.loc[[1, 3]]
  34. tm.assert_series_equal(result, expected)
  35. # GH15424
  36. y1 = Series([1, 3], index=[1, 2])
  37. result = x.loc[y1]
  38. tm.assert_series_equal(result, expected)
  39. empty = Series(data=[], dtype=np.float64)
  40. expected = Series([], index=MultiIndex(
  41. levels=index.levels, codes=[[], []], dtype=np.float64))
  42. result = x.loc[empty]
  43. tm.assert_series_equal(result, expected)
  44. def test_loc_getitem_array(self):
  45. # GH15434
  46. # passing an array as a key with a MultiIndex
  47. index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
  48. x = Series(index=index, data=range(9), dtype=np.float64)
  49. y = np.array([1, 3])
  50. expected = Series(
  51. data=[0, 1, 2, 6, 7, 8],
  52. index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
  53. dtype=np.float64)
  54. result = x.loc[y]
  55. tm.assert_series_equal(result, expected)
  56. # empty array:
  57. empty = np.array([])
  58. expected = Series([], index=MultiIndex(
  59. levels=index.levels, codes=[[], []], dtype=np.float64))
  60. result = x.loc[empty]
  61. tm.assert_series_equal(result, expected)
  62. # 0-dim array (scalar):
  63. scalar = np.int64(1)
  64. expected = Series(
  65. data=[0, 1, 2],
  66. index=['A', 'B', 'C'],
  67. dtype=np.float64)
  68. result = x.loc[scalar]
  69. tm.assert_series_equal(result, expected)
  70. def test_loc_multiindex(self):
  71. mi_labels = DataFrame(np.random.randn(3, 3),
  72. columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
  73. index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
  74. mi_int = DataFrame(np.random.randn(3, 3),
  75. columns=[[2, 2, 4], [6, 8, 10]],
  76. index=[[4, 4, 8], [8, 10, 12]])
  77. # the first row
  78. rs = mi_labels.loc['i']
  79. with catch_warnings(record=True):
  80. xp = mi_labels.ix['i']
  81. tm.assert_frame_equal(rs, xp)
  82. # 2nd (last) columns
  83. rs = mi_labels.loc[:, 'j']
  84. with catch_warnings(record=True):
  85. xp = mi_labels.ix[:, 'j']
  86. tm.assert_frame_equal(rs, xp)
  87. # corner column
  88. rs = mi_labels.loc['j'].loc[:, 'j']
  89. with catch_warnings(record=True):
  90. xp = mi_labels.ix['j'].ix[:, 'j']
  91. tm.assert_frame_equal(rs, xp)
  92. # with a tuple
  93. rs = mi_labels.loc[('i', 'X')]
  94. with catch_warnings(record=True):
  95. xp = mi_labels.ix[('i', 'X')]
  96. tm.assert_frame_equal(rs, xp)
  97. rs = mi_int.loc[4]
  98. with catch_warnings(record=True):
  99. xp = mi_int.ix[4]
  100. tm.assert_frame_equal(rs, xp)
  101. # missing label
  102. pytest.raises(KeyError, lambda: mi_int.loc[2])
  103. with catch_warnings(record=True):
  104. # GH 21593
  105. pytest.raises(KeyError, lambda: mi_int.ix[2])
  106. def test_loc_multiindex_indexer_none(self):
  107. # GH6788
  108. # multi-index indexer is None (meaning take all)
  109. attributes = ['Attribute' + str(i) for i in range(1)]
  110. attribute_values = ['Value' + str(i) for i in range(5)]
  111. index = MultiIndex.from_product([attributes, attribute_values])
  112. df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
  113. df = DataFrame(df, columns=index)
  114. result = df[attributes]
  115. tm.assert_frame_equal(result, df)
  116. # GH 7349
  117. # loc with a multi-index seems to be doing fallback
  118. df = DataFrame(np.arange(12).reshape(-1, 1),
  119. index=MultiIndex.from_product([[1, 2, 3, 4],
  120. [1, 2, 3]]))
  121. expected = df.loc[([1, 2], ), :]
  122. result = df.loc[[1, 2]]
  123. tm.assert_frame_equal(result, expected)
  124. def test_loc_multiindex_incomplete(self):
  125. # GH 7399
  126. # incomplete indexers
  127. s = Series(np.arange(15, dtype='int64'),
  128. MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
  129. expected = s.loc[:, 'a':'c']
  130. result = s.loc[0:4, 'a':'c']
  131. tm.assert_series_equal(result, expected)
  132. tm.assert_series_equal(result, expected)
  133. result = s.loc[:4, 'a':'c']
  134. tm.assert_series_equal(result, expected)
  135. tm.assert_series_equal(result, expected)
  136. result = s.loc[0:, 'a':'c']
  137. tm.assert_series_equal(result, expected)
  138. tm.assert_series_equal(result, expected)
  139. # GH 7400
  140. # multiindexer gettitem with list of indexers skips wrong element
  141. s = Series(np.arange(15, dtype='int64'),
  142. MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
  143. expected = s.iloc[[6, 7, 8, 12, 13, 14]]
  144. result = s.loc[2:4:2, 'a':'c']
  145. tm.assert_series_equal(result, expected)
  146. def test_get_loc_single_level(self, single_level_multiindex):
  147. single_level = single_level_multiindex
  148. s = Series(np.random.randn(len(single_level)),
  149. index=single_level)
  150. for k in single_level.values:
  151. s[k]
  152. def test_loc_getitem_int_slice(self):
  153. # GH 3053
  154. # loc should treat integer slices like label slices
  155. index = MultiIndex.from_tuples([t for t in itertools.product(
  156. [6, 7, 8], ['a', 'b'])])
  157. df = DataFrame(np.random.randn(6, 6), index, index)
  158. result = df.loc[6:8, :]
  159. expected = df
  160. tm.assert_frame_equal(result, expected)
  161. index = MultiIndex.from_tuples([t
  162. for t in itertools.product(
  163. [10, 20, 30], ['a', 'b'])])
  164. df = DataFrame(np.random.randn(6, 6), index, index)
  165. result = df.loc[20:30, :]
  166. expected = df.iloc[2:]
  167. tm.assert_frame_equal(result, expected)
  168. # doc examples
  169. result = df.loc[10, :]
  170. expected = df.iloc[0:2]
  171. expected.index = ['a', 'b']
  172. tm.assert_frame_equal(result, expected)
  173. result = df.loc[:, 10]
  174. # expected = df.ix[:,10] (this fails)
  175. expected = df[10]
  176. tm.assert_frame_equal(result, expected)
  177. @pytest.mark.parametrize(
  178. 'indexer_type_1',
  179. (list, tuple, set, slice, np.ndarray, Series, Index))
  180. @pytest.mark.parametrize(
  181. 'indexer_type_2',
  182. (list, tuple, set, slice, np.ndarray, Series, Index))
  183. def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
  184. # GH #19686
  185. # .loc should work with nested indexers which can be
  186. # any list-like objects (see `pandas.api.types.is_list_like`) or slices
  187. def convert_nested_indexer(indexer_type, keys):
  188. if indexer_type == np.ndarray:
  189. return np.array(keys)
  190. if indexer_type == slice:
  191. return slice(*keys)
  192. return indexer_type(keys)
  193. a = [10, 20, 30]
  194. b = [1, 2, 3]
  195. index = MultiIndex.from_product([a, b])
  196. df = DataFrame(
  197. np.arange(len(index), dtype='int64'),
  198. index=index, columns=['Data'])
  199. keys = ([10, 20], [2, 3])
  200. types = (indexer_type_1, indexer_type_2)
  201. # check indexers with all the combinations of nested objects
  202. # of all the valid types
  203. indexer = tuple(
  204. convert_nested_indexer(indexer_type, k)
  205. for indexer_type, k in zip(types, keys))
  206. result = df.loc[indexer, 'Data']
  207. expected = Series(
  208. [1, 2, 4, 5], name='Data',
  209. index=MultiIndex.from_product(keys))
  210. tm.assert_series_equal(result, expected)
  211. @pytest.mark.parametrize('indexer, is_level1, expected_error', [
  212. ([], False, None), # empty ok
  213. (['A'], False, None),
  214. (['A', 'D'], False, None),
  215. (['D'], False, r"\['D'\] not in index"), # not any values found
  216. (pd.IndexSlice[:, ['foo']], True, None),
  217. (pd.IndexSlice[:, ['foo', 'bah']], True, None)
  218. ])
  219. def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
  220. expected_error):
  221. # GH 7866
  222. # multi-index slicing with missing indexers
  223. idx = MultiIndex.from_product([['A', 'B', 'C'],
  224. ['foo', 'bar', 'baz']],
  225. names=['one', 'two'])
  226. s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
  227. if indexer == []:
  228. expected = s.iloc[[]]
  229. elif is_level1:
  230. expected = Series([0, 3, 6], index=MultiIndex.from_product(
  231. [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
  232. else:
  233. exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
  234. names=['one', 'two'])
  235. expected = Series(np.arange(3, dtype='int64'),
  236. index=exp_idx).sort_index()
  237. if expected_error is not None:
  238. with pytest.raises(KeyError, match=expected_error):
  239. s.loc[indexer]
  240. else:
  241. result = s.loc[indexer]
  242. tm.assert_series_equal(result, expected)
  243. @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
  244. @pytest.mark.parametrize('indexer', [
  245. lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
  246. lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
  247. ])
  248. def test_series_loc_getitem_fancy(
  249. multiindex_year_month_day_dataframe_random_data, indexer):
  250. s = multiindex_year_month_day_dataframe_random_data['A']
  251. expected = s.reindex(s.index[49:51])
  252. result = indexer(s)
  253. tm.assert_series_equal(result, expected)
  254. @pytest.mark.parametrize('columns_indexer', [
  255. ([], slice(None)),
  256. (['foo'], [])
  257. ])
  258. def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
  259. # GH 8737
  260. # empty indexer
  261. multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
  262. ['alpha', 'beta']))
  263. df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
  264. df = df.sort_index(level=0, axis=1)
  265. expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
  266. result = df.loc[:, columns_indexer]
  267. tm.assert_frame_equal(result, expected)
  268. def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
  269. # regression from < 0.14.0
  270. # GH 7914
  271. df = DataFrame([[np.mean, np.median], ['mean', 'median']],
  272. columns=MultiIndex.from_tuples([('functs', 'mean'),
  273. ('functs', 'median')]),
  274. index=['function', 'name'])
  275. result = df.loc['function', ('functs', 'mean')]
  276. expected = np.mean
  277. assert result == expected
  278. def test_loc_getitem_tuple_plus_slice():
  279. # GH 671
  280. df = DataFrame({'a': np.arange(10),
  281. 'b': np.arange(10),
  282. 'c': np.random.randn(10),
  283. 'd': np.random.randn(10)}
  284. ).set_index(['a', 'b'])
  285. expected = df.loc[0, 0]
  286. result = df.loc[(0, 0), :]
  287. tm.assert_series_equal(result, expected)
  288. def test_loc_getitem_int(frame_random_data_integer_multi_index):
  289. df = frame_random_data_integer_multi_index
  290. result = df.loc[1]
  291. expected = df[-3:]
  292. expected.index = expected.index.droplevel(0)
  293. tm.assert_frame_equal(result, expected)
  294. def test_loc_getitem_int_raises_exception(
  295. frame_random_data_integer_multi_index):
  296. df = frame_random_data_integer_multi_index
  297. with pytest.raises(KeyError, match=r"^3L?$"):
  298. df.loc[3]
  299. def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
  300. df = multiindex_dataframe_random_data
  301. # test setup - check key not in dataframe
  302. with pytest.raises(KeyError, match=r"^11L?$"):
  303. df.loc[('bar', 'three'), 'B']
  304. # in theory should be inserting in a sorted space????
  305. df.loc[('bar', 'three'), 'B'] = 0
  306. expected = 0
  307. result = df.sort_index().loc[('bar', 'three'), 'B']
  308. assert result == expected