test_getitem.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import u, zip
  4. from pandas import DataFrame, Index, MultiIndex, Series
  5. from pandas.core.indexing import IndexingError
  6. from pandas.util import testing as tm
  7. # ----------------------------------------------------------------------------
  8. # test indexing of Series with multi-level Index
  9. # ----------------------------------------------------------------------------
  10. @pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
  11. lambda s, x: s.loc[:, x],
  12. lambda s, x: s.xs(x, level=1)])
  13. @pytest.mark.parametrize('level1_value, expected', [
  14. (0, Series([1], index=[0])),
  15. (1, Series([2, 3], index=[1, 2]))
  16. ])
  17. def test_series_getitem_multiindex(access_method, level1_value, expected):
  18. # GH 6018
  19. # series regression getitem with a multi-index
  20. s = Series([1, 2, 3])
  21. s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
  22. result = access_method(s, level1_value)
  23. tm.assert_series_equal(result, expected)
  24. @pytest.mark.parametrize('level0_value', ['D', 'A'])
  25. def test_series_getitem_duplicates_multiindex(level0_value):
  26. # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
  27. # the appropriate error, only in PY3 of course!
  28. index = MultiIndex(levels=[[level0_value, 'B', 'C'],
  29. [0, 26, 27, 37, 57, 67, 75, 82]],
  30. codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
  31. [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
  32. names=['tag', 'day'])
  33. arr = np.random.randn(len(index), 1)
  34. df = DataFrame(arr, index=index, columns=['val'])
  35. # confirm indexing on missing value raises KeyError
  36. if level0_value != 'A':
  37. with pytest.raises(KeyError, match=r"^'A'$"):
  38. df.val['A']
  39. with pytest.raises(KeyError, match=r"^'X'$"):
  40. df.val['X']
  41. result = df.val[level0_value]
  42. expected = Series(arr.ravel()[0:3], name='val', index=Index(
  43. [26, 37, 57], name='day'))
  44. tm.assert_series_equal(result, expected)
  45. @pytest.mark.parametrize('indexer', [
  46. lambda s: s[2000, 3],
  47. lambda s: s.loc[2000, 3]
  48. ])
  49. def test_series_getitem(
  50. multiindex_year_month_day_dataframe_random_data, indexer):
  51. s = multiindex_year_month_day_dataframe_random_data['A']
  52. expected = s.reindex(s.index[42:65])
  53. expected.index = expected.index.droplevel(0).droplevel(0)
  54. result = indexer(s)
  55. tm.assert_series_equal(result, expected)
  56. @pytest.mark.parametrize('indexer', [
  57. lambda s: s[2000, 3, 10],
  58. lambda s: s.loc[2000, 3, 10]
  59. ])
  60. def test_series_getitem_returns_scalar(
  61. multiindex_year_month_day_dataframe_random_data, indexer):
  62. s = multiindex_year_month_day_dataframe_random_data['A']
  63. expected = s.iloc[49]
  64. result = indexer(s)
  65. assert result == expected
  66. @pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [
  67. (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356L?$"),
  68. (lambda s: s[(2000, 3, 4)], KeyError, r"^356L?$"),
  69. (lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'),
  70. (lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'),
  71. (lambda s: s[len(s)], IndexError, 'index out of bounds'),
  72. (lambda s: s.iloc[len(s)], IndexError,
  73. 'single positional indexer is out-of-bounds')
  74. ])
  75. def test_series_getitem_indexing_errors(
  76. multiindex_year_month_day_dataframe_random_data, indexer,
  77. expected_error, expected_error_msg):
  78. s = multiindex_year_month_day_dataframe_random_data['A']
  79. with pytest.raises(expected_error, match=expected_error_msg):
  80. indexer(s)
  81. def test_series_getitem_corner_generator(
  82. multiindex_year_month_day_dataframe_random_data):
  83. s = multiindex_year_month_day_dataframe_random_data['A']
  84. result = s[(x > 0 for x in s)]
  85. expected = s[s > 0]
  86. tm.assert_series_equal(result, expected)
  87. # ----------------------------------------------------------------------------
  88. # test indexing of DataFrame with multi-level Index
  89. # ----------------------------------------------------------------------------
  90. def test_getitem_simple(multiindex_dataframe_random_data):
  91. df = multiindex_dataframe_random_data.T
  92. expected = df.values[:, 0]
  93. result = df['foo', 'one'].values
  94. tm.assert_almost_equal(result, expected)
  95. @pytest.mark.parametrize('indexer,expected_error_msg', [
  96. (lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"),
  97. (lambda df: df['foobar'], r"^'foobar'$")
  98. ])
  99. def test_frame_getitem_simple_key_error(
  100. multiindex_dataframe_random_data, indexer, expected_error_msg):
  101. df = multiindex_dataframe_random_data.T
  102. with pytest.raises(KeyError, match=expected_error_msg):
  103. indexer(df)
  104. def test_frame_getitem_multicolumn_empty_level():
  105. df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
  106. df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
  107. ['level3 item1', 'level3 item2']]
  108. result = df['level1 item1']
  109. expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
  110. columns=['level3 item1'])
  111. tm.assert_frame_equal(result, expected)
  112. @pytest.mark.parametrize('indexer,expected_slice', [
  113. (lambda df: df['foo'], slice(3)),
  114. (lambda df: df['bar'], slice(3, 5)),
  115. (lambda df: df.loc[:, 'bar'], slice(3, 5))
  116. ])
  117. def test_frame_getitem_toplevel(
  118. multiindex_dataframe_random_data, indexer, expected_slice):
  119. df = multiindex_dataframe_random_data.T
  120. expected = df.reindex(columns=df.columns[expected_slice])
  121. expected.columns = expected.columns.droplevel(0)
  122. result = indexer(df)
  123. tm.assert_frame_equal(result, expected)
  124. @pytest.mark.parametrize('unicode_strings', [True, False])
  125. def test_frame_mixed_depth_get(unicode_strings):
  126. # If unicode_strings is True, the column labels in dataframe
  127. # construction will use unicode strings in Python 2 (pull request
  128. # #17099).
  129. arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
  130. ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
  131. ['', 'wx', 'wy', '', '', '']]
  132. if unicode_strings:
  133. arrays = [[u(s) for s in arr] for arr in arrays]
  134. tuples = sorted(zip(*arrays))
  135. index = MultiIndex.from_tuples(tuples)
  136. df = DataFrame(np.random.randn(4, 6), columns=index)
  137. result = df['a']
  138. expected = df['a', '', ''].rename('a')
  139. tm.assert_series_equal(result, expected)
  140. result = df['routine1', 'result1']
  141. expected = df['routine1', 'result1', '']
  142. expected = expected.rename(('routine1', 'result1'))
  143. tm.assert_series_equal(result, expected)
  144. # ----------------------------------------------------------------------------
  145. # test indexing of DataFrame with multi-level Index with duplicates
  146. # ----------------------------------------------------------------------------
  147. @pytest.fixture
  148. def dataframe_with_duplicate_index():
  149. """Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
  150. data = [['a', 'd', 'e', 'c', 'f', 'b'],
  151. [1, 4, 5, 3, 6, 2],
  152. [1, 4, 5, 3, 6, 2]]
  153. index = ['h1', 'h3', 'h5']
  154. columns = MultiIndex(
  155. levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
  156. codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
  157. names=['main', 'sub'])
  158. return DataFrame(data, index=index, columns=columns)
  159. @pytest.mark.parametrize('indexer', [
  160. lambda df: df[('A', 'A1')],
  161. lambda df: df.loc[:, ('A', 'A1')]
  162. ])
  163. def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
  164. # GH 4145
  165. df = dataframe_with_duplicate_index
  166. index = Index(['h1', 'h3', 'h5'])
  167. columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
  168. expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
  169. result = indexer(df)
  170. tm.assert_frame_equal(result, expected)
  171. def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
  172. # GH 4146, not returning a block manager when selecting a unique index
  173. # from a duplicate index
  174. # as of 4879, this returns a Series (which is similar to what happens
  175. # with a non-unique)
  176. df = dataframe_with_duplicate_index
  177. expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
  178. result = df['A']['A1']
  179. tm.assert_series_equal(result, expected)
  180. def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
  181. # selecting a non_unique from the 2nd level
  182. df = dataframe_with_duplicate_index
  183. expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
  184. index=Index(['B2', 'B2'], name='sub'),
  185. columns=['h1', 'h3', 'h5'], ).T
  186. result = df['A']['B2']
  187. tm.assert_frame_equal(result, expected)