from warnings import catch_warnings import numpy as np import pytest from pandas.errors import UnsortedIndexError import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp from pandas.core.indexing import _non_reducing_slice from pandas.tests.indexing.common import _mklbl from pandas.util import testing as tm @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") class TestMultiIndexSlicers(object): def test_per_axis_per_level_getitem(self): # GH6134 # example test case ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( 'C', 4), _mklbl('D', 2)]) df = DataFrame(np.arange(len(ix.get_values())), index=ix) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C2' or c == 'C3')]] result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] tm.assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3), ('B', 1)], names=['one', 'two']) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame( np.arange(16, dtype='int64').reshape( 4, 4), index=index, columns=columns) df = df.sort_index(axis=0).sort_index(axis=1) # identity result = df.loc[(slice(None), slice(None)), :] tm.assert_frame_equal(result, df) result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] tm.assert_frame_equal(result, df) result = df.loc[:, (slice(None), slice(None))] tm.assert_frame_equal(result, df) # index result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), 1), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # columns result = df.loc[:, (slice(None), ['foo'])] expected = df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) # both result = df.loc[(slice(None), 1), (slice(None), ['foo'])] expected = df.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(result, expected) result = df.loc['A', 'a'] expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), index=Index([1, 2, 3], name='two'), columns=Index(['bar', 'foo'], name='lvl1')) tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1, 2]), :] expected = df.iloc[[0, 1, 3]] tm.assert_frame_equal(result, expected) # multi-level series s = Series(np.arange(len(ix.get_values())), index=ix) result = s.loc['A1':'A3', :, ['C1', 'C3']] expected = s.loc[[tuple([a, b, c, d]) for a, b, c, d in s.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_series_equal(result, expected) # boolean indexers result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] expected = df.iloc[[2, 3]] tm.assert_frame_equal(result, expected) with pytest.raises(ValueError): df.loc[(slice(None), np.array([True, False])), :] # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well pytest.raises(KeyError, lambda: df.loc[slice(None), [1]]) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] tm.assert_frame_equal(result, expected) # not lexsorted assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) assert df.index.lexsort_depth == 0 msg = ('MultiIndex slicing requires the index to be ' r'lexsorted: slicing on levels \[1\], lexsort depth 0') with pytest.raises(UnsortedIndexError, match=msg): df.loc[(slice(None), slice('bar')), :] # GH 16734: not sorted, but no real slicing result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] tm.assert_frame_equal(result, df.iloc[[1, 3], :]) def test_multiindex_slicers_non_unique(self): # GH 7106 # non-unique mi index support df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], B=['a', 'a', 'a', 'a'], C=[1, 2, 1, 3], D=[1, 2, 3, 4])) .set_index(['A', 'B', 'C']).sort_index()) assert not df.index.is_unique expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], C=[1, 1], D=[1, 3])) .set_index(['A', 'B', 'C']).sort_index()) result = df.loc[(slice(None), slice(None), 1), :] tm.assert_frame_equal(result, expected) # this is equivalent of an xs expression result = df.xs(1, level=2, drop_level=False) tm.assert_frame_equal(result, expected) df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], B=['a', 'a', 'a', 'a'], C=[1, 2, 1, 2], D=[1, 2, 3, 4])) .set_index(['A', 'B', 'C']).sort_index()) assert not df.index.is_unique expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], C=[1, 1], D=[1, 3])) .set_index(['A', 'B', 'C']).sort_index()) result = df.loc[(slice(None), slice(None), 1), :] assert not result.index.is_unique tm.assert_frame_equal(result, expected) # GH12896 # numpy-implementation dependent bug ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16, 17, 18, 19, 200000, 200000] n = len(ints) idx = MultiIndex.from_arrays([['a'] * n, ints]) result = Series([1] * n, index=idx) result = result.sort_index() result = result.loc[(slice(None), slice(100000))] expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() tm.assert_series_equal(result, expected) def test_multiindex_slicers_datetimelike(self): # GH 7429 # buggy/inconsistent behavior when slicing with datetime-like import datetime dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i) for i in range(6)] freq = [1, 2] index = MultiIndex.from_product( [dates, freq], names=['date', 'frequency']) df = DataFrame( np.arange(6 * 2 * 4, dtype='int64').reshape( -1, 4), index=index, columns=list('ABCD')) # multi-axis slicing idx = pd.IndexSlice expected = df.iloc[[0, 2, 4], [0, 1]] result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), Timestamp('2012-01-03 12:12:12')), slice(1, 1)), slice('A', 'B')] tm.assert_frame_equal(result, expected) result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp( '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')] tm.assert_frame_equal(result, expected) result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'), Timestamp('2012-01-03 12:12:12')), 1), slice('A', 'B')] tm.assert_frame_equal(result, expected) # with strings result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'), slice(1, 1)), slice('A', 'B')] tm.assert_frame_equal(result, expected) result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1), idx['A', 'B']] tm.assert_frame_equal(result, expected) def test_multiindex_slicers_edges(self): # GH 8132 # various edge cases df = DataFrame( {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5, 'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3, 'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01", "2013-07-09", "2013-08-06", "2013-09-03"], 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]}) df['DATE'] = pd.to_datetime(df['DATE']) df1 = df.set_index(['A', 'B', 'DATE']) df1 = df1.sort_index() # A1 - Get all values under "A0" and "A1" result = df1.loc[(slice('A1')), :] expected = df1.iloc[0:10] tm.assert_frame_equal(result, expected) # A2 - Get all values from the start to "A2" result = df1.loc[(slice('A2')), :] expected = df1 tm.assert_frame_equal(result, expected) # A3 - Get all values under "B1" or "B2" result = df1.loc[(slice(None), slice('B1', 'B2')), :] expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] tm.assert_frame_equal(result, expected) # A4 - Get all values between 2013-07-02 and 2013-07-09 result = df1.loc[(slice(None), slice(None), slice('20130702', '20130709')), :] expected = df1.iloc[[1, 2, 6, 7, 12]] tm.assert_frame_equal(result, expected) # B1 - Get all values in B0 that are also under A0, A1 and A2 result = df1.loc[(slice('A2'), slice('B0')), :] expected = df1.iloc[[0, 1, 5, 6, 10, 11]] tm.assert_frame_equal(result, expected) # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for # the As) result = df1.loc[(slice(None), slice('B2')), :] expected = df1 tm.assert_frame_equal(result, expected) # B3 - Get all values from B1 to B2 and up to 2013-08-06 result = df1.loc[(slice(None), slice('B1', 'B2'), slice('2013-08-06')), :] expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] tm.assert_frame_equal(result, expected) # B4 - Same as A4 but the start of the date slice is not a key. # shows indexing on a partial selection slice result = df1.loc[(slice(None), slice(None), slice('20130701', '20130709')), :] expected = df1.iloc[[1, 2, 6, 7, 12]] tm.assert_frame_equal(result, expected) def test_per_axis_per_level_doc_examples(self): # test index maker idx = pd.IndexSlice # from indexing.rst / advanced index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), _mklbl('C', 4), _mklbl('D', 2)]) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') .reshape((len(index), len(columns))), index=index, columns=columns) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) result = df.loc[idx[:, :, ['C1', 'C3']], :] tm.assert_frame_equal(result, expected) # not sorted with pytest.raises(UnsortedIndexError): df.loc['A1', ('a', slice('foo'))] # GH 16734: not sorted, but no real slicing tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], df.loc['A1'].iloc[:, [0, 2]]) df = df.sort_index(axis=1) # slicing df.loc['A1', (slice(None), 'foo')] df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] # setitem df.loc(axis=0)[:, :, ['C1', 'C3']] = -10 def test_loc_axis_arguments(self): index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), _mklbl('C', 4), _mklbl('D', 2)]) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') .reshape((len(index), len(columns))), index=index, columns=columns).sort_index().sort_index(axis=1) # axis 0 result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (a == 'A1' or a == 'A2' or a == 'A3') and ( c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) result = df.loc(axis='index')[:, :, ['C1', 'C3']] expected = df.loc[[tuple([a, b, c, d]) for a, b, c, d in df.index.values if (c == 'C1' or c == 'C3')]] tm.assert_frame_equal(result, expected) # axis 1 result = df.loc(axis=1)[:, 'foo'] expected = df.loc[:, (slice(None), 'foo')] tm.assert_frame_equal(result, expected) result = df.loc(axis='columns')[:, 'foo'] expected = df.loc[:, (slice(None), 'foo')] tm.assert_frame_equal(result, expected) # invalid axis with pytest.raises(ValueError): df.loc(axis=-1)[:, :, ['C1', 'C3']] with pytest.raises(ValueError): df.loc(axis=2)[:, :, ['C1', 'C3']] with pytest.raises(ValueError): df.loc(axis='foo')[:, :, ['C1', 'C3']] def test_per_axis_per_level_setitem(self): # test index maker idx = pd.IndexSlice # test multi-index slicing with per axis and per index controls index = MultiIndex.from_tuples([('A', 1), ('A', 2), ('A', 3), ('B', 1)], names=['one', 'two']) columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) df_orig = DataFrame( np.arange(16, dtype='int64').reshape( 4, 4), index=index, columns=columns) df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) # identity df = df_orig.copy() df.loc[(slice(None), slice(None)), :] = 100 expected = df_orig.copy() expected.iloc[:, :] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc(axis=0)[:, :] = 100 expected = df_orig.copy() expected.iloc[:, :] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 expected = df_orig.copy() expected.iloc[:, :] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[:, (slice(None), slice(None))] = 100 expected = df_orig.copy() expected.iloc[:, :] = 100 tm.assert_frame_equal(df, expected) # index df = df_orig.copy() df.loc[(slice(None), [1]), :] = 100 expected = df_orig.copy() expected.iloc[[0, 3]] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[(slice(None), 1), :] = 100 expected = df_orig.copy() expected.iloc[[0, 3]] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc(axis=0)[:, 1] = 100 expected = df_orig.copy() expected.iloc[[0, 3]] = 100 tm.assert_frame_equal(df, expected) # columns df = df_orig.copy() df.loc[:, (slice(None), ['foo'])] = 100 expected = df_orig.copy() expected.iloc[:, [1, 3]] = 100 tm.assert_frame_equal(df, expected) # both df = df_orig.copy() df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100 expected = df_orig.copy() expected.iloc[[0, 3], [1, 3]] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[idx[:, 1], idx[:, ['foo']]] = 100 expected = df_orig.copy() expected.iloc[[0, 3], [1, 3]] = 100 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc['A', 'a'] = 100 expected = df_orig.copy() expected.iloc[0:3, 0:2] = 100 tm.assert_frame_equal(df, expected) # setting with a list-like df = df_orig.copy() df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [[100, 100], [100, 100]], dtype='int64') expected = df_orig.copy() expected.iloc[[0, 3], [1, 3]] = 100 tm.assert_frame_equal(df, expected) # not enough values df = df_orig.copy() with pytest.raises(ValueError): df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [[100], [100, 100]], dtype='int64') with pytest.raises(ValueError): df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( [100, 100, 100, 100], dtype='int64') # with an alignable rhs df = df_orig.copy() df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice( None), 1), (slice(None), ['foo'])] * 5 expected = df_orig.copy() expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice( None), 1), (slice(None), ['foo'])] expected = df_orig.copy() expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(df, expected) rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy() rhs.loc[:, ('c', 'bah')] = 10 df = df_orig.copy() df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs expected = df_orig.copy() expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] tm.assert_frame_equal(df, expected) def test_multiindex_label_slicing_with_negative_step(self): s = Series(np.arange(20), MultiIndex.from_product([list('abcde'), np.arange(4)])) SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) with catch_warnings(record=True): tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc]) assert_slices_equivalent(SLC[::-1], SLC[::-1]) assert_slices_equivalent(SLC['d'::-1], SLC[15::-1]) assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1]) assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1]) assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1]) assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1]) assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1]) assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1]) assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1]) assert_slices_equivalent(SLC['b':'d':-1], SLC[:0]) assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1]) assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1]) assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1]) def test_multiindex_slice_first_level(self): # GH 12697 freq = ['a', 'b', 'c', 'd'] idx = MultiIndex.from_product([freq, np.arange(500)]) df = DataFrame(list(range(2000)), index=idx, columns=['Test']) df_slice = df.loc[pd.IndexSlice[:, 30:70], :] result = df_slice.loc['a'] expected = DataFrame(list(range(30, 71)), columns=['Test'], index=range(30, 71)) tm.assert_frame_equal(result, expected) result = df_slice.loc['d'] expected = DataFrame(list(range(1530, 1571)), columns=['Test'], index=range(30, 71)) tm.assert_frame_equal(result, expected) def test_int_series_slicing( self, multiindex_year_month_day_dataframe_random_data): ymd = multiindex_year_month_day_dataframe_random_data s = ymd['A'] result = s[5:] expected = s.reindex(s.index[5:]) tm.assert_series_equal(result, expected) exp = ymd['A'].copy() s[5:] = 0 exp.values[5:] = 0 tm.assert_numpy_array_equal(s.values, exp.values) result = ymd[5:] expected = ymd.reindex(s.index[5:]) tm.assert_frame_equal(result, expected) def test_non_reducing_slice_on_multiindex(self): # GH 19861 dic = { ('a', 'd'): [1, 4], ('a', 'c'): [2, 3], ('b', 'c'): [3, 2], ('b', 'd'): [4, 1] } df = pd.DataFrame(dic, index=[0, 1]) idx = pd.IndexSlice slice_ = idx[:, idx['b', 'd']] tslice_ = _non_reducing_slice(slice_) result = df.loc[tslice_] expected = pd.DataFrame({('b', 'd'): [4, 1]}) tm.assert_frame_equal(result, expected)