123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- import itertools
- from warnings import catch_warnings
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import DataFrame, Index, MultiIndex, Series
- from pandas.util import testing as tm
- @pytest.fixture
- def single_level_multiindex():
- """single level MultiIndex"""
- return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
- codes=[[0, 1, 2, 3]], names=['first'])
- @pytest.fixture
- def frame_random_data_integer_multi_index():
- levels = [[0, 1], [0, 1, 2]]
- codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
- index = MultiIndex(levels=levels, codes=codes)
- return DataFrame(np.random.randn(6, 2), index=index)
- @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
- class TestMultiIndexLoc(object):
- def test_loc_getitem_series(self):
- # GH14730
- # passing a series as a key with a MultiIndex
- index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
- x = Series(index=index, data=range(9), dtype=np.float64)
- y = Series([1, 3])
- expected = Series(
- data=[0, 1, 2, 6, 7, 8],
- index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
- dtype=np.float64)
- result = x.loc[y]
- tm.assert_series_equal(result, expected)
- result = x.loc[[1, 3]]
- tm.assert_series_equal(result, expected)
- # GH15424
- y1 = Series([1, 3], index=[1, 2])
- result = x.loc[y1]
- tm.assert_series_equal(result, expected)
- empty = Series(data=[], dtype=np.float64)
- expected = Series([], index=MultiIndex(
- levels=index.levels, codes=[[], []], dtype=np.float64))
- result = x.loc[empty]
- tm.assert_series_equal(result, expected)
- def test_loc_getitem_array(self):
- # GH15434
- # passing an array as a key with a MultiIndex
- index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
- x = Series(index=index, data=range(9), dtype=np.float64)
- y = np.array([1, 3])
- expected = Series(
- data=[0, 1, 2, 6, 7, 8],
- index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
- dtype=np.float64)
- result = x.loc[y]
- tm.assert_series_equal(result, expected)
- # empty array:
- empty = np.array([])
- expected = Series([], index=MultiIndex(
- levels=index.levels, codes=[[], []], dtype=np.float64))
- result = x.loc[empty]
- tm.assert_series_equal(result, expected)
- # 0-dim array (scalar):
- scalar = np.int64(1)
- expected = Series(
- data=[0, 1, 2],
- index=['A', 'B', 'C'],
- dtype=np.float64)
- result = x.loc[scalar]
- tm.assert_series_equal(result, expected)
- def test_loc_multiindex(self):
- mi_labels = DataFrame(np.random.randn(3, 3),
- columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
- index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
- mi_int = DataFrame(np.random.randn(3, 3),
- columns=[[2, 2, 4], [6, 8, 10]],
- index=[[4, 4, 8], [8, 10, 12]])
- # the first row
- rs = mi_labels.loc['i']
- with catch_warnings(record=True):
- xp = mi_labels.ix['i']
- tm.assert_frame_equal(rs, xp)
- # 2nd (last) columns
- rs = mi_labels.loc[:, 'j']
- with catch_warnings(record=True):
- xp = mi_labels.ix[:, 'j']
- tm.assert_frame_equal(rs, xp)
- # corner column
- rs = mi_labels.loc['j'].loc[:, 'j']
- with catch_warnings(record=True):
- xp = mi_labels.ix['j'].ix[:, 'j']
- tm.assert_frame_equal(rs, xp)
- # with a tuple
- rs = mi_labels.loc[('i', 'X')]
- with catch_warnings(record=True):
- xp = mi_labels.ix[('i', 'X')]
- tm.assert_frame_equal(rs, xp)
- rs = mi_int.loc[4]
- with catch_warnings(record=True):
- xp = mi_int.ix[4]
- tm.assert_frame_equal(rs, xp)
- # missing label
- pytest.raises(KeyError, lambda: mi_int.loc[2])
- with catch_warnings(record=True):
- # GH 21593
- pytest.raises(KeyError, lambda: mi_int.ix[2])
- def test_loc_multiindex_indexer_none(self):
- # GH6788
- # multi-index indexer is None (meaning take all)
- attributes = ['Attribute' + str(i) for i in range(1)]
- attribute_values = ['Value' + str(i) for i in range(5)]
- index = MultiIndex.from_product([attributes, attribute_values])
- df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
- df = DataFrame(df, columns=index)
- result = df[attributes]
- tm.assert_frame_equal(result, df)
- # GH 7349
- # loc with a multi-index seems to be doing fallback
- df = DataFrame(np.arange(12).reshape(-1, 1),
- index=MultiIndex.from_product([[1, 2, 3, 4],
- [1, 2, 3]]))
- expected = df.loc[([1, 2], ), :]
- result = df.loc[[1, 2]]
- tm.assert_frame_equal(result, expected)
- def test_loc_multiindex_incomplete(self):
- # GH 7399
- # incomplete indexers
- s = Series(np.arange(15, dtype='int64'),
- MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
- expected = s.loc[:, 'a':'c']
- result = s.loc[0:4, 'a':'c']
- tm.assert_series_equal(result, expected)
- tm.assert_series_equal(result, expected)
- result = s.loc[:4, 'a':'c']
- tm.assert_series_equal(result, expected)
- tm.assert_series_equal(result, expected)
- result = s.loc[0:, 'a':'c']
- tm.assert_series_equal(result, expected)
- tm.assert_series_equal(result, expected)
- # GH 7400
- # multiindexer gettitem with list of indexers skips wrong element
- s = Series(np.arange(15, dtype='int64'),
- MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
- expected = s.iloc[[6, 7, 8, 12, 13, 14]]
- result = s.loc[2:4:2, 'a':'c']
- tm.assert_series_equal(result, expected)
- def test_get_loc_single_level(self, single_level_multiindex):
- single_level = single_level_multiindex
- s = Series(np.random.randn(len(single_level)),
- index=single_level)
- for k in single_level.values:
- s[k]
- def test_loc_getitem_int_slice(self):
- # GH 3053
- # loc should treat integer slices like label slices
- index = MultiIndex.from_tuples([t for t in itertools.product(
- [6, 7, 8], ['a', 'b'])])
- df = DataFrame(np.random.randn(6, 6), index, index)
- result = df.loc[6:8, :]
- expected = df
- tm.assert_frame_equal(result, expected)
- index = MultiIndex.from_tuples([t
- for t in itertools.product(
- [10, 20, 30], ['a', 'b'])])
- df = DataFrame(np.random.randn(6, 6), index, index)
- result = df.loc[20:30, :]
- expected = df.iloc[2:]
- tm.assert_frame_equal(result, expected)
- # doc examples
- result = df.loc[10, :]
- expected = df.iloc[0:2]
- expected.index = ['a', 'b']
- tm.assert_frame_equal(result, expected)
- result = df.loc[:, 10]
- # expected = df.ix[:,10] (this fails)
- expected = df[10]
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- 'indexer_type_1',
- (list, tuple, set, slice, np.ndarray, Series, Index))
- @pytest.mark.parametrize(
- 'indexer_type_2',
- (list, tuple, set, slice, np.ndarray, Series, Index))
- def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
- # GH #19686
- # .loc should work with nested indexers which can be
- # any list-like objects (see `pandas.api.types.is_list_like`) or slices
- def convert_nested_indexer(indexer_type, keys):
- if indexer_type == np.ndarray:
- return np.array(keys)
- if indexer_type == slice:
- return slice(*keys)
- return indexer_type(keys)
- a = [10, 20, 30]
- b = [1, 2, 3]
- index = MultiIndex.from_product([a, b])
- df = DataFrame(
- np.arange(len(index), dtype='int64'),
- index=index, columns=['Data'])
- keys = ([10, 20], [2, 3])
- types = (indexer_type_1, indexer_type_2)
- # check indexers with all the combinations of nested objects
- # of all the valid types
- indexer = tuple(
- convert_nested_indexer(indexer_type, k)
- for indexer_type, k in zip(types, keys))
- result = df.loc[indexer, 'Data']
- expected = Series(
- [1, 2, 4, 5], name='Data',
- index=MultiIndex.from_product(keys))
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize('indexer, is_level1, expected_error', [
- ([], False, None), # empty ok
- (['A'], False, None),
- (['A', 'D'], False, None),
- (['D'], False, r"\['D'\] not in index"), # not any values found
- (pd.IndexSlice[:, ['foo']], True, None),
- (pd.IndexSlice[:, ['foo', 'bah']], True, None)
- ])
- def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
- expected_error):
- # GH 7866
- # multi-index slicing with missing indexers
- idx = MultiIndex.from_product([['A', 'B', 'C'],
- ['foo', 'bar', 'baz']],
- names=['one', 'two'])
- s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
- if indexer == []:
- expected = s.iloc[[]]
- elif is_level1:
- expected = Series([0, 3, 6], index=MultiIndex.from_product(
- [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
- else:
- exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
- names=['one', 'two'])
- expected = Series(np.arange(3, dtype='int64'),
- index=exp_idx).sort_index()
- if expected_error is not None:
- with pytest.raises(KeyError, match=expected_error):
- s.loc[indexer]
- else:
- result = s.loc[indexer]
- tm.assert_series_equal(result, expected)
- @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
- @pytest.mark.parametrize('indexer', [
- lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
- lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
- ])
- def test_series_loc_getitem_fancy(
- multiindex_year_month_day_dataframe_random_data, indexer):
- s = multiindex_year_month_day_dataframe_random_data['A']
- expected = s.reindex(s.index[49:51])
- result = indexer(s)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize('columns_indexer', [
- ([], slice(None)),
- (['foo'], [])
- ])
- def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
- # GH 8737
- # empty indexer
- multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
- ['alpha', 'beta']))
- df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
- df = df.sort_index(level=0, axis=1)
- expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
- result = df.loc[:, columns_indexer]
- tm.assert_frame_equal(result, expected)
- def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
- # regression from < 0.14.0
- # GH 7914
- df = DataFrame([[np.mean, np.median], ['mean', 'median']],
- columns=MultiIndex.from_tuples([('functs', 'mean'),
- ('functs', 'median')]),
- index=['function', 'name'])
- result = df.loc['function', ('functs', 'mean')]
- expected = np.mean
- assert result == expected
- def test_loc_getitem_tuple_plus_slice():
- # GH 671
- df = DataFrame({'a': np.arange(10),
- 'b': np.arange(10),
- 'c': np.random.randn(10),
- 'd': np.random.randn(10)}
- ).set_index(['a', 'b'])
- expected = df.loc[0, 0]
- result = df.loc[(0, 0), :]
- tm.assert_series_equal(result, expected)
- def test_loc_getitem_int(frame_random_data_integer_multi_index):
- df = frame_random_data_integer_multi_index
- result = df.loc[1]
- expected = df[-3:]
- expected.index = expected.index.droplevel(0)
- tm.assert_frame_equal(result, expected)
- def test_loc_getitem_int_raises_exception(
- frame_random_data_integer_multi_index):
- df = frame_random_data_integer_multi_index
- with pytest.raises(KeyError, match=r"^3L?$"):
- df.loc[3]
- def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
- df = multiindex_dataframe_random_data
- # test setup - check key not in dataframe
- with pytest.raises(KeyError, match=r"^11L?$"):
- df.loc[('bar', 'three'), 'B']
- # in theory should be inserting in a sorted space????
- df.loc[('bar', 'three'), 'B'] = 0
- expected = 0
- result = df.sort_index().loc[('bar', 'three'), 'B']
- assert result == expected
|