123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767 |
- """ test label based indexing with loc """
- from warnings import catch_warnings, filterwarnings
- import numpy as np
- import pytest
- from pandas.compat import PY2, StringIO, lrange
- import pandas as pd
- from pandas import DataFrame, Series, Timestamp, date_range
- from pandas.api.types import is_scalar
- from pandas.tests.indexing.common import Base
- from pandas.util import testing as tm
- class TestLoc(Base):
- def test_loc_getitem_dups(self):
- # GH 5678
- # repeated gettitems on a dup index returning a ndarray
- df = DataFrame(
- np.random.random_sample((20, 5)),
- index=['ABCDE' [x % 5] for x in range(20)])
- expected = df.loc['A', 0]
- result = df.loc[:, 0].loc['A']
- tm.assert_series_equal(result, expected)
- def test_loc_getitem_dups2(self):
- # GH4726
- # dup indexing with iloc/loc
- df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
- columns=['a', 'a', 'a', 'a', 'a'], index=[1])
- expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
- index=['a', 'a', 'a', 'a', 'a'], name=1)
- result = df.iloc[0]
- tm.assert_series_equal(result, expected)
- result = df.loc[1]
- tm.assert_series_equal(result, expected)
- def test_loc_setitem_dups(self):
- # GH 6541
- df_orig = DataFrame(
- {'me': list('rttti'),
- 'foo': list('aaade'),
- 'bar': np.arange(5, dtype='float64') * 1.34 + 2,
- 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')
- indexer = tuple(['r', ['bar', 'bar2']])
- df = df_orig.copy()
- df.loc[indexer] *= 2.0
- tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
- indexer = tuple(['r', 'bar'])
- df = df_orig.copy()
- df.loc[indexer] *= 2.0
- assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
- indexer = tuple(['t', ['bar', 'bar2']])
- df = df_orig.copy()
- df.loc[indexer] *= 2.0
- tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
- def test_loc_setitem_slice(self):
- # GH10503
- # assigning the same type should not change the type
- df1 = DataFrame({'a': [0, 1, 1],
- 'b': Series([100, 200, 300], dtype='uint32')})
- ix = df1['a'] == 1
- newb1 = df1.loc[ix, 'b'] + 1
- df1.loc[ix, 'b'] = newb1
- expected = DataFrame({'a': [0, 1, 1],
- 'b': Series([100, 201, 301], dtype='uint32')})
- tm.assert_frame_equal(df1, expected)
- # assigning a new type should get the inferred type
- df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
- dtype='uint64')
- ix = df1['a'] == 1
- newb2 = df2.loc[ix, 'b']
- df1.loc[ix, 'b'] = newb2
- expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
- dtype='uint64')
- tm.assert_frame_equal(df2, expected)
- def test_loc_getitem_int(self):
- # int label
- self.check_result('int label', 'loc', 2, 'ix', 2,
- typs=['ints', 'uints'], axes=0)
- self.check_result('int label', 'loc', 3, 'ix', 3,
- typs=['ints', 'uints'], axes=1)
- self.check_result('int label', 'loc', 4, 'ix', 4,
- typs=['ints', 'uints'], axes=2)
- self.check_result('int label', 'loc', 2, 'ix', 2,
- typs=['label'], fails=KeyError)
- def test_loc_getitem_label(self):
- # label
- self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
- axes=0)
- self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
- axes=0)
- self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
- self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
- typs=['ts'], axes=0)
- self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
- fails=KeyError)
- def test_loc_getitem_label_out_of_range(self):
- # out of range label
- self.check_result('label range', 'loc', 'f', 'ix', 'f',
- typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
- fails=KeyError)
- self.check_result('label range', 'loc', 'f', 'ix', 'f',
- typs=['floats'], fails=KeyError)
- self.check_result('label range', 'loc', 20, 'ix', 20,
- typs=['ints', 'uints', 'mixed'], fails=KeyError)
- self.check_result('label range', 'loc', 20, 'ix', 20,
- typs=['labels'], fails=TypeError)
- self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
- axes=0, fails=TypeError)
- self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
- axes=0, fails=KeyError)
- def test_loc_getitem_label_list(self):
- # list of labels
- self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
- typs=['ints', 'uints'], axes=0)
- self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
- typs=['ints', 'uints'], axes=1)
- self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
- typs=['ints', 'uints'], axes=2)
- self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
- ['a', 'b', 'd'], typs=['labels'], axes=0)
- self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
- ['A', 'B', 'C'], typs=['labels'], axes=1)
- self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
- ['Z', 'Y', 'W'], typs=['labels'], axes=2)
- self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
- [2, 8, 'null'], typs=['mixed'], axes=0)
- self.check_result('list lbl', 'loc',
- [Timestamp('20130102'), Timestamp('20130103')], 'ix',
- [Timestamp('20130102'), Timestamp('20130103')],
- typs=['ts'], axes=0)
- @pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
- "Python 2 (GH #20770)"))
- def test_loc_getitem_label_list_with_missing(self):
- self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
- typs=['empty'], fails=KeyError)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
- typs=['ints', 'uints', 'floats'],
- axes=0, fails=KeyError)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
- typs=['ints', 'uints', 'floats'],
- axes=1, fails=KeyError)
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
- typs=['ints', 'uints', 'floats'],
- axes=2, fails=KeyError)
- # GH 17758 - MultiIndex and missing keys
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
- 'ix', [(1, 3), (1, 4), (2, 5)],
- typs=['multi'],
- axes=0)
- def test_getitem_label_list_with_missing(self):
- s = Series(range(3), index=['a', 'b', 'c'])
- # consistency
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- s[['a', 'd']]
- s = Series(range(3))
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- s[[0, 3]]
- def test_loc_getitem_label_list_fails(self):
- # fails
- self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
- typs=['ints', 'uints'], axes=1, fails=KeyError)
- self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
- typs=['ints', 'uints'], axes=2, fails=KeyError)
- def test_loc_getitem_label_array_like(self):
- # array like
- self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
- 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
- self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
- 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
- self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
- 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)
- def test_loc_getitem_bool(self):
- # boolean indexers
- b = [True, False, True, False]
- self.check_result('bool', 'loc', b, 'ix', b,
- typs=['ints', 'uints', 'labels',
- 'mixed', 'ts', 'floats'])
- self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
- fails=KeyError)
- def test_loc_getitem_int_slice(self):
- # ok
- self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
- typs=['ints', 'uints'], axes=0)
- self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
- typs=['ints', 'uints'], axes=1)
- self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
- typs=['ints', 'uints'], axes=2)
- def test_loc_to_fail(self):
- # GH3449
- df = DataFrame(np.random.random((3, 3)),
- index=['a', 'b', 'c'],
- columns=['e', 'f', 'g'])
- # raise a KeyError?
- pytest.raises(KeyError, df.loc.__getitem__,
- tuple([[1, 2], [1, 2]]))
- # GH 7496
- # loc should not fallback
- s = Series()
- s.loc[1] = 1
- s.loc['a'] = 2
- pytest.raises(KeyError, lambda: s.loc[-1])
- pytest.raises(KeyError, lambda: s.loc[[-1, -2]])
- pytest.raises(KeyError, lambda: s.loc[['4']])
- s.loc[-1] = 3
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- result = s.loc[[-1, -2]]
- expected = Series([3, np.nan], index=[-1, -2])
- tm.assert_series_equal(result, expected)
- s['a'] = 2
- pytest.raises(KeyError, lambda: s.loc[[-2]])
- del s['a']
- def f():
- s.loc[[-2]] = 0
- pytest.raises(KeyError, f)
- # inconsistency between .loc[values] and .loc[values,:]
- # GH 7999
- df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])
- def f():
- df.loc[[3], :]
- pytest.raises(KeyError, f)
- def f():
- df.loc[[3]]
- pytest.raises(KeyError, f)
- def test_loc_getitem_list_with_fail(self):
- # 15747
- # should KeyError if *any* missing labels
- s = Series([1, 2, 3])
- s.loc[[2]]
- with pytest.raises(KeyError):
- s.loc[[3]]
- # a non-match and a match
- with tm.assert_produces_warning(FutureWarning):
- expected = s.loc[[2, 3]]
- result = s.reindex([2, 3])
- tm.assert_series_equal(result, expected)
- def test_loc_getitem_label_slice(self):
- # label slices (with ints)
- self.check_result('lab slice', 'loc', slice(1, 3),
- 'ix', slice(1, 3),
- typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
- fails=TypeError)
- # real label slices
- self.check_result('lab slice', 'loc', slice('a', 'c'),
- 'ix', slice('a', 'c'), typs=['labels'], axes=0)
- self.check_result('lab slice', 'loc', slice('A', 'C'),
- 'ix', slice('A', 'C'), typs=['labels'], axes=1)
- self.check_result('lab slice', 'loc', slice('W', 'Z'),
- 'ix', slice('W', 'Z'), typs=['labels'], axes=2)
- self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
- 'ix', slice('20130102', '20130104'),
- typs=['ts'], axes=0)
- self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
- 'ix', slice('20130102', '20130104'),
- typs=['ts'], axes=1, fails=TypeError)
- self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
- 'ix', slice('20130102', '20130104'),
- typs=['ts'], axes=2, fails=TypeError)
- # GH 14316
- self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
- 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)
- self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
- typs=['mixed'], axes=0, fails=TypeError)
- self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
- typs=['mixed'], axes=1, fails=KeyError)
- self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
- typs=['mixed'], axes=2, fails=KeyError)
- self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
- 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
- def test_loc_index(self):
- # gh-17131
- # a boolean index should index like a boolean numpy array
- df = DataFrame(
- np.random.random(size=(5, 10)),
- index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
- mask = df.index.map(lambda x: "alpha" in x)
- expected = df.loc[np.array(mask)]
- result = df.loc[mask]
- tm.assert_frame_equal(result, expected)
- result = df.loc[mask.values]
- tm.assert_frame_equal(result, expected)
- def test_loc_general(self):
- df = DataFrame(
- np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'],
- index=['A', 'B', 'C', 'D'])
- # want this to work
- result = df.loc[:, "A":"B"].iloc[0:2, :]
- assert (result.columns == ['A', 'B']).all()
- assert (result.index == ['A', 'B']).all()
- # mixed type
- result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0]
- expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0)
- tm.assert_series_equal(result, expected)
- assert result.dtype == object
- def test_loc_setitem_consistency(self):
- # GH 6149
- # coerce similarly for setitem and loc when rows have a null-slice
- expected = DataFrame({'date': Series(0, index=range(5),
- dtype=np.int64),
- 'val': Series(range(5), dtype=np.int64)})
- df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
- 'val': Series(
- range(5), dtype=np.int64)})
- df.loc[:, 'date'] = 0
- tm.assert_frame_equal(df, expected)
- df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
- 'val': Series(range(5), dtype=np.int64)})
- df.loc[:, 'date'] = np.array(0, dtype=np.int64)
- tm.assert_frame_equal(df, expected)
- df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
- 'val': Series(range(5), dtype=np.int64)})
- df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
- tm.assert_frame_equal(df, expected)
- expected = DataFrame({'date': Series('foo', index=range(5)),
- 'val': Series(range(5), dtype=np.int64)})
- df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
- 'val': Series(range(5), dtype=np.int64)})
- df.loc[:, 'date'] = 'foo'
- tm.assert_frame_equal(df, expected)
- expected = DataFrame({'date': Series(1.0, index=range(5)),
- 'val': Series(range(5), dtype=np.int64)})
- df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
- 'val': Series(range(5), dtype=np.int64)})
- df.loc[:, 'date'] = 1.0
- tm.assert_frame_equal(df, expected)
- # GH 15494
- # setting on frame with single row
- df = DataFrame({'date': Series([Timestamp('20180101')])})
- df.loc[:, 'date'] = 'string'
- expected = DataFrame({'date': Series(['string'])})
- tm.assert_frame_equal(df, expected)
- def test_loc_setitem_consistency_empty(self):
- # empty (essentially noops)
- expected = DataFrame(columns=['x', 'y'])
- expected['x'] = expected['x'].astype(np.int64)
- df = DataFrame(columns=['x', 'y'])
- df.loc[:, 'x'] = 1
- tm.assert_frame_equal(df, expected)
- df = DataFrame(columns=['x', 'y'])
- df['x'] = 1
- tm.assert_frame_equal(df, expected)
- def test_loc_setitem_consistency_slice_column_len(self):
- # .loc[:,column] setting with slice == len of the column
- # GH10408
- data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
- Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
- Region,Site,RespondentID,,,,,
- Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
- Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
- Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
- Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""
- df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
- df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
- 'Respondent', 'StartDate')])
- df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
- 'Respondent', 'EndDate')])
- df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
- 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]
- df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
- 'Respondent', 'Duration')].astype('timedelta64[s]')
- expected = Series([1380, 720, 840, 2160.], index=df.index,
- name=('Respondent', 'Duration'))
- tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
- def test_loc_setitem_frame(self):
- df = self.frame_labels
- result = df.iloc[0, 0]
- df.loc['a', 'A'] = 1
- result = df.loc['a', 'A']
- assert result == 1
- result = df.iloc[0, 0]
- assert result == 1
- df.loc[:, 'B':'D'] = 0
- expected = df.loc[:, 'B':'D']
- result = df.iloc[:, 1:]
- tm.assert_frame_equal(result, expected)
- # GH 6254
- # setting issue
- df = DataFrame(index=[3, 5, 4], columns=['A'])
- df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64')
- expected = DataFrame(dict(A=Series(
- [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4])
- tm.assert_frame_equal(df, expected)
- # GH 6252
- # setting with an empty frame
- keys1 = ['@' + str(i) for i in range(5)]
- val1 = np.arange(5, dtype='int64')
- keys2 = ['@' + str(i) for i in range(4)]
- val2 = np.arange(4, dtype='int64')
- index = list(set(keys1).union(keys2))
- df = DataFrame(index=index)
- df['A'] = np.nan
- df.loc[keys1, 'A'] = val1
- df['B'] = np.nan
- df.loc[keys2, 'B'] = val2
- expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series(
- val2, index=keys2))).reindex(index=index)
- tm.assert_frame_equal(df, expected)
- # GH 8669
- # invalid coercion of nan -> int
- df = DataFrame({'A': [1, 2, 3], 'B': np.nan})
- df.loc[df.B > df.A, 'B'] = df.A
- expected = DataFrame({'A': [1, 2, 3], 'B': np.nan})
- tm.assert_frame_equal(df, expected)
- # GH 6546
- # setting with mixed labels
- df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']})
- result = df.loc[0, [1, 2]]
- expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
- tm.assert_series_equal(result, expected)
- expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']})
- df.loc[0, [1, 2]] = [5, 6]
- tm.assert_frame_equal(df, expected)
- def test_loc_setitem_frame_multiples(self):
- # multiple setting
- df = DataFrame({'A': ['foo', 'bar', 'baz'],
- 'B': Series(
- range(3), dtype=np.int64)})
- rhs = df.loc[1:2]
- rhs.index = df.index[0:2]
- df.loc[0:1] = rhs
- expected = DataFrame({'A': ['bar', 'baz', 'baz'],
- 'B': Series(
- [1, 2, 2], dtype=np.int64)})
- tm.assert_frame_equal(df, expected)
- # multiple setting with frame on rhs (with M8)
- df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
- 'val': Series(
- range(5), dtype=np.int64)})
- expected = DataFrame({'date': [Timestamp('20000101'), Timestamp(
- '20000102'), Timestamp('20000101'), Timestamp('20000102'),
- Timestamp('20000103')],
- 'val': Series(
- [0, 1, 0, 1, 2], dtype=np.int64)})
- rhs = df.loc[0:2]
- rhs.index = df.index[2:5]
- df.loc[2:4] = rhs
- tm.assert_frame_equal(df, expected)
- @pytest.mark.parametrize(
- 'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
- @pytest.mark.parametrize(
- 'value', [['Z'], np.array(['Z'])])
- def test_loc_setitem_with_scalar_index(self, indexer, value):
- # GH #19474
- # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
- # elementwisely, not using "setter('A', ['Z'])".
- df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
- df.loc[0, indexer] = value
- result = df.loc[0, 'A']
- assert is_scalar(result) and result == 'Z'
- def test_loc_coerceion(self):
- # 12411
- df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'),
- pd.NaT]})
- expected = df.dtypes
- result = df.iloc[[0]]
- tm.assert_series_equal(result.dtypes, expected)
- result = df.iloc[[1]]
- tm.assert_series_equal(result.dtypes, expected)
- # 12045
- import datetime
- df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
- datetime.datetime(1012, 1, 2)]})
- expected = df.dtypes
- result = df.iloc[[0]]
- tm.assert_series_equal(result.dtypes, expected)
- result = df.iloc[[1]]
- tm.assert_series_equal(result.dtypes, expected)
- # 11594
- df = DataFrame({'text': ['some words'] + [None] * 9})
- expected = df.dtypes
- result = df.iloc[0:2]
- tm.assert_series_equal(result.dtypes, expected)
- result = df.iloc[3:]
- tm.assert_series_equal(result.dtypes, expected)
- def test_loc_non_unique(self):
- # GH3659
- # non-unique indexer with loc slice
- # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
- # these are going to raise because the we are non monotonic
- df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
- 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3])
- pytest.raises(KeyError, df.loc.__getitem__,
- tuple([slice(1, None)]))
- pytest.raises(KeyError, df.loc.__getitem__,
- tuple([slice(0, None)]))
- pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)]))
- # monotonic are ok
- df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
- 'B': [3, 4, 5, 6, 7, 8]},
- index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0)
- result = df.loc[1:]
- expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]},
- index=[1, 1, 2, 3])
- tm.assert_frame_equal(result, expected)
- result = df.loc[0:]
- tm.assert_frame_equal(result, df)
- result = df.loc[1:2]
- expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]},
- index=[1, 1, 2])
- tm.assert_frame_equal(result, expected)
- def test_loc_non_unique_memory_error(self):
- # GH 4280
- # non_unique index with a large selection triggers a memory error
- columns = list('ABCDEFG')
- def gen_test(l, l2):
- return pd.concat([
- DataFrame(np.random.randn(l, len(columns)),
- index=lrange(l), columns=columns),
- DataFrame(np.ones((l2, len(columns))),
- index=[0] * l2, columns=columns)])
- def gen_expected(df, mask):
- len_mask = len(mask)
- return pd.concat([df.take([0]),
- DataFrame(np.ones((len_mask, len(columns))),
- index=[0] * len_mask,
- columns=columns),
- df.take(mask[1:])])
- df = gen_test(900, 100)
- assert df.index.is_unique is False
- mask = np.arange(100)
- result = df.loc[mask]
- expected = gen_expected(df, mask)
- tm.assert_frame_equal(result, expected)
- df = gen_test(900000, 100000)
- assert df.index.is_unique is False
- mask = np.arange(100000)
- result = df.loc[mask]
- expected = gen_expected(df, mask)
- tm.assert_frame_equal(result, expected)
- def test_loc_name(self):
- # GH 3880
- df = DataFrame([[1, 1], [1, 1]])
- df.index.name = 'index_name'
- result = df.iloc[[0, 1]].index.name
- assert result == 'index_name'
- with catch_warnings(record=True):
- filterwarnings("ignore", "\\n.ix", DeprecationWarning)
- result = df.ix[[0, 1]].index.name
- assert result == 'index_name'
- result = df.loc[[0, 1]].index.name
- assert result == 'index_name'
- def test_loc_empty_list_indexer_is_ok(self):
- from pandas.util.testing import makeCustomDataframe as mkdf
- df = mkdf(5, 2)
- # vertical empty
- tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
- check_index_type=True, check_column_type=True)
- # horizontal empty
- tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
- check_index_type=True, check_column_type=True)
- # horizontal empty
- tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :],
- check_index_type=True,
- check_column_type=True)
- def test_identity_slice_returns_new_object(self):
- # GH13873
- original_df = DataFrame({'a': [1, 2, 3]})
- sliced_df = original_df.loc[:]
- assert sliced_df is not original_df
- assert original_df[:] is not original_df
- # should be a shallow copy
- original_df['a'] = [4, 4, 4]
- assert (sliced_df['a'] == 4).all()
- # These should not return copies
- assert original_df is original_df.loc[:, :]
- df = DataFrame(np.random.randn(10, 4))
- assert df[0] is df.loc[:, 0]
- # Same tests for Series
- original_series = Series([1, 2, 3, 4, 5, 6])
- sliced_series = original_series.loc[:]
- assert sliced_series is not original_series
- assert original_series[:] is not original_series
- original_series[:3] = [7, 8, 9]
- assert all(sliced_series[:3] == [7, 8, 9])
- def test_loc_uint64(self):
- # GH20722
- # Test whether loc accept uint64 max value as index.
- s = pd.Series([1, 2],
- index=[np.iinfo('uint64').max - 1,
- np.iinfo('uint64').max])
- result = s.loc[np.iinfo('uint64').max - 1]
- expected = s.iloc[0]
- assert result == expected
- result = s.loc[[np.iinfo('uint64').max - 1]]
- expected = s.iloc[[0]]
- tm.assert_series_equal(result, expected)
- result = s.loc[[np.iinfo('uint64').max - 1,
- np.iinfo('uint64').max]]
- tm.assert_series_equal(result, s)
- def test_loc_setitem_empty_append(self):
- # GH6173, various appends to an empty dataframe
- data = [1, 2, 3]
- expected = DataFrame({'x': data, 'y': [None] * len(data)})
- # appends to fit length of data
- df = DataFrame(columns=['x', 'y'])
- df.loc[:, 'x'] = data
- tm.assert_frame_equal(df, expected)
- # only appends one value
- expected = DataFrame({'x': [1.0], 'y': [np.nan]})
- df = DataFrame(columns=['x', 'y'],
- dtype=np.float)
- df.loc[0, 'x'] = expected.loc[0, 'x']
- tm.assert_frame_equal(df, expected)
- def test_loc_setitem_empty_append_raises(self):
- # GH6173, various appends to an empty dataframe
- data = [1, 2]
- df = DataFrame(columns=['x', 'y'])
- msg = (r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
- r"are in the \[index\]")
- with pytest.raises(KeyError, match=msg):
- df.loc[[0, 1], 'x'] = data
- msg = "cannot copy sequence with size 2 to array axis with dimension 0"
- with pytest.raises(ValueError, match=msg):
- df.loc[0:2, 'x'] = data
|