123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- DataFrame, Series, Timestamp, compat, date_range, option_context)
- from pandas.core import common as com
- from pandas.util import testing as tm
- class TestCaching(object):
- def test_slice_consolidate_invalidate_item_cache(self):
- # this is chained assignment, but will 'work'
- with option_context('chained_assignment', None):
- # #3970
- df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})
- # Creates a second float block
- df["cc"] = 0.0
- # caches a reference to the 'bb' series
- df["bb"]
- # repr machinery triggers consolidation
- repr(df)
- # Assignment to wrong series
- df['bb'].iloc[0] = 0.17
- df._clear_item_cache()
- tm.assert_almost_equal(df['bb'][0], 0.17)
- def test_setitem_cache_updating(self):
- # GH 5424
- cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
- for do_ref in [False, False]:
- df = DataFrame({'a': cont,
- "b": cont[3:] + cont[:3],
- 'c': np.arange(7)})
- # ref the cache
- if do_ref:
- df.loc[0, "c"]
- # set it
- df.loc[7, 'c'] = 1
- assert df.loc[0, 'c'] == 0.0
- assert df.loc[7, 'c'] == 1.0
- # GH 7084
- # not updating cache on series setting with slices
- expected = DataFrame({'A': [600, 600, 600]},
- index=date_range('5/7/2014', '5/9/2014'))
- out = DataFrame({'A': [0, 0, 0]},
- index=date_range('5/7/2014', '5/9/2014'))
- df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
- # loop through df to update out
- six = Timestamp('5/7/2014')
- eix = Timestamp('5/9/2014')
- for ix, row in df.iterrows():
- out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']
- tm.assert_frame_equal(out, expected)
- tm.assert_series_equal(out['A'], expected['A'])
- # try via a chain indexing
- # this actually works
- out = DataFrame({'A': [0, 0, 0]},
- index=date_range('5/7/2014', '5/9/2014'))
- for ix, row in df.iterrows():
- v = out[row['C']][six:eix] + row['D']
- out[row['C']][six:eix] = v
- tm.assert_frame_equal(out, expected)
- tm.assert_series_equal(out['A'], expected['A'])
- out = DataFrame({'A': [0, 0, 0]},
- index=date_range('5/7/2014', '5/9/2014'))
- for ix, row in df.iterrows():
- out.loc[six:eix, row['C']] += row['D']
- tm.assert_frame_equal(out, expected)
- tm.assert_series_equal(out['A'], expected['A'])
- class TestChaining(object):
- def test_setitem_chained_setfault(self):
- # GH6026
- data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
- mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
- df = DataFrame({'response': np.array(data)})
- mask = df.response == 'timeout'
- df.response[mask] = 'none'
- tm.assert_frame_equal(df, DataFrame({'response': mdata}))
- recarray = np.rec.fromarrays([data], names=['response'])
- df = DataFrame(recarray)
- mask = df.response == 'timeout'
- df.response[mask] = 'none'
- tm.assert_frame_equal(df, DataFrame({'response': mdata}))
- df = DataFrame({'response': data, 'response1': data})
- mask = df.response == 'timeout'
- df.response[mask] = 'none'
- tm.assert_frame_equal(df, DataFrame({'response': mdata,
- 'response1': data}))
- # GH 6056
- expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
- df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
- df['A'].iloc[0] = np.nan
- result = df.head()
- tm.assert_frame_equal(result, expected)
- df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
- df.A.iloc[0] = np.nan
- result = df.head()
- tm.assert_frame_equal(result, expected)
- def test_detect_chained_assignment(self):
- pd.set_option('chained_assignment', 'raise')
- # work with the chain
- expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
- df = DataFrame(np.arange(4).reshape(2, 2),
- columns=list('AB'), dtype='int64')
- assert df._is_copy is None
- df['A'][0] = -5
- df['A'][1] = -6
- tm.assert_frame_equal(df, expected)
- # test with the chaining
- df = DataFrame({'A': Series(range(2), dtype='int64'),
- 'B': np.array(np.arange(2, 4), dtype=np.float64)})
- assert df._is_copy is None
- with pytest.raises(com.SettingWithCopyError):
- df['A'][0] = -5
- with pytest.raises(com.SettingWithCopyError):
- df['A'][1] = np.nan
- assert df['A']._is_copy is None
- # Using a copy (the chain), fails
- df = DataFrame({'A': Series(range(2), dtype='int64'),
- 'B': np.array(np.arange(2, 4), dtype=np.float64)})
- with pytest.raises(com.SettingWithCopyError):
- df.loc[0]['A'] = -5
- # Doc example
- df = DataFrame({'a': ['one', 'one', 'two', 'three',
- 'two', 'one', 'six'],
- 'c': Series(range(7), dtype='int64')})
- assert df._is_copy is None
- with pytest.raises(com.SettingWithCopyError):
- indexer = df.a.str.startswith('o')
- df[indexer]['c'] = 42
- expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
- df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
- with pytest.raises(com.SettingWithCopyError):
- df['A'][0] = 111
- with pytest.raises(com.SettingWithCopyError):
- df.loc[0]['A'] = 111
- df.loc[0, 'A'] = 111
- tm.assert_frame_equal(df, expected)
- # gh-5475: Make sure that is_copy is picked up reconstruction
- df = DataFrame({"A": [1, 2]})
- assert df._is_copy is None
- with tm.ensure_clean('__tmp__pickle') as path:
- df.to_pickle(path)
- df2 = pd.read_pickle(path)
- df2["B"] = df2["A"]
- df2["B"] = df2["A"]
- # gh-5597: a spurious raise as we are setting the entire column here
- from string import ascii_letters as letters
- def random_text(nobs=100):
- df = []
- for i in range(nobs):
- idx = np.random.randint(len(letters), size=2)
- idx.sort()
- df.append([letters[idx[0]:idx[1]]])
- return DataFrame(df, columns=['letters'])
- df = random_text(100000)
- # Always a copy
- x = df.iloc[[0, 1, 2]]
- assert x._is_copy is not None
- x = df.iloc[[0, 1, 2, 4]]
- assert x._is_copy is not None
- # Explicitly copy
- indexer = df.letters.apply(lambda x: len(x) > 10)
- df = df.loc[indexer].copy()
- assert df._is_copy is None
- df['letters'] = df['letters'].apply(str.lower)
- # Implicitly take
- df = random_text(100000)
- indexer = df.letters.apply(lambda x: len(x) > 10)
- df = df.loc[indexer]
- assert df._is_copy is not None
- df['letters'] = df['letters'].apply(str.lower)
- # Implicitly take 2
- df = random_text(100000)
- indexer = df.letters.apply(lambda x: len(x) > 10)
- df = df.loc[indexer]
- assert df._is_copy is not None
- df.loc[:, 'letters'] = df['letters'].apply(str.lower)
- # Should be ok even though it's a copy!
- assert df._is_copy is None
- df['letters'] = df['letters'].apply(str.lower)
- assert df._is_copy is None
- df = random_text(100000)
- indexer = df.letters.apply(lambda x: len(x) > 10)
- df.loc[indexer, 'letters'] = (
- df.loc[indexer, 'letters'].apply(str.lower))
- # an identical take, so no copy
- df = DataFrame({'a': [1]}).dropna()
- assert df._is_copy is None
- df['a'] += 1
- df = DataFrame(np.random.randn(10, 4))
- s = df.iloc[:, 0].sort_values()
- tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
- tm.assert_series_equal(s, df[0].sort_values())
- # see gh-6025: false positives
- df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
- str(df)
- df['column1'] = df['column1'] + 'b'
- str(df)
- df = df[df['column2'] != 8]
- str(df)
- df['column1'] = df['column1'] + 'c'
- str(df)
- # from SO:
- # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
- df = DataFrame(np.arange(0, 9), columns=['count'])
- df['group'] = 'b'
- with pytest.raises(com.SettingWithCopyError):
- df.iloc[0:5]['group'] = 'a'
- # Mixed type setting but same dtype & changing dtype
- df = DataFrame(dict(A=date_range('20130101', periods=5),
- B=np.random.randn(5),
- C=np.arange(5, dtype='int64'),
- D=list('abcde')))
- with pytest.raises(com.SettingWithCopyError):
- df.loc[2]['D'] = 'foo'
- with pytest.raises(com.SettingWithCopyError):
- df.loc[2]['C'] = 'foo'
- with pytest.raises(com.SettingWithCopyError):
- df['C'][2] = 'foo'
- def test_setting_with_copy_bug(self):
- # operating on a copy
- df = DataFrame({'a': list(range(4)),
- 'b': list('ab..'),
- 'c': ['a', 'b', np.nan, 'd']})
- mask = pd.isna(df.c)
- def f():
- df[['c']][mask] = df[['b']][mask]
- pytest.raises(com.SettingWithCopyError, f)
- # invalid warning as we are returning a new object
- # GH 8730
- df1 = DataFrame({'x': Series(['a', 'b', 'c']),
- 'y': Series(['d', 'e', 'f'])})
- df2 = df1[['x']]
- # this should not raise
- df2['y'] = ['g', 'h', 'i']
- def test_detect_chained_assignment_warnings(self):
- with option_context("chained_assignment", "warn"):
- df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
- with tm.assert_produces_warning(com.SettingWithCopyWarning):
- df.loc[0]["A"] = 111
- def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
- # xref gh-13017.
- with option_context("chained_assignment", "warn"):
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
- columns=["a", "a", "c"])
- with tm.assert_produces_warning(com.SettingWithCopyWarning):
- df.c.loc[df.c > 0] = None
- expected = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]],
- columns=["a", "a", "c"])
- tm.assert_frame_equal(df, expected)
- def test_chained_getitem_with_lists(self):
- # GH6394
- # Regression in chained getitem indexing with embedded list-like from
- # 0.12
- def check(result, expected):
- tm.assert_numpy_array_equal(result, expected)
- assert isinstance(result, np.ndarray)
- df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]})
- expected = df['A'].iloc[2]
- result = df.loc[2, 'A']
- check(result, expected)
- result2 = df.iloc[2]['A']
- check(result2, expected)
- result3 = df['A'].loc[2]
- check(result3, expected)
- result4 = df['A'].iloc[2]
- check(result4, expected)
- @pytest.mark.filterwarnings("ignore::DeprecationWarning")
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_cache_updating(self):
- # GH 4939, make sure to update the cache on setitem
- df = tm.makeDataFrame()
- df['A'] # cache series
- df.ix["Hello Friend"] = df.ix[0]
- assert "Hello Friend" in df['A'].index
- assert "Hello Friend" in df['B'].index
- panel = tm.makePanel()
- panel.ix[0] # get first item into cache
- panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1
- assert "A+1" in panel.ix[0].columns
- assert "A+1" in panel.ix[1].columns
- # 10264
- df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[
- 'a', 'b', 'c', 'd', 'e'], index=range(5))
- df['f'] = 0
- df.f.values[3] = 1
- # TODO(wesm): unused?
- # y = df.iloc[np.arange(2, len(df))]
- df.f.values[3] = 2
- expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[
- 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5))
- expected.at[3, 'f'] = 2
- tm.assert_frame_equal(df, expected)
- expected = Series([0, 0, 0, 2, 0], name='f')
- tm.assert_series_equal(df.f, expected)
- def test_deprecate_is_copy(self):
- # GH18801
- df = DataFrame({"A": [1, 2, 3]})
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- # getter
- df.is_copy
- with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
- # setter
- df.is_copy = "test deprecated is_copy"
|