123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 |
- # coding=utf-8
- # pylint: disable-msg=E1101,W0612
- import numpy as np
- import pytest
- import pandas as pd
- import pandas.util.testing as tm
- from .common import TestData
- class TestSeriesReplace(TestData):
- def test_replace(self):
- N = 100
- ser = pd.Series(np.random.randn(N))
- ser[0:4] = np.nan
- ser[6:10] = 0
- # replace list with a single value
- ser.replace([np.nan], -1, inplace=True)
- exp = ser.fillna(-1)
- tm.assert_series_equal(ser, exp)
- rs = ser.replace(0., np.nan)
- ser[ser == 0.] = np.nan
- tm.assert_series_equal(rs, ser)
- ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
- dtype=object)
- ser[:5] = np.nan
- ser[6:10] = 'foo'
- ser[20:30] = 'bar'
- # replace list with a single value
- rs = ser.replace([np.nan, 'foo', 'bar'], -1)
- assert (rs[:5] == -1).all()
- assert (rs[6:10] == -1).all()
- assert (rs[20:30] == -1).all()
- assert (pd.isna(ser[:5])).all()
- # replace with different values
- rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
- assert (rs[:5] == -1).all()
- assert (rs[6:10] == -2).all()
- assert (rs[20:30] == -3).all()
- assert (pd.isna(ser[:5])).all()
- # replace with different values with 2 lists
- rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
- tm.assert_series_equal(rs, rs2)
- # replace inplace
- ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
- assert (ser[:5] == -1).all()
- assert (ser[6:10] == -1).all()
- assert (ser[20:30] == -1).all()
- ser = pd.Series([np.nan, 0, np.inf])
- tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
- ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, pd.NaT])
- tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
- filled = ser.copy()
- filled[4] = 0
- tm.assert_series_equal(ser.replace(np.inf, 0), filled)
- ser = pd.Series(self.ts.index)
- tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
- # malformed
- msg = r"Replacement lists must match in length\. Expecting 3 got 2"
- with pytest.raises(ValueError, match=msg):
- ser.replace([1, 2, 3], [np.nan, 0])
- # make sure that we aren't just masking a TypeError because bools don't
- # implement indexing
- with pytest.raises(TypeError, match='Cannot compare types .+'):
- ser.replace([1, 2], [np.nan, 0])
- ser = pd.Series([0, 1, 2, 3, 4])
- result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
- tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
- def test_replace_gh5319(self):
- # API change from 0.12?
- # GH 5319
- ser = pd.Series([0, np.nan, 2, 3, 4])
- expected = ser.ffill()
- result = ser.replace([np.nan])
- tm.assert_series_equal(result, expected)
- ser = pd.Series([0, np.nan, 2, 3, 4])
- expected = ser.ffill()
- result = ser.replace(np.nan)
- tm.assert_series_equal(result, expected)
- # GH 5797
- ser = pd.Series(pd.date_range('20130101', periods=5))
- expected = ser.copy()
- expected.loc[2] = pd.Timestamp('20120101')
- result = ser.replace({pd.Timestamp('20130103'):
- pd.Timestamp('20120101')})
- tm.assert_series_equal(result, expected)
- result = ser.replace(pd.Timestamp('20130103'),
- pd.Timestamp('20120101'))
- tm.assert_series_equal(result, expected)
- # GH 11792: Test with replacing NaT in a list with tz data
- ts = pd.Timestamp('2015/01/01', tz='UTC')
- s = pd.Series([pd.NaT, pd.Timestamp('2015/01/01', tz='UTC')])
- result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
- expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
- tm.assert_series_equal(expected, result)
- def test_replace_with_single_list(self):
- ser = pd.Series([0, 1, 2, 3, 4])
- result = ser.replace([1, 2, 3])
- tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
- s = ser.copy()
- s.replace([1, 2, 3], inplace=True)
- tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
- # make sure things don't get corrupted when fillna call fails
- s = ser.copy()
- msg = (r"Invalid fill method\. Expecting pad \(ffill\) or backfill"
- r" \(bfill\)\. Got crash_cymbal")
- with pytest.raises(ValueError, match=msg):
- s.replace([1, 2, 3], inplace=True, method='crash_cymbal')
- tm.assert_series_equal(s, ser)
- def test_replace_with_empty_list(self):
- # GH 21977
- s = pd.Series([[1], [2, 3], [], np.nan, [4]])
- expected = s
- result = s.replace([], np.nan)
- tm.assert_series_equal(result, expected)
- # GH 19266
- with pytest.raises(ValueError, match="cannot assign mismatch"):
- s.replace({np.nan: []})
- with pytest.raises(ValueError, match="cannot assign mismatch"):
- s.replace({np.nan: ['dummy', 'alt']})
- def test_replace_mixed_types(self):
- s = pd.Series(np.arange(5), dtype='int64')
- def check_replace(to_rep, val, expected):
- sc = s.copy()
- r = s.replace(to_rep, val)
- sc.replace(to_rep, val, inplace=True)
- tm.assert_series_equal(expected, r)
- tm.assert_series_equal(expected, sc)
- # MUST upcast to float
- e = pd.Series([0., 1., 2., 3., 4.])
- tr, v = [3], [3.0]
- check_replace(tr, v, e)
- # MUST upcast to float
- e = pd.Series([0, 1, 2, 3.5, 4])
- tr, v = [3], [3.5]
- check_replace(tr, v, e)
- # casts to object
- e = pd.Series([0, 1, 2, 3.5, 'a'])
- tr, v = [3, 4], [3.5, 'a']
- check_replace(tr, v, e)
- # again casts to object
- e = pd.Series([0, 1, 2, 3.5, pd.Timestamp('20130101')])
- tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
- check_replace(tr, v, e)
- # casts to object
- e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
- tr, v = [3, 4], [3.5, True]
- check_replace(tr, v, e)
- # test an object with dates + floats + integers + strings
- dr = pd.date_range('1/1/2001', '1/10/2001',
- freq='D').to_series().reset_index(drop=True)
- result = dr.astype(object).replace(
- [dr[0], dr[1], dr[2]], [1.0, 2, 'a'])
- expected = pd.Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object)
- tm.assert_series_equal(result, expected)
- def test_replace_bool_with_string_no_op(self):
- s = pd.Series([True, False, True])
- result = s.replace('fun', 'in-the-sun')
- tm.assert_series_equal(s, result)
- def test_replace_bool_with_string(self):
- # nonexistent elements
- s = pd.Series([True, False, True])
- result = s.replace(True, '2u')
- expected = pd.Series(['2u', False, '2u'])
- tm.assert_series_equal(expected, result)
- def test_replace_bool_with_bool(self):
- s = pd.Series([True, False, True])
- result = s.replace(True, False)
- expected = pd.Series([False] * len(s))
- tm.assert_series_equal(expected, result)
- def test_replace_with_dict_with_bool_keys(self):
- s = pd.Series([True, False, True])
- with pytest.raises(TypeError, match='Cannot compare types .+'):
- s.replace({'asdf': 'asdb', True: 'yes'})
- def test_replace2(self):
- N = 100
- ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
- dtype=object)
- ser[:5] = np.nan
- ser[6:10] = 'foo'
- ser[20:30] = 'bar'
- # replace list with a single value
- rs = ser.replace([np.nan, 'foo', 'bar'], -1)
- assert (rs[:5] == -1).all()
- assert (rs[6:10] == -1).all()
- assert (rs[20:30] == -1).all()
- assert (pd.isna(ser[:5])).all()
- # replace with different values
- rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
- assert (rs[:5] == -1).all()
- assert (rs[6:10] == -2).all()
- assert (rs[20:30] == -3).all()
- assert (pd.isna(ser[:5])).all()
- # replace with different values with 2 lists
- rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
- tm.assert_series_equal(rs, rs2)
- # replace inplace
- ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
- assert (ser[:5] == -1).all()
- assert (ser[6:10] == -1).all()
- assert (ser[20:30] == -1).all()
- def test_replace_with_empty_dictlike(self):
- # GH 15289
- s = pd.Series(list('abcd'))
- tm.assert_series_equal(s, s.replace(dict()))
- tm.assert_series_equal(s, s.replace(pd.Series([])))
- def test_replace_string_with_number(self):
- # GH 15743
- s = pd.Series([1, 2, 3])
- result = s.replace('2', np.nan)
- expected = pd.Series([1, 2, 3])
- tm.assert_series_equal(expected, result)
- def test_replace_replacer_equals_replacement(self):
- # GH 20656
- # make sure all replacers are matching against original values
- s = pd.Series(['a', 'b'])
- expected = pd.Series(['b', 'a'])
- result = s.replace({'a': 'b', 'b': 'a'})
- tm.assert_series_equal(expected, result)
- def test_replace_unicode_with_number(self):
- # GH 15743
- s = pd.Series([1, 2, 3])
- result = s.replace(u'2', np.nan)
- expected = pd.Series([1, 2, 3])
- tm.assert_series_equal(expected, result)
- def test_replace_mixed_types_with_string(self):
- # Testing mixed
- s = pd.Series([1, 2, 3, '4', 4, 5])
- result = s.replace([2, '4'], np.nan)
- expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
- tm.assert_series_equal(expected, result)
- def test_replace_with_no_overflowerror(self):
- # GH 25616
- # casts to object without Exception from OverflowError
- s = pd.Series([0, 1, 2, 3, 4])
- result = s.replace([3], ['100000000000000000000'])
- expected = pd.Series([0, 1, 2, '100000000000000000000', 4])
- tm.assert_series_equal(result, expected)
- s = pd.Series([0, '100000000000000000000',
- '100000000000000000001'])
- result = s.replace(['100000000000000000000'], [1])
- expected = pd.Series([0, 1, '100000000000000000001'])
- tm.assert_series_equal(result, expected)
|