123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374 |
- # coding=utf-8
- # pylint: disable-msg=E1101,W0612
- from datetime import datetime, timedelta
- from distutils.version import LooseVersion
- import numpy as np
- from numpy import nan
- import pytest
- import pytz
- from pandas._libs.tslib import iNaT
- from pandas.compat import range
- from pandas.errors import PerformanceWarning
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- Categorical, DataFrame, Index, IntervalIndex, MultiIndex, NaT, Series,
- Timestamp, date_range, isna)
- from pandas.core.series import remove_na
- import pandas.util.testing as tm
- from pandas.util.testing import assert_frame_equal, assert_series_equal
- try:
- import scipy
- _is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >=
- LooseVersion('0.19.0'))
- except ImportError:
- _is_scipy_ge_0190 = False
- def _skip_if_no_pchip():
- try:
- from scipy.interpolate import pchip_interpolate # noqa
- except ImportError:
- import pytest
- pytest.skip('scipy.interpolate.pchip missing')
- def _skip_if_no_akima():
- try:
- from scipy.interpolate import Akima1DInterpolator # noqa
- except ImportError:
- import pytest
- pytest.skip('scipy.interpolate.Akima1DInterpolator missing')
- def _simple_ts(start, end, freq='D'):
- rng = date_range(start, end, freq=freq)
- return Series(np.random.randn(len(rng)), index=rng)
- class TestSeriesMissingData():
- def test_remove_na_deprecation(self):
- # see gh-16971
- with tm.assert_produces_warning(FutureWarning):
- remove_na(Series([]))
- def test_timedelta_fillna(self):
- # GH 3371
- s = Series([Timestamp('20130101'), Timestamp('20130101'),
- Timestamp('20130102'), Timestamp('20130103 9:01:01')])
- td = s.diff()
- # reg fillna
- with tm.assert_produces_warning(FutureWarning):
- result = td.fillna(0)
- expected = Series([timedelta(0), timedelta(0), timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
- assert_series_equal(result, expected)
- # interpreted as seconds, deprecated
- with tm.assert_produces_warning(FutureWarning):
- result = td.fillna(1)
- expected = Series([timedelta(seconds=1),
- timedelta(0), timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
- assert_series_equal(result, expected)
- result = td.fillna(timedelta(days=1, seconds=1))
- expected = Series([timedelta(days=1, seconds=1), timedelta(0),
- timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
- assert_series_equal(result, expected)
- result = td.fillna(np.timedelta64(int(1e9)))
- expected = Series([timedelta(seconds=1), timedelta(0), timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
- assert_series_equal(result, expected)
- result = td.fillna(NaT)
- expected = Series([NaT, timedelta(0), timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1)],
- dtype='m8[ns]')
- assert_series_equal(result, expected)
- # ffill
- td[2] = np.nan
- result = td.ffill()
- with tm.assert_produces_warning(FutureWarning):
- expected = td.fillna(0)
- expected[0] = np.nan
- assert_series_equal(result, expected)
- # bfill
- td[2] = np.nan
- result = td.bfill()
- with tm.assert_produces_warning(FutureWarning):
- expected = td.fillna(0)
- expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
- assert_series_equal(result, expected)
- def test_datetime64_fillna(self):
- s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp(
- '20130102'), Timestamp('20130103 9:01:01')])
- s[2] = np.nan
- # reg fillna
- result = s.fillna(Timestamp('20130104'))
- expected = Series([Timestamp('20130101'), Timestamp(
- '20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')])
- assert_series_equal(result, expected)
- result = s.fillna(NaT)
- expected = s
- assert_series_equal(result, expected)
- # ffill
- result = s.ffill()
- expected = Series([Timestamp('20130101'), Timestamp(
- '20130101'), Timestamp('20130101'), Timestamp('20130103 9:01:01')])
- assert_series_equal(result, expected)
- # bfill
- result = s.bfill()
- expected = Series([Timestamp('20130101'), Timestamp('20130101'),
- Timestamp('20130103 9:01:01'), Timestamp(
- '20130103 9:01:01')])
- assert_series_equal(result, expected)
- # GH 6587
- # make sure that we are treating as integer when filling
- # this also tests inference of a datetime-like with NaT's
- s = Series([pd.NaT, pd.NaT, '2013-08-05 15:30:00.000001'])
- expected = Series(
- ['2013-08-05 15:30:00.000001', '2013-08-05 15:30:00.000001',
- '2013-08-05 15:30:00.000001'], dtype='M8[ns]')
- result = s.fillna(method='backfill')
- assert_series_equal(result, expected)
- def test_datetime64_tz_fillna(self):
- for tz in ['US/Eastern', 'Asia/Tokyo']:
- # DatetimeBlock
- s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
- Timestamp('2011-01-03 10:00'), pd.NaT])
- null_loc = pd.Series([False, True, False, True])
- result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
- expected = Series([Timestamp('2011-01-01 10:00'),
- Timestamp('2011-01-02 10:00'),
- Timestamp('2011-01-03 10:00'),
- Timestamp('2011-01-02 10:00')])
- tm.assert_series_equal(expected, result)
- # check s is not changed
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
- expected = Series([Timestamp('2011-01-01 10:00'),
- Timestamp('2011-01-02 10:00', tz=tz),
- Timestamp('2011-01-03 10:00'),
- Timestamp('2011-01-02 10:00', tz=tz)])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna('AAA')
- expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
- Timestamp('2011-01-03 10:00'), 'AAA'],
- dtype=object)
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
- 3: pd.Timestamp('2011-01-04 10:00')})
- expected = Series([Timestamp('2011-01-01 10:00'),
- Timestamp('2011-01-02 10:00', tz=tz),
- Timestamp('2011-01-03 10:00'),
- Timestamp('2011-01-04 10:00')])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
- 3: pd.Timestamp('2011-01-04 10:00')})
- expected = Series([Timestamp('2011-01-01 10:00'),
- Timestamp('2011-01-02 10:00'),
- Timestamp('2011-01-03 10:00'),
- Timestamp('2011-01-04 10:00')])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- # DatetimeBlockTZ
- idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
- '2011-01-03 10:00', pd.NaT], tz=tz)
- s = pd.Series(idx)
- assert s.dtype == 'datetime64[ns, {0}]'.format(tz)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
- expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
- Timestamp('2011-01-02 10:00'),
- Timestamp('2011-01-03 10:00', tz=tz),
- Timestamp('2011-01-02 10:00')])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
- idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
- '2011-01-03 10:00', '2011-01-02 10:00'],
- tz=tz)
- expected = Series(idx)
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna(pd.Timestamp('2011-01-02 10:00',
- tz=tz).to_pydatetime())
- idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
- '2011-01-03 10:00', '2011-01-02 10:00'],
- tz=tz)
- expected = Series(idx)
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna('AAA')
- expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
- Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
- dtype=object)
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
- 3: pd.Timestamp('2011-01-04 10:00')})
- expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
- Timestamp('2011-01-02 10:00', tz=tz),
- Timestamp('2011-01-03 10:00', tz=tz),
- Timestamp('2011-01-04 10:00')])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
- 3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
- expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
- Timestamp('2011-01-02 10:00', tz=tz),
- Timestamp('2011-01-03 10:00', tz=tz),
- Timestamp('2011-01-04 10:00', tz=tz)])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- # filling with a naive/other zone, coerce to object
- result = s.fillna(Timestamp('20130101'))
- expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
- Timestamp('2013-01-01'),
- Timestamp('2011-01-03 10:00', tz=tz),
- Timestamp('2013-01-01')])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- result = s.fillna(Timestamp('20130101', tz='US/Pacific'))
- expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
- Timestamp('2013-01-01', tz='US/Pacific'),
- Timestamp('2011-01-03 10:00', tz=tz),
- Timestamp('2013-01-01', tz='US/Pacific')])
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(pd.isna(s), null_loc)
- # with timezone
- # GH 15855
- df = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT])
- exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'),
- pd.Timestamp('2012-11-11 00:00:00+01:00')])
- assert_series_equal(df.fillna(method='pad'), exp)
- df = pd.Series([pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')])
- exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'),
- pd.Timestamp('2012-11-11 00:00:00+01:00')])
- assert_series_equal(df.fillna(method='bfill'), exp)
- def test_fillna_consistency(self):
- # GH 16402
- # fillna with a tz aware to a tz-naive, should result in object
- s = Series([Timestamp('20130101'), pd.NaT])
- result = s.fillna(Timestamp('20130101', tz='US/Eastern'))
- expected = Series([Timestamp('20130101'),
- Timestamp('2013-01-01', tz='US/Eastern')],
- dtype='object')
- assert_series_equal(result, expected)
- # where (we ignore the errors=)
- result = s.where([True, False],
- Timestamp('20130101', tz='US/Eastern'),
- errors='ignore')
- assert_series_equal(result, expected)
- result = s.where([True, False],
- Timestamp('20130101', tz='US/Eastern'),
- errors='ignore')
- assert_series_equal(result, expected)
- # with a non-datetime
- result = s.fillna('foo')
- expected = Series([Timestamp('20130101'),
- 'foo'])
- assert_series_equal(result, expected)
- # assignment
- s2 = s.copy()
- s2[1] = 'foo'
- assert_series_equal(s2, expected)
- def test_datetime64tz_fillna_round_issue(self):
- # GH 14872
- data = pd.Series([pd.NaT, pd.NaT,
- datetime(2016, 12, 12, 22, 24, 6, 100001,
- tzinfo=pytz.utc)])
- filled = data.fillna(method='bfill')
- expected = pd.Series([datetime(2016, 12, 12, 22, 24, 6,
- 100001, tzinfo=pytz.utc),
- datetime(2016, 12, 12, 22, 24, 6,
- 100001, tzinfo=pytz.utc),
- datetime(2016, 12, 12, 22, 24, 6,
- 100001, tzinfo=pytz.utc)])
- assert_series_equal(filled, expected)
- def test_fillna_downcast(self):
- # GH 15277
- # infer int64 from float64
- s = pd.Series([1., np.nan])
- result = s.fillna(0, downcast='infer')
- expected = pd.Series([1, 0])
- assert_series_equal(result, expected)
- # infer int64 from float64 when fillna value is a dict
- s = pd.Series([1., np.nan])
- result = s.fillna({1: 0}, downcast='infer')
- expected = pd.Series([1, 0])
- assert_series_equal(result, expected)
- def test_fillna_int(self):
- s = Series(np.random.randint(-100, 100, 50))
- s.fillna(method='ffill', inplace=True)
- assert_series_equal(s.fillna(method='ffill', inplace=False), s)
- def test_fillna_raise(self):
- s = Series(np.random.randint(-100, 100, 50))
- msg = ('"value" parameter must be a scalar or dict, but you passed a'
- ' "list"')
- with pytest.raises(TypeError, match=msg):
- s.fillna([1, 2])
- msg = ('"value" parameter must be a scalar or dict, but you passed a'
- ' "tuple"')
- with pytest.raises(TypeError, match=msg):
- s.fillna((1, 2))
- # related GH 9217, make sure limit is an int and greater than 0
- s = Series([1, 2, 3, None])
- msg = (r"Cannot specify both 'value' and 'method'\.|"
- r"Limit must be greater than 0|"
- "Limit must be an integer")
- for limit in [-1, 0, 1., 2.]:
- for method in ['backfill', 'bfill', 'pad', 'ffill', None]:
- with pytest.raises(ValueError, match=msg):
- s.fillna(1, limit=limit, method=method)
- def test_categorical_nan_equality(self):
- cat = Series(Categorical(["a", "b", "c", np.nan]))
- exp = Series([True, True, True, False])
- res = (cat == cat)
- tm.assert_series_equal(res, exp)
- def test_categorical_nan_handling(self):
- # NaNs are represented as -1 in labels
- s = Series(Categorical(["a", "b", np.nan, "a"]))
- tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
- tm.assert_numpy_array_equal(s.values.codes,
- np.array([0, 1, -1, 0], dtype=np.int8))
- @pytest.mark.parametrize('fill_value, expected_output', [
- ('a', ['a', 'a', 'b', 'a', 'a']),
- ({1: 'a', 3: 'b', 4: 'b'}, ['a', 'a', 'b', 'b', 'b']),
- ({1: 'a'}, ['a', 'a', 'b', np.nan, np.nan]),
- ({1: 'a', 3: 'b'}, ['a', 'a', 'b', 'b', np.nan]),
- (Series('a'), ['a', np.nan, 'b', np.nan, np.nan]),
- (Series('a', index=[1]), ['a', 'a', 'b', np.nan, np.nan]),
- (Series({1: 'a', 3: 'b'}), ['a', 'a', 'b', 'b', np.nan]),
- (Series(['a', 'b'], index=[3, 4]), ['a', np.nan, 'b', 'a', 'b'])
- ])
- def test_fillna_categorical(self, fill_value, expected_output):
- # GH 17033
- # Test fillna for a Categorical series
- data = ['a', np.nan, 'b', np.nan, np.nan]
- s = Series(Categorical(data, categories=['a', 'b']))
- exp = Series(Categorical(expected_output, categories=['a', 'b']))
- tm.assert_series_equal(s.fillna(fill_value), exp)
- def test_fillna_categorical_raise(self):
- data = ['a', np.nan, 'b', np.nan, np.nan]
- s = Series(Categorical(data, categories=['a', 'b']))
- with pytest.raises(ValueError,
- match="fill value must be in categories"):
- s.fillna('d')
- with pytest.raises(ValueError,
- match="fill value must be in categories"):
- s.fillna(Series('d'))
- with pytest.raises(ValueError,
- match="fill value must be in categories"):
- s.fillna({1: 'd', 3: 'a'})
- msg = ('"value" parameter must be a scalar or '
- 'dict, but you passed a "list"')
- with pytest.raises(TypeError, match=msg):
- s.fillna(['a', 'b'])
- msg = ('"value" parameter must be a scalar or '
- 'dict, but you passed a "tuple"')
- with pytest.raises(TypeError, match=msg):
- s.fillna(('a', 'b'))
- msg = ('"value" parameter must be a scalar, dict '
- 'or Series, but you passed a "DataFrame"')
- with pytest.raises(TypeError, match=msg):
- s.fillna(DataFrame({1: ['a'], 3: ['b']}))
- def test_fillna_nat(self):
- series = Series([0, 1, 2, iNaT], dtype='M8[ns]')
- filled = series.fillna(method='pad')
- filled2 = series.fillna(value=series.values[2])
- expected = series.copy()
- expected.values[3] = expected.values[2]
- assert_series_equal(filled, expected)
- assert_series_equal(filled2, expected)
- df = DataFrame({'A': series})
- filled = df.fillna(method='pad')
- filled2 = df.fillna(value=series.values[2])
- expected = DataFrame({'A': expected})
- assert_frame_equal(filled, expected)
- assert_frame_equal(filled2, expected)
- series = Series([iNaT, 0, 1, 2], dtype='M8[ns]')
- filled = series.fillna(method='bfill')
- filled2 = series.fillna(value=series[1])
- expected = series.copy()
- expected[0] = expected[1]
- assert_series_equal(filled, expected)
- assert_series_equal(filled2, expected)
- df = DataFrame({'A': series})
- filled = df.fillna(method='bfill')
- filled2 = df.fillna(value=series[1])
- expected = DataFrame({'A': expected})
- assert_frame_equal(filled, expected)
- assert_frame_equal(filled2, expected)
- def test_isna_for_inf(self):
- s = Series(['a', np.inf, np.nan, 1.0])
- with pd.option_context('mode.use_inf_as_na', True):
- r = s.isna()
- dr = s.dropna()
- e = Series([False, True, True, False])
- de = Series(['a', 1.0], index=[0, 3])
- tm.assert_series_equal(r, e)
- tm.assert_series_equal(dr, de)
- def test_isnull_for_inf_deprecated(self):
- # gh-17115
- s = Series(['a', np.inf, np.nan, 1.0])
- with pd.option_context('mode.use_inf_as_null', True):
- r = s.isna()
- dr = s.dropna()
- e = Series([False, True, True, False])
- de = Series(['a', 1.0], index=[0, 3])
- tm.assert_series_equal(r, e)
- tm.assert_series_equal(dr, de)
- def test_fillna(self, datetime_series):
- ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
- tm.assert_series_equal(ts, ts.fillna(method='ffill'))
- ts[2] = np.NaN
- exp = Series([0., 1., 1., 3., 4.], index=ts.index)
- tm.assert_series_equal(ts.fillna(method='ffill'), exp)
- exp = Series([0., 1., 3., 3., 4.], index=ts.index)
- tm.assert_series_equal(ts.fillna(method='backfill'), exp)
- exp = Series([0., 1., 5., 3., 4.], index=ts.index)
- tm.assert_series_equal(ts.fillna(value=5), exp)
- msg = "Must specify a fill 'value' or 'method'"
- with pytest.raises(ValueError, match=msg):
- ts.fillna()
- msg = "Cannot specify both 'value' and 'method'"
- with pytest.raises(ValueError, match=msg):
- datetime_series.fillna(value=0, method='ffill')
- # GH 5703
- s1 = Series([np.nan])
- s2 = Series([1])
- result = s1.fillna(s2)
- expected = Series([1.])
- assert_series_equal(result, expected)
- result = s1.fillna({})
- assert_series_equal(result, s1)
- result = s1.fillna(Series(()))
- assert_series_equal(result, s1)
- result = s2.fillna(s1)
- assert_series_equal(result, s2)
- result = s1.fillna({0: 1})
- assert_series_equal(result, expected)
- result = s1.fillna({1: 1})
- assert_series_equal(result, Series([np.nan]))
- result = s1.fillna({0: 1, 1: 1})
- assert_series_equal(result, expected)
- result = s1.fillna(Series({0: 1, 1: 1}))
- assert_series_equal(result, expected)
- result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
- assert_series_equal(result, s1)
- s1 = Series([0, 1, 2], list('abc'))
- s2 = Series([0, np.nan, 2], list('bac'))
- result = s2.fillna(s1)
- expected = Series([0, 0, 2.], list('bac'))
- assert_series_equal(result, expected)
- # limit
- s = Series(np.nan, index=[0, 1, 2])
- result = s.fillna(999, limit=1)
- expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
- assert_series_equal(result, expected)
- result = s.fillna(999, limit=2)
- expected = Series([999, 999, np.nan], index=[0, 1, 2])
- assert_series_equal(result, expected)
- # GH 9043
- # make sure a string representation of int/float values can be filled
- # correctly without raising errors or being converted
- vals = ['0', '1.5', '-0.3']
- for val in vals:
- s = Series([0, 1, np.nan, np.nan, 4], dtype='float64')
- result = s.fillna(val)
- expected = Series([0, 1, val, val, 4], dtype='object')
- assert_series_equal(result, expected)
- def test_fillna_bug(self):
- x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
- filled = x.fillna(method='ffill')
- expected = Series([nan, 1., 1., 3., 3.], x.index)
- assert_series_equal(filled, expected)
- filled = x.fillna(method='bfill')
- expected = Series([1., 1., 3., 3., nan], x.index)
- assert_series_equal(filled, expected)
- def test_fillna_inplace(self):
- x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
- y = x.copy()
- y.fillna(value=0, inplace=True)
- expected = x.fillna(value=0)
- assert_series_equal(y, expected)
- def test_fillna_invalid_method(self, datetime_series):
- try:
- datetime_series.fillna(method='ffil')
- except ValueError as inst:
- assert 'ffil' in str(inst)
- def test_ffill(self):
- ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
- ts[2] = np.NaN
- assert_series_equal(ts.ffill(), ts.fillna(method='ffill'))
- def test_ffill_mixed_dtypes_without_missing_data(self):
- # GH14956
- series = pd.Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
- result = series.ffill()
- assert_series_equal(series, result)
- def test_bfill(self):
- ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
- ts[2] = np.NaN
- assert_series_equal(ts.bfill(), ts.fillna(method='bfill'))
- def test_timedelta64_nan(self):
- td = Series([timedelta(days=i) for i in range(10)])
- # nan ops on timedeltas
- td1 = td.copy()
- td1[0] = np.nan
- assert isna(td1[0])
- assert td1[0].value == iNaT
- td1[0] = td[0]
- assert not isna(td1[0])
- td1[1] = iNaT
- assert isna(td1[1])
- assert td1[1].value == iNaT
- td1[1] = td[1]
- assert not isna(td1[1])
- td1[2] = NaT
- assert isna(td1[2])
- assert td1[2].value == iNaT
- td1[2] = td[2]
- assert not isna(td1[2])
- # boolean setting
- # this doesn't work, not sure numpy even supports it
- # result = td[(td>np.timedelta64(timedelta(days=3))) &
- # td<np.timedelta64(timedelta(days=7)))] = np.nan
- # assert isna(result).sum() == 7
- # NumPy limitiation =(
- # def test_logical_range_select(self):
- # np.random.seed(12345)
- # selector = -0.5 <= datetime_series <= 0.5
- # expected = (datetime_series >= -0.5) & (datetime_series <= 0.5)
- # assert_series_equal(selector, expected)
- def test_dropna_empty(self):
- s = Series([])
- assert len(s.dropna()) == 0
- s.dropna(inplace=True)
- assert len(s) == 0
- # invalid axis
- msg = r"No axis named 1 for object type <(class|type) 'type'>"
- with pytest.raises(ValueError, match=msg):
- s.dropna(axis=1)
- def test_datetime64_tz_dropna(self):
- # DatetimeBlock
- s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
- '2011-01-03 10:00'), pd.NaT])
- result = s.dropna()
- expected = Series([Timestamp('2011-01-01 10:00'),
- Timestamp('2011-01-03 10:00')], index=[0, 2])
- tm.assert_series_equal(result, expected)
- # DatetimeBlockTZ
- idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
- '2011-01-03 10:00', pd.NaT],
- tz='Asia/Tokyo')
- s = pd.Series(idx)
- assert s.dtype == 'datetime64[ns, Asia/Tokyo]'
- result = s.dropna()
- expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
- Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')],
- index=[0, 2])
- assert result.dtype == 'datetime64[ns, Asia/Tokyo]'
- tm.assert_series_equal(result, expected)
- def test_dropna_no_nan(self):
- for s in [Series([1, 2, 3], name='x'), Series(
- [False, True, False], name='x')]:
- result = s.dropna()
- tm.assert_series_equal(result, s)
- assert result is not s
- s2 = s.copy()
- s2.dropna(inplace=True)
- tm.assert_series_equal(s2, s)
- def test_dropna_intervals(self):
- s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays(
- [np.nan, 0, 1, 2],
- [np.nan, 1, 2, 3]))
- result = s.dropna()
- expected = s.iloc[1:]
- assert_series_equal(result, expected)
- def test_valid(self, datetime_series):
- ts = datetime_series.copy()
- ts[::2] = np.NaN
- result = ts.dropna()
- assert len(result) == ts.count()
- tm.assert_series_equal(result, ts[1::2])
- tm.assert_series_equal(result, ts[pd.notna(ts)])
- def test_isna(self):
- ser = Series([0, 5.4, 3, nan, -0.001])
- expected = Series([False, False, False, True, False])
- tm.assert_series_equal(ser.isna(), expected)
- ser = Series(["hi", "", nan])
- expected = Series([False, False, True])
- tm.assert_series_equal(ser.isna(), expected)
- def test_notna(self):
- ser = Series([0, 5.4, 3, nan, -0.001])
- expected = Series([True, True, True, False, True])
- tm.assert_series_equal(ser.notna(), expected)
- ser = Series(["hi", "", nan])
- expected = Series([True, True, False])
- tm.assert_series_equal(ser.notna(), expected)
- def test_pad_nan(self):
- x = Series([np.nan, 1., np.nan, 3., np.nan], ['z', 'a', 'b', 'c', 'd'],
- dtype=float)
- x.fillna(method='pad', inplace=True)
- expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0],
- ['z', 'a', 'b', 'c', 'd'], dtype=float)
- assert_series_equal(x[1:], expected[1:])
- assert np.isnan(x[0]), np.isnan(expected[0])
- def test_pad_require_monotonicity(self):
- rng = date_range('1/1/2000', '3/1/2000', freq='B')
- # neither monotonic increasing or decreasing
- rng2 = rng[[1, 0, 2]]
- msg = "index must be monotonic increasing or decreasing"
- with pytest.raises(ValueError, match=msg):
- rng2.get_indexer(rng, method='pad')
- def test_dropna_preserve_name(self, datetime_series):
- datetime_series[:5] = np.nan
- result = datetime_series.dropna()
- assert result.name == datetime_series.name
- name = datetime_series.name
- ts = datetime_series.copy()
- ts.dropna(inplace=True)
- assert ts.name == name
- def test_fill_value_when_combine_const(self):
- # GH12723
- s = Series([0, 1, np.nan, 3, 4, 5])
- exp = s.fillna(0).add(2)
- res = s.add(2, fill_value=0)
- assert_series_equal(res, exp)
- def test_series_fillna_limit(self):
- index = np.arange(10)
- s = Series(np.random.randn(10), index=index)
- result = s[:2].reindex(index)
- result = result.fillna(method='pad', limit=5)
- expected = s[:2].reindex(index).fillna(method='pad')
- expected[-3:] = np.nan
- assert_series_equal(result, expected)
- result = s[-2:].reindex(index)
- result = result.fillna(method='bfill', limit=5)
- expected = s[-2:].reindex(index).fillna(method='backfill')
- expected[:3] = np.nan
- assert_series_equal(result, expected)
- def test_sparse_series_fillna_limit(self):
- index = np.arange(10)
- s = Series(np.random.randn(10), index=index)
- ss = s[:2].reindex(index).to_sparse()
- # TODO: what is this test doing? why are result an expected
- # the same call to fillna?
- with tm.assert_produces_warning(PerformanceWarning):
- # TODO: release-note fillna performance warning
- result = ss.fillna(method='pad', limit=5)
- expected = ss.fillna(method='pad', limit=5)
- expected = expected.to_dense()
- expected[-3:] = np.nan
- expected = expected.to_sparse()
- assert_series_equal(result, expected)
- ss = s[-2:].reindex(index).to_sparse()
- with tm.assert_produces_warning(PerformanceWarning):
- result = ss.fillna(method='backfill', limit=5)
- expected = ss.fillna(method='backfill')
- expected = expected.to_dense()
- expected[:3] = np.nan
- expected = expected.to_sparse()
- assert_series_equal(result, expected)
- def test_sparse_series_pad_backfill_limit(self):
- index = np.arange(10)
- s = Series(np.random.randn(10), index=index)
- s = s.to_sparse()
- result = s[:2].reindex(index, method='pad', limit=5)
- with tm.assert_produces_warning(PerformanceWarning):
- expected = s[:2].reindex(index).fillna(method='pad')
- expected = expected.to_dense()
- expected[-3:] = np.nan
- expected = expected.to_sparse()
- assert_series_equal(result, expected)
- result = s[-2:].reindex(index, method='backfill', limit=5)
- with tm.assert_produces_warning(PerformanceWarning):
- expected = s[-2:].reindex(index).fillna(method='backfill')
- expected = expected.to_dense()
- expected[:3] = np.nan
- expected = expected.to_sparse()
- assert_series_equal(result, expected)
- def test_series_pad_backfill_limit(self):
- index = np.arange(10)
- s = Series(np.random.randn(10), index=index)
- result = s[:2].reindex(index, method='pad', limit=5)
- expected = s[:2].reindex(index).fillna(method='pad')
- expected[-3:] = np.nan
- assert_series_equal(result, expected)
- result = s[-2:].reindex(index, method='backfill', limit=5)
- expected = s[-2:].reindex(index).fillna(method='backfill')
- expected[:3] = np.nan
- assert_series_equal(result, expected)
- class TestSeriesInterpolateData():
- def test_interpolate(self, datetime_series, string_series):
- ts = Series(np.arange(len(datetime_series), dtype=float),
- datetime_series.index)
- ts_copy = ts.copy()
- ts_copy[5:10] = np.NaN
- linear_interp = ts_copy.interpolate(method='linear')
- tm.assert_series_equal(linear_interp, ts)
- ord_ts = Series([d.toordinal() for d in datetime_series.index],
- index=datetime_series.index).astype(float)
- ord_ts_copy = ord_ts.copy()
- ord_ts_copy[5:10] = np.NaN
- time_interp = ord_ts_copy.interpolate(method='time')
- tm.assert_series_equal(time_interp, ord_ts)
- # try time interpolation on a non-TimeSeries
- # Only raises ValueError if there are NaNs.
- non_ts = string_series.copy()
- non_ts[0] = np.NaN
- msg = ("time-weighted interpolation only works on Series or DataFrames"
- " with a DatetimeIndex")
- with pytest.raises(ValueError, match=msg):
- non_ts.interpolate(method='time')
- @td.skip_if_no_scipy
- def test_interpolate_pchip(self):
- _skip_if_no_pchip()
- ser = Series(np.sort(np.random.uniform(size=100)))
- # interpolate at new_index
- new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5,
- 50.75]))
- interp_s = ser.reindex(new_index).interpolate(method='pchip')
- # does not blow up, GH5977
- interp_s[49:51]
- @td.skip_if_no_scipy
- def test_interpolate_akima(self):
- _skip_if_no_akima()
- ser = Series([10, 11, 12, 13])
- expected = Series([11.00, 11.25, 11.50, 11.75,
- 12.00, 12.25, 12.50, 12.75, 13.00],
- index=Index([1.0, 1.25, 1.5, 1.75,
- 2.0, 2.25, 2.5, 2.75, 3.0]))
- # interpolate at new_index
- new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
- interp_s = ser.reindex(new_index).interpolate(method='akima')
- assert_series_equal(interp_s[1:3], expected)
- @td.skip_if_no_scipy
- def test_interpolate_piecewise_polynomial(self):
- ser = Series([10, 11, 12, 13])
- expected = Series([11.00, 11.25, 11.50, 11.75,
- 12.00, 12.25, 12.50, 12.75, 13.00],
- index=Index([1.0, 1.25, 1.5, 1.75,
- 2.0, 2.25, 2.5, 2.75, 3.0]))
- # interpolate at new_index
- new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
- interp_s = ser.reindex(new_index).interpolate(
- method='piecewise_polynomial')
- assert_series_equal(interp_s[1:3], expected)
- @td.skip_if_no_scipy
- def test_interpolate_from_derivatives(self):
- ser = Series([10, 11, 12, 13])
- expected = Series([11.00, 11.25, 11.50, 11.75,
- 12.00, 12.25, 12.50, 12.75, 13.00],
- index=Index([1.0, 1.25, 1.5, 1.75,
- 2.0, 2.25, 2.5, 2.75, 3.0]))
- # interpolate at new_index
- new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
- interp_s = ser.reindex(new_index).interpolate(
- method='from_derivatives')
- assert_series_equal(interp_s[1:3], expected)
- @pytest.mark.parametrize("kwargs", [
- {},
- pytest.param({'method': 'polynomial', 'order': 1},
- marks=td.skip_if_no_scipy)
- ])
- def test_interpolate_corners(self, kwargs):
- s = Series([np.nan, np.nan])
- assert_series_equal(s.interpolate(**kwargs), s)
- s = Series([]).interpolate()
- assert_series_equal(s.interpolate(**kwargs), s)
- def test_interpolate_index_values(self):
- s = Series(np.nan, index=np.sort(np.random.rand(30)))
- s[::3] = np.random.randn(10)
- vals = s.index.values.astype(float)
- result = s.interpolate(method='index')
- expected = s.copy()
- bad = isna(expected.values)
- good = ~bad
- expected = Series(np.interp(vals[bad], vals[good],
- s.values[good]),
- index=s.index[bad])
- assert_series_equal(result[bad], expected)
- # 'values' is synonymous with 'index' for the method kwarg
- other_result = s.interpolate(method='values')
- assert_series_equal(other_result, result)
- assert_series_equal(other_result[bad], expected)
- def test_interpolate_non_ts(self):
- s = Series([1, 3, np.nan, np.nan, np.nan, 11])
- msg = ("time-weighted interpolation only works on Series or DataFrames"
- " with a DatetimeIndex")
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='time')
- @pytest.mark.parametrize("kwargs", [
- {},
- pytest.param({'method': 'polynomial', 'order': 1},
- marks=td.skip_if_no_scipy)
- ])
- def test_nan_interpolate(self, kwargs):
- s = Series([0, 1, np.nan, 3])
- result = s.interpolate(**kwargs)
- expected = Series([0., 1., 2., 3.])
- assert_series_equal(result, expected)
- def test_nan_irregular_index(self):
- s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
- result = s.interpolate()
- expected = Series([1., 2., 3., 4.], index=[1, 3, 5, 9])
- assert_series_equal(result, expected)
- def test_nan_str_index(self):
- s = Series([0, 1, 2, np.nan], index=list('abcd'))
- result = s.interpolate()
- expected = Series([0., 1., 2., 2.], index=list('abcd'))
- assert_series_equal(result, expected)
- @td.skip_if_no_scipy
- def test_interp_quad(self):
- sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
- result = sq.interpolate(method='quadratic')
- expected = Series([1., 4., 9., 16.], index=[1, 2, 3, 4])
- assert_series_equal(result, expected)
- @td.skip_if_no_scipy
- def test_interp_scipy_basic(self):
- s = Series([1, 3, np.nan, 12, np.nan, 25])
- # slinear
- expected = Series([1., 3., 7.5, 12., 18.5, 25.])
- result = s.interpolate(method='slinear')
- assert_series_equal(result, expected)
- result = s.interpolate(method='slinear', downcast='infer')
- assert_series_equal(result, expected)
- # nearest
- expected = Series([1, 3, 3, 12, 12, 25])
- result = s.interpolate(method='nearest')
- assert_series_equal(result, expected.astype('float'))
- result = s.interpolate(method='nearest', downcast='infer')
- assert_series_equal(result, expected)
- # zero
- expected = Series([1, 3, 3, 12, 12, 25])
- result = s.interpolate(method='zero')
- assert_series_equal(result, expected.astype('float'))
- result = s.interpolate(method='zero', downcast='infer')
- assert_series_equal(result, expected)
- # quadratic
- # GH #15662.
- # new cubic and quadratic interpolation algorithms from scipy 0.19.0.
- # previously `splmake` was used. See scipy/scipy#6710
- if _is_scipy_ge_0190:
- expected = Series([1, 3., 6.823529, 12., 18.058824, 25.])
- else:
- expected = Series([1, 3., 6.769231, 12., 18.230769, 25.])
- result = s.interpolate(method='quadratic')
- assert_series_equal(result, expected)
- result = s.interpolate(method='quadratic', downcast='infer')
- assert_series_equal(result, expected)
- # cubic
- expected = Series([1., 3., 6.8, 12., 18.2, 25.])
- result = s.interpolate(method='cubic')
- assert_series_equal(result, expected)
- def test_interp_limit(self):
- s = Series([1, 3, np.nan, np.nan, np.nan, 11])
- expected = Series([1., 3., 5., 7., np.nan, 11.])
- result = s.interpolate(method='linear', limit=2)
- assert_series_equal(result, expected)
- # GH 9217, make sure limit is an int and greater than 0
- methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
- 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
- 'polynomial', 'spline', 'piecewise_polynomial', None,
- 'from_derivatives', 'pchip', 'akima']
- s = pd.Series([1, 2, np.nan, np.nan, 5])
- msg = (r"Limit must be greater than 0|"
- "time-weighted interpolation only works on Series or"
- r" DataFrames with a DatetimeIndex|"
- r"invalid method '(polynomial|spline|None)' to interpolate|"
- "Limit must be an integer")
- for limit in [-1, 0, 1., 2.]:
- for method in methods:
- with pytest.raises(ValueError, match=msg):
- s.interpolate(limit=limit, method=method)
- def test_interp_limit_forward(self):
- s = Series([1, 3, np.nan, np.nan, np.nan, 11])
- # Provide 'forward' (the default) explicitly here.
- expected = Series([1., 3., 5., 7., np.nan, 11.])
- result = s.interpolate(method='linear', limit=2,
- limit_direction='forward')
- assert_series_equal(result, expected)
- result = s.interpolate(method='linear', limit=2,
- limit_direction='FORWARD')
- assert_series_equal(result, expected)
- def test_interp_unlimited(self):
- # these test are for issue #16282 default Limit=None is unlimited
- s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan])
- expected = Series([1., 1., 3., 5., 7., 9., 11., 11.])
- result = s.interpolate(method='linear',
- limit_direction='both')
- assert_series_equal(result, expected)
- expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.])
- result = s.interpolate(method='linear',
- limit_direction='forward')
- assert_series_equal(result, expected)
- expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan])
- result = s.interpolate(method='linear',
- limit_direction='backward')
- assert_series_equal(result, expected)
- def test_interp_limit_bad_direction(self):
- s = Series([1, 3, np.nan, np.nan, np.nan, 11])
- msg = (r"Invalid limit_direction: expecting one of \['forward',"
- r" 'backward', 'both'\], got 'abc'")
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='linear', limit=2, limit_direction='abc')
- # raises an error even if no limit is specified.
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='linear', limit_direction='abc')
- # limit_area introduced GH #16284
- def test_interp_limit_area(self):
- # These tests are for issue #9218 -- fill NaNs in both directions.
- s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan])
- expected = Series([nan, nan, 3., 4., 5., 6., 7., nan, nan])
- result = s.interpolate(method='linear', limit_area='inside')
- assert_series_equal(result, expected)
- expected = Series([nan, nan, 3., 4., nan, nan, 7., nan, nan])
- result = s.interpolate(method='linear', limit_area='inside',
- limit=1)
- expected = Series([nan, nan, 3., 4., nan, 6., 7., nan, nan])
- result = s.interpolate(method='linear', limit_area='inside',
- limit_direction='both', limit=1)
- assert_series_equal(result, expected)
- expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., 7.])
- result = s.interpolate(method='linear', limit_area='outside')
- assert_series_equal(result, expected)
- expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., nan])
- result = s.interpolate(method='linear', limit_area='outside',
- limit=1)
- expected = Series([nan, 3., 3., nan, nan, nan, 7., 7., nan])
- result = s.interpolate(method='linear', limit_area='outside',
- limit_direction='both', limit=1)
- assert_series_equal(result, expected)
- expected = Series([3., 3., 3., nan, nan, nan, 7., nan, nan])
- result = s.interpolate(method='linear', limit_area='outside',
- direction='backward')
- # raises an error even if limit type is wrong.
- msg = (r"Invalid limit_area: expecting one of \['inside', 'outside'\],"
- " got abc")
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='linear', limit_area='abc')
- def test_interp_limit_direction(self):
- # These tests are for issue #9218 -- fill NaNs in both directions.
- s = Series([1, 3, np.nan, np.nan, np.nan, 11])
- expected = Series([1., 3., np.nan, 7., 9., 11.])
- result = s.interpolate(method='linear', limit=2,
- limit_direction='backward')
- assert_series_equal(result, expected)
- expected = Series([1., 3., 5., np.nan, 9., 11.])
- result = s.interpolate(method='linear', limit=1,
- limit_direction='both')
- assert_series_equal(result, expected)
- # Check that this works on a longer series of nans.
- s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12,
- np.nan])
- expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.])
- result = s.interpolate(method='linear', limit=2,
- limit_direction='both')
- assert_series_equal(result, expected)
- expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.])
- result = s.interpolate(method='linear', limit=1,
- limit_direction='both')
- assert_series_equal(result, expected)
- def test_interp_limit_to_ends(self):
- # These test are for issue #10420 -- flow back to beginning.
- s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
- expected = Series([5., 5., 5., 7., 9., np.nan])
- result = s.interpolate(method='linear', limit=2,
- limit_direction='backward')
- assert_series_equal(result, expected)
- expected = Series([5., 5., 5., 7., 9., 9.])
- result = s.interpolate(method='linear', limit=2,
- limit_direction='both')
- assert_series_equal(result, expected)
- def test_interp_limit_before_ends(self):
- # These test are for issue #11115 -- limit ends properly.
- s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
- expected = Series([np.nan, np.nan, 5., 7., 7., np.nan])
- result = s.interpolate(method='linear', limit=1,
- limit_direction='forward')
- assert_series_equal(result, expected)
- expected = Series([np.nan, 5., 5., 7., np.nan, np.nan])
- result = s.interpolate(method='linear', limit=1,
- limit_direction='backward')
- assert_series_equal(result, expected)
- expected = Series([np.nan, 5., 5., 7., 7., np.nan])
- result = s.interpolate(method='linear', limit=1,
- limit_direction='both')
- assert_series_equal(result, expected)
- @td.skip_if_no_scipy
- def test_interp_all_good(self):
- s = Series([1, 2, 3])
- result = s.interpolate(method='polynomial', order=1)
- assert_series_equal(result, s)
- # non-scipy
- result = s.interpolate()
- assert_series_equal(result, s)
- @pytest.mark.parametrize("check_scipy", [
- False,
- pytest.param(True, marks=td.skip_if_no_scipy)
- ])
- def test_interp_multiIndex(self, check_scipy):
- idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
- s = Series([1, 2, np.nan], index=idx)
- expected = s.copy()
- expected.loc[2] = 2
- result = s.interpolate()
- assert_series_equal(result, expected)
- msg = "Only `method=linear` interpolation is supported on MultiIndexes"
- if check_scipy:
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='polynomial', order=1)
- @td.skip_if_no_scipy
- def test_interp_nonmono_raise(self):
- s = Series([1, np.nan, 3], index=[0, 2, 1])
- msg = "krogh interpolation requires that the index be monotonic"
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='krogh')
- @td.skip_if_no_scipy
- def test_interp_datetime64(self):
- df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3))
- result = df.interpolate(method='nearest')
- expected = Series([1., 1., 3.],
- index=date_range('1/1/2000', periods=3))
- assert_series_equal(result, expected)
- def test_interp_limit_no_nans(self):
- # GH 7173
- s = pd.Series([1., 2., 3.])
- result = s.interpolate(limit=1)
- expected = s
- assert_series_equal(result, expected)
- @td.skip_if_no_scipy
- @pytest.mark.parametrize("method", ['polynomial', 'spline'])
- def test_no_order(self, method):
- s = Series([0, 1, np.nan, 3])
- msg = "invalid method '{}' to interpolate".format(method)
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method=method)
- @td.skip_if_no_scipy
- def test_spline(self):
- s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
- result = s.interpolate(method='spline', order=1)
- expected = Series([1., 2., 3., 4., 5., 6., 7.])
- assert_series_equal(result, expected)
- @td.skip_if_no('scipy', min_version='0.15')
- def test_spline_extrapolate(self):
- s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
- result3 = s.interpolate(method='spline', order=1, ext=3)
- expected3 = Series([1., 2., 3., 4., 5., 6., 6.])
- assert_series_equal(result3, expected3)
- result1 = s.interpolate(method='spline', order=1, ext=0)
- expected1 = Series([1., 2., 3., 4., 5., 6., 7.])
- assert_series_equal(result1, expected1)
- @td.skip_if_no_scipy
- def test_spline_smooth(self):
- s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
- assert (s.interpolate(method='spline', order=3, s=0)[5] !=
- s.interpolate(method='spline', order=3)[5])
- @td.skip_if_no_scipy
- def test_spline_interpolation(self):
- s = Series(np.arange(10) ** 2)
- s[np.random.randint(0, 9, 3)] = np.nan
- result1 = s.interpolate(method='spline', order=1)
- expected1 = s.interpolate(method='spline', order=1)
- assert_series_equal(result1, expected1)
- @td.skip_if_no_scipy
- def test_spline_error(self):
- # see gh-10633
- s = pd.Series(np.arange(10) ** 2)
- s[np.random.randint(0, 9, 3)] = np.nan
- msg = "invalid method 'spline' to interpolate"
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='spline')
- msg = "order needs to be specified and greater than 0"
- with pytest.raises(ValueError, match=msg):
- s.interpolate(method='spline', order=0)
- def test_interp_timedelta64(self):
- # GH 6424
- df = Series([1, np.nan, 3],
- index=pd.to_timedelta([1, 2, 3]))
- result = df.interpolate(method='time')
- expected = Series([1., 2., 3.],
- index=pd.to_timedelta([1, 2, 3]))
- assert_series_equal(result, expected)
- # test for non uniform spacing
- df = Series([1, np.nan, 3],
- index=pd.to_timedelta([1, 2, 4]))
- result = df.interpolate(method='time')
- expected = Series([1., 1.666667, 3.],
- index=pd.to_timedelta([1, 2, 4]))
- assert_series_equal(result, expected)
- def test_series_interpolate_method_values(self):
- # #1646
- ts = _simple_ts('1/1/2000', '1/20/2000')
- ts[::2] = np.nan
- result = ts.interpolate(method='values')
- exp = ts.interpolate()
- assert_series_equal(result, exp)
- def test_series_interpolate_intraday(self):
- # #1698
- index = pd.date_range('1/1/2012', periods=4, freq='12D')
- ts = pd.Series([0, 12, 24, 36], index)
- new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
- exp = ts.reindex(new_index).interpolate(method='time')
- index = pd.date_range('1/1/2012', periods=4, freq='12H')
- ts = pd.Series([0, 12, 24, 36], index)
- new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
- result = ts.reindex(new_index).interpolate(method='time')
- tm.assert_numpy_array_equal(result.values, exp.values)
- def test_nonzero_warning(self):
- # GH 24048
- ser = pd.Series([1, 0, 3, 4])
- with tm.assert_produces_warning(FutureWarning):
- ser.nonzero()
|