# -*- coding: utf-8 -*- from __future__ import print_function import datetime from distutils.version import LooseVersion import dateutil import numpy as np import pytest from pandas.compat import lrange import pandas.util._test_decorators as td import pandas as pd from pandas import Categorical, DataFrame, Series, Timestamp, date_range from pandas.tests.frame.common import TestData, _check_mixed_float import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal try: import scipy _is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >= LooseVersion('0.19.0')) except ImportError: _is_scipy_ge_0190 = False def _skip_if_no_pchip(): try: from scipy.interpolate import pchip_interpolate # noqa except ImportError: import pytest pytest.skip('scipy.interpolate.pchip missing') class TestDataFrameMissingData(TestData): def test_dropEmptyRows(self): N = len(self.frame.index) mat = np.random.randn(N) mat[:5] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) original = Series(mat, index=self.frame.index, name='foo') expected = original.dropna() inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() smaller_frame = frame.dropna(how='all') # check that original was preserved assert_series_equal(frame['foo'], original) inplace_frame1.dropna(how='all', inplace=True) assert_series_equal(smaller_frame['foo'], expected) assert_series_equal(inplace_frame1['foo'], expected) smaller_frame = frame.dropna(how='all', subset=['foo']) inplace_frame2.dropna(how='all', subset=['foo'], inplace=True) assert_series_equal(smaller_frame['foo'], expected) assert_series_equal(inplace_frame2['foo'], expected) def test_dropIncompleteRows(self): N = len(self.frame.index) mat = np.random.randn(N) mat[:5] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) frame['bar'] = 5 original = Series(mat, index=self.frame.index, name='foo') inp_frame1, inp_frame2 = frame.copy(), frame.copy() smaller_frame = frame.dropna() assert_series_equal(frame['foo'], original) inp_frame1.dropna(inplace=True) exp = Series(mat[5:], index=self.frame.index[5:], name='foo') tm.assert_series_equal(smaller_frame['foo'], exp) tm.assert_series_equal(inp_frame1['foo'], exp) samesize_frame = frame.dropna(subset=['bar']) assert_series_equal(frame['foo'], original) assert (frame['bar'] == 5).all() inp_frame2.dropna(subset=['bar'], inplace=True) tm.assert_index_equal(samesize_frame.index, self.frame.index) tm.assert_index_equal(inp_frame2.index, self.frame.index) def test_dropna(self): df = DataFrame(np.random.randn(6, 4)) df[2][:2] = np.nan dropped = df.dropna(axis=1) expected = df.loc[:, [0, 1, 3]] inp = df.copy() inp.dropna(axis=1, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=0) expected = df.loc[lrange(2, 6)] inp = df.copy() inp.dropna(axis=0, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) # threshold dropped = df.dropna(axis=1, thresh=5) expected = df.loc[:, [0, 1, 3]] inp = df.copy() inp.dropna(axis=1, thresh=5, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=0, thresh=4) expected = df.loc[lrange(2, 6)] inp = df.copy() inp.dropna(axis=0, thresh=4, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=1, thresh=4) assert_frame_equal(dropped, df) dropped = df.dropna(axis=1, thresh=3) assert_frame_equal(dropped, df) # subset dropped = df.dropna(axis=0, subset=[0, 1, 3]) inp = df.copy() inp.dropna(axis=0, subset=[0, 1, 3], inplace=True) assert_frame_equal(dropped, df) assert_frame_equal(inp, df) # all dropped = df.dropna(axis=1, how='all') assert_frame_equal(dropped, df) df[2] = np.nan dropped = df.dropna(axis=1, how='all') expected = df.loc[:, [0, 1, 3]] assert_frame_equal(dropped, expected) # bad input pytest.raises(ValueError, df.dropna, axis=3) def test_drop_and_dropna_caching(self): # tst that cacher updates original = Series([1, 2, np.nan], name='A') expected = Series([1, 2], dtype=original.dtype, name='A') df = pd.DataFrame({'A': original.values.copy()}) df2 = df.copy() df['A'].dropna() assert_series_equal(df['A'], original) df['A'].dropna(inplace=True) assert_series_equal(df['A'], expected) df2['A'].drop([1]) assert_series_equal(df2['A'], original) df2['A'].drop([1], inplace=True) assert_series_equal(df2['A'], original.drop([1])) def test_dropna_corner(self): # bad input pytest.raises(ValueError, self.frame.dropna, how='foo') pytest.raises(TypeError, self.frame.dropna, how=None) # non-existent column - 8303 pytest.raises(KeyError, self.frame.dropna, subset=['A', 'X']) def test_dropna_multiple_axes(self): df = DataFrame([[1, np.nan, 2, 3], [4, np.nan, 5, 6], [np.nan, np.nan, np.nan, np.nan], [7, np.nan, 8, 9]]) cp = df.copy() # GH20987 with tm.assert_produces_warning(FutureWarning): result = df.dropna(how='all', axis=[0, 1]) with tm.assert_produces_warning(FutureWarning): result2 = df.dropna(how='all', axis=(0, 1)) expected = df.dropna(how='all').dropna(how='all', axis=1) assert_frame_equal(result, expected) assert_frame_equal(result2, expected) assert_frame_equal(df, cp) inp = df.copy() with tm.assert_produces_warning(FutureWarning): inp.dropna(how='all', axis=(0, 1), inplace=True) assert_frame_equal(inp, expected) def test_dropna_tz_aware_datetime(self): # GH13407 df = DataFrame() dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) df['Time'] = [dt1] result = df.dropna(axis=0) expected = DataFrame({'Time': [dt1]}) assert_frame_equal(result, expected) # Ex2 df = DataFrame({'Time': [dt1, None, np.nan, dt2]}) result = df.dropna(axis=0) expected = DataFrame([dt1, dt2], columns=['Time'], index=[0, 3]) assert_frame_equal(result, expected) def test_fillna(self): tf = self.tsframe tf.loc[tf.index[:5], 'A'] = np.nan tf.loc[tf.index[-5:], 'A'] = np.nan zero_filled = self.tsframe.fillna(0) assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all() padded = self.tsframe.fillna(method='pad') assert np.isnan(padded.loc[padded.index[:5], 'A']).all() assert (padded.loc[padded.index[-5:], 'A'] == padded.loc[padded.index[-5], 'A']).all() # mixed type mf = self.mixed_frame mf.loc[mf.index[5:20], 'foo'] = np.nan mf.loc[mf.index[-10:], 'A'] = np.nan result = self.mixed_frame.fillna(value=0) result = self.mixed_frame.fillna(method='pad') pytest.raises(ValueError, self.tsframe.fillna) pytest.raises(ValueError, self.tsframe.fillna, 5, method='ffill') # mixed numeric (but no float16) mf = self.mixed_float.reindex(columns=['A', 'B', 'D']) mf.loc[mf.index[-10:], 'A'] = np.nan result = mf.fillna(value=0) _check_mixed_float(result, dtype=dict(C=None)) result = mf.fillna(method='pad') _check_mixed_float(result, dtype=dict(C=None)) # empty frame (GH #2778) df = DataFrame(columns=['x']) for m in ['pad', 'backfill']: df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) # with different dtype (GH3386) df = DataFrame([['a', 'a', np.nan, 'a'], [ 'b', 'b', np.nan, 'b'], ['c', 'c', np.nan, 'c']]) result = df.fillna({2: 'foo'}) expected = DataFrame([['a', 'a', 'foo', 'a'], ['b', 'b', 'foo', 'b'], ['c', 'c', 'foo', 'c']]) assert_frame_equal(result, expected) df.fillna({2: 'foo'}, inplace=True) assert_frame_equal(df, expected) # limit and value df = DataFrame(np.random.randn(10, 3)) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan expected = df.copy() expected.iloc[2, 0] = 999 expected.iloc[3, 2] = 999 result = df.fillna(999, limit=1) assert_frame_equal(result, expected) # with datelike # GH 6344 df = DataFrame({ 'Date': [pd.NaT, Timestamp("2014-1-1")], 'Date2': [Timestamp("2013-1-1"), pd.NaT] }) expected = df.copy() expected['Date'] = expected['Date'].fillna( df.loc[df.index[0], 'Date2']) result = df.fillna(value={'Date': df['Date2']}) assert_frame_equal(result, expected) # with timezone # GH 15855 df = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT]}) exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.Timestamp('2012-11-11 00:00:00+01:00')]}) assert_frame_equal(df.fillna(method='pad'), exp) df = pd.DataFrame({'A': [pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')]}) exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.Timestamp('2012-11-11 00:00:00+01:00')]}) assert_frame_equal(df.fillna(method='bfill'), exp) # with timezone in another column # GH 15522 df = pd.DataFrame({'A': pd.date_range('20130101', periods=4, tz='US/Eastern'), 'B': [1, 2, np.nan, np.nan]}) result = df.fillna(method='pad') expected = pd.DataFrame({'A': pd.date_range('20130101', periods=4, tz='US/Eastern'), 'B': [1., 2., 2., 2.]}) assert_frame_equal(result, expected) def test_na_actions_categorical(self): cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) vals = ["a", "b", np.nan, "d"] df = DataFrame({"cats": cat, "vals": vals}) cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) vals2 = ["a", "b", "b", "d"] df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) vals3 = ["a", "b", np.nan] df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) cat4 = Categorical([1, 2], categories=[1, 2, 3]) vals4 = ["a", "b"] df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) # fillna res = df.fillna(value={"cats": 3, "vals": "b"}) tm.assert_frame_equal(res, df_exp_fill) with pytest.raises(ValueError, match=("fill value must " "be in categories")): df.fillna(value={"cats": 4, "vals": "c"}) res = df.fillna(method='pad') tm.assert_frame_equal(res, df_exp_fill) # dropna res = df.dropna(subset=["cats"]) tm.assert_frame_equal(res, df_exp_drop_cats) res = df.dropna() tm.assert_frame_equal(res, df_exp_drop_all) # make sure that fillna takes missing values into account c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) res = df.fillna("a") tm.assert_frame_equal(res, df_exp) def test_fillna_categorical_nan(self): # GH 14021 # np.nan should always be a valid filler cat = Categorical([np.nan, 2, np.nan]) val = Categorical([np.nan, np.nan, np.nan]) df = DataFrame({"cats": cat, "vals": val}) res = df.fillna(df.median()) v_exp = [np.nan, np.nan, np.nan] df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype='category') tm.assert_frame_equal(res, df_exp) result = df.cats.fillna(np.nan) tm.assert_series_equal(result, df.cats) result = df.vals.fillna(np.nan) tm.assert_series_equal(result, df.vals) idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45', '2011-01-01 09:00', pd.NaT, pd.NaT]) df = DataFrame({'a': Categorical(idx)}) tm.assert_frame_equal(df.fillna(value=pd.NaT), df) idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01', pd.NaT, pd.NaT], freq='M') df = DataFrame({'a': Categorical(idx)}) tm.assert_frame_equal(df.fillna(value=pd.NaT), df) idx = pd.TimedeltaIndex(['1 days', '2 days', '1 days', pd.NaT, pd.NaT]) df = DataFrame({'a': Categorical(idx)}) tm.assert_frame_equal(df.fillna(value=pd.NaT), df) def test_fillna_downcast(self): # GH 15277 # infer int64 from float64 df = pd.DataFrame({'a': [1., np.nan]}) result = df.fillna(0, downcast='infer') expected = pd.DataFrame({'a': [1, 0]}) assert_frame_equal(result, expected) # infer int64 from float64 when fillna value is a dict df = pd.DataFrame({'a': [1., np.nan]}) result = df.fillna({'a': 0}, downcast='infer') expected = pd.DataFrame({'a': [1, 0]}) assert_frame_equal(result, expected) def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) result = df.get_dtype_counts().sort_values() expected = Series({'object': 5}) assert_series_equal(result, expected) result = df.fillna(1) expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) result = result.get_dtype_counts().sort_values() expected = Series({'int64': 5}) assert_series_equal(result, expected) # empty block df = DataFrame(index=lrange(3), columns=['A', 'B'], dtype='float64') result = df.fillna('nan') expected = DataFrame('nan', index=lrange(3), columns=['A', 'B']) assert_frame_equal(result, expected) # equiv of replace df = DataFrame(dict(A=[1, np.nan], B=[1., 2.])) for v in ['', 1, np.nan, 1.0]: expected = df.replace(np.nan, v) result = df.fillna(v) assert_frame_equal(result, expected) def test_fillna_datetime_columns(self): # GH 7095 df = pd.DataFrame({'A': [-1, -2, np.nan], 'B': date_range('20130101', periods=3), 'C': ['foo', 'bar', None], 'D': ['foo2', 'bar2', None]}, index=date_range('20130110', periods=3)) result = df.fillna('?') expected = pd.DataFrame({'A': [-1, -2, '?'], 'B': date_range('20130101', periods=3), 'C': ['foo', 'bar', '?'], 'D': ['foo2', 'bar2', '?']}, index=date_range('20130110', periods=3)) tm.assert_frame_equal(result, expected) df = pd.DataFrame({'A': [-1, -2, np.nan], 'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), pd.NaT], 'C': ['foo', 'bar', None], 'D': ['foo2', 'bar2', None]}, index=date_range('20130110', periods=3)) result = df.fillna('?') expected = pd.DataFrame({'A': [-1, -2, '?'], 'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), '?'], 'C': ['foo', 'bar', '?'], 'D': ['foo2', 'bar2', '?']}, index=pd.date_range('20130110', periods=3)) tm.assert_frame_equal(result, expected) def test_ffill(self): self.tsframe['A'][:5] = np.nan self.tsframe['A'][-5:] = np.nan assert_frame_equal(self.tsframe.ffill(), self.tsframe.fillna(method='ffill')) def test_bfill(self): self.tsframe['A'][:5] = np.nan self.tsframe['A'][-5:] = np.nan assert_frame_equal(self.tsframe.bfill(), self.tsframe.fillna(method='bfill')) def test_frame_pad_backfill_limit(self): index = np.arange(10) df = DataFrame(np.random.randn(10, 4), index=index) result = df[:2].reindex(index, method='pad', limit=5) expected = df[:2].reindex(index).fillna(method='pad') expected.values[-3:] = np.nan tm.assert_frame_equal(result, expected) result = df[-2:].reindex(index, method='backfill', limit=5) expected = df[-2:].reindex(index).fillna(method='backfill') expected.values[:3] = np.nan tm.assert_frame_equal(result, expected) def test_frame_fillna_limit(self): index = np.arange(10) df = DataFrame(np.random.randn(10, 4), index=index) result = df[:2].reindex(index) result = result.fillna(method='pad', limit=5) expected = df[:2].reindex(index).fillna(method='pad') expected.values[-3:] = np.nan tm.assert_frame_equal(result, expected) result = df[-2:].reindex(index) result = result.fillna(method='backfill', limit=5) expected = df[-2:].reindex(index).fillna(method='backfill') expected.values[:3] = np.nan tm.assert_frame_equal(result, expected) def test_fillna_skip_certain_blocks(self): # don't try to fill boolean, int blocks df = DataFrame(np.random.randn(10, 4).astype(int)) # it works! df.fillna(np.nan) def test_fillna_inplace(self): df = DataFrame(np.random.randn(10, 4)) df[1][:4] = np.nan df[3][-4:] = np.nan expected = df.fillna(value=0) assert expected is not df df.fillna(value=0, inplace=True) tm.assert_frame_equal(df, expected) expected = df.fillna(value={0: 0}, inplace=True) assert expected is None df[1][:4] = np.nan df[3][-4:] = np.nan expected = df.fillna(method='ffill') assert expected is not df df.fillna(method='ffill', inplace=True) tm.assert_frame_equal(df, expected) def test_fillna_dict_series(self): df = DataFrame({'a': [np.nan, 1, 2, np.nan, np.nan], 'b': [1, 2, 3, np.nan, np.nan], 'c': [np.nan, 1, 2, 3, 4]}) result = df.fillna({'a': 0, 'b': 5}) expected = df.copy() expected['a'] = expected['a'].fillna(0) expected['b'] = expected['b'].fillna(5) assert_frame_equal(result, expected) # it works result = df.fillna({'a': 0, 'b': 5, 'd': 7}) # Series treated same as dict result = df.fillna(df.max()) expected = df.fillna(df.max().to_dict()) assert_frame_equal(result, expected) # disable this for now with pytest.raises(NotImplementedError, match='column by column'): df.fillna(df.max(1), axis=1) def test_fillna_dataframe(self): # GH 8377 df = DataFrame({'a': [np.nan, 1, 2, np.nan, np.nan], 'b': [1, 2, 3, np.nan, np.nan], 'c': [np.nan, 1, 2, 3, 4]}, index=list('VWXYZ')) # df2 may have different index and columns df2 = DataFrame({'a': [np.nan, 10, 20, 30, 40], 'b': [50, 60, 70, 80, 90], 'foo': ['bar'] * 5}, index=list('VWXuZ')) result = df.fillna(df2) # only those columns and indices which are shared get filled expected = DataFrame({'a': [np.nan, 1, 2, np.nan, 40], 'b': [1, 2, 3, np.nan, 90], 'c': [np.nan, 1, 2, 3, 4]}, index=list('VWXYZ')) assert_frame_equal(result, expected) def test_fillna_columns(self): df = DataFrame(np.random.randn(10, 10)) df.values[:, ::2] = np.nan result = df.fillna(method='ffill', axis=1) expected = df.T.fillna(method='pad').T assert_frame_equal(result, expected) df.insert(6, 'foo', 5) result = df.fillna(method='ffill', axis=1) expected = df.astype(float).fillna(method='ffill', axis=1) assert_frame_equal(result, expected) def test_fillna_invalid_method(self): with pytest.raises(ValueError, match='ffil'): self.frame.fillna(method='ffil') def test_fillna_invalid_value(self): # list pytest.raises(TypeError, self.frame.fillna, [1, 2]) # tuple pytest.raises(TypeError, self.frame.fillna, (1, 2)) # frame with series pytest.raises(TypeError, self.frame.iloc[:, 0].fillna, self.frame) def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.rand(20, 5) df = DataFrame(index=lrange(20), columns=cols, data=data) filled = df.fillna(method='ffill') assert df.columns.tolist() == filled.columns.tolist() def test_fill_corner(self): mf = self.mixed_frame mf.loc[mf.index[5:20], 'foo'] = np.nan mf.loc[mf.index[-10:], 'A'] = np.nan filled = self.mixed_frame.fillna(value=0) assert (filled.loc[filled.index[5:20], 'foo'] == 0).all() del self.mixed_frame['foo'] empty_float = self.frame.reindex(columns=[]) # TODO(wesm): unused? result = empty_float.fillna(value=0) # noqa def test_fill_value_when_combine_const(self): # GH12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float') df = DataFrame({'foo': dat}, index=range(6)) exp = df.fillna(0).add(2) res = df.add(2, fill_value=0) assert_frame_equal(res, exp) class TestDataFrameInterpolate(TestData): def test_interp_basic(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [1, 4, 9, np.nan], 'C': [1, 2, 3, 5], 'D': list('abcd')}) expected = DataFrame({'A': [1., 2., 3., 4.], 'B': [1., 4., 9., 9.], 'C': [1, 2, 3, 5], 'D': list('abcd')}) result = df.interpolate() assert_frame_equal(result, expected) result = df.set_index('C').interpolate() expected = df.set_index('C') expected.loc[3, 'A'] = 3 expected.loc[5, 'B'] = 9 assert_frame_equal(result, expected) def test_interp_bad_method(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [1, 4, 9, np.nan], 'C': [1, 2, 3, 5], 'D': list('abcd')}) with pytest.raises(ValueError): df.interpolate(method='not_a_method') def test_interp_combo(self): df = DataFrame({'A': [1., 2., np.nan, 4.], 'B': [1, 4, 9, np.nan], 'C': [1, 2, 3, 5], 'D': list('abcd')}) result = df['A'].interpolate() expected = Series([1., 2., 3., 4.], name='A') assert_series_equal(result, expected) result = df['A'].interpolate(downcast='infer') expected = Series([1, 2, 3, 4], name='A') assert_series_equal(result, expected) def test_interp_nan_idx(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) df = df.set_index('A') with pytest.raises(NotImplementedError): df.interpolate(method='values') @td.skip_if_no_scipy def test_interp_various(self): df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) df = df.set_index('C') expected = df.copy() result = df.interpolate(method='polynomial', order=1) expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923076 assert_frame_equal(result, expected) result = df.interpolate(method='cubic') # GH #15662. # new cubic and quadratic interpolation algorithms from scipy 0.19.0. # previously `splmake` was used. See scipy/scipy#6710 if _is_scipy_ge_0190: expected.A.loc[3] = 2.81547781 expected.A.loc[13] = 5.52964175 else: expected.A.loc[3] = 2.81621174 expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') expected.A.loc[3] = 2 expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') if _is_scipy_ge_0190: expected.A.loc[3] = 2.82150771 expected.A.loc[13] = 6.12648668 else: expected.A.loc[3] = 2.82533638 expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923077 assert_frame_equal(result, expected) result = df.interpolate(method='zero') expected.A.loc[3] = 2. expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) @td.skip_if_no_scipy def test_interp_alt_scipy(self): df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21]}) result = df.interpolate(method='barycentric') expected = df.copy() expected.loc[2, 'A'] = 3 expected.loc[5, 'A'] = 6 assert_frame_equal(result, expected) result = df.interpolate(method='barycentric', downcast='infer') assert_frame_equal(result, expected.astype(np.int64)) result = df.interpolate(method='krogh') expectedk = df.copy() expectedk['A'] = expected['A'] assert_frame_equal(result, expectedk) _skip_if_no_pchip() import scipy result = df.interpolate(method='pchip') expected.loc[2, 'A'] = 3 if LooseVersion(scipy.__version__) >= LooseVersion('0.17.0'): expected.loc[5, 'A'] = 6.0 else: expected.loc[5, 'A'] = 6.125 assert_frame_equal(result, expected) def test_interp_rowwise(self): df = DataFrame({0: [1, 2, np.nan, 4], 1: [2, 3, 4, np.nan], 2: [np.nan, 4, 5, 6], 3: [4, np.nan, 6, 7], 4: [1, 2, 3, 4]}) result = df.interpolate(axis=1) expected = df.copy() expected.loc[3, 1] = 5 expected.loc[0, 2] = 3 expected.loc[1, 3] = 3 expected[4] = expected[4].astype(np.float64) assert_frame_equal(result, expected) result = df.interpolate(axis=1, method='values') assert_frame_equal(result, expected) result = df.interpolate(axis=0) expected = df.interpolate() assert_frame_equal(result, expected) def test_rowwise_alt(self): df = DataFrame({0: [0, .5, 1., np.nan, 4, 8, np.nan, np.nan, 64], 1: [1, 2, 3, 4, 3, 2, 1, 0, -1]}) df.interpolate(axis=0) @pytest.mark.parametrize("check_scipy", [ False, pytest.param(True, marks=td.skip_if_no_scipy) ]) def test_interp_leading_nans(self, check_scipy): df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]}) result = df.interpolate() expected = df.copy() expected['B'].loc[3] = -3.75 assert_frame_equal(result, expected) if check_scipy: result = df.interpolate(method='polynomial', order=1) assert_frame_equal(result, expected) def test_interp_raise_on_only_mixed(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': ['a', 'b', 'c', 'd'], 'C': [np.nan, 2, 5, 7], 'D': [np.nan, np.nan, 9, 9], 'E': [1, 2, 3, 4]}) with pytest.raises(TypeError): df.interpolate(axis=1) def test_interp_raise_on_all_object_dtype(self): # GH 22985 df = DataFrame({ 'A': [1, 2, 3], 'B': [4, 5, 6]}, dtype='object') msg = ("Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " "column to a numeric dtype.") with pytest.raises(TypeError, match=msg): df.interpolate() def test_interp_inplace(self): df = DataFrame({'a': [1., 2., np.nan, 4.]}) expected = DataFrame({'a': [1., 2., 3., 4.]}) result = df.copy() result['a'].interpolate(inplace=True) assert_frame_equal(result, expected) result = df.copy() result['a'].interpolate(inplace=True, downcast='infer') assert_frame_equal(result, expected.astype('int64')) def test_interp_inplace_row(self): # GH 10395 result = DataFrame({'a': [1., 2., 3., 4.], 'b': [np.nan, 2., 3., 4.], 'c': [3, 2, 2, 2]}) expected = result.interpolate(method='linear', axis=1, inplace=False) result.interpolate(method='linear', axis=1, inplace=True) assert_frame_equal(result, expected) def test_interp_ignore_all_good(self): # GH df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [1, 2, 3, 4], 'C': [1., 2., np.nan, 4.], 'D': [1., 2., 3., 4.]}) expected = DataFrame({'A': np.array( [1, 2, 3, 4], dtype='float64'), 'B': np.array( [1, 2, 3, 4], dtype='int64'), 'C': np.array( [1., 2., 3, 4.], dtype='float64'), 'D': np.array( [1., 2., 3., 4.], dtype='float64')}) result = df.interpolate(downcast=None) assert_frame_equal(result, expected) # all good result = df[['B', 'D']].interpolate(downcast=None) assert_frame_equal(result, df[['B', 'D']])