test_missing.py 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from datetime import datetime, timedelta
  4. from distutils.version import LooseVersion
  5. import numpy as np
  6. from numpy import nan
  7. import pytest
  8. import pytz
  9. from pandas._libs.tslib import iNaT
  10. from pandas.compat import range
  11. from pandas.errors import PerformanceWarning
  12. import pandas.util._test_decorators as td
  13. import pandas as pd
  14. from pandas import (
  15. Categorical, DataFrame, Index, IntervalIndex, MultiIndex, NaT, Series,
  16. Timestamp, date_range, isna)
  17. from pandas.core.series import remove_na
  18. import pandas.util.testing as tm
  19. from pandas.util.testing import assert_frame_equal, assert_series_equal
  20. try:
  21. import scipy
  22. _is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >=
  23. LooseVersion('0.19.0'))
  24. except ImportError:
  25. _is_scipy_ge_0190 = False
  26. def _skip_if_no_pchip():
  27. try:
  28. from scipy.interpolate import pchip_interpolate # noqa
  29. except ImportError:
  30. import pytest
  31. pytest.skip('scipy.interpolate.pchip missing')
  32. def _skip_if_no_akima():
  33. try:
  34. from scipy.interpolate import Akima1DInterpolator # noqa
  35. except ImportError:
  36. import pytest
  37. pytest.skip('scipy.interpolate.Akima1DInterpolator missing')
  38. def _simple_ts(start, end, freq='D'):
  39. rng = date_range(start, end, freq=freq)
  40. return Series(np.random.randn(len(rng)), index=rng)
  41. class TestSeriesMissingData():
  42. def test_remove_na_deprecation(self):
  43. # see gh-16971
  44. with tm.assert_produces_warning(FutureWarning):
  45. remove_na(Series([]))
  46. def test_timedelta_fillna(self):
  47. # GH 3371
  48. s = Series([Timestamp('20130101'), Timestamp('20130101'),
  49. Timestamp('20130102'), Timestamp('20130103 9:01:01')])
  50. td = s.diff()
  51. # reg fillna
  52. with tm.assert_produces_warning(FutureWarning):
  53. result = td.fillna(0)
  54. expected = Series([timedelta(0), timedelta(0), timedelta(1),
  55. timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
  56. assert_series_equal(result, expected)
  57. # interpreted as seconds, deprecated
  58. with tm.assert_produces_warning(FutureWarning):
  59. result = td.fillna(1)
  60. expected = Series([timedelta(seconds=1),
  61. timedelta(0), timedelta(1),
  62. timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
  63. assert_series_equal(result, expected)
  64. result = td.fillna(timedelta(days=1, seconds=1))
  65. expected = Series([timedelta(days=1, seconds=1), timedelta(0),
  66. timedelta(1),
  67. timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
  68. assert_series_equal(result, expected)
  69. result = td.fillna(np.timedelta64(int(1e9)))
  70. expected = Series([timedelta(seconds=1), timedelta(0), timedelta(1),
  71. timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
  72. assert_series_equal(result, expected)
  73. result = td.fillna(NaT)
  74. expected = Series([NaT, timedelta(0), timedelta(1),
  75. timedelta(days=1, seconds=9 * 3600 + 60 + 1)],
  76. dtype='m8[ns]')
  77. assert_series_equal(result, expected)
  78. # ffill
  79. td[2] = np.nan
  80. result = td.ffill()
  81. with tm.assert_produces_warning(FutureWarning):
  82. expected = td.fillna(0)
  83. expected[0] = np.nan
  84. assert_series_equal(result, expected)
  85. # bfill
  86. td[2] = np.nan
  87. result = td.bfill()
  88. with tm.assert_produces_warning(FutureWarning):
  89. expected = td.fillna(0)
  90. expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
  91. assert_series_equal(result, expected)
  92. def test_datetime64_fillna(self):
  93. s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp(
  94. '20130102'), Timestamp('20130103 9:01:01')])
  95. s[2] = np.nan
  96. # reg fillna
  97. result = s.fillna(Timestamp('20130104'))
  98. expected = Series([Timestamp('20130101'), Timestamp(
  99. '20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')])
  100. assert_series_equal(result, expected)
  101. result = s.fillna(NaT)
  102. expected = s
  103. assert_series_equal(result, expected)
  104. # ffill
  105. result = s.ffill()
  106. expected = Series([Timestamp('20130101'), Timestamp(
  107. '20130101'), Timestamp('20130101'), Timestamp('20130103 9:01:01')])
  108. assert_series_equal(result, expected)
  109. # bfill
  110. result = s.bfill()
  111. expected = Series([Timestamp('20130101'), Timestamp('20130101'),
  112. Timestamp('20130103 9:01:01'), Timestamp(
  113. '20130103 9:01:01')])
  114. assert_series_equal(result, expected)
  115. # GH 6587
  116. # make sure that we are treating as integer when filling
  117. # this also tests inference of a datetime-like with NaT's
  118. s = Series([pd.NaT, pd.NaT, '2013-08-05 15:30:00.000001'])
  119. expected = Series(
  120. ['2013-08-05 15:30:00.000001', '2013-08-05 15:30:00.000001',
  121. '2013-08-05 15:30:00.000001'], dtype='M8[ns]')
  122. result = s.fillna(method='backfill')
  123. assert_series_equal(result, expected)
  124. def test_datetime64_tz_fillna(self):
  125. for tz in ['US/Eastern', 'Asia/Tokyo']:
  126. # DatetimeBlock
  127. s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
  128. Timestamp('2011-01-03 10:00'), pd.NaT])
  129. null_loc = pd.Series([False, True, False, True])
  130. result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
  131. expected = Series([Timestamp('2011-01-01 10:00'),
  132. Timestamp('2011-01-02 10:00'),
  133. Timestamp('2011-01-03 10:00'),
  134. Timestamp('2011-01-02 10:00')])
  135. tm.assert_series_equal(expected, result)
  136. # check s is not changed
  137. tm.assert_series_equal(pd.isna(s), null_loc)
  138. result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
  139. expected = Series([Timestamp('2011-01-01 10:00'),
  140. Timestamp('2011-01-02 10:00', tz=tz),
  141. Timestamp('2011-01-03 10:00'),
  142. Timestamp('2011-01-02 10:00', tz=tz)])
  143. tm.assert_series_equal(expected, result)
  144. tm.assert_series_equal(pd.isna(s), null_loc)
  145. result = s.fillna('AAA')
  146. expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
  147. Timestamp('2011-01-03 10:00'), 'AAA'],
  148. dtype=object)
  149. tm.assert_series_equal(expected, result)
  150. tm.assert_series_equal(pd.isna(s), null_loc)
  151. result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
  152. 3: pd.Timestamp('2011-01-04 10:00')})
  153. expected = Series([Timestamp('2011-01-01 10:00'),
  154. Timestamp('2011-01-02 10:00', tz=tz),
  155. Timestamp('2011-01-03 10:00'),
  156. Timestamp('2011-01-04 10:00')])
  157. tm.assert_series_equal(expected, result)
  158. tm.assert_series_equal(pd.isna(s), null_loc)
  159. result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
  160. 3: pd.Timestamp('2011-01-04 10:00')})
  161. expected = Series([Timestamp('2011-01-01 10:00'),
  162. Timestamp('2011-01-02 10:00'),
  163. Timestamp('2011-01-03 10:00'),
  164. Timestamp('2011-01-04 10:00')])
  165. tm.assert_series_equal(expected, result)
  166. tm.assert_series_equal(pd.isna(s), null_loc)
  167. # DatetimeBlockTZ
  168. idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
  169. '2011-01-03 10:00', pd.NaT], tz=tz)
  170. s = pd.Series(idx)
  171. assert s.dtype == 'datetime64[ns, {0}]'.format(tz)
  172. tm.assert_series_equal(pd.isna(s), null_loc)
  173. result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
  174. expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
  175. Timestamp('2011-01-02 10:00'),
  176. Timestamp('2011-01-03 10:00', tz=tz),
  177. Timestamp('2011-01-02 10:00')])
  178. tm.assert_series_equal(expected, result)
  179. tm.assert_series_equal(pd.isna(s), null_loc)
  180. result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
  181. idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
  182. '2011-01-03 10:00', '2011-01-02 10:00'],
  183. tz=tz)
  184. expected = Series(idx)
  185. tm.assert_series_equal(expected, result)
  186. tm.assert_series_equal(pd.isna(s), null_loc)
  187. result = s.fillna(pd.Timestamp('2011-01-02 10:00',
  188. tz=tz).to_pydatetime())
  189. idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
  190. '2011-01-03 10:00', '2011-01-02 10:00'],
  191. tz=tz)
  192. expected = Series(idx)
  193. tm.assert_series_equal(expected, result)
  194. tm.assert_series_equal(pd.isna(s), null_loc)
  195. result = s.fillna('AAA')
  196. expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
  197. Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
  198. dtype=object)
  199. tm.assert_series_equal(expected, result)
  200. tm.assert_series_equal(pd.isna(s), null_loc)
  201. result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
  202. 3: pd.Timestamp('2011-01-04 10:00')})
  203. expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
  204. Timestamp('2011-01-02 10:00', tz=tz),
  205. Timestamp('2011-01-03 10:00', tz=tz),
  206. Timestamp('2011-01-04 10:00')])
  207. tm.assert_series_equal(expected, result)
  208. tm.assert_series_equal(pd.isna(s), null_loc)
  209. result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
  210. 3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
  211. expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
  212. Timestamp('2011-01-02 10:00', tz=tz),
  213. Timestamp('2011-01-03 10:00', tz=tz),
  214. Timestamp('2011-01-04 10:00', tz=tz)])
  215. tm.assert_series_equal(expected, result)
  216. tm.assert_series_equal(pd.isna(s), null_loc)
  217. # filling with a naive/other zone, coerce to object
  218. result = s.fillna(Timestamp('20130101'))
  219. expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
  220. Timestamp('2013-01-01'),
  221. Timestamp('2011-01-03 10:00', tz=tz),
  222. Timestamp('2013-01-01')])
  223. tm.assert_series_equal(expected, result)
  224. tm.assert_series_equal(pd.isna(s), null_loc)
  225. result = s.fillna(Timestamp('20130101', tz='US/Pacific'))
  226. expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
  227. Timestamp('2013-01-01', tz='US/Pacific'),
  228. Timestamp('2011-01-03 10:00', tz=tz),
  229. Timestamp('2013-01-01', tz='US/Pacific')])
  230. tm.assert_series_equal(expected, result)
  231. tm.assert_series_equal(pd.isna(s), null_loc)
  232. # with timezone
  233. # GH 15855
  234. df = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT])
  235. exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'),
  236. pd.Timestamp('2012-11-11 00:00:00+01:00')])
  237. assert_series_equal(df.fillna(method='pad'), exp)
  238. df = pd.Series([pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')])
  239. exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'),
  240. pd.Timestamp('2012-11-11 00:00:00+01:00')])
  241. assert_series_equal(df.fillna(method='bfill'), exp)
  242. def test_fillna_consistency(self):
  243. # GH 16402
  244. # fillna with a tz aware to a tz-naive, should result in object
  245. s = Series([Timestamp('20130101'), pd.NaT])
  246. result = s.fillna(Timestamp('20130101', tz='US/Eastern'))
  247. expected = Series([Timestamp('20130101'),
  248. Timestamp('2013-01-01', tz='US/Eastern')],
  249. dtype='object')
  250. assert_series_equal(result, expected)
  251. # where (we ignore the errors=)
  252. result = s.where([True, False],
  253. Timestamp('20130101', tz='US/Eastern'),
  254. errors='ignore')
  255. assert_series_equal(result, expected)
  256. result = s.where([True, False],
  257. Timestamp('20130101', tz='US/Eastern'),
  258. errors='ignore')
  259. assert_series_equal(result, expected)
  260. # with a non-datetime
  261. result = s.fillna('foo')
  262. expected = Series([Timestamp('20130101'),
  263. 'foo'])
  264. assert_series_equal(result, expected)
  265. # assignment
  266. s2 = s.copy()
  267. s2[1] = 'foo'
  268. assert_series_equal(s2, expected)
  269. def test_datetime64tz_fillna_round_issue(self):
  270. # GH 14872
  271. data = pd.Series([pd.NaT, pd.NaT,
  272. datetime(2016, 12, 12, 22, 24, 6, 100001,
  273. tzinfo=pytz.utc)])
  274. filled = data.fillna(method='bfill')
  275. expected = pd.Series([datetime(2016, 12, 12, 22, 24, 6,
  276. 100001, tzinfo=pytz.utc),
  277. datetime(2016, 12, 12, 22, 24, 6,
  278. 100001, tzinfo=pytz.utc),
  279. datetime(2016, 12, 12, 22, 24, 6,
  280. 100001, tzinfo=pytz.utc)])
  281. assert_series_equal(filled, expected)
  282. def test_fillna_downcast(self):
  283. # GH 15277
  284. # infer int64 from float64
  285. s = pd.Series([1., np.nan])
  286. result = s.fillna(0, downcast='infer')
  287. expected = pd.Series([1, 0])
  288. assert_series_equal(result, expected)
  289. # infer int64 from float64 when fillna value is a dict
  290. s = pd.Series([1., np.nan])
  291. result = s.fillna({1: 0}, downcast='infer')
  292. expected = pd.Series([1, 0])
  293. assert_series_equal(result, expected)
  294. def test_fillna_int(self):
  295. s = Series(np.random.randint(-100, 100, 50))
  296. s.fillna(method='ffill', inplace=True)
  297. assert_series_equal(s.fillna(method='ffill', inplace=False), s)
  298. def test_fillna_raise(self):
  299. s = Series(np.random.randint(-100, 100, 50))
  300. msg = ('"value" parameter must be a scalar or dict, but you passed a'
  301. ' "list"')
  302. with pytest.raises(TypeError, match=msg):
  303. s.fillna([1, 2])
  304. msg = ('"value" parameter must be a scalar or dict, but you passed a'
  305. ' "tuple"')
  306. with pytest.raises(TypeError, match=msg):
  307. s.fillna((1, 2))
  308. # related GH 9217, make sure limit is an int and greater than 0
  309. s = Series([1, 2, 3, None])
  310. msg = (r"Cannot specify both 'value' and 'method'\.|"
  311. r"Limit must be greater than 0|"
  312. "Limit must be an integer")
  313. for limit in [-1, 0, 1., 2.]:
  314. for method in ['backfill', 'bfill', 'pad', 'ffill', None]:
  315. with pytest.raises(ValueError, match=msg):
  316. s.fillna(1, limit=limit, method=method)
  317. def test_categorical_nan_equality(self):
  318. cat = Series(Categorical(["a", "b", "c", np.nan]))
  319. exp = Series([True, True, True, False])
  320. res = (cat == cat)
  321. tm.assert_series_equal(res, exp)
  322. def test_categorical_nan_handling(self):
  323. # NaNs are represented as -1 in labels
  324. s = Series(Categorical(["a", "b", np.nan, "a"]))
  325. tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
  326. tm.assert_numpy_array_equal(s.values.codes,
  327. np.array([0, 1, -1, 0], dtype=np.int8))
  328. @pytest.mark.parametrize('fill_value, expected_output', [
  329. ('a', ['a', 'a', 'b', 'a', 'a']),
  330. ({1: 'a', 3: 'b', 4: 'b'}, ['a', 'a', 'b', 'b', 'b']),
  331. ({1: 'a'}, ['a', 'a', 'b', np.nan, np.nan]),
  332. ({1: 'a', 3: 'b'}, ['a', 'a', 'b', 'b', np.nan]),
  333. (Series('a'), ['a', np.nan, 'b', np.nan, np.nan]),
  334. (Series('a', index=[1]), ['a', 'a', 'b', np.nan, np.nan]),
  335. (Series({1: 'a', 3: 'b'}), ['a', 'a', 'b', 'b', np.nan]),
  336. (Series(['a', 'b'], index=[3, 4]), ['a', np.nan, 'b', 'a', 'b'])
  337. ])
  338. def test_fillna_categorical(self, fill_value, expected_output):
  339. # GH 17033
  340. # Test fillna for a Categorical series
  341. data = ['a', np.nan, 'b', np.nan, np.nan]
  342. s = Series(Categorical(data, categories=['a', 'b']))
  343. exp = Series(Categorical(expected_output, categories=['a', 'b']))
  344. tm.assert_series_equal(s.fillna(fill_value), exp)
  345. def test_fillna_categorical_raise(self):
  346. data = ['a', np.nan, 'b', np.nan, np.nan]
  347. s = Series(Categorical(data, categories=['a', 'b']))
  348. with pytest.raises(ValueError,
  349. match="fill value must be in categories"):
  350. s.fillna('d')
  351. with pytest.raises(ValueError,
  352. match="fill value must be in categories"):
  353. s.fillna(Series('d'))
  354. with pytest.raises(ValueError,
  355. match="fill value must be in categories"):
  356. s.fillna({1: 'd', 3: 'a'})
  357. msg = ('"value" parameter must be a scalar or '
  358. 'dict, but you passed a "list"')
  359. with pytest.raises(TypeError, match=msg):
  360. s.fillna(['a', 'b'])
  361. msg = ('"value" parameter must be a scalar or '
  362. 'dict, but you passed a "tuple"')
  363. with pytest.raises(TypeError, match=msg):
  364. s.fillna(('a', 'b'))
  365. msg = ('"value" parameter must be a scalar, dict '
  366. 'or Series, but you passed a "DataFrame"')
  367. with pytest.raises(TypeError, match=msg):
  368. s.fillna(DataFrame({1: ['a'], 3: ['b']}))
  369. def test_fillna_nat(self):
  370. series = Series([0, 1, 2, iNaT], dtype='M8[ns]')
  371. filled = series.fillna(method='pad')
  372. filled2 = series.fillna(value=series.values[2])
  373. expected = series.copy()
  374. expected.values[3] = expected.values[2]
  375. assert_series_equal(filled, expected)
  376. assert_series_equal(filled2, expected)
  377. df = DataFrame({'A': series})
  378. filled = df.fillna(method='pad')
  379. filled2 = df.fillna(value=series.values[2])
  380. expected = DataFrame({'A': expected})
  381. assert_frame_equal(filled, expected)
  382. assert_frame_equal(filled2, expected)
  383. series = Series([iNaT, 0, 1, 2], dtype='M8[ns]')
  384. filled = series.fillna(method='bfill')
  385. filled2 = series.fillna(value=series[1])
  386. expected = series.copy()
  387. expected[0] = expected[1]
  388. assert_series_equal(filled, expected)
  389. assert_series_equal(filled2, expected)
  390. df = DataFrame({'A': series})
  391. filled = df.fillna(method='bfill')
  392. filled2 = df.fillna(value=series[1])
  393. expected = DataFrame({'A': expected})
  394. assert_frame_equal(filled, expected)
  395. assert_frame_equal(filled2, expected)
  396. def test_isna_for_inf(self):
  397. s = Series(['a', np.inf, np.nan, 1.0])
  398. with pd.option_context('mode.use_inf_as_na', True):
  399. r = s.isna()
  400. dr = s.dropna()
  401. e = Series([False, True, True, False])
  402. de = Series(['a', 1.0], index=[0, 3])
  403. tm.assert_series_equal(r, e)
  404. tm.assert_series_equal(dr, de)
  405. def test_isnull_for_inf_deprecated(self):
  406. # gh-17115
  407. s = Series(['a', np.inf, np.nan, 1.0])
  408. with pd.option_context('mode.use_inf_as_null', True):
  409. r = s.isna()
  410. dr = s.dropna()
  411. e = Series([False, True, True, False])
  412. de = Series(['a', 1.0], index=[0, 3])
  413. tm.assert_series_equal(r, e)
  414. tm.assert_series_equal(dr, de)
  415. def test_fillna(self, datetime_series):
  416. ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
  417. tm.assert_series_equal(ts, ts.fillna(method='ffill'))
  418. ts[2] = np.NaN
  419. exp = Series([0., 1., 1., 3., 4.], index=ts.index)
  420. tm.assert_series_equal(ts.fillna(method='ffill'), exp)
  421. exp = Series([0., 1., 3., 3., 4.], index=ts.index)
  422. tm.assert_series_equal(ts.fillna(method='backfill'), exp)
  423. exp = Series([0., 1., 5., 3., 4.], index=ts.index)
  424. tm.assert_series_equal(ts.fillna(value=5), exp)
  425. msg = "Must specify a fill 'value' or 'method'"
  426. with pytest.raises(ValueError, match=msg):
  427. ts.fillna()
  428. msg = "Cannot specify both 'value' and 'method'"
  429. with pytest.raises(ValueError, match=msg):
  430. datetime_series.fillna(value=0, method='ffill')
  431. # GH 5703
  432. s1 = Series([np.nan])
  433. s2 = Series([1])
  434. result = s1.fillna(s2)
  435. expected = Series([1.])
  436. assert_series_equal(result, expected)
  437. result = s1.fillna({})
  438. assert_series_equal(result, s1)
  439. result = s1.fillna(Series(()))
  440. assert_series_equal(result, s1)
  441. result = s2.fillna(s1)
  442. assert_series_equal(result, s2)
  443. result = s1.fillna({0: 1})
  444. assert_series_equal(result, expected)
  445. result = s1.fillna({1: 1})
  446. assert_series_equal(result, Series([np.nan]))
  447. result = s1.fillna({0: 1, 1: 1})
  448. assert_series_equal(result, expected)
  449. result = s1.fillna(Series({0: 1, 1: 1}))
  450. assert_series_equal(result, expected)
  451. result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
  452. assert_series_equal(result, s1)
  453. s1 = Series([0, 1, 2], list('abc'))
  454. s2 = Series([0, np.nan, 2], list('bac'))
  455. result = s2.fillna(s1)
  456. expected = Series([0, 0, 2.], list('bac'))
  457. assert_series_equal(result, expected)
  458. # limit
  459. s = Series(np.nan, index=[0, 1, 2])
  460. result = s.fillna(999, limit=1)
  461. expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
  462. assert_series_equal(result, expected)
  463. result = s.fillna(999, limit=2)
  464. expected = Series([999, 999, np.nan], index=[0, 1, 2])
  465. assert_series_equal(result, expected)
  466. # GH 9043
  467. # make sure a string representation of int/float values can be filled
  468. # correctly without raising errors or being converted
  469. vals = ['0', '1.5', '-0.3']
  470. for val in vals:
  471. s = Series([0, 1, np.nan, np.nan, 4], dtype='float64')
  472. result = s.fillna(val)
  473. expected = Series([0, 1, val, val, 4], dtype='object')
  474. assert_series_equal(result, expected)
  475. def test_fillna_bug(self):
  476. x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
  477. filled = x.fillna(method='ffill')
  478. expected = Series([nan, 1., 1., 3., 3.], x.index)
  479. assert_series_equal(filled, expected)
  480. filled = x.fillna(method='bfill')
  481. expected = Series([1., 1., 3., 3., nan], x.index)
  482. assert_series_equal(filled, expected)
  483. def test_fillna_inplace(self):
  484. x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
  485. y = x.copy()
  486. y.fillna(value=0, inplace=True)
  487. expected = x.fillna(value=0)
  488. assert_series_equal(y, expected)
  489. def test_fillna_invalid_method(self, datetime_series):
  490. try:
  491. datetime_series.fillna(method='ffil')
  492. except ValueError as inst:
  493. assert 'ffil' in str(inst)
  494. def test_ffill(self):
  495. ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
  496. ts[2] = np.NaN
  497. assert_series_equal(ts.ffill(), ts.fillna(method='ffill'))
  498. def test_ffill_mixed_dtypes_without_missing_data(self):
  499. # GH14956
  500. series = pd.Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
  501. result = series.ffill()
  502. assert_series_equal(series, result)
  503. def test_bfill(self):
  504. ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
  505. ts[2] = np.NaN
  506. assert_series_equal(ts.bfill(), ts.fillna(method='bfill'))
  507. def test_timedelta64_nan(self):
  508. td = Series([timedelta(days=i) for i in range(10)])
  509. # nan ops on timedeltas
  510. td1 = td.copy()
  511. td1[0] = np.nan
  512. assert isna(td1[0])
  513. assert td1[0].value == iNaT
  514. td1[0] = td[0]
  515. assert not isna(td1[0])
  516. td1[1] = iNaT
  517. assert isna(td1[1])
  518. assert td1[1].value == iNaT
  519. td1[1] = td[1]
  520. assert not isna(td1[1])
  521. td1[2] = NaT
  522. assert isna(td1[2])
  523. assert td1[2].value == iNaT
  524. td1[2] = td[2]
  525. assert not isna(td1[2])
  526. # boolean setting
  527. # this doesn't work, not sure numpy even supports it
  528. # result = td[(td>np.timedelta64(timedelta(days=3))) &
  529. # td<np.timedelta64(timedelta(days=7)))] = np.nan
  530. # assert isna(result).sum() == 7
  531. # NumPy limitiation =(
  532. # def test_logical_range_select(self):
  533. # np.random.seed(12345)
  534. # selector = -0.5 <= datetime_series <= 0.5
  535. # expected = (datetime_series >= -0.5) & (datetime_series <= 0.5)
  536. # assert_series_equal(selector, expected)
  537. def test_dropna_empty(self):
  538. s = Series([])
  539. assert len(s.dropna()) == 0
  540. s.dropna(inplace=True)
  541. assert len(s) == 0
  542. # invalid axis
  543. msg = r"No axis named 1 for object type <(class|type) 'type'>"
  544. with pytest.raises(ValueError, match=msg):
  545. s.dropna(axis=1)
  546. def test_datetime64_tz_dropna(self):
  547. # DatetimeBlock
  548. s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
  549. '2011-01-03 10:00'), pd.NaT])
  550. result = s.dropna()
  551. expected = Series([Timestamp('2011-01-01 10:00'),
  552. Timestamp('2011-01-03 10:00')], index=[0, 2])
  553. tm.assert_series_equal(result, expected)
  554. # DatetimeBlockTZ
  555. idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
  556. '2011-01-03 10:00', pd.NaT],
  557. tz='Asia/Tokyo')
  558. s = pd.Series(idx)
  559. assert s.dtype == 'datetime64[ns, Asia/Tokyo]'
  560. result = s.dropna()
  561. expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
  562. Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')],
  563. index=[0, 2])
  564. assert result.dtype == 'datetime64[ns, Asia/Tokyo]'
  565. tm.assert_series_equal(result, expected)
  566. def test_dropna_no_nan(self):
  567. for s in [Series([1, 2, 3], name='x'), Series(
  568. [False, True, False], name='x')]:
  569. result = s.dropna()
  570. tm.assert_series_equal(result, s)
  571. assert result is not s
  572. s2 = s.copy()
  573. s2.dropna(inplace=True)
  574. tm.assert_series_equal(s2, s)
  575. def test_dropna_intervals(self):
  576. s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays(
  577. [np.nan, 0, 1, 2],
  578. [np.nan, 1, 2, 3]))
  579. result = s.dropna()
  580. expected = s.iloc[1:]
  581. assert_series_equal(result, expected)
  582. def test_valid(self, datetime_series):
  583. ts = datetime_series.copy()
  584. ts[::2] = np.NaN
  585. result = ts.dropna()
  586. assert len(result) == ts.count()
  587. tm.assert_series_equal(result, ts[1::2])
  588. tm.assert_series_equal(result, ts[pd.notna(ts)])
  589. def test_isna(self):
  590. ser = Series([0, 5.4, 3, nan, -0.001])
  591. expected = Series([False, False, False, True, False])
  592. tm.assert_series_equal(ser.isna(), expected)
  593. ser = Series(["hi", "", nan])
  594. expected = Series([False, False, True])
  595. tm.assert_series_equal(ser.isna(), expected)
  596. def test_notna(self):
  597. ser = Series([0, 5.4, 3, nan, -0.001])
  598. expected = Series([True, True, True, False, True])
  599. tm.assert_series_equal(ser.notna(), expected)
  600. ser = Series(["hi", "", nan])
  601. expected = Series([True, True, False])
  602. tm.assert_series_equal(ser.notna(), expected)
  603. def test_pad_nan(self):
  604. x = Series([np.nan, 1., np.nan, 3., np.nan], ['z', 'a', 'b', 'c', 'd'],
  605. dtype=float)
  606. x.fillna(method='pad', inplace=True)
  607. expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0],
  608. ['z', 'a', 'b', 'c', 'd'], dtype=float)
  609. assert_series_equal(x[1:], expected[1:])
  610. assert np.isnan(x[0]), np.isnan(expected[0])
  611. def test_pad_require_monotonicity(self):
  612. rng = date_range('1/1/2000', '3/1/2000', freq='B')
  613. # neither monotonic increasing or decreasing
  614. rng2 = rng[[1, 0, 2]]
  615. msg = "index must be monotonic increasing or decreasing"
  616. with pytest.raises(ValueError, match=msg):
  617. rng2.get_indexer(rng, method='pad')
  618. def test_dropna_preserve_name(self, datetime_series):
  619. datetime_series[:5] = np.nan
  620. result = datetime_series.dropna()
  621. assert result.name == datetime_series.name
  622. name = datetime_series.name
  623. ts = datetime_series.copy()
  624. ts.dropna(inplace=True)
  625. assert ts.name == name
  626. def test_fill_value_when_combine_const(self):
  627. # GH12723
  628. s = Series([0, 1, np.nan, 3, 4, 5])
  629. exp = s.fillna(0).add(2)
  630. res = s.add(2, fill_value=0)
  631. assert_series_equal(res, exp)
  632. def test_series_fillna_limit(self):
  633. index = np.arange(10)
  634. s = Series(np.random.randn(10), index=index)
  635. result = s[:2].reindex(index)
  636. result = result.fillna(method='pad', limit=5)
  637. expected = s[:2].reindex(index).fillna(method='pad')
  638. expected[-3:] = np.nan
  639. assert_series_equal(result, expected)
  640. result = s[-2:].reindex(index)
  641. result = result.fillna(method='bfill', limit=5)
  642. expected = s[-2:].reindex(index).fillna(method='backfill')
  643. expected[:3] = np.nan
  644. assert_series_equal(result, expected)
  645. def test_sparse_series_fillna_limit(self):
  646. index = np.arange(10)
  647. s = Series(np.random.randn(10), index=index)
  648. ss = s[:2].reindex(index).to_sparse()
  649. # TODO: what is this test doing? why are result an expected
  650. # the same call to fillna?
  651. with tm.assert_produces_warning(PerformanceWarning):
  652. # TODO: release-note fillna performance warning
  653. result = ss.fillna(method='pad', limit=5)
  654. expected = ss.fillna(method='pad', limit=5)
  655. expected = expected.to_dense()
  656. expected[-3:] = np.nan
  657. expected = expected.to_sparse()
  658. assert_series_equal(result, expected)
  659. ss = s[-2:].reindex(index).to_sparse()
  660. with tm.assert_produces_warning(PerformanceWarning):
  661. result = ss.fillna(method='backfill', limit=5)
  662. expected = ss.fillna(method='backfill')
  663. expected = expected.to_dense()
  664. expected[:3] = np.nan
  665. expected = expected.to_sparse()
  666. assert_series_equal(result, expected)
  667. def test_sparse_series_pad_backfill_limit(self):
  668. index = np.arange(10)
  669. s = Series(np.random.randn(10), index=index)
  670. s = s.to_sparse()
  671. result = s[:2].reindex(index, method='pad', limit=5)
  672. with tm.assert_produces_warning(PerformanceWarning):
  673. expected = s[:2].reindex(index).fillna(method='pad')
  674. expected = expected.to_dense()
  675. expected[-3:] = np.nan
  676. expected = expected.to_sparse()
  677. assert_series_equal(result, expected)
  678. result = s[-2:].reindex(index, method='backfill', limit=5)
  679. with tm.assert_produces_warning(PerformanceWarning):
  680. expected = s[-2:].reindex(index).fillna(method='backfill')
  681. expected = expected.to_dense()
  682. expected[:3] = np.nan
  683. expected = expected.to_sparse()
  684. assert_series_equal(result, expected)
  685. def test_series_pad_backfill_limit(self):
  686. index = np.arange(10)
  687. s = Series(np.random.randn(10), index=index)
  688. result = s[:2].reindex(index, method='pad', limit=5)
  689. expected = s[:2].reindex(index).fillna(method='pad')
  690. expected[-3:] = np.nan
  691. assert_series_equal(result, expected)
  692. result = s[-2:].reindex(index, method='backfill', limit=5)
  693. expected = s[-2:].reindex(index).fillna(method='backfill')
  694. expected[:3] = np.nan
  695. assert_series_equal(result, expected)
  696. class TestSeriesInterpolateData():
  697. def test_interpolate(self, datetime_series, string_series):
  698. ts = Series(np.arange(len(datetime_series), dtype=float),
  699. datetime_series.index)
  700. ts_copy = ts.copy()
  701. ts_copy[5:10] = np.NaN
  702. linear_interp = ts_copy.interpolate(method='linear')
  703. tm.assert_series_equal(linear_interp, ts)
  704. ord_ts = Series([d.toordinal() for d in datetime_series.index],
  705. index=datetime_series.index).astype(float)
  706. ord_ts_copy = ord_ts.copy()
  707. ord_ts_copy[5:10] = np.NaN
  708. time_interp = ord_ts_copy.interpolate(method='time')
  709. tm.assert_series_equal(time_interp, ord_ts)
  710. # try time interpolation on a non-TimeSeries
  711. # Only raises ValueError if there are NaNs.
  712. non_ts = string_series.copy()
  713. non_ts[0] = np.NaN
  714. msg = ("time-weighted interpolation only works on Series or DataFrames"
  715. " with a DatetimeIndex")
  716. with pytest.raises(ValueError, match=msg):
  717. non_ts.interpolate(method='time')
  718. @td.skip_if_no_scipy
  719. def test_interpolate_pchip(self):
  720. _skip_if_no_pchip()
  721. ser = Series(np.sort(np.random.uniform(size=100)))
  722. # interpolate at new_index
  723. new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5,
  724. 50.75]))
  725. interp_s = ser.reindex(new_index).interpolate(method='pchip')
  726. # does not blow up, GH5977
  727. interp_s[49:51]
  728. @td.skip_if_no_scipy
  729. def test_interpolate_akima(self):
  730. _skip_if_no_akima()
  731. ser = Series([10, 11, 12, 13])
  732. expected = Series([11.00, 11.25, 11.50, 11.75,
  733. 12.00, 12.25, 12.50, 12.75, 13.00],
  734. index=Index([1.0, 1.25, 1.5, 1.75,
  735. 2.0, 2.25, 2.5, 2.75, 3.0]))
  736. # interpolate at new_index
  737. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
  738. interp_s = ser.reindex(new_index).interpolate(method='akima')
  739. assert_series_equal(interp_s[1:3], expected)
  740. @td.skip_if_no_scipy
  741. def test_interpolate_piecewise_polynomial(self):
  742. ser = Series([10, 11, 12, 13])
  743. expected = Series([11.00, 11.25, 11.50, 11.75,
  744. 12.00, 12.25, 12.50, 12.75, 13.00],
  745. index=Index([1.0, 1.25, 1.5, 1.75,
  746. 2.0, 2.25, 2.5, 2.75, 3.0]))
  747. # interpolate at new_index
  748. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
  749. interp_s = ser.reindex(new_index).interpolate(
  750. method='piecewise_polynomial')
  751. assert_series_equal(interp_s[1:3], expected)
  752. @td.skip_if_no_scipy
  753. def test_interpolate_from_derivatives(self):
  754. ser = Series([10, 11, 12, 13])
  755. expected = Series([11.00, 11.25, 11.50, 11.75,
  756. 12.00, 12.25, 12.50, 12.75, 13.00],
  757. index=Index([1.0, 1.25, 1.5, 1.75,
  758. 2.0, 2.25, 2.5, 2.75, 3.0]))
  759. # interpolate at new_index
  760. new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
  761. interp_s = ser.reindex(new_index).interpolate(
  762. method='from_derivatives')
  763. assert_series_equal(interp_s[1:3], expected)
  764. @pytest.mark.parametrize("kwargs", [
  765. {},
  766. pytest.param({'method': 'polynomial', 'order': 1},
  767. marks=td.skip_if_no_scipy)
  768. ])
  769. def test_interpolate_corners(self, kwargs):
  770. s = Series([np.nan, np.nan])
  771. assert_series_equal(s.interpolate(**kwargs), s)
  772. s = Series([]).interpolate()
  773. assert_series_equal(s.interpolate(**kwargs), s)
  774. def test_interpolate_index_values(self):
  775. s = Series(np.nan, index=np.sort(np.random.rand(30)))
  776. s[::3] = np.random.randn(10)
  777. vals = s.index.values.astype(float)
  778. result = s.interpolate(method='index')
  779. expected = s.copy()
  780. bad = isna(expected.values)
  781. good = ~bad
  782. expected = Series(np.interp(vals[bad], vals[good],
  783. s.values[good]),
  784. index=s.index[bad])
  785. assert_series_equal(result[bad], expected)
  786. # 'values' is synonymous with 'index' for the method kwarg
  787. other_result = s.interpolate(method='values')
  788. assert_series_equal(other_result, result)
  789. assert_series_equal(other_result[bad], expected)
  790. def test_interpolate_non_ts(self):
  791. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  792. msg = ("time-weighted interpolation only works on Series or DataFrames"
  793. " with a DatetimeIndex")
  794. with pytest.raises(ValueError, match=msg):
  795. s.interpolate(method='time')
  796. @pytest.mark.parametrize("kwargs", [
  797. {},
  798. pytest.param({'method': 'polynomial', 'order': 1},
  799. marks=td.skip_if_no_scipy)
  800. ])
  801. def test_nan_interpolate(self, kwargs):
  802. s = Series([0, 1, np.nan, 3])
  803. result = s.interpolate(**kwargs)
  804. expected = Series([0., 1., 2., 3.])
  805. assert_series_equal(result, expected)
  806. def test_nan_irregular_index(self):
  807. s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
  808. result = s.interpolate()
  809. expected = Series([1., 2., 3., 4.], index=[1, 3, 5, 9])
  810. assert_series_equal(result, expected)
  811. def test_nan_str_index(self):
  812. s = Series([0, 1, 2, np.nan], index=list('abcd'))
  813. result = s.interpolate()
  814. expected = Series([0., 1., 2., 2.], index=list('abcd'))
  815. assert_series_equal(result, expected)
  816. @td.skip_if_no_scipy
  817. def test_interp_quad(self):
  818. sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
  819. result = sq.interpolate(method='quadratic')
  820. expected = Series([1., 4., 9., 16.], index=[1, 2, 3, 4])
  821. assert_series_equal(result, expected)
  822. @td.skip_if_no_scipy
  823. def test_interp_scipy_basic(self):
  824. s = Series([1, 3, np.nan, 12, np.nan, 25])
  825. # slinear
  826. expected = Series([1., 3., 7.5, 12., 18.5, 25.])
  827. result = s.interpolate(method='slinear')
  828. assert_series_equal(result, expected)
  829. result = s.interpolate(method='slinear', downcast='infer')
  830. assert_series_equal(result, expected)
  831. # nearest
  832. expected = Series([1, 3, 3, 12, 12, 25])
  833. result = s.interpolate(method='nearest')
  834. assert_series_equal(result, expected.astype('float'))
  835. result = s.interpolate(method='nearest', downcast='infer')
  836. assert_series_equal(result, expected)
  837. # zero
  838. expected = Series([1, 3, 3, 12, 12, 25])
  839. result = s.interpolate(method='zero')
  840. assert_series_equal(result, expected.astype('float'))
  841. result = s.interpolate(method='zero', downcast='infer')
  842. assert_series_equal(result, expected)
  843. # quadratic
  844. # GH #15662.
  845. # new cubic and quadratic interpolation algorithms from scipy 0.19.0.
  846. # previously `splmake` was used. See scipy/scipy#6710
  847. if _is_scipy_ge_0190:
  848. expected = Series([1, 3., 6.823529, 12., 18.058824, 25.])
  849. else:
  850. expected = Series([1, 3., 6.769231, 12., 18.230769, 25.])
  851. result = s.interpolate(method='quadratic')
  852. assert_series_equal(result, expected)
  853. result = s.interpolate(method='quadratic', downcast='infer')
  854. assert_series_equal(result, expected)
  855. # cubic
  856. expected = Series([1., 3., 6.8, 12., 18.2, 25.])
  857. result = s.interpolate(method='cubic')
  858. assert_series_equal(result, expected)
  859. def test_interp_limit(self):
  860. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  861. expected = Series([1., 3., 5., 7., np.nan, 11.])
  862. result = s.interpolate(method='linear', limit=2)
  863. assert_series_equal(result, expected)
  864. # GH 9217, make sure limit is an int and greater than 0
  865. methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
  866. 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
  867. 'polynomial', 'spline', 'piecewise_polynomial', None,
  868. 'from_derivatives', 'pchip', 'akima']
  869. s = pd.Series([1, 2, np.nan, np.nan, 5])
  870. msg = (r"Limit must be greater than 0|"
  871. "time-weighted interpolation only works on Series or"
  872. r" DataFrames with a DatetimeIndex|"
  873. r"invalid method '(polynomial|spline|None)' to interpolate|"
  874. "Limit must be an integer")
  875. for limit in [-1, 0, 1., 2.]:
  876. for method in methods:
  877. with pytest.raises(ValueError, match=msg):
  878. s.interpolate(limit=limit, method=method)
  879. def test_interp_limit_forward(self):
  880. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  881. # Provide 'forward' (the default) explicitly here.
  882. expected = Series([1., 3., 5., 7., np.nan, 11.])
  883. result = s.interpolate(method='linear', limit=2,
  884. limit_direction='forward')
  885. assert_series_equal(result, expected)
  886. result = s.interpolate(method='linear', limit=2,
  887. limit_direction='FORWARD')
  888. assert_series_equal(result, expected)
  889. def test_interp_unlimited(self):
  890. # these test are for issue #16282 default Limit=None is unlimited
  891. s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan])
  892. expected = Series([1., 1., 3., 5., 7., 9., 11., 11.])
  893. result = s.interpolate(method='linear',
  894. limit_direction='both')
  895. assert_series_equal(result, expected)
  896. expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.])
  897. result = s.interpolate(method='linear',
  898. limit_direction='forward')
  899. assert_series_equal(result, expected)
  900. expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan])
  901. result = s.interpolate(method='linear',
  902. limit_direction='backward')
  903. assert_series_equal(result, expected)
  904. def test_interp_limit_bad_direction(self):
  905. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  906. msg = (r"Invalid limit_direction: expecting one of \['forward',"
  907. r" 'backward', 'both'\], got 'abc'")
  908. with pytest.raises(ValueError, match=msg):
  909. s.interpolate(method='linear', limit=2, limit_direction='abc')
  910. # raises an error even if no limit is specified.
  911. with pytest.raises(ValueError, match=msg):
  912. s.interpolate(method='linear', limit_direction='abc')
  913. # limit_area introduced GH #16284
  914. def test_interp_limit_area(self):
  915. # These tests are for issue #9218 -- fill NaNs in both directions.
  916. s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan])
  917. expected = Series([nan, nan, 3., 4., 5., 6., 7., nan, nan])
  918. result = s.interpolate(method='linear', limit_area='inside')
  919. assert_series_equal(result, expected)
  920. expected = Series([nan, nan, 3., 4., nan, nan, 7., nan, nan])
  921. result = s.interpolate(method='linear', limit_area='inside',
  922. limit=1)
  923. expected = Series([nan, nan, 3., 4., nan, 6., 7., nan, nan])
  924. result = s.interpolate(method='linear', limit_area='inside',
  925. limit_direction='both', limit=1)
  926. assert_series_equal(result, expected)
  927. expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., 7.])
  928. result = s.interpolate(method='linear', limit_area='outside')
  929. assert_series_equal(result, expected)
  930. expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., nan])
  931. result = s.interpolate(method='linear', limit_area='outside',
  932. limit=1)
  933. expected = Series([nan, 3., 3., nan, nan, nan, 7., 7., nan])
  934. result = s.interpolate(method='linear', limit_area='outside',
  935. limit_direction='both', limit=1)
  936. assert_series_equal(result, expected)
  937. expected = Series([3., 3., 3., nan, nan, nan, 7., nan, nan])
  938. result = s.interpolate(method='linear', limit_area='outside',
  939. direction='backward')
  940. # raises an error even if limit type is wrong.
  941. msg = (r"Invalid limit_area: expecting one of \['inside', 'outside'\],"
  942. " got abc")
  943. with pytest.raises(ValueError, match=msg):
  944. s.interpolate(method='linear', limit_area='abc')
  945. def test_interp_limit_direction(self):
  946. # These tests are for issue #9218 -- fill NaNs in both directions.
  947. s = Series([1, 3, np.nan, np.nan, np.nan, 11])
  948. expected = Series([1., 3., np.nan, 7., 9., 11.])
  949. result = s.interpolate(method='linear', limit=2,
  950. limit_direction='backward')
  951. assert_series_equal(result, expected)
  952. expected = Series([1., 3., 5., np.nan, 9., 11.])
  953. result = s.interpolate(method='linear', limit=1,
  954. limit_direction='both')
  955. assert_series_equal(result, expected)
  956. # Check that this works on a longer series of nans.
  957. s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12,
  958. np.nan])
  959. expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.])
  960. result = s.interpolate(method='linear', limit=2,
  961. limit_direction='both')
  962. assert_series_equal(result, expected)
  963. expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.])
  964. result = s.interpolate(method='linear', limit=1,
  965. limit_direction='both')
  966. assert_series_equal(result, expected)
  967. def test_interp_limit_to_ends(self):
  968. # These test are for issue #10420 -- flow back to beginning.
  969. s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
  970. expected = Series([5., 5., 5., 7., 9., np.nan])
  971. result = s.interpolate(method='linear', limit=2,
  972. limit_direction='backward')
  973. assert_series_equal(result, expected)
  974. expected = Series([5., 5., 5., 7., 9., 9.])
  975. result = s.interpolate(method='linear', limit=2,
  976. limit_direction='both')
  977. assert_series_equal(result, expected)
  978. def test_interp_limit_before_ends(self):
  979. # These test are for issue #11115 -- limit ends properly.
  980. s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
  981. expected = Series([np.nan, np.nan, 5., 7., 7., np.nan])
  982. result = s.interpolate(method='linear', limit=1,
  983. limit_direction='forward')
  984. assert_series_equal(result, expected)
  985. expected = Series([np.nan, 5., 5., 7., np.nan, np.nan])
  986. result = s.interpolate(method='linear', limit=1,
  987. limit_direction='backward')
  988. assert_series_equal(result, expected)
  989. expected = Series([np.nan, 5., 5., 7., 7., np.nan])
  990. result = s.interpolate(method='linear', limit=1,
  991. limit_direction='both')
  992. assert_series_equal(result, expected)
  993. @td.skip_if_no_scipy
  994. def test_interp_all_good(self):
  995. s = Series([1, 2, 3])
  996. result = s.interpolate(method='polynomial', order=1)
  997. assert_series_equal(result, s)
  998. # non-scipy
  999. result = s.interpolate()
  1000. assert_series_equal(result, s)
  1001. @pytest.mark.parametrize("check_scipy", [
  1002. False,
  1003. pytest.param(True, marks=td.skip_if_no_scipy)
  1004. ])
  1005. def test_interp_multiIndex(self, check_scipy):
  1006. idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
  1007. s = Series([1, 2, np.nan], index=idx)
  1008. expected = s.copy()
  1009. expected.loc[2] = 2
  1010. result = s.interpolate()
  1011. assert_series_equal(result, expected)
  1012. msg = "Only `method=linear` interpolation is supported on MultiIndexes"
  1013. if check_scipy:
  1014. with pytest.raises(ValueError, match=msg):
  1015. s.interpolate(method='polynomial', order=1)
  1016. @td.skip_if_no_scipy
  1017. def test_interp_nonmono_raise(self):
  1018. s = Series([1, np.nan, 3], index=[0, 2, 1])
  1019. msg = "krogh interpolation requires that the index be monotonic"
  1020. with pytest.raises(ValueError, match=msg):
  1021. s.interpolate(method='krogh')
  1022. @td.skip_if_no_scipy
  1023. def test_interp_datetime64(self):
  1024. df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3))
  1025. result = df.interpolate(method='nearest')
  1026. expected = Series([1., 1., 3.],
  1027. index=date_range('1/1/2000', periods=3))
  1028. assert_series_equal(result, expected)
  1029. def test_interp_limit_no_nans(self):
  1030. # GH 7173
  1031. s = pd.Series([1., 2., 3.])
  1032. result = s.interpolate(limit=1)
  1033. expected = s
  1034. assert_series_equal(result, expected)
  1035. @td.skip_if_no_scipy
  1036. @pytest.mark.parametrize("method", ['polynomial', 'spline'])
  1037. def test_no_order(self, method):
  1038. s = Series([0, 1, np.nan, 3])
  1039. msg = "invalid method '{}' to interpolate".format(method)
  1040. with pytest.raises(ValueError, match=msg):
  1041. s.interpolate(method=method)
  1042. @td.skip_if_no_scipy
  1043. def test_spline(self):
  1044. s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
  1045. result = s.interpolate(method='spline', order=1)
  1046. expected = Series([1., 2., 3., 4., 5., 6., 7.])
  1047. assert_series_equal(result, expected)
  1048. @td.skip_if_no('scipy', min_version='0.15')
  1049. def test_spline_extrapolate(self):
  1050. s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
  1051. result3 = s.interpolate(method='spline', order=1, ext=3)
  1052. expected3 = Series([1., 2., 3., 4., 5., 6., 6.])
  1053. assert_series_equal(result3, expected3)
  1054. result1 = s.interpolate(method='spline', order=1, ext=0)
  1055. expected1 = Series([1., 2., 3., 4., 5., 6., 7.])
  1056. assert_series_equal(result1, expected1)
  1057. @td.skip_if_no_scipy
  1058. def test_spline_smooth(self):
  1059. s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
  1060. assert (s.interpolate(method='spline', order=3, s=0)[5] !=
  1061. s.interpolate(method='spline', order=3)[5])
  1062. @td.skip_if_no_scipy
  1063. def test_spline_interpolation(self):
  1064. s = Series(np.arange(10) ** 2)
  1065. s[np.random.randint(0, 9, 3)] = np.nan
  1066. result1 = s.interpolate(method='spline', order=1)
  1067. expected1 = s.interpolate(method='spline', order=1)
  1068. assert_series_equal(result1, expected1)
  1069. @td.skip_if_no_scipy
  1070. def test_spline_error(self):
  1071. # see gh-10633
  1072. s = pd.Series(np.arange(10) ** 2)
  1073. s[np.random.randint(0, 9, 3)] = np.nan
  1074. msg = "invalid method 'spline' to interpolate"
  1075. with pytest.raises(ValueError, match=msg):
  1076. s.interpolate(method='spline')
  1077. msg = "order needs to be specified and greater than 0"
  1078. with pytest.raises(ValueError, match=msg):
  1079. s.interpolate(method='spline', order=0)
  1080. def test_interp_timedelta64(self):
  1081. # GH 6424
  1082. df = Series([1, np.nan, 3],
  1083. index=pd.to_timedelta([1, 2, 3]))
  1084. result = df.interpolate(method='time')
  1085. expected = Series([1., 2., 3.],
  1086. index=pd.to_timedelta([1, 2, 3]))
  1087. assert_series_equal(result, expected)
  1088. # test for non uniform spacing
  1089. df = Series([1, np.nan, 3],
  1090. index=pd.to_timedelta([1, 2, 4]))
  1091. result = df.interpolate(method='time')
  1092. expected = Series([1., 1.666667, 3.],
  1093. index=pd.to_timedelta([1, 2, 4]))
  1094. assert_series_equal(result, expected)
  1095. def test_series_interpolate_method_values(self):
  1096. # #1646
  1097. ts = _simple_ts('1/1/2000', '1/20/2000')
  1098. ts[::2] = np.nan
  1099. result = ts.interpolate(method='values')
  1100. exp = ts.interpolate()
  1101. assert_series_equal(result, exp)
  1102. def test_series_interpolate_intraday(self):
  1103. # #1698
  1104. index = pd.date_range('1/1/2012', periods=4, freq='12D')
  1105. ts = pd.Series([0, 12, 24, 36], index)
  1106. new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
  1107. exp = ts.reindex(new_index).interpolate(method='time')
  1108. index = pd.date_range('1/1/2012', periods=4, freq='12H')
  1109. ts = pd.Series([0, 12, 24, 36], index)
  1110. new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
  1111. result = ts.reindex(new_index).interpolate(method='time')
  1112. tm.assert_numpy_array_equal(result.values, exp.values)
  1113. def test_nonzero_warning(self):
  1114. # GH 24048
  1115. ser = pd.Series([1, 0, 3, 4])
  1116. with tm.assert_produces_warning(FutureWarning):
  1117. ser.nonzero()