test_timeseries.py 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from datetime import datetime, time, timedelta
  4. import numpy as np
  5. import pytest
  6. from pandas._libs.tslib import iNaT
  7. from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
  8. from pandas.compat import StringIO, lrange, product
  9. from pandas.errors import NullFrequencyError
  10. import pandas.util._test_decorators as td
  11. import pandas as pd
  12. from pandas import (
  13. DataFrame, Index, NaT, Series, Timestamp, concat, date_range, offsets,
  14. timedelta_range, to_datetime)
  15. from pandas.core.indexes.datetimes import DatetimeIndex
  16. from pandas.core.indexes.timedeltas import TimedeltaIndex
  17. from pandas.tests.series.common import TestData
  18. import pandas.util.testing as tm
  19. from pandas.util.testing import (
  20. assert_almost_equal, assert_frame_equal, assert_series_equal)
  21. from pandas.tseries.offsets import BDay, BMonthEnd
  22. def _simple_ts(start, end, freq='D'):
  23. rng = date_range(start, end, freq=freq)
  24. return Series(np.random.randn(len(rng)), index=rng)
  25. def assert_range_equal(left, right):
  26. assert (left.equals(right))
  27. assert (left.freq == right.freq)
  28. assert (left.tz == right.tz)
  29. class TestTimeSeries(TestData):
  30. def test_shift(self):
  31. shifted = self.ts.shift(1)
  32. unshifted = shifted.shift(-1)
  33. tm.assert_index_equal(shifted.index, self.ts.index)
  34. tm.assert_index_equal(unshifted.index, self.ts.index)
  35. tm.assert_numpy_array_equal(unshifted.dropna().values,
  36. self.ts.values[:-1])
  37. offset = BDay()
  38. shifted = self.ts.shift(1, freq=offset)
  39. unshifted = shifted.shift(-1, freq=offset)
  40. assert_series_equal(unshifted, self.ts)
  41. unshifted = self.ts.shift(0, freq=offset)
  42. assert_series_equal(unshifted, self.ts)
  43. shifted = self.ts.shift(1, freq='B')
  44. unshifted = shifted.shift(-1, freq='B')
  45. assert_series_equal(unshifted, self.ts)
  46. # corner case
  47. unshifted = self.ts.shift(0)
  48. assert_series_equal(unshifted, self.ts)
  49. # Shifting with PeriodIndex
  50. ps = tm.makePeriodSeries()
  51. shifted = ps.shift(1)
  52. unshifted = shifted.shift(-1)
  53. tm.assert_index_equal(shifted.index, ps.index)
  54. tm.assert_index_equal(unshifted.index, ps.index)
  55. tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1])
  56. shifted2 = ps.shift(1, 'B')
  57. shifted3 = ps.shift(1, BDay())
  58. assert_series_equal(shifted2, shifted3)
  59. assert_series_equal(ps, shifted2.shift(-1, 'B'))
  60. msg = "Given freq D does not match PeriodIndex freq B"
  61. with pytest.raises(ValueError, match=msg):
  62. ps.shift(freq='D')
  63. # legacy support
  64. shifted4 = ps.shift(1, freq='B')
  65. assert_series_equal(shifted2, shifted4)
  66. shifted5 = ps.shift(1, freq=BDay())
  67. assert_series_equal(shifted5, shifted4)
  68. # 32-bit taking
  69. # GH 8129
  70. index = date_range('2000-01-01', periods=5)
  71. for dtype in ['int32', 'int64']:
  72. s1 = Series(np.arange(5, dtype=dtype), index=index)
  73. p = s1.iloc[1]
  74. result = s1.shift(periods=p)
  75. expected = Series([np.nan, 0, 1, 2, 3], index=index)
  76. assert_series_equal(result, expected)
  77. # xref 8260
  78. # with tz
  79. s = Series(date_range('2000-01-01 09:00:00', periods=5,
  80. tz='US/Eastern'), name='foo')
  81. result = s - s.shift()
  82. exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')
  83. assert_series_equal(result, exp)
  84. # incompat tz
  85. s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
  86. tz='CET'), name='foo')
  87. msg = ("DatetimeArray subtraction must have the same timezones or no"
  88. " timezones")
  89. with pytest.raises(TypeError, match=msg):
  90. s - s2
  91. def test_shift2(self):
  92. ts = Series(np.random.randn(5),
  93. index=date_range('1/1/2000', periods=5, freq='H'))
  94. result = ts.shift(1, freq='5T')
  95. exp_index = ts.index.shift(1, freq='5T')
  96. tm.assert_index_equal(result.index, exp_index)
  97. # GH #1063, multiple of same base
  98. result = ts.shift(1, freq='4H')
  99. exp_index = ts.index + offsets.Hour(4)
  100. tm.assert_index_equal(result.index, exp_index)
  101. idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'])
  102. msg = "Cannot shift with no freq"
  103. with pytest.raises(NullFrequencyError, match=msg):
  104. idx.shift(1)
  105. def test_shift_fill_value(self):
  106. # GH #24128
  107. ts = Series([1.0, 2.0, 3.0, 4.0, 5.0],
  108. index=date_range('1/1/2000', periods=5, freq='H'))
  109. exp = Series([0.0, 1.0, 2.0, 3.0, 4.0],
  110. index=date_range('1/1/2000', periods=5, freq='H'))
  111. # check that fill value works
  112. result = ts.shift(1, fill_value=0.0)
  113. tm.assert_series_equal(result, exp)
  114. exp = Series([0.0, 0.0, 1.0, 2.0, 3.0],
  115. index=date_range('1/1/2000', periods=5, freq='H'))
  116. result = ts.shift(2, fill_value=0.0)
  117. tm.assert_series_equal(result, exp)
  118. ts = pd.Series([1, 2, 3])
  119. res = ts.shift(2, fill_value=0)
  120. assert res.dtype == ts.dtype
  121. def test_categorical_shift_fill_value(self):
  122. ts = pd.Series(['a', 'b', 'c', 'd'], dtype="category")
  123. res = ts.shift(1, fill_value='a')
  124. expected = pd.Series(pd.Categorical(['a', 'a', 'b', 'c'],
  125. categories=['a', 'b', 'c', 'd'],
  126. ordered=False))
  127. tm.assert_equal(res, expected)
  128. # check for incorrect fill_value
  129. msg = "'fill_value=f' is not present in this Categorical's categories"
  130. with pytest.raises(ValueError, match=msg):
  131. ts.shift(1, fill_value='f')
  132. def test_shift_dst(self):
  133. # GH 13926
  134. dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern')
  135. s = Series(dates)
  136. res = s.shift(0)
  137. tm.assert_series_equal(res, s)
  138. assert res.dtype == 'datetime64[ns, US/Eastern]'
  139. res = s.shift(1)
  140. exp_vals = [NaT] + dates.astype(object).values.tolist()[:9]
  141. exp = Series(exp_vals)
  142. tm.assert_series_equal(res, exp)
  143. assert res.dtype == 'datetime64[ns, US/Eastern]'
  144. res = s.shift(-2)
  145. exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT]
  146. exp = Series(exp_vals)
  147. tm.assert_series_equal(res, exp)
  148. assert res.dtype == 'datetime64[ns, US/Eastern]'
  149. for ex in [10, -10, 20, -20]:
  150. res = s.shift(ex)
  151. exp = Series([NaT] * 10, dtype='datetime64[ns, US/Eastern]')
  152. tm.assert_series_equal(res, exp)
  153. assert res.dtype == 'datetime64[ns, US/Eastern]'
  154. def test_tshift(self):
  155. # PeriodIndex
  156. ps = tm.makePeriodSeries()
  157. shifted = ps.tshift(1)
  158. unshifted = shifted.tshift(-1)
  159. assert_series_equal(unshifted, ps)
  160. shifted2 = ps.tshift(freq='B')
  161. assert_series_equal(shifted, shifted2)
  162. shifted3 = ps.tshift(freq=BDay())
  163. assert_series_equal(shifted, shifted3)
  164. msg = "Given freq M does not match PeriodIndex freq B"
  165. with pytest.raises(ValueError, match=msg):
  166. ps.tshift(freq='M')
  167. # DatetimeIndex
  168. shifted = self.ts.tshift(1)
  169. unshifted = shifted.tshift(-1)
  170. assert_series_equal(self.ts, unshifted)
  171. shifted2 = self.ts.tshift(freq=self.ts.index.freq)
  172. assert_series_equal(shifted, shifted2)
  173. inferred_ts = Series(self.ts.values, Index(np.asarray(self.ts.index)),
  174. name='ts')
  175. shifted = inferred_ts.tshift(1)
  176. unshifted = shifted.tshift(-1)
  177. assert_series_equal(shifted, self.ts.tshift(1))
  178. assert_series_equal(unshifted, inferred_ts)
  179. no_freq = self.ts[[0, 5, 7]]
  180. msg = "Freq was not given and was not set in the index"
  181. with pytest.raises(ValueError, match=msg):
  182. no_freq.tshift()
  183. def test_truncate(self):
  184. offset = BDay()
  185. ts = self.ts[::3]
  186. start, end = self.ts.index[3], self.ts.index[6]
  187. start_missing, end_missing = self.ts.index[2], self.ts.index[7]
  188. # neither specified
  189. truncated = ts.truncate()
  190. assert_series_equal(truncated, ts)
  191. # both specified
  192. expected = ts[1:3]
  193. truncated = ts.truncate(start, end)
  194. assert_series_equal(truncated, expected)
  195. truncated = ts.truncate(start_missing, end_missing)
  196. assert_series_equal(truncated, expected)
  197. # start specified
  198. expected = ts[1:]
  199. truncated = ts.truncate(before=start)
  200. assert_series_equal(truncated, expected)
  201. truncated = ts.truncate(before=start_missing)
  202. assert_series_equal(truncated, expected)
  203. # end specified
  204. expected = ts[:3]
  205. truncated = ts.truncate(after=end)
  206. assert_series_equal(truncated, expected)
  207. truncated = ts.truncate(after=end_missing)
  208. assert_series_equal(truncated, expected)
  209. # corner case, empty series returned
  210. truncated = ts.truncate(after=self.ts.index[0] - offset)
  211. assert (len(truncated) == 0)
  212. truncated = ts.truncate(before=self.ts.index[-1] + offset)
  213. assert (len(truncated) == 0)
  214. msg = "Truncate: 1999-12-31 00:00:00 must be after 2000-02-14 00:00:00"
  215. with pytest.raises(ValueError, match=msg):
  216. ts.truncate(before=self.ts.index[-1] + offset,
  217. after=self.ts.index[0] - offset)
  218. def test_truncate_nonsortedindex(self):
  219. # GH 17935
  220. s = pd.Series(['a', 'b', 'c', 'd', 'e'],
  221. index=[5, 3, 2, 9, 0])
  222. msg = 'truncate requires a sorted index'
  223. with pytest.raises(ValueError, match=msg):
  224. s.truncate(before=3, after=9)
  225. rng = pd.date_range('2011-01-01', '2012-01-01', freq='W')
  226. ts = pd.Series(np.random.randn(len(rng)), index=rng)
  227. msg = 'truncate requires a sorted index'
  228. with pytest.raises(ValueError, match=msg):
  229. ts.sort_values(ascending=False).truncate(before='2011-11',
  230. after='2011-12')
  231. def test_asfreq(self):
  232. ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime(
  233. 2009, 11, 30), datetime(2009, 12, 31)])
  234. daily_ts = ts.asfreq('B')
  235. monthly_ts = daily_ts.asfreq('BM')
  236. tm.assert_series_equal(monthly_ts, ts)
  237. daily_ts = ts.asfreq('B', method='pad')
  238. monthly_ts = daily_ts.asfreq('BM')
  239. tm.assert_series_equal(monthly_ts, ts)
  240. daily_ts = ts.asfreq(BDay())
  241. monthly_ts = daily_ts.asfreq(BMonthEnd())
  242. tm.assert_series_equal(monthly_ts, ts)
  243. result = ts[:0].asfreq('M')
  244. assert len(result) == 0
  245. assert result is not ts
  246. daily_ts = ts.asfreq('D', fill_value=-1)
  247. result = daily_ts.value_counts().sort_index()
  248. expected = Series([60, 1, 1, 1],
  249. index=[-1.0, 2.0, 1.0, 0.0]).sort_index()
  250. tm.assert_series_equal(result, expected)
  251. def test_asfreq_datetimeindex_empty_series(self):
  252. # GH 14320
  253. expected = Series(index=pd.DatetimeIndex(
  254. ["2016-09-29 11:00"])).asfreq('H')
  255. result = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]),
  256. data=[3]).asfreq('H')
  257. tm.assert_index_equal(expected.index, result.index)
  258. def test_diff(self):
  259. # Just run the function
  260. self.ts.diff()
  261. # int dtype
  262. a = 10000000000000000
  263. b = a + 1
  264. s = Series([a, b])
  265. rs = s.diff()
  266. assert rs[1] == 1
  267. # neg n
  268. rs = self.ts.diff(-1)
  269. xp = self.ts - self.ts.shift(-1)
  270. assert_series_equal(rs, xp)
  271. # 0
  272. rs = self.ts.diff(0)
  273. xp = self.ts - self.ts
  274. assert_series_equal(rs, xp)
  275. # datetime diff (GH3100)
  276. s = Series(date_range('20130102', periods=5))
  277. rs = s - s.shift(1)
  278. xp = s.diff()
  279. assert_series_equal(rs, xp)
  280. # timedelta diff
  281. nrs = rs - rs.shift(1)
  282. nxp = xp.diff()
  283. assert_series_equal(nrs, nxp)
  284. # with tz
  285. s = Series(
  286. date_range('2000-01-01 09:00:00', periods=5,
  287. tz='US/Eastern'), name='foo')
  288. result = s.diff()
  289. assert_series_equal(result, Series(
  290. TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo'))
  291. def test_pct_change(self):
  292. rs = self.ts.pct_change(fill_method=None)
  293. assert_series_equal(rs, self.ts / self.ts.shift(1) - 1)
  294. rs = self.ts.pct_change(2)
  295. filled = self.ts.fillna(method='pad')
  296. assert_series_equal(rs, filled / filled.shift(2) - 1)
  297. rs = self.ts.pct_change(fill_method='bfill', limit=1)
  298. filled = self.ts.fillna(method='bfill', limit=1)
  299. assert_series_equal(rs, filled / filled.shift(1) - 1)
  300. rs = self.ts.pct_change(freq='5D')
  301. filled = self.ts.fillna(method='pad')
  302. assert_series_equal(rs,
  303. (filled / filled.shift(freq='5D') - 1)
  304. .reindex_like(filled))
  305. def test_pct_change_shift_over_nas(self):
  306. s = Series([1., 1.5, np.nan, 2.5, 3.])
  307. chg = s.pct_change()
  308. expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2])
  309. assert_series_equal(chg, expected)
  310. @pytest.mark.parametrize("freq, periods, fill_method, limit",
  311. [('5B', 5, None, None),
  312. ('3B', 3, None, None),
  313. ('3B', 3, 'bfill', None),
  314. ('7B', 7, 'pad', 1),
  315. ('7B', 7, 'bfill', 3),
  316. ('14B', 14, None, None)])
  317. def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
  318. # GH 7292
  319. rs_freq = self.ts.pct_change(freq=freq,
  320. fill_method=fill_method,
  321. limit=limit)
  322. rs_periods = self.ts.pct_change(periods,
  323. fill_method=fill_method,
  324. limit=limit)
  325. assert_series_equal(rs_freq, rs_periods)
  326. empty_ts = Series(index=self.ts.index)
  327. rs_freq = empty_ts.pct_change(freq=freq,
  328. fill_method=fill_method,
  329. limit=limit)
  330. rs_periods = empty_ts.pct_change(periods,
  331. fill_method=fill_method,
  332. limit=limit)
  333. assert_series_equal(rs_freq, rs_periods)
  334. def test_autocorr(self):
  335. # Just run the function
  336. corr1 = self.ts.autocorr()
  337. # Now run it with the lag parameter
  338. corr2 = self.ts.autocorr(lag=1)
  339. # corr() with lag needs Series of at least length 2
  340. if len(self.ts) <= 2:
  341. assert np.isnan(corr1)
  342. assert np.isnan(corr2)
  343. else:
  344. assert corr1 == corr2
  345. # Choose a random lag between 1 and length of Series - 2
  346. # and compare the result with the Series corr() function
  347. n = 1 + np.random.randint(max(1, len(self.ts) - 2))
  348. corr1 = self.ts.corr(self.ts.shift(n))
  349. corr2 = self.ts.autocorr(lag=n)
  350. # corr() with lag needs Series of at least length 2
  351. if len(self.ts) <= 2:
  352. assert np.isnan(corr1)
  353. assert np.isnan(corr2)
  354. else:
  355. assert corr1 == corr2
  356. def test_first_last_valid(self):
  357. ts = self.ts.copy()
  358. ts[:5] = np.NaN
  359. index = ts.first_valid_index()
  360. assert index == ts.index[5]
  361. ts[-5:] = np.NaN
  362. index = ts.last_valid_index()
  363. assert index == ts.index[-6]
  364. ts[:] = np.nan
  365. assert ts.last_valid_index() is None
  366. assert ts.first_valid_index() is None
  367. ser = Series([], index=[])
  368. assert ser.last_valid_index() is None
  369. assert ser.first_valid_index() is None
  370. # GH12800
  371. empty = Series()
  372. assert empty.last_valid_index() is None
  373. assert empty.first_valid_index() is None
  374. # GH20499: its preserves freq with holes
  375. ts.index = date_range("20110101", periods=len(ts), freq="B")
  376. ts.iloc[1] = 1
  377. ts.iloc[-2] = 1
  378. assert ts.first_valid_index() == ts.index[1]
  379. assert ts.last_valid_index() == ts.index[-2]
  380. assert ts.first_valid_index().freq == ts.index.freq
  381. assert ts.last_valid_index().freq == ts.index.freq
  382. def test_mpl_compat_hack(self):
  383. result = self.ts[:, np.newaxis]
  384. expected = self.ts.values[:, np.newaxis]
  385. assert_almost_equal(result, expected)
  386. def test_timeseries_coercion(self):
  387. idx = tm.makeDateIndex(10000)
  388. ser = Series(np.random.randn(len(idx)), idx.astype(object))
  389. assert ser.index.is_all_dates
  390. assert isinstance(ser.index, DatetimeIndex)
  391. def test_contiguous_boolean_preserve_freq(self):
  392. rng = date_range('1/1/2000', '3/1/2000', freq='B')
  393. mask = np.zeros(len(rng), dtype=bool)
  394. mask[10:20] = True
  395. masked = rng[mask]
  396. expected = rng[10:20]
  397. assert expected.freq is not None
  398. assert_range_equal(masked, expected)
  399. mask[22] = True
  400. masked = rng[mask]
  401. assert masked.freq is None
  402. def test_to_datetime_unit(self):
  403. epoch = 1370745748
  404. s = Series([epoch + t for t in range(20)])
  405. result = to_datetime(s, unit='s')
  406. expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta(
  407. seconds=t) for t in range(20)])
  408. assert_series_equal(result, expected)
  409. s = Series([epoch + t for t in range(20)]).astype(float)
  410. result = to_datetime(s, unit='s')
  411. expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta(
  412. seconds=t) for t in range(20)])
  413. assert_series_equal(result, expected)
  414. s = Series([epoch + t for t in range(20)] + [iNaT])
  415. result = to_datetime(s, unit='s')
  416. expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta(
  417. seconds=t) for t in range(20)] + [NaT])
  418. assert_series_equal(result, expected)
  419. s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float)
  420. result = to_datetime(s, unit='s')
  421. expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta(
  422. seconds=t) for t in range(20)] + [NaT])
  423. assert_series_equal(result, expected)
  424. # GH13834
  425. s = Series([epoch + t for t in np.arange(0, 2, .25)] +
  426. [iNaT]).astype(float)
  427. result = to_datetime(s, unit='s')
  428. expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta(
  429. seconds=t) for t in np.arange(0, 2, .25)] + [NaT])
  430. assert_series_equal(result, expected)
  431. s = concat([Series([epoch + t for t in range(20)]
  432. ).astype(float), Series([np.nan])],
  433. ignore_index=True)
  434. result = to_datetime(s, unit='s')
  435. expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta(
  436. seconds=t) for t in range(20)] + [NaT])
  437. assert_series_equal(result, expected)
  438. result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D')
  439. expected = DatetimeIndex([Timestamp('1970-01-02'),
  440. Timestamp('1970-01-03')] + ['NaT'] * 3)
  441. tm.assert_index_equal(result, expected)
  442. msg = "non convertible value foo with the unit 'D'"
  443. with pytest.raises(ValueError, match=msg):
  444. to_datetime([1, 2, 'foo'], unit='D')
  445. msg = "cannot convert input 111111111 with the unit 'D'"
  446. with pytest.raises(OutOfBoundsDatetime, match=msg):
  447. to_datetime([1, 2, 111111111], unit='D')
  448. # coerce we can process
  449. expected = DatetimeIndex([Timestamp('1970-01-02'),
  450. Timestamp('1970-01-03')] + ['NaT'] * 1)
  451. result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce')
  452. tm.assert_index_equal(result, expected)
  453. result = to_datetime([1, 2, 111111111], unit='D', errors='coerce')
  454. tm.assert_index_equal(result, expected)
  455. def test_series_ctor_datetime64(self):
  456. rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
  457. dates = np.asarray(rng)
  458. series = Series(dates)
  459. assert np.issubdtype(series.dtype, np.dtype('M8[ns]'))
  460. def test_series_repr_nat(self):
  461. series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]')
  462. result = repr(series)
  463. expected = ('0 1970-01-01 00:00:00.000000\n'
  464. '1 1970-01-01 00:00:00.000001\n'
  465. '2 1970-01-01 00:00:00.000002\n'
  466. '3 NaT\n'
  467. 'dtype: datetime64[ns]')
  468. assert result == expected
  469. def test_asfreq_keep_index_name(self):
  470. # GH #9854
  471. index_name = 'bar'
  472. index = pd.date_range('20130101', periods=20, name=index_name)
  473. df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index)
  474. assert index_name == df.index.name
  475. assert index_name == df.asfreq('10D').index.name
  476. def test_promote_datetime_date(self):
  477. rng = date_range('1/1/2000', periods=20)
  478. ts = Series(np.random.randn(20), index=rng)
  479. ts_slice = ts[5:]
  480. ts2 = ts_slice.copy()
  481. ts2.index = [x.date() for x in ts2.index]
  482. result = ts + ts2
  483. result2 = ts2 + ts
  484. expected = ts + ts[5:]
  485. assert_series_equal(result, expected)
  486. assert_series_equal(result2, expected)
  487. # test asfreq
  488. result = ts2.asfreq('4H', method='ffill')
  489. expected = ts[5:].asfreq('4H', method='ffill')
  490. assert_series_equal(result, expected)
  491. result = rng.get_indexer(ts2.index)
  492. expected = rng.get_indexer(ts_slice.index)
  493. tm.assert_numpy_array_equal(result, expected)
  494. def test_asfreq_normalize(self):
  495. rng = date_range('1/1/2000 09:30', periods=20)
  496. norm = date_range('1/1/2000', periods=20)
  497. vals = np.random.randn(20)
  498. ts = Series(vals, index=rng)
  499. result = ts.asfreq('D', normalize=True)
  500. norm = date_range('1/1/2000', periods=20)
  501. expected = Series(vals, index=norm)
  502. assert_series_equal(result, expected)
  503. vals = np.random.randn(20, 3)
  504. ts = DataFrame(vals, index=rng)
  505. result = ts.asfreq('D', normalize=True)
  506. expected = DataFrame(vals, index=norm)
  507. assert_frame_equal(result, expected)
  508. def test_first_subset(self):
  509. ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h')
  510. result = ts.first('10d')
  511. assert len(result) == 20
  512. ts = _simple_ts('1/1/2000', '1/1/2010')
  513. result = ts.first('10d')
  514. assert len(result) == 10
  515. result = ts.first('3M')
  516. expected = ts[:'3/31/2000']
  517. assert_series_equal(result, expected)
  518. result = ts.first('21D')
  519. expected = ts[:21]
  520. assert_series_equal(result, expected)
  521. result = ts[:0].first('3M')
  522. assert_series_equal(result, ts[:0])
  523. def test_first_raises(self):
  524. # GH20725
  525. ser = pd.Series('a b c'.split())
  526. msg = "'first' only supports a DatetimeIndex index"
  527. with pytest.raises(TypeError, match=msg):
  528. ser.first('1D')
  529. def test_last_subset(self):
  530. ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h')
  531. result = ts.last('10d')
  532. assert len(result) == 20
  533. ts = _simple_ts('1/1/2000', '1/1/2010')
  534. result = ts.last('10d')
  535. assert len(result) == 10
  536. result = ts.last('21D')
  537. expected = ts['12/12/2009':]
  538. assert_series_equal(result, expected)
  539. result = ts.last('21D')
  540. expected = ts[-21:]
  541. assert_series_equal(result, expected)
  542. result = ts[:0].last('3M')
  543. assert_series_equal(result, ts[:0])
  544. def test_last_raises(self):
  545. # GH20725
  546. ser = pd.Series('a b c'.split())
  547. msg = "'last' only supports a DatetimeIndex index"
  548. with pytest.raises(TypeError, match=msg):
  549. ser.last('1D')
  550. def test_format_pre_1900_dates(self):
  551. rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC')
  552. rng.format()
  553. ts = Series(1, index=rng)
  554. repr(ts)
  555. def test_at_time(self):
  556. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  557. ts = Series(np.random.randn(len(rng)), index=rng)
  558. rs = ts.at_time(rng[1])
  559. assert (rs.index.hour == rng[1].hour).all()
  560. assert (rs.index.minute == rng[1].minute).all()
  561. assert (rs.index.second == rng[1].second).all()
  562. result = ts.at_time('9:30')
  563. expected = ts.at_time(time(9, 30))
  564. assert_series_equal(result, expected)
  565. df = DataFrame(np.random.randn(len(rng), 3), index=rng)
  566. result = ts[time(9, 30)]
  567. result_df = df.loc[time(9, 30)]
  568. expected = ts[(rng.hour == 9) & (rng.minute == 30)]
  569. exp_df = df[(rng.hour == 9) & (rng.minute == 30)]
  570. # expected.index = date_range('1/1/2000', '1/4/2000')
  571. assert_series_equal(result, expected)
  572. tm.assert_frame_equal(result_df, exp_df)
  573. chunk = df.loc['1/4/2000':]
  574. result = chunk.loc[time(9, 30)]
  575. expected = result_df[-1:]
  576. tm.assert_frame_equal(result, expected)
  577. # midnight, everything
  578. rng = date_range('1/1/2000', '1/31/2000')
  579. ts = Series(np.random.randn(len(rng)), index=rng)
  580. result = ts.at_time(time(0, 0))
  581. assert_series_equal(result, ts)
  582. # time doesn't exist
  583. rng = date_range('1/1/2012', freq='23Min', periods=384)
  584. ts = Series(np.random.randn(len(rng)), rng)
  585. rs = ts.at_time('16:00')
  586. assert len(rs) == 0
  587. def test_at_time_raises(self):
  588. # GH20725
  589. ser = pd.Series('a b c'.split())
  590. msg = "Index must be DatetimeIndex"
  591. with pytest.raises(TypeError, match=msg):
  592. ser.at_time('00:00')
  593. def test_between(self):
  594. series = Series(date_range('1/1/2000', periods=10))
  595. left, right = series[[2, 7]]
  596. result = series.between(left, right)
  597. expected = (series >= left) & (series <= right)
  598. assert_series_equal(result, expected)
  599. def test_between_time(self):
  600. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  601. ts = Series(np.random.randn(len(rng)), index=rng)
  602. stime = time(0, 0)
  603. etime = time(1, 0)
  604. close_open = product([True, False], [True, False])
  605. for inc_start, inc_end in close_open:
  606. filtered = ts.between_time(stime, etime, inc_start, inc_end)
  607. exp_len = 13 * 4 + 1
  608. if not inc_start:
  609. exp_len -= 5
  610. if not inc_end:
  611. exp_len -= 4
  612. assert len(filtered) == exp_len
  613. for rs in filtered.index:
  614. t = rs.time()
  615. if inc_start:
  616. assert t >= stime
  617. else:
  618. assert t > stime
  619. if inc_end:
  620. assert t <= etime
  621. else:
  622. assert t < etime
  623. result = ts.between_time('00:00', '01:00')
  624. expected = ts.between_time(stime, etime)
  625. assert_series_equal(result, expected)
  626. # across midnight
  627. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  628. ts = Series(np.random.randn(len(rng)), index=rng)
  629. stime = time(22, 0)
  630. etime = time(9, 0)
  631. close_open = product([True, False], [True, False])
  632. for inc_start, inc_end in close_open:
  633. filtered = ts.between_time(stime, etime, inc_start, inc_end)
  634. exp_len = (12 * 11 + 1) * 4 + 1
  635. if not inc_start:
  636. exp_len -= 4
  637. if not inc_end:
  638. exp_len -= 4
  639. assert len(filtered) == exp_len
  640. for rs in filtered.index:
  641. t = rs.time()
  642. if inc_start:
  643. assert (t >= stime) or (t <= etime)
  644. else:
  645. assert (t > stime) or (t <= etime)
  646. if inc_end:
  647. assert (t <= etime) or (t >= stime)
  648. else:
  649. assert (t < etime) or (t >= stime)
  650. def test_between_time_raises(self):
  651. # GH20725
  652. ser = pd.Series('a b c'.split())
  653. msg = "Index must be DatetimeIndex"
  654. with pytest.raises(TypeError, match=msg):
  655. ser.between_time(start_time='00:00', end_time='12:00')
  656. def test_between_time_types(self):
  657. # GH11818
  658. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  659. msg = (r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\]"
  660. " to a time")
  661. with pytest.raises(ValueError, match=msg):
  662. rng.indexer_between_time(datetime(2010, 1, 2, 1),
  663. datetime(2010, 1, 2, 5))
  664. frame = DataFrame({'A': 0}, index=rng)
  665. with pytest.raises(ValueError, match=msg):
  666. frame.between_time(datetime(2010, 1, 2, 1),
  667. datetime(2010, 1, 2, 5))
  668. series = Series(0, index=rng)
  669. with pytest.raises(ValueError, match=msg):
  670. series.between_time(datetime(2010, 1, 2, 1),
  671. datetime(2010, 1, 2, 5))
  672. @td.skip_if_has_locale
  673. def test_between_time_formats(self):
  674. # GH11818
  675. rng = date_range('1/1/2000', '1/5/2000', freq='5min')
  676. ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
  677. strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"),
  678. ("0200am", "0230am"), ("2:00:00", "2:30:00"),
  679. ("020000", "023000"), ("2:00:00am", "2:30:00am"),
  680. ("020000am", "023000am")]
  681. expected_length = 28
  682. for time_string in strings:
  683. assert len(ts.between_time(*time_string)) == expected_length
  684. def test_between_time_axis(self):
  685. # issue 8839
  686. rng = date_range('1/1/2000', periods=100, freq='10min')
  687. ts = Series(np.random.randn(len(rng)), index=rng)
  688. stime, etime = ('08:00:00', '09:00:00')
  689. expected_length = 7
  690. assert len(ts.between_time(stime, etime)) == expected_length
  691. assert len(ts.between_time(stime, etime, axis=0)) == expected_length
  692. msg = r"No axis named 1 for object type <(class|type) 'type'>"
  693. with pytest.raises(ValueError, match=msg):
  694. ts.between_time(stime, etime, axis=1)
  695. def test_to_period(self):
  696. from pandas.core.indexes.period import period_range
  697. ts = _simple_ts('1/1/2000', '1/1/2001')
  698. pts = ts.to_period()
  699. exp = ts.copy()
  700. exp.index = period_range('1/1/2000', '1/1/2001')
  701. assert_series_equal(pts, exp)
  702. pts = ts.to_period('M')
  703. exp.index = exp.index.asfreq('M')
  704. tm.assert_index_equal(pts.index, exp.index.asfreq('M'))
  705. assert_series_equal(pts, exp)
  706. # GH 7606 without freq
  707. idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
  708. '2011-01-04'])
  709. exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03',
  710. '2011-01-04'], freq='D')
  711. s = Series(np.random.randn(4), index=idx)
  712. expected = s.copy()
  713. expected.index = exp_idx
  714. assert_series_equal(s.to_period(), expected)
  715. df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx)
  716. expected = df.copy()
  717. expected.index = exp_idx
  718. assert_frame_equal(df.to_period(), expected)
  719. expected = df.copy()
  720. expected.columns = exp_idx
  721. assert_frame_equal(df.to_period(axis=1), expected)
  722. def test_groupby_count_dateparseerror(self):
  723. dr = date_range(start='1/1/2012', freq='5min', periods=10)
  724. # BAD Example, datetimes first
  725. s = Series(np.arange(10), index=[dr, lrange(10)])
  726. grouped = s.groupby(lambda x: x[1] % 2 == 0)
  727. result = grouped.count()
  728. s = Series(np.arange(10), index=[lrange(10), dr])
  729. grouped = s.groupby(lambda x: x[0] % 2 == 0)
  730. expected = grouped.count()
  731. assert_series_equal(result, expected)
  732. def test_to_csv_numpy_16_bug(self):
  733. frame = DataFrame({'a': date_range('1/1/2000', periods=10)})
  734. buf = StringIO()
  735. frame.to_csv(buf)
  736. result = buf.getvalue()
  737. assert '2000-01-01' in result
  738. def test_series_map_box_timedelta(self):
  739. # GH 11349
  740. s = Series(timedelta_range('1 day 1 s', periods=5, freq='h'))
  741. def f(x):
  742. return x.total_seconds()
  743. s.map(f)
  744. s.apply(f)
  745. DataFrame(s).applymap(f)
  746. def test_asfreq_resample_set_correct_freq(self):
  747. # GH5613
  748. # we test if .asfreq() and .resample() set the correct value for .freq
  749. df = pd.DataFrame({'date': ["2012-01-01", "2012-01-02", "2012-01-03"],
  750. 'col': [1, 2, 3]})
  751. df = df.set_index(pd.to_datetime(df.date))
  752. # testing the settings before calling .asfreq() and .resample()
  753. assert df.index.freq is None
  754. assert df.index.inferred_freq == 'D'
  755. # does .asfreq() set .freq correctly?
  756. assert df.asfreq('D').index.freq == 'D'
  757. # does .resample() set .freq correctly?
  758. assert df.resample('D').asfreq().index.freq == 'D'
  759. def test_pickle(self):
  760. # GH4606
  761. p = tm.round_trip_pickle(NaT)
  762. assert p is NaT
  763. idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06'])
  764. idx_p = tm.round_trip_pickle(idx)
  765. assert idx_p[0] == idx[0]
  766. assert idx_p[1] is NaT
  767. assert idx_p[2] == idx[2]
  768. # GH11002
  769. # don't infer freq
  770. idx = date_range('1750-1-1', '2050-1-1', freq='7D')
  771. idx_p = tm.round_trip_pickle(idx)
  772. tm.assert_index_equal(idx, idx_p)
  773. @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo', 'US/Eastern'])
  774. def test_setops_preserve_freq(self, tz):
  775. rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz)
  776. result = rng[:50].union(rng[50:100])
  777. assert result.name == rng.name
  778. assert result.freq == rng.freq
  779. assert result.tz == rng.tz
  780. result = rng[:50].union(rng[30:100])
  781. assert result.name == rng.name
  782. assert result.freq == rng.freq
  783. assert result.tz == rng.tz
  784. result = rng[:50].union(rng[60:100])
  785. assert result.name == rng.name
  786. assert result.freq is None
  787. assert result.tz == rng.tz
  788. result = rng[:50].intersection(rng[25:75])
  789. assert result.name == rng.name
  790. assert result.freqstr == 'D'
  791. assert result.tz == rng.tz
  792. nofreq = DatetimeIndex(list(rng[25:75]), name='other')
  793. result = rng[:50].union(nofreq)
  794. assert result.name is None
  795. assert result.freq == rng.freq
  796. assert result.tz == rng.tz
  797. result = rng[:50].intersection(nofreq)
  798. assert result.name is None
  799. assert result.freq == rng.freq
  800. assert result.tz == rng.tz
  801. def test_from_M8_structured(self):
  802. dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))]
  803. arr = np.array(dates,
  804. dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')])
  805. df = DataFrame(arr)
  806. assert df['Date'][0] == dates[0][0]
  807. assert df['Forecasting'][0] == dates[0][1]
  808. s = Series(arr['Date'])
  809. assert isinstance(s[0], Timestamp)
  810. assert s[0] == dates[0][0]
  811. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  812. s = Series.from_array(arr['Date'], Index([0]))
  813. assert s[0] == dates[0][0]
  814. def test_get_level_values_box(self):
  815. from pandas import MultiIndex
  816. dates = date_range('1/1/2000', periods=4)
  817. levels = [dates, [0, 1]]
  818. codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
  819. index = MultiIndex(levels=levels, codes=codes)
  820. assert isinstance(index.get_level_values(0)[0], Timestamp)
  821. def test_view_tz(self):
  822. # GH#24024
  823. ser = pd.Series(pd.date_range('2000', periods=4, tz='US/Central'))
  824. result = ser.view("i8")
  825. expected = pd.Series([946706400000000000,
  826. 946792800000000000,
  827. 946879200000000000,
  828. 946965600000000000])
  829. tm.assert_series_equal(result, expected)
  830. def test_asarray_tz_naive(self):
  831. # This shouldn't produce a warning.
  832. ser = pd.Series(pd.date_range('2000', periods=2))
  833. expected = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]')
  834. with tm.assert_produces_warning(None):
  835. result = np.asarray(ser)
  836. tm.assert_numpy_array_equal(result, expected)
  837. # optionally, object
  838. with tm.assert_produces_warning(None):
  839. result = np.asarray(ser, dtype=object)
  840. expected = np.array([pd.Timestamp('2000-01-01'),
  841. pd.Timestamp('2000-01-02')])
  842. tm.assert_numpy_array_equal(result, expected)
  843. def test_asarray_tz_aware(self):
  844. tz = 'US/Central'
  845. ser = pd.Series(pd.date_range('2000', periods=2, tz=tz))
  846. expected = np.array(['2000-01-01T06', '2000-01-02T06'], dtype='M8[ns]')
  847. # We warn by default and return an ndarray[M8[ns]]
  848. with tm.assert_produces_warning(FutureWarning):
  849. result = np.asarray(ser)
  850. tm.assert_numpy_array_equal(result, expected)
  851. # Old behavior with no warning
  852. with tm.assert_produces_warning(None):
  853. result = np.asarray(ser, dtype="M8[ns]")
  854. tm.assert_numpy_array_equal(result, expected)
  855. # Future behavior with no warning
  856. expected = np.array([pd.Timestamp("2000-01-01", tz=tz),
  857. pd.Timestamp("2000-01-02", tz=tz)])
  858. with tm.assert_produces_warning(None):
  859. result = np.asarray(ser, dtype=object)
  860. tm.assert_numpy_array_equal(result, expected)