test_period_index.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. from datetime import datetime, timedelta
  2. import dateutil
  3. import numpy as np
  4. import pytest
  5. import pytz
  6. from pandas._libs.tslibs.ccalendar import DAYS, MONTHS
  7. from pandas._libs.tslibs.period import IncompatibleFrequency
  8. from pandas.compat import lrange, range, zip
  9. import pandas as pd
  10. from pandas import DataFrame, Series, Timestamp
  11. from pandas.core.indexes.datetimes import date_range
  12. from pandas.core.indexes.period import Period, PeriodIndex, period_range
  13. from pandas.core.resample import _get_period_range_edges
  14. import pandas.util.testing as tm
  15. from pandas.util.testing import (
  16. assert_almost_equal, assert_frame_equal, assert_series_equal)
  17. import pandas.tseries.offsets as offsets
  18. @pytest.fixture()
  19. def _index_factory():
  20. return period_range
  21. @pytest.fixture
  22. def _series_name():
  23. return 'pi'
  24. class TestPeriodIndex(object):
  25. @pytest.mark.parametrize('freq', ['2D', '1H', '2H'])
  26. @pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
  27. def test_asfreq(self, series_and_frame, freq, kind):
  28. # GH 12884, 15944
  29. # make sure .asfreq() returns PeriodIndex (except kind='timestamp')
  30. obj = series_and_frame
  31. if kind == 'timestamp':
  32. expected = obj.to_timestamp().resample(freq).asfreq()
  33. else:
  34. start = obj.index[0].to_timestamp(how='start')
  35. end = (obj.index[-1] + obj.index.freq).to_timestamp(how='start')
  36. new_index = date_range(start=start, end=end, freq=freq,
  37. closed='left')
  38. expected = obj.to_timestamp().reindex(new_index).to_period(freq)
  39. result = obj.resample(freq, kind=kind).asfreq()
  40. assert_almost_equal(result, expected)
  41. def test_asfreq_fill_value(self, series):
  42. # test for fill value during resampling, issue 3715
  43. s = series
  44. new_index = date_range(s.index[0].to_timestamp(how='start'),
  45. (s.index[-1]).to_timestamp(how='start'),
  46. freq='1H')
  47. expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
  48. result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
  49. assert_series_equal(result, expected)
  50. frame = s.to_frame('value')
  51. new_index = date_range(frame.index[0].to_timestamp(how='start'),
  52. (frame.index[-1]).to_timestamp(how='start'),
  53. freq='1H')
  54. expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
  55. result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
  56. assert_frame_equal(result, expected)
  57. @pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W'])
  58. @pytest.mark.parametrize('kind', [None, 'period', 'timestamp'])
  59. def test_selection(self, index, freq, kind):
  60. # This is a bug, these should be implemented
  61. # GH 14008
  62. rng = np.arange(len(index), dtype=np.int64)
  63. df = DataFrame({'date': index, 'a': rng},
  64. index=pd.MultiIndex.from_arrays([rng, index],
  65. names=['v', 'd']))
  66. with pytest.raises(NotImplementedError):
  67. df.resample(freq, on='date', kind=kind)
  68. with pytest.raises(NotImplementedError):
  69. df.resample(freq, level='d', kind=kind)
  70. @pytest.mark.parametrize('month', MONTHS)
  71. @pytest.mark.parametrize('meth', ['ffill', 'bfill'])
  72. @pytest.mark.parametrize('conv', ['start', 'end'])
  73. @pytest.mark.parametrize('targ', ['D', 'B', 'M'])
  74. def test_annual_upsample_cases(self, targ, conv, meth, month,
  75. simple_period_range_series):
  76. ts = simple_period_range_series(
  77. '1/1/1990', '12/31/1991', freq='A-%s' % month)
  78. result = getattr(ts.resample(targ, convention=conv), meth)()
  79. expected = result.to_timestamp(targ, how=conv)
  80. expected = expected.asfreq(targ, meth).to_period()
  81. assert_series_equal(result, expected)
  82. def test_basic_downsample(self, simple_period_range_series):
  83. ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M')
  84. result = ts.resample('a-dec').mean()
  85. expected = ts.groupby(ts.index.year).mean()
  86. expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec')
  87. assert_series_equal(result, expected)
  88. # this is ok
  89. assert_series_equal(ts.resample('a-dec').mean(), result)
  90. assert_series_equal(ts.resample('a').mean(), result)
  91. def test_not_subperiod(self, simple_period_range_series):
  92. # These are incompatible period rules for resampling
  93. ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='w-wed')
  94. pytest.raises(ValueError, lambda: ts.resample('a-dec').mean())
  95. pytest.raises(ValueError, lambda: ts.resample('q-mar').mean())
  96. pytest.raises(ValueError, lambda: ts.resample('M').mean())
  97. pytest.raises(ValueError, lambda: ts.resample('w-thu').mean())
  98. @pytest.mark.parametrize('freq', ['D', '2D'])
  99. def test_basic_upsample(self, freq, simple_period_range_series):
  100. ts = simple_period_range_series('1/1/1990', '6/30/1995', freq='M')
  101. result = ts.resample('a-dec').mean()
  102. resampled = result.resample(freq, convention='end').ffill()
  103. expected = result.to_timestamp(freq, how='end')
  104. expected = expected.asfreq(freq, 'ffill').to_period(freq)
  105. assert_series_equal(resampled, expected)
  106. def test_upsample_with_limit(self):
  107. rng = period_range('1/1/2000', periods=5, freq='A')
  108. ts = Series(np.random.randn(len(rng)), rng)
  109. result = ts.resample('M', convention='end').ffill(limit=2)
  110. expected = ts.asfreq('M').reindex(result.index, method='ffill',
  111. limit=2)
  112. assert_series_equal(result, expected)
  113. def test_annual_upsample(self, simple_period_range_series):
  114. ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='A-DEC')
  115. df = DataFrame({'a': ts})
  116. rdf = df.resample('D').ffill()
  117. exp = df['a'].resample('D').ffill()
  118. assert_series_equal(rdf['a'], exp)
  119. rng = period_range('2000', '2003', freq='A-DEC')
  120. ts = Series([1, 2, 3, 4], index=rng)
  121. result = ts.resample('M').ffill()
  122. ex_index = period_range('2000-01', '2003-12', freq='M')
  123. expected = ts.asfreq('M', how='start').reindex(ex_index,
  124. method='ffill')
  125. assert_series_equal(result, expected)
  126. @pytest.mark.parametrize('month', MONTHS)
  127. @pytest.mark.parametrize('target', ['D', 'B', 'M'])
  128. @pytest.mark.parametrize('convention', ['start', 'end'])
  129. def test_quarterly_upsample(self, month, target, convention,
  130. simple_period_range_series):
  131. freq = 'Q-{month}'.format(month=month)
  132. ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq)
  133. result = ts.resample(target, convention=convention).ffill()
  134. expected = result.to_timestamp(target, how=convention)
  135. expected = expected.asfreq(target, 'ffill').to_period()
  136. assert_series_equal(result, expected)
  137. @pytest.mark.parametrize('target', ['D', 'B'])
  138. @pytest.mark.parametrize('convention', ['start', 'end'])
  139. def test_monthly_upsample(self, target, convention,
  140. simple_period_range_series):
  141. ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M')
  142. result = ts.resample(target, convention=convention).ffill()
  143. expected = result.to_timestamp(target, how=convention)
  144. expected = expected.asfreq(target, 'ffill').to_period()
  145. assert_series_equal(result, expected)
  146. def test_resample_basic(self):
  147. # GH3609
  148. s = Series(range(100), index=date_range(
  149. '20130101', freq='s', periods=100, name='idx'), dtype='float')
  150. s[10:30] = np.nan
  151. index = PeriodIndex([
  152. Period('2013-01-01 00:00', 'T'),
  153. Period('2013-01-01 00:01', 'T')], name='idx')
  154. expected = Series([34.5, 79.5], index=index)
  155. result = s.to_period().resample('T', kind='period').mean()
  156. assert_series_equal(result, expected)
  157. result2 = s.resample('T', kind='period').mean()
  158. assert_series_equal(result2, expected)
  159. @pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]),
  160. ('2M', [31 + 29, 31 + 9])])
  161. def test_resample_count(self, freq, expected_vals):
  162. # GH12774
  163. series = Series(1, index=pd.period_range(start='2000', periods=100))
  164. result = series.resample(freq).count()
  165. expected_index = pd.period_range(start='2000', freq=freq,
  166. periods=len(expected_vals))
  167. expected = Series(expected_vals, index=expected_index)
  168. assert_series_equal(result, expected)
  169. def test_resample_same_freq(self, resample_method):
  170. # GH12770
  171. series = Series(range(3), index=pd.period_range(
  172. start='2000', periods=3, freq='M'))
  173. expected = series
  174. result = getattr(series.resample('M'), resample_method)()
  175. assert_series_equal(result, expected)
  176. def test_resample_incompat_freq(self):
  177. with pytest.raises(IncompatibleFrequency):
  178. Series(range(3), index=pd.period_range(
  179. start='2000', periods=3, freq='M')).resample('W').mean()
  180. def test_with_local_timezone_pytz(self):
  181. # see gh-5430
  182. local_timezone = pytz.timezone('America/Los_Angeles')
  183. start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
  184. tzinfo=pytz.utc)
  185. # 1 day later
  186. end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
  187. tzinfo=pytz.utc)
  188. index = pd.date_range(start, end, freq='H')
  189. series = Series(1, index=index)
  190. series = series.tz_convert(local_timezone)
  191. result = series.resample('D', kind='period').mean()
  192. # Create the expected series
  193. # Index is moved back a day with the timezone conversion from UTC to
  194. # Pacific
  195. expected_index = (pd.period_range(start=start, end=end, freq='D') -
  196. offsets.Day())
  197. expected = Series(1, index=expected_index)
  198. assert_series_equal(result, expected)
  199. def test_resample_with_pytz(self):
  200. # GH 13238
  201. s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H",
  202. tz="US/Eastern"))
  203. result = s.resample("D").mean()
  204. expected = Series(2, index=pd.DatetimeIndex(['2017-01-01',
  205. '2017-01-02'],
  206. tz="US/Eastern"))
  207. assert_series_equal(result, expected)
  208. # Especially assert that the timezone is LMT for pytz
  209. assert result.index.tz == pytz.timezone('US/Eastern')
  210. def test_with_local_timezone_dateutil(self):
  211. # see gh-5430
  212. local_timezone = 'dateutil/America/Los_Angeles'
  213. start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
  214. tzinfo=dateutil.tz.tzutc())
  215. # 1 day later
  216. end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
  217. tzinfo=dateutil.tz.tzutc())
  218. index = pd.date_range(start, end, freq='H', name='idx')
  219. series = Series(1, index=index)
  220. series = series.tz_convert(local_timezone)
  221. result = series.resample('D', kind='period').mean()
  222. # Create the expected series
  223. # Index is moved back a day with the timezone conversion from UTC to
  224. # Pacific
  225. expected_index = (pd.period_range(start=start, end=end, freq='D',
  226. name='idx') - offsets.Day())
  227. expected = Series(1, index=expected_index)
  228. assert_series_equal(result, expected)
  229. def test_resample_nonexistent_time_bin_edge(self):
  230. # GH 19375
  231. index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
  232. s = Series(np.zeros(len(index)), index=index)
  233. expected = s.tz_localize('US/Pacific')
  234. result = expected.resample('900S').mean()
  235. tm.assert_series_equal(result, expected)
  236. # GH 23742
  237. index = date_range(start='2017-10-10', end='2017-10-20', freq='1H')
  238. index = index.tz_localize('UTC').tz_convert('America/Sao_Paulo')
  239. df = DataFrame(data=list(range(len(index))), index=index)
  240. result = df.groupby(pd.Grouper(freq='1D')).count()
  241. expected = date_range(start='2017-10-09', end='2017-10-20', freq='D',
  242. tz="America/Sao_Paulo",
  243. nonexistent='shift_forward', closed='left')
  244. tm.assert_index_equal(result.index, expected)
  245. def test_resample_ambiguous_time_bin_edge(self):
  246. # GH 10117
  247. idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
  248. freq="30T", tz="Europe/London")
  249. expected = Series(np.zeros(len(idx)), index=idx)
  250. result = expected.resample('30T').mean()
  251. tm.assert_series_equal(result, expected)
  252. def test_fill_method_and_how_upsample(self):
  253. # GH2073
  254. s = Series(np.arange(9, dtype='int64'),
  255. index=date_range('2010-01-01', periods=9, freq='Q'))
  256. last = s.resample('M').ffill()
  257. both = s.resample('M').ffill().resample('M').last().astype('int64')
  258. assert_series_equal(last, both)
  259. @pytest.mark.parametrize('day', DAYS)
  260. @pytest.mark.parametrize('target', ['D', 'B'])
  261. @pytest.mark.parametrize('convention', ['start', 'end'])
  262. def test_weekly_upsample(self, day, target, convention,
  263. simple_period_range_series):
  264. freq = 'W-{day}'.format(day=day)
  265. ts = simple_period_range_series('1/1/1990', '12/31/1995', freq=freq)
  266. result = ts.resample(target, convention=convention).ffill()
  267. expected = result.to_timestamp(target, how=convention)
  268. expected = expected.asfreq(target, 'ffill').to_period()
  269. assert_series_equal(result, expected)
  270. def test_resample_to_timestamps(self, simple_period_range_series):
  271. ts = simple_period_range_series('1/1/1990', '12/31/1995', freq='M')
  272. result = ts.resample('A-DEC', kind='timestamp').mean()
  273. expected = ts.to_timestamp(how='start').resample('A-DEC').mean()
  274. assert_series_equal(result, expected)
  275. def test_resample_to_quarterly(self, simple_period_range_series):
  276. for month in MONTHS:
  277. ts = simple_period_range_series(
  278. '1990', '1992', freq='A-%s' % month)
  279. quar_ts = ts.resample('Q-%s' % month).ffill()
  280. stamps = ts.to_timestamp('D', how='start')
  281. qdates = period_range(ts.index[0].asfreq('D', 'start'),
  282. ts.index[-1].asfreq('D', 'end'),
  283. freq='Q-%s' % month)
  284. expected = stamps.reindex(qdates.to_timestamp('D', 's'),
  285. method='ffill')
  286. expected.index = qdates
  287. assert_series_equal(quar_ts, expected)
  288. # conforms, but different month
  289. ts = simple_period_range_series('1990', '1992', freq='A-JUN')
  290. for how in ['start', 'end']:
  291. result = ts.resample('Q-MAR', convention=how).ffill()
  292. expected = ts.asfreq('Q-MAR', how=how)
  293. expected = expected.reindex(result.index, method='ffill')
  294. # .to_timestamp('D')
  295. # expected = expected.resample('Q-MAR').ffill()
  296. assert_series_equal(result, expected)
  297. def test_resample_fill_missing(self):
  298. rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')
  299. s = Series(np.random.randn(4), index=rng)
  300. stamps = s.to_timestamp()
  301. filled = s.resample('A').ffill()
  302. expected = stamps.resample('A').ffill().to_period('A')
  303. assert_series_equal(filled, expected)
  304. def test_cant_fill_missing_dups(self):
  305. rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A')
  306. s = Series(np.random.randn(5), index=rng)
  307. pytest.raises(Exception, lambda: s.resample('A').ffill())
  308. @pytest.mark.parametrize('freq', ['5min'])
  309. @pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
  310. def test_resample_5minute(self, freq, kind):
  311. rng = period_range('1/1/2000', '1/5/2000', freq='T')
  312. ts = Series(np.random.randn(len(rng)), index=rng)
  313. expected = ts.to_timestamp().resample(freq).mean()
  314. if kind != 'timestamp':
  315. expected = expected.to_period(freq)
  316. result = ts.resample(freq, kind=kind).mean()
  317. assert_series_equal(result, expected)
  318. def test_upsample_daily_business_daily(self, simple_period_range_series):
  319. ts = simple_period_range_series('1/1/2000', '2/1/2000', freq='B')
  320. result = ts.resample('D').asfreq()
  321. expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000'))
  322. assert_series_equal(result, expected)
  323. ts = simple_period_range_series('1/1/2000', '2/1/2000')
  324. result = ts.resample('H', convention='s').asfreq()
  325. exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H')
  326. expected = ts.asfreq('H', how='s').reindex(exp_rng)
  327. assert_series_equal(result, expected)
  328. def test_resample_irregular_sparse(self):
  329. dr = date_range(start='1/1/2012', freq='5min', periods=1000)
  330. s = Series(np.array(100), index=dr)
  331. # subset the data.
  332. subset = s[:'2012-01-04 06:55']
  333. result = subset.resample('10min').apply(len)
  334. expected = s.resample('10min').apply(len).loc[result.index]
  335. assert_series_equal(result, expected)
  336. def test_resample_weekly_all_na(self):
  337. rng = date_range('1/1/2000', periods=10, freq='W-WED')
  338. ts = Series(np.random.randn(len(rng)), index=rng)
  339. result = ts.resample('W-THU').asfreq()
  340. assert result.isna().all()
  341. result = ts.resample('W-THU').asfreq().ffill()[:-1]
  342. expected = ts.asfreq('W-THU').ffill()
  343. assert_series_equal(result, expected)
  344. def test_resample_tz_localized(self):
  345. dr = date_range(start='2012-4-13', end='2012-5-1')
  346. ts = Series(lrange(len(dr)), dr)
  347. ts_utc = ts.tz_localize('UTC')
  348. ts_local = ts_utc.tz_convert('America/Los_Angeles')
  349. result = ts_local.resample('W').mean()
  350. ts_local_naive = ts_local.copy()
  351. ts_local_naive.index = [x.replace(tzinfo=None)
  352. for x in ts_local_naive.index.to_pydatetime()]
  353. exp = ts_local_naive.resample(
  354. 'W').mean().tz_localize('America/Los_Angeles')
  355. assert_series_equal(result, exp)
  356. # it works
  357. result = ts_local.resample('D').mean()
  358. # #2245
  359. idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
  360. tz='Australia/Sydney')
  361. s = Series([1, 2], index=idx)
  362. result = s.resample('D', closed='right', label='right').mean()
  363. ex_index = date_range('2001-09-21', periods=1, freq='D',
  364. tz='Australia/Sydney')
  365. expected = Series([1.5], index=ex_index)
  366. assert_series_equal(result, expected)
  367. # for good measure
  368. result = s.resample('D', kind='period').mean()
  369. ex_index = period_range('2001-09-20', periods=1, freq='D')
  370. expected = Series([1.5], index=ex_index)
  371. assert_series_equal(result, expected)
  372. # GH 6397
  373. # comparing an offset that doesn't propagate tz's
  374. rng = date_range('1/1/2011', periods=20000, freq='H')
  375. rng = rng.tz_localize('EST')
  376. ts = DataFrame(index=rng)
  377. ts['first'] = np.random.randn(len(rng))
  378. ts['second'] = np.cumsum(np.random.randn(len(rng)))
  379. expected = DataFrame(
  380. {
  381. 'first': ts.resample('A').sum()['first'],
  382. 'second': ts.resample('A').mean()['second']},
  383. columns=['first', 'second'])
  384. result = ts.resample(
  385. 'A').agg({'first': np.sum,
  386. 'second': np.mean}).reindex(columns=['first', 'second'])
  387. assert_frame_equal(result, expected)
  388. def test_closed_left_corner(self):
  389. # #1465
  390. s = Series(np.random.randn(21),
  391. index=date_range(start='1/1/2012 9:30',
  392. freq='1min', periods=21))
  393. s[0] = np.nan
  394. result = s.resample('10min', closed='left', label='right').mean()
  395. exp = s[1:].resample('10min', closed='left', label='right').mean()
  396. assert_series_equal(result, exp)
  397. result = s.resample('10min', closed='left', label='left').mean()
  398. exp = s[1:].resample('10min', closed='left', label='left').mean()
  399. ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3)
  400. tm.assert_index_equal(result.index, ex_index)
  401. assert_series_equal(result, exp)
  402. def test_quarterly_resampling(self):
  403. rng = period_range('2000Q1', periods=10, freq='Q-DEC')
  404. ts = Series(np.arange(10), index=rng)
  405. result = ts.resample('A').mean()
  406. exp = ts.to_timestamp().resample('A').mean().to_period()
  407. assert_series_equal(result, exp)
  408. def test_resample_weekly_bug_1726(self):
  409. # 8/6/12 is a Monday
  410. ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
  411. n = len(ind)
  412. data = [[x] * 5 for x in range(n)]
  413. df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
  414. index=ind)
  415. # it works!
  416. df.resample('W-MON', closed='left', label='left').first()
  417. def test_resample_with_dst_time_change(self):
  418. # GH 15549
  419. index = (
  420. pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
  421. .tz_localize("UTC").tz_convert('America/Chicago')
  422. )
  423. df = pd.DataFrame([1, 2], index=index)
  424. result = df.resample('12h', closed='right',
  425. label='right').last().ffill()
  426. expected_index_values = ['2016-03-09 12:00:00-06:00',
  427. '2016-03-10 00:00:00-06:00',
  428. '2016-03-10 12:00:00-06:00',
  429. '2016-03-11 00:00:00-06:00',
  430. '2016-03-11 12:00:00-06:00',
  431. '2016-03-12 00:00:00-06:00',
  432. '2016-03-12 12:00:00-06:00',
  433. '2016-03-13 00:00:00-06:00',
  434. '2016-03-13 13:00:00-05:00',
  435. '2016-03-14 01:00:00-05:00',
  436. '2016-03-14 13:00:00-05:00',
  437. '2016-03-15 01:00:00-05:00',
  438. '2016-03-15 13:00:00-05:00']
  439. index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
  440. 'America/Chicago')
  441. expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0,
  442. 1.0, 1.0, 1.0, 1.0, 1.0,
  443. 1.0, 1.0, 2.0], index=index)
  444. assert_frame_equal(result, expected)
  445. def test_resample_bms_2752(self):
  446. # GH2753
  447. foo = Series(index=pd.bdate_range('20000101', '20000201'))
  448. res1 = foo.resample("BMS").mean()
  449. res2 = foo.resample("BMS").mean().resample("B").mean()
  450. assert res1.index[0] == Timestamp('20000103')
  451. assert res1.index[0] == res2.index[0]
  452. # def test_monthly_convention_span(self):
  453. # rng = period_range('2000-01', periods=3, freq='M')
  454. # ts = Series(np.arange(3), index=rng)
  455. # # hacky way to get same thing
  456. # exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
  457. # expected = ts.asfreq('D', how='end').reindex(exp_index)
  458. # expected = expected.fillna(method='bfill')
  459. # result = ts.resample('D', convention='span').mean()
  460. # assert_series_equal(result, expected)
  461. def test_default_right_closed_label(self):
  462. end_freq = ['D', 'Q', 'M', 'D']
  463. end_types = ['M', 'A', 'Q', 'W']
  464. for from_freq, to_freq in zip(end_freq, end_types):
  465. idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
  466. df = DataFrame(np.random.randn(len(idx), 2), idx)
  467. resampled = df.resample(to_freq).mean()
  468. assert_frame_equal(resampled, df.resample(to_freq, closed='right',
  469. label='right').mean())
  470. def test_default_left_closed_label(self):
  471. others = ['MS', 'AS', 'QS', 'D', 'H']
  472. others_freq = ['D', 'Q', 'M', 'H', 'T']
  473. for from_freq, to_freq in zip(others_freq, others):
  474. idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
  475. df = DataFrame(np.random.randn(len(idx), 2), idx)
  476. resampled = df.resample(to_freq).mean()
  477. assert_frame_equal(resampled, df.resample(to_freq, closed='left',
  478. label='left').mean())
  479. def test_all_values_single_bin(self):
  480. # 2070
  481. index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
  482. s = Series(np.random.randn(len(index)), index=index)
  483. result = s.resample("A").mean()
  484. tm.assert_almost_equal(result[0], s.mean())
  485. def test_evenly_divisible_with_no_extra_bins(self):
  486. # 4076
  487. # when the frequency is evenly divisible, sometimes extra bins
  488. df = DataFrame(np.random.randn(9, 3),
  489. index=date_range('2000-1-1', periods=9))
  490. result = df.resample('5D').mean()
  491. expected = pd.concat(
  492. [df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
  493. expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
  494. assert_frame_equal(result, expected)
  495. index = date_range(start='2001-5-4', periods=28)
  496. df = DataFrame(
  497. [{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
  498. 'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
  499. [{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
  500. 'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
  501. index=index.append(index)).sort_index()
  502. index = date_range('2001-5-4', periods=4, freq='7D')
  503. expected = DataFrame(
  504. [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
  505. 'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
  506. index=index)
  507. result = df.resample('7D').count()
  508. assert_frame_equal(result, expected)
  509. expected = DataFrame(
  510. [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
  511. 'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
  512. index=index)
  513. result = df.resample('7D').sum()
  514. assert_frame_equal(result, expected)
  515. @pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
  516. @pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']])
  517. def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
  518. # make sure passing loffset returns DatetimeIndex in all cases
  519. # basic method taken from Base.test_resample_loffset_arg_type()
  520. df = frame
  521. expected_means = [df.values[i:i + 2].mean()
  522. for i in range(0, len(df.values), 2)]
  523. expected_index = period_range(
  524. df.index[0], periods=len(df.index) / 2, freq='2D')
  525. # loffset coerces PeriodIndex to DateTimeIndex
  526. expected_index = expected_index.to_timestamp()
  527. expected_index += timedelta(hours=2)
  528. expected = DataFrame({'value': expected_means}, index=expected_index)
  529. result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg)
  530. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  531. result_how = df.resample('2D', how=agg_arg, loffset='2H',
  532. kind=kind)
  533. if isinstance(agg_arg, list):
  534. expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')])
  535. assert_frame_equal(result_agg, expected)
  536. assert_frame_equal(result_how, expected)
  537. @pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)])
  538. @pytest.mark.parametrize('kind', [None, 'period'])
  539. def test_upsampling_ohlc(self, freq, period_mult, kind):
  540. # GH 13083
  541. pi = period_range(start='2000', freq='D', periods=10)
  542. s = Series(range(len(pi)), index=pi)
  543. expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
  544. # timestamp-based resampling doesn't include all sub-periods
  545. # of the last original period, so extend accordingly:
  546. new_index = period_range(start='2000', freq=freq,
  547. periods=period_mult * len(pi))
  548. expected = expected.reindex(new_index)
  549. result = s.resample(freq, kind=kind).ohlc()
  550. assert_frame_equal(result, expected)
  551. @pytest.mark.parametrize('periods, values',
  552. [([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
  553. '1970-01-01 00:00:02', '1970-01-01 00:00:03'],
  554. [2, 3, 5, 7, 11]),
  555. ([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT,
  556. pd.NaT, pd.NaT, '1970-01-01 00:00:02',
  557. '1970-01-01 00:00:03', pd.NaT, pd.NaT],
  558. [1, 2, 3, 5, 6, 8, 7, 11, 12, 13])])
  559. @pytest.mark.parametrize('freq, expected_values',
  560. [('1s', [3, np.NaN, 7, 11]),
  561. ('2s', [3, int((7 + 11) / 2)]),
  562. ('3s', [int((3 + 7) / 2), 11])])
  563. def test_resample_with_nat(self, periods, values, freq, expected_values):
  564. # GH 13224
  565. index = PeriodIndex(periods, freq='S')
  566. frame = DataFrame(values, index=index)
  567. expected_index = period_range('1970-01-01 00:00:00',
  568. periods=len(expected_values), freq=freq)
  569. expected = DataFrame(expected_values, index=expected_index)
  570. result = frame.resample(freq).mean()
  571. assert_frame_equal(result, expected)
  572. def test_resample_with_only_nat(self):
  573. # GH 13224
  574. pi = PeriodIndex([pd.NaT] * 3, freq='S')
  575. frame = DataFrame([2, 3, 5], index=pi)
  576. expected_index = PeriodIndex(data=[], freq=pi.freq)
  577. expected = DataFrame([], index=expected_index)
  578. result = frame.resample('1s').mean()
  579. assert_frame_equal(result, expected)
  580. @pytest.mark.parametrize('start,end,start_freq,end_freq,base', [
  581. ('19910905', '19910909 03:00', 'H', '24H', 10),
  582. ('19910905', '19910909 12:00', 'H', '24H', 10),
  583. ('19910905', '19910909 23:00', 'H', '24H', 10),
  584. ('19910905 10:00', '19910909', 'H', '24H', 10),
  585. ('19910905 10:00', '19910909 10:00', 'H', '24H', 10),
  586. ('19910905', '19910909 10:00', 'H', '24H', 10),
  587. ('19910905 12:00', '19910909', 'H', '24H', 10),
  588. ('19910905 12:00', '19910909 03:00', 'H', '24H', 10),
  589. ('19910905 12:00', '19910909 12:00', 'H', '24H', 10),
  590. ('19910905 12:00', '19910909 12:00', 'H', '24H', 34),
  591. ('19910905 12:00', '19910909 12:00', 'H', '17H', 10),
  592. ('19910905 12:00', '19910909 12:00', 'H', '17H', 3),
  593. ('19910905 12:00', '19910909 1:00', 'H', 'M', 3),
  594. ('19910905', '19910913 06:00', '2H', '24H', 10),
  595. ('19910905', '19910905 01:39', 'Min', '5Min', 3),
  596. ('19910905', '19910905 03:18', '2Min', '5Min', 3),
  597. ])
  598. def test_resample_with_non_zero_base(self, start, end, start_freq,
  599. end_freq, base):
  600. # GH 23882
  601. s = pd.Series(0, index=pd.period_range(start, end, freq=start_freq))
  602. s = s + np.arange(len(s))
  603. result = s.resample(end_freq, base=base).mean()
  604. result = result.to_timestamp(end_freq)
  605. # to_timestamp casts 24H -> D
  606. result = result.asfreq(end_freq) if end_freq == '24H' else result
  607. expected = s.to_timestamp().resample(end_freq, base=base).mean()
  608. assert_series_equal(result, expected)
  609. @pytest.mark.parametrize('first,last,offset,exp_first,exp_last', [
  610. ('19910905', '19920406', 'D', '19910905', '19920406'),
  611. ('19910905 00:00', '19920406 06:00', 'D', '19910905', '19920406'),
  612. ('19910905 06:00', '19920406 06:00', 'H', '19910905 06:00',
  613. '19920406 06:00'),
  614. ('19910906', '19920406', 'M', '1991-09', '1992-04'),
  615. ('19910831', '19920430', 'M', '1991-08', '1992-04'),
  616. ('1991-08', '1992-04', 'M', '1991-08', '1992-04'),
  617. ])
  618. def test_get_period_range_edges(self, first, last, offset,
  619. exp_first, exp_last):
  620. first = pd.Period(first)
  621. last = pd.Period(last)
  622. exp_first = pd.Period(exp_first, freq=offset)
  623. exp_last = pd.Period(exp_last, freq=offset)
  624. offset = pd.tseries.frequencies.to_offset(offset)
  625. result = _get_period_range_edges(first, last, offset)
  626. expected = (exp_first, exp_last)
  627. assert result == expected