test_datetime_index.py 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530
  1. from datetime import datetime, timedelta
  2. from functools import partial
  3. from warnings import catch_warnings, simplefilter
  4. import numpy as np
  5. import pytest
  6. import pytz
  7. from pandas.compat import StringIO, range
  8. from pandas.errors import UnsupportedFunctionCall
  9. import pandas as pd
  10. from pandas import DataFrame, Panel, Series, Timedelta, Timestamp, isna, notna
  11. from pandas.core.indexes.datetimes import date_range
  12. from pandas.core.indexes.period import Period, period_range
  13. from pandas.core.resample import (
  14. DatetimeIndex, TimeGrouper, _get_timestamp_range_edges)
  15. import pandas.util.testing as tm
  16. from pandas.util.testing import (
  17. assert_almost_equal, assert_frame_equal, assert_series_equal)
  18. import pandas.tseries.offsets as offsets
  19. from pandas.tseries.offsets import BDay, Minute
  20. @pytest.fixture()
  21. def _index_factory():
  22. return date_range
  23. @pytest.fixture
  24. def _index_freq():
  25. return 'Min'
  26. @pytest.fixture
  27. def _static_values(index):
  28. return np.random.rand(len(index))
  29. def test_custom_grouper(index):
  30. dti = index
  31. s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')
  32. b = TimeGrouper(Minute(5))
  33. g = s.groupby(b)
  34. # check all cython functions work
  35. funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
  36. for f in funcs:
  37. g._cython_agg_general(f)
  38. b = TimeGrouper(Minute(5), closed='right', label='right')
  39. g = s.groupby(b)
  40. # check all cython functions work
  41. funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
  42. for f in funcs:
  43. g._cython_agg_general(f)
  44. assert g.ngroups == 2593
  45. assert notna(g.mean()).all()
  46. # construct expected val
  47. arr = [1] + [5] * 2592
  48. idx = dti[0:-1:5]
  49. idx = idx.append(dti[-1:])
  50. expect = Series(arr, index=idx)
  51. # GH2763 - return in put dtype if we can
  52. result = g.agg(np.sum)
  53. assert_series_equal(result, expect)
  54. df = DataFrame(np.random.rand(len(dti), 10),
  55. index=dti, dtype='float64')
  56. r = df.groupby(b).agg(np.sum)
  57. assert len(r.columns) == 10
  58. assert len(r.index) == 2593
  59. @pytest.mark.parametrize(
  60. '_index_start,_index_end,_index_name',
  61. [('1/1/2000 00:00:00', '1/1/2000 00:13:00', 'index')])
  62. @pytest.mark.parametrize('closed, expected', [
  63. ('right',
  64. lambda s: Series(
  65. [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
  66. index=date_range(
  67. '1/1/2000', periods=4, freq='5min', name='index'))),
  68. ('left',
  69. lambda s: Series(
  70. [s[:5].mean(), s[5:10].mean(), s[10:].mean()],
  71. index=date_range(
  72. '1/1/2000 00:05', periods=3, freq='5min', name='index'))
  73. )
  74. ])
  75. def test_resample_basic(series, closed, expected):
  76. s = series
  77. expected = expected(s)
  78. result = s.resample('5min', closed=closed, label='right').mean()
  79. assert_series_equal(result, expected)
  80. def test_resample_integerarray():
  81. # GH 25580, resample on IntegerArray
  82. ts = pd.Series(range(9),
  83. index=pd.date_range('1/1/2000', periods=9, freq='T'),
  84. dtype='Int64')
  85. result = ts.resample('3T').sum()
  86. expected = Series([3, 12, 21],
  87. index=pd.date_range('1/1/2000', periods=3, freq='3T'),
  88. dtype="Int64")
  89. assert_series_equal(result, expected)
  90. def test_resample_basic_grouper(series):
  91. s = series
  92. result = s.resample('5Min').last()
  93. grouper = TimeGrouper(Minute(5), closed='left', label='left')
  94. expected = s.groupby(grouper).agg(lambda x: x[-1])
  95. assert_series_equal(result, expected)
  96. @pytest.mark.parametrize(
  97. '_index_start,_index_end,_index_name',
  98. [('1/1/2000 00:00:00', '1/1/2000 00:13:00', 'index')])
  99. @pytest.mark.parametrize('kwargs', [
  100. dict(label='righttt'),
  101. dict(closed='righttt'),
  102. dict(convention='starttt')
  103. ])
  104. def test_resample_string_kwargs(series, kwargs):
  105. # see gh-19303
  106. # Check that wrong keyword argument strings raise an error
  107. with pytest.raises(ValueError, match='Unsupported value'):
  108. series.resample('5min', **kwargs)
  109. @pytest.mark.parametrize(
  110. '_index_start,_index_end,_index_name',
  111. [('1/1/2000 00:00:00', '1/1/2000 00:13:00', 'index')])
  112. def test_resample_how(series, downsample_method):
  113. if downsample_method == 'ohlc':
  114. pytest.skip('covered by test_resample_how_ohlc')
  115. s = series
  116. grouplist = np.ones_like(s)
  117. grouplist[0] = 0
  118. grouplist[1:6] = 1
  119. grouplist[6:11] = 2
  120. grouplist[11:] = 3
  121. expected = s.groupby(grouplist).agg(downsample_method)
  122. expected.index = date_range(
  123. '1/1/2000', periods=4, freq='5min', name='index')
  124. result = getattr(s.resample(
  125. '5min', closed='right', label='right'), downsample_method)()
  126. assert_series_equal(result, expected)
  127. @pytest.mark.parametrize(
  128. '_index_start,_index_end,_index_name',
  129. [('1/1/2000 00:00:00', '1/1/2000 00:13:00', 'index')])
  130. def test_resample_how_ohlc(series):
  131. s = series
  132. grouplist = np.ones_like(s)
  133. grouplist[0] = 0
  134. grouplist[1:6] = 1
  135. grouplist[6:11] = 2
  136. grouplist[11:] = 3
  137. def _ohlc(group):
  138. if isna(group).all():
  139. return np.repeat(np.nan, 4)
  140. return [group[0], group.max(), group.min(), group[-1]]
  141. expected = DataFrame(
  142. s.groupby(grouplist).agg(_ohlc).values.tolist(),
  143. index=date_range('1/1/2000', periods=4, freq='5min', name='index'),
  144. columns=['open', 'high', 'low', 'close'])
  145. result = s.resample('5min', closed='right', label='right').ohlc()
  146. assert_frame_equal(result, expected)
  147. @pytest.mark.parametrize(
  148. 'func', ['min', 'max', 'sum', 'prod', 'mean', 'var', 'std'])
  149. def test_numpy_compat(func):
  150. # see gh-12811
  151. s = Series([1, 2, 3, 4, 5], index=date_range(
  152. '20130101', periods=5, freq='s'))
  153. r = s.resample('2s')
  154. msg = "numpy operations are not valid with resample"
  155. with pytest.raises(UnsupportedFunctionCall, match=msg):
  156. getattr(r, func)(func, 1, 2, 3)
  157. with pytest.raises(UnsupportedFunctionCall, match=msg):
  158. getattr(r, func)(axis=1)
  159. def test_resample_how_callables():
  160. # GH#7929
  161. data = np.arange(5, dtype=np.int64)
  162. ind = date_range(start='2014-01-01', periods=len(data), freq='d')
  163. df = DataFrame({"A": data, "B": data}, index=ind)
  164. def fn(x, a=1):
  165. return str(type(x))
  166. class FnClass(object):
  167. def __call__(self, x):
  168. return str(type(x))
  169. df_standard = df.resample("M").apply(fn)
  170. df_lambda = df.resample("M").apply(lambda x: str(type(x)))
  171. df_partial = df.resample("M").apply(partial(fn))
  172. df_partial2 = df.resample("M").apply(partial(fn, a=2))
  173. df_class = df.resample("M").apply(FnClass())
  174. assert_frame_equal(df_standard, df_lambda)
  175. assert_frame_equal(df_standard, df_partial)
  176. assert_frame_equal(df_standard, df_partial2)
  177. assert_frame_equal(df_standard, df_class)
  178. def test_resample_rounding():
  179. # GH 8371
  180. # odd results when rounding is needed
  181. data = """date,time,value
  182. 11-08-2014,00:00:01.093,1
  183. 11-08-2014,00:00:02.159,1
  184. 11-08-2014,00:00:02.667,1
  185. 11-08-2014,00:00:03.175,1
  186. 11-08-2014,00:00:07.058,1
  187. 11-08-2014,00:00:07.362,1
  188. 11-08-2014,00:00:08.324,1
  189. 11-08-2014,00:00:08.830,1
  190. 11-08-2014,00:00:08.982,1
  191. 11-08-2014,00:00:09.815,1
  192. 11-08-2014,00:00:10.540,1
  193. 11-08-2014,00:00:11.061,1
  194. 11-08-2014,00:00:11.617,1
  195. 11-08-2014,00:00:13.607,1
  196. 11-08-2014,00:00:14.535,1
  197. 11-08-2014,00:00:15.525,1
  198. 11-08-2014,00:00:17.960,1
  199. 11-08-2014,00:00:20.674,1
  200. 11-08-2014,00:00:21.191,1"""
  201. df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [
  202. 'date', 'time']}, index_col='timestamp')
  203. df.index.name = None
  204. result = df.resample('6s').sum()
  205. expected = DataFrame({'value': [
  206. 4, 9, 4, 2
  207. ]}, index=date_range('2014-11-08', freq='6s', periods=4))
  208. assert_frame_equal(result, expected)
  209. result = df.resample('7s').sum()
  210. expected = DataFrame({'value': [
  211. 4, 10, 4, 1
  212. ]}, index=date_range('2014-11-08', freq='7s', periods=4))
  213. assert_frame_equal(result, expected)
  214. result = df.resample('11s').sum()
  215. expected = DataFrame({'value': [
  216. 11, 8
  217. ]}, index=date_range('2014-11-08', freq='11s', periods=2))
  218. assert_frame_equal(result, expected)
  219. result = df.resample('13s').sum()
  220. expected = DataFrame({'value': [
  221. 13, 6
  222. ]}, index=date_range('2014-11-08', freq='13s', periods=2))
  223. assert_frame_equal(result, expected)
  224. result = df.resample('17s').sum()
  225. expected = DataFrame({'value': [
  226. 16, 3
  227. ]}, index=date_range('2014-11-08', freq='17s', periods=2))
  228. assert_frame_equal(result, expected)
  229. def test_resample_basic_from_daily():
  230. # from daily
  231. dti = date_range(start=datetime(2005, 1, 1),
  232. end=datetime(2005, 1, 10), freq='D', name='index')
  233. s = Series(np.random.rand(len(dti)), dti)
  234. # to weekly
  235. result = s.resample('w-sun').last()
  236. assert len(result) == 3
  237. assert (result.index.dayofweek == [6, 6, 6]).all()
  238. assert result.iloc[0] == s['1/2/2005']
  239. assert result.iloc[1] == s['1/9/2005']
  240. assert result.iloc[2] == s.iloc[-1]
  241. result = s.resample('W-MON').last()
  242. assert len(result) == 2
  243. assert (result.index.dayofweek == [0, 0]).all()
  244. assert result.iloc[0] == s['1/3/2005']
  245. assert result.iloc[1] == s['1/10/2005']
  246. result = s.resample('W-TUE').last()
  247. assert len(result) == 2
  248. assert (result.index.dayofweek == [1, 1]).all()
  249. assert result.iloc[0] == s['1/4/2005']
  250. assert result.iloc[1] == s['1/10/2005']
  251. result = s.resample('W-WED').last()
  252. assert len(result) == 2
  253. assert (result.index.dayofweek == [2, 2]).all()
  254. assert result.iloc[0] == s['1/5/2005']
  255. assert result.iloc[1] == s['1/10/2005']
  256. result = s.resample('W-THU').last()
  257. assert len(result) == 2
  258. assert (result.index.dayofweek == [3, 3]).all()
  259. assert result.iloc[0] == s['1/6/2005']
  260. assert result.iloc[1] == s['1/10/2005']
  261. result = s.resample('W-FRI').last()
  262. assert len(result) == 2
  263. assert (result.index.dayofweek == [4, 4]).all()
  264. assert result.iloc[0] == s['1/7/2005']
  265. assert result.iloc[1] == s['1/10/2005']
  266. # to biz day
  267. result = s.resample('B').last()
  268. assert len(result) == 7
  269. assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all()
  270. assert result.iloc[0] == s['1/2/2005']
  271. assert result.iloc[1] == s['1/3/2005']
  272. assert result.iloc[5] == s['1/9/2005']
  273. assert result.index.name == 'index'
  274. def test_resample_upsampling_picked_but_not_correct():
  275. # Test for issue #3020
  276. dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D')
  277. series = Series(1, index=dates)
  278. result = series.resample('D').mean()
  279. assert result.index[0] == dates[0]
  280. # GH 5955
  281. # incorrect deciding to upsample when the axis frequency matches the
  282. # resample frequency
  283. s = Series(np.arange(1., 6), index=[datetime(
  284. 1975, 1, i, 12, 0) for i in range(1, 6)])
  285. expected = Series(np.arange(1., 6), index=date_range(
  286. '19750101', periods=5, freq='D'))
  287. result = s.resample('D').count()
  288. assert_series_equal(result, Series(1, index=expected.index))
  289. result1 = s.resample('D').sum()
  290. result2 = s.resample('D').mean()
  291. assert_series_equal(result1, expected)
  292. assert_series_equal(result2, expected)
  293. def test_resample_frame_basic():
  294. df = tm.makeTimeDataFrame()
  295. b = TimeGrouper('M')
  296. g = df.groupby(b)
  297. # check all cython functions work
  298. funcs = ['add', 'mean', 'prod', 'min', 'max', 'var']
  299. for f in funcs:
  300. g._cython_agg_general(f)
  301. result = df.resample('A').mean()
  302. assert_series_equal(result['A'], df['A'].resample('A').mean())
  303. result = df.resample('M').mean()
  304. assert_series_equal(result['A'], df['A'].resample('M').mean())
  305. df.resample('M', kind='period').mean()
  306. df.resample('W-WED', kind='period').mean()
  307. @pytest.mark.parametrize('loffset', [timedelta(minutes=1),
  308. '1min', Minute(1),
  309. np.timedelta64(1, 'm')])
  310. def test_resample_loffset(loffset):
  311. # GH 7687
  312. rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
  313. s = Series(np.random.randn(14), index=rng)
  314. result = s.resample('5min', closed='right', label='right',
  315. loffset=loffset).mean()
  316. idx = date_range('1/1/2000', periods=4, freq='5min')
  317. expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
  318. index=idx + timedelta(minutes=1))
  319. assert_series_equal(result, expected)
  320. assert result.index.freq == Minute(5)
  321. # from daily
  322. dti = date_range(start=datetime(2005, 1, 1),
  323. end=datetime(2005, 1, 10), freq='D')
  324. ser = Series(np.random.rand(len(dti)), dti)
  325. # to weekly
  326. result = ser.resample('w-sun').last()
  327. business_day_offset = BDay()
  328. expected = ser.resample('w-sun', loffset=-business_day_offset).last()
  329. assert result.index[0] - business_day_offset == expected.index[0]
  330. def test_resample_loffset_upsample():
  331. # GH 20744
  332. rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
  333. s = Series(np.random.randn(14), index=rng)
  334. result = s.resample('5min', closed='right', label='right',
  335. loffset=timedelta(minutes=1)).ffill()
  336. idx = date_range('1/1/2000', periods=4, freq='5min')
  337. expected = Series([s[0], s[5], s[10], s[-1]],
  338. index=idx + timedelta(minutes=1))
  339. assert_series_equal(result, expected)
  340. def test_resample_loffset_count():
  341. # GH 12725
  342. start_time = '1/1/2000 00:00:00'
  343. rng = date_range(start_time, periods=100, freq='S')
  344. ts = Series(np.random.randn(len(rng)), index=rng)
  345. result = ts.resample('10S', loffset='1s').count()
  346. expected_index = (
  347. date_range(start_time, periods=10, freq='10S') +
  348. timedelta(seconds=1)
  349. )
  350. expected = Series(10, index=expected_index)
  351. assert_series_equal(result, expected)
  352. # Same issue should apply to .size() since it goes through
  353. # same code path
  354. result = ts.resample('10S', loffset='1s').size()
  355. assert_series_equal(result, expected)
  356. def test_resample_upsample():
  357. # from daily
  358. dti = date_range(start=datetime(2005, 1, 1),
  359. end=datetime(2005, 1, 10), freq='D', name='index')
  360. s = Series(np.random.rand(len(dti)), dti)
  361. # to minutely, by padding
  362. result = s.resample('Min').pad()
  363. assert len(result) == 12961
  364. assert result[0] == s[0]
  365. assert result[-1] == s[-1]
  366. assert result.index.name == 'index'
  367. def test_resample_how_method():
  368. # GH9915
  369. s = Series([11, 22],
  370. index=[Timestamp('2015-03-31 21:48:52.672000'),
  371. Timestamp('2015-03-31 21:49:52.739000')])
  372. expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22],
  373. index=[Timestamp('2015-03-31 21:48:50'),
  374. Timestamp('2015-03-31 21:49:00'),
  375. Timestamp('2015-03-31 21:49:10'),
  376. Timestamp('2015-03-31 21:49:20'),
  377. Timestamp('2015-03-31 21:49:30'),
  378. Timestamp('2015-03-31 21:49:40'),
  379. Timestamp('2015-03-31 21:49:50')])
  380. assert_series_equal(s.resample("10S").mean(), expected)
  381. def test_resample_extra_index_point():
  382. # GH#9756
  383. index = date_range(start='20150101', end='20150331', freq='BM')
  384. expected = DataFrame({'A': Series([21, 41, 63], index=index)})
  385. index = date_range(start='20150101', end='20150331', freq='B')
  386. df = DataFrame(
  387. {'A': Series(range(len(index)), index=index)}, dtype='int64')
  388. result = df.resample('BM').last()
  389. assert_frame_equal(result, expected)
  390. def test_upsample_with_limit():
  391. rng = date_range('1/1/2000', periods=3, freq='5t')
  392. ts = Series(np.random.randn(len(rng)), rng)
  393. result = ts.resample('t').ffill(limit=2)
  394. expected = ts.reindex(result.index, method='ffill', limit=2)
  395. assert_series_equal(result, expected)
  396. def test_nearest_upsample_with_limit():
  397. rng = date_range('1/1/2000', periods=3, freq='5t')
  398. ts = Series(np.random.randn(len(rng)), rng)
  399. result = ts.resample('t').nearest(limit=2)
  400. expected = ts.reindex(result.index, method='nearest', limit=2)
  401. assert_series_equal(result, expected)
  402. def test_resample_ohlc(series):
  403. s = series
  404. grouper = TimeGrouper(Minute(5))
  405. expect = s.groupby(grouper).agg(lambda x: x[-1])
  406. result = s.resample('5Min').ohlc()
  407. assert len(result) == len(expect)
  408. assert len(result.columns) == 4
  409. xs = result.iloc[-2]
  410. assert xs['open'] == s[-6]
  411. assert xs['high'] == s[-6:-1].max()
  412. assert xs['low'] == s[-6:-1].min()
  413. assert xs['close'] == s[-2]
  414. xs = result.iloc[0]
  415. assert xs['open'] == s[0]
  416. assert xs['high'] == s[:5].max()
  417. assert xs['low'] == s[:5].min()
  418. assert xs['close'] == s[4]
  419. def test_resample_ohlc_result():
  420. # GH 12332
  421. index = pd.date_range('1-1-2000', '2-15-2000', freq='h')
  422. index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h'))
  423. s = Series(range(len(index)), index=index)
  424. a = s.loc[:'4-15-2000'].resample('30T').ohlc()
  425. assert isinstance(a, DataFrame)
  426. b = s.loc[:'4-14-2000'].resample('30T').ohlc()
  427. assert isinstance(b, DataFrame)
  428. # GH12348
  429. # raising on odd period
  430. rng = date_range('2013-12-30', '2014-01-07')
  431. index = rng.drop([Timestamp('2014-01-01'),
  432. Timestamp('2013-12-31'),
  433. Timestamp('2014-01-04'),
  434. Timestamp('2014-01-05')])
  435. df = DataFrame(data=np.arange(len(index)), index=index)
  436. result = df.resample('B').mean()
  437. expected = df.reindex(index=date_range(rng[0], rng[-1], freq='B'))
  438. assert_frame_equal(result, expected)
  439. def test_resample_ohlc_dataframe():
  440. df = (
  441. DataFrame({
  442. 'PRICE': {
  443. Timestamp('2011-01-06 10:59:05', tz=None): 24990,
  444. Timestamp('2011-01-06 12:43:33', tz=None): 25499,
  445. Timestamp('2011-01-06 12:54:09', tz=None): 25499},
  446. 'VOLUME': {
  447. Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
  448. Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
  449. Timestamp('2011-01-06 12:54:09', tz=None): 100000000}})
  450. ).reindex(['VOLUME', 'PRICE'], axis=1)
  451. res = df.resample('H').ohlc()
  452. exp = pd.concat([df['VOLUME'].resample('H').ohlc(),
  453. df['PRICE'].resample('H').ohlc()],
  454. axis=1,
  455. keys=['VOLUME', 'PRICE'])
  456. assert_frame_equal(exp, res)
  457. df.columns = [['a', 'b'], ['c', 'd']]
  458. res = df.resample('H').ohlc()
  459. exp.columns = pd.MultiIndex.from_tuples([
  460. ('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'),
  461. ('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'),
  462. ('b', 'd', 'low'), ('b', 'd', 'close')])
  463. assert_frame_equal(exp, res)
  464. # dupe columns fail atm
  465. # df.columns = ['PRICE', 'PRICE']
  466. def test_resample_dup_index():
  467. # GH 4812
  468. # dup columns with resample raising
  469. df = DataFrame(np.random.randn(4, 12), index=[2000, 2000, 2000, 2000],
  470. columns=[Period(year=2000, month=i + 1, freq='M')
  471. for i in range(12)])
  472. df.iloc[3, :] = np.nan
  473. result = df.resample('Q', axis=1).mean()
  474. expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean()
  475. expected.columns = [
  476. Period(year=2000, quarter=i + 1, freq='Q') for i in range(4)]
  477. assert_frame_equal(result, expected)
  478. def test_resample_reresample():
  479. dti = date_range(start=datetime(2005, 1, 1),
  480. end=datetime(2005, 1, 10), freq='D')
  481. s = Series(np.random.rand(len(dti)), dti)
  482. bs = s.resample('B', closed='right', label='right').mean()
  483. result = bs.resample('8H').mean()
  484. assert len(result) == 22
  485. assert isinstance(result.index.freq, offsets.DateOffset)
  486. assert result.index.freq == offsets.Hour(8)
  487. def test_resample_timestamp_to_period(simple_date_range_series):
  488. ts = simple_date_range_series('1/1/1990', '1/1/2000')
  489. result = ts.resample('A-DEC', kind='period').mean()
  490. expected = ts.resample('A-DEC').mean()
  491. expected.index = period_range('1990', '2000', freq='a-dec')
  492. assert_series_equal(result, expected)
  493. result = ts.resample('A-JUN', kind='period').mean()
  494. expected = ts.resample('A-JUN').mean()
  495. expected.index = period_range('1990', '2000', freq='a-jun')
  496. assert_series_equal(result, expected)
  497. result = ts.resample('M', kind='period').mean()
  498. expected = ts.resample('M').mean()
  499. expected.index = period_range('1990-01', '2000-01', freq='M')
  500. assert_series_equal(result, expected)
  501. result = ts.resample('M', kind='period').mean()
  502. expected = ts.resample('M').mean()
  503. expected.index = period_range('1990-01', '2000-01', freq='M')
  504. assert_series_equal(result, expected)
  505. def test_ohlc_5min():
  506. def _ohlc(group):
  507. if isna(group).all():
  508. return np.repeat(np.nan, 4)
  509. return [group[0], group.max(), group.min(), group[-1]]
  510. rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s')
  511. ts = Series(np.random.randn(len(rng)), index=rng)
  512. resampled = ts.resample('5min', closed='right',
  513. label='right').ohlc()
  514. assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all()
  515. exp = _ohlc(ts[1:31])
  516. assert (resampled.loc['1/1/2000 00:05'] == exp).all()
  517. exp = _ohlc(ts['1/1/2000 5:55:01':])
  518. assert (resampled.loc['1/1/2000 6:00:00'] == exp).all()
  519. def test_downsample_non_unique():
  520. rng = date_range('1/1/2000', '2/29/2000')
  521. rng2 = rng.repeat(5).values
  522. ts = Series(np.random.randn(len(rng2)), index=rng2)
  523. result = ts.resample('M').mean()
  524. expected = ts.groupby(lambda x: x.month).mean()
  525. assert len(result) == 2
  526. assert_almost_equal(result[0], expected[1])
  527. assert_almost_equal(result[1], expected[2])
  528. def test_asfreq_non_unique():
  529. # GH #1077
  530. rng = date_range('1/1/2000', '2/29/2000')
  531. rng2 = rng.repeat(2).values
  532. ts = Series(np.random.randn(len(rng2)), index=rng2)
  533. msg = 'cannot reindex from a duplicate axis'
  534. with pytest.raises(Exception, match=msg):
  535. ts.asfreq('B')
  536. def test_resample_axis1():
  537. rng = date_range('1/1/2000', '2/29/2000')
  538. df = DataFrame(np.random.randn(3, len(rng)), columns=rng,
  539. index=['a', 'b', 'c'])
  540. result = df.resample('M', axis=1).mean()
  541. expected = df.T.resample('M').mean().T
  542. tm.assert_frame_equal(result, expected)
  543. def test_resample_panel():
  544. rng = date_range('1/1/2000', '6/30/2000')
  545. n = len(rng)
  546. with catch_warnings(record=True):
  547. simplefilter("ignore", FutureWarning)
  548. panel = Panel(np.random.randn(3, n, 5),
  549. items=['one', 'two', 'three'],
  550. major_axis=rng,
  551. minor_axis=['a', 'b', 'c', 'd', 'e'])
  552. result = panel.resample('M', axis=1).mean()
  553. def p_apply(panel, f):
  554. result = {}
  555. for item in panel.items:
  556. result[item] = f(panel[item])
  557. return Panel(result, items=panel.items)
  558. expected = p_apply(panel, lambda x: x.resample('M').mean())
  559. tm.assert_panel_equal(result, expected)
  560. panel2 = panel.swapaxes(1, 2)
  561. result = panel2.resample('M', axis=2).mean()
  562. expected = p_apply(panel2,
  563. lambda x: x.resample('M', axis=1).mean())
  564. tm.assert_panel_equal(result, expected)
  565. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  566. def test_resample_panel_numpy():
  567. rng = date_range('1/1/2000', '6/30/2000')
  568. n = len(rng)
  569. with catch_warnings(record=True):
  570. panel = Panel(np.random.randn(3, n, 5),
  571. items=['one', 'two', 'three'],
  572. major_axis=rng,
  573. minor_axis=['a', 'b', 'c', 'd', 'e'])
  574. result = panel.resample('M', axis=1).apply(lambda x: x.mean(1))
  575. expected = panel.resample('M', axis=1).mean()
  576. tm.assert_panel_equal(result, expected)
  577. panel = panel.swapaxes(1, 2)
  578. result = panel.resample('M', axis=2).apply(lambda x: x.mean(2))
  579. expected = panel.resample('M', axis=2).mean()
  580. tm.assert_panel_equal(result, expected)
  581. def test_resample_anchored_ticks():
  582. # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
  583. # "anchor" the origin at midnight so we get regular intervals rather
  584. # than starting from the first timestamp which might start in the
  585. # middle of a desired interval
  586. rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s')
  587. ts = Series(np.random.randn(len(rng)), index=rng)
  588. ts[:2] = np.nan # so results are the same
  589. freqs = ['t', '5t', '15t', '30t', '4h', '12h']
  590. for freq in freqs:
  591. result = ts[2:].resample(freq, closed='left', label='left').mean()
  592. expected = ts.resample(freq, closed='left', label='left').mean()
  593. assert_series_equal(result, expected)
  594. def test_resample_single_group():
  595. mysum = lambda x: x.sum()
  596. rng = date_range('2000-1-1', '2000-2-10', freq='D')
  597. ts = Series(np.random.randn(len(rng)), index=rng)
  598. assert_series_equal(ts.resample('M').sum(),
  599. ts.resample('M').apply(mysum))
  600. rng = date_range('2000-1-1', '2000-1-10', freq='D')
  601. ts = Series(np.random.randn(len(rng)), index=rng)
  602. assert_series_equal(ts.resample('M').sum(),
  603. ts.resample('M').apply(mysum))
  604. # GH 3849
  605. s = Series([30.1, 31.6], index=[Timestamp('20070915 15:30:00'),
  606. Timestamp('20070915 15:40:00')])
  607. expected = Series([0.75], index=[Timestamp('20070915')])
  608. result = s.resample('D').apply(lambda x: np.std(x))
  609. assert_series_equal(result, expected)
  610. def test_resample_base():
  611. rng = date_range('1/1/2000 00:00:00', '1/1/2000 02:00', freq='s')
  612. ts = Series(np.random.randn(len(rng)), index=rng)
  613. resampled = ts.resample('5min', base=2).mean()
  614. exp_rng = date_range('12/31/1999 23:57:00', '1/1/2000 01:57',
  615. freq='5min')
  616. tm.assert_index_equal(resampled.index, exp_rng)
  617. def test_resample_daily_anchored():
  618. rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T')
  619. ts = Series(np.random.randn(len(rng)), index=rng)
  620. ts[:2] = np.nan # so results are the same
  621. result = ts[2:].resample('D', closed='left', label='left').mean()
  622. expected = ts.resample('D', closed='left', label='left').mean()
  623. assert_series_equal(result, expected)
  624. def test_resample_to_period_monthly_buglet():
  625. # GH #1259
  626. rng = date_range('1/1/2000', '12/31/2000')
  627. ts = Series(np.random.randn(len(rng)), index=rng)
  628. result = ts.resample('M', kind='period').mean()
  629. exp_index = period_range('Jan-2000', 'Dec-2000', freq='M')
  630. tm.assert_index_equal(result.index, exp_index)
  631. def test_period_with_agg():
  632. # aggregate a period resampler with a lambda
  633. s2 = Series(np.random.randint(0, 5, 50),
  634. index=pd.period_range('2012-01-01', freq='H', periods=50),
  635. dtype='float64')
  636. expected = s2.to_timestamp().resample('D').mean().to_period()
  637. result = s2.resample('D').agg(lambda x: x.mean())
  638. assert_series_equal(result, expected)
  639. def test_resample_segfault():
  640. # GH 8573
  641. # segfaulting in older versions
  642. all_wins_and_wagers = [
  643. (1, datetime(2013, 10, 1, 16, 20), 1, 0),
  644. (2, datetime(2013, 10, 1, 16, 10), 1, 0),
  645. (2, datetime(2013, 10, 1, 18, 15), 1, 0),
  646. (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0)]
  647. df = DataFrame.from_records(all_wins_and_wagers,
  648. columns=("ID", "timestamp", "A", "B")
  649. ).set_index("timestamp")
  650. result = df.groupby("ID").resample("5min").sum()
  651. expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum())
  652. assert_frame_equal(result, expected)
  653. def test_resample_dtype_preservation():
  654. # GH 12202
  655. # validation tests for dtype preservation
  656. df = DataFrame({'date': pd.date_range(start='2016-01-01',
  657. periods=4, freq='W'),
  658. 'group': [1, 1, 2, 2],
  659. 'val': Series([5, 6, 7, 8],
  660. dtype='int32')}
  661. ).set_index('date')
  662. result = df.resample('1D').ffill()
  663. assert result.val.dtype == np.int32
  664. result = df.groupby('group').resample('1D').ffill()
  665. assert result.val.dtype == np.int32
  666. def test_resample_dtype_coerceion():
  667. pytest.importorskip('scipy.interpolate')
  668. # GH 16361
  669. df = {"a": [1, 3, 1, 4]}
  670. df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04"))
  671. expected = (df.astype("float64")
  672. .resample("H")
  673. .mean()
  674. ["a"]
  675. .interpolate("cubic")
  676. )
  677. result = df.resample("H")["a"].mean().interpolate("cubic")
  678. tm.assert_series_equal(result, expected)
  679. result = df.resample("H").mean()["a"].interpolate("cubic")
  680. tm.assert_series_equal(result, expected)
  681. def test_weekly_resample_buglet():
  682. # #1327
  683. rng = date_range('1/1/2000', freq='B', periods=20)
  684. ts = Series(np.random.randn(len(rng)), index=rng)
  685. resampled = ts.resample('W').mean()
  686. expected = ts.resample('W-SUN').mean()
  687. assert_series_equal(resampled, expected)
  688. def test_monthly_resample_error():
  689. # #1451
  690. dates = date_range('4/16/2012 20:00', periods=5000, freq='h')
  691. ts = Series(np.random.randn(len(dates)), index=dates)
  692. # it works!
  693. ts.resample('M')
  694. def test_nanosecond_resample_error():
  695. # GH 12307 - Values falls after last bin when
  696. # Resampling using pd.tseries.offsets.Nano as period
  697. start = 1443707890427
  698. exp_start = 1443707890400
  699. indx = pd.date_range(
  700. start=pd.to_datetime(start),
  701. periods=10,
  702. freq='100n'
  703. )
  704. ts = Series(range(len(indx)), index=indx)
  705. r = ts.resample(pd.tseries.offsets.Nano(100))
  706. result = r.agg('mean')
  707. exp_indx = pd.date_range(
  708. start=pd.to_datetime(exp_start),
  709. periods=10,
  710. freq='100n'
  711. )
  712. exp = Series(range(len(exp_indx)), index=exp_indx)
  713. assert_series_equal(result, exp)
  714. def test_resample_anchored_intraday(simple_date_range_series):
  715. # #1471, #1458
  716. rng = date_range('1/1/2012', '4/1/2012', freq='100min')
  717. df = DataFrame(rng.month, index=rng)
  718. result = df.resample('M').mean()
  719. expected = df.resample(
  720. 'M', kind='period').mean().to_timestamp(how='end')
  721. expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D')
  722. tm.assert_frame_equal(result, expected)
  723. result = df.resample('M', closed='left').mean()
  724. exp = df.tshift(1, freq='D').resample('M', kind='period').mean()
  725. exp = exp.to_timestamp(how='end')
  726. exp.index = exp.index + Timedelta(1, 'ns') - Timedelta(1, 'D')
  727. tm.assert_frame_equal(result, exp)
  728. rng = date_range('1/1/2012', '4/1/2012', freq='100min')
  729. df = DataFrame(rng.month, index=rng)
  730. result = df.resample('Q').mean()
  731. expected = df.resample(
  732. 'Q', kind='period').mean().to_timestamp(how='end')
  733. expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D')
  734. tm.assert_frame_equal(result, expected)
  735. result = df.resample('Q', closed='left').mean()
  736. expected = df.tshift(1, freq='D').resample('Q', kind='period',
  737. closed='left').mean()
  738. expected = expected.to_timestamp(how='end')
  739. expected.index += Timedelta(1, 'ns') - Timedelta(1, 'D')
  740. tm.assert_frame_equal(result, expected)
  741. ts = simple_date_range_series('2012-04-29 23:00', '2012-04-30 5:00',
  742. freq='h')
  743. resampled = ts.resample('M').mean()
  744. assert len(resampled) == 1
  745. def test_resample_anchored_monthstart(simple_date_range_series):
  746. ts = simple_date_range_series('1/1/2000', '12/31/2002')
  747. freqs = ['MS', 'BMS', 'QS-MAR', 'AS-DEC', 'AS-JUN']
  748. for freq in freqs:
  749. ts.resample(freq).mean()
  750. def test_resample_anchored_multiday():
  751. # When resampling a range spanning multiple days, ensure that the
  752. # start date gets used to determine the offset. Fixes issue where
  753. # a one day period is not a multiple of the frequency.
  754. #
  755. # See: https://github.com/pandas-dev/pandas/issues/8683
  756. index = pd.date_range(
  757. '2014-10-14 23:06:23.206', periods=3, freq='400L'
  758. ) | pd.date_range(
  759. '2014-10-15 23:00:00', periods=2, freq='2200L')
  760. s = Series(np.random.randn(5), index=index)
  761. # Ensure left closing works
  762. result = s.resample('2200L').mean()
  763. assert result.index[-1] == Timestamp('2014-10-15 23:00:02.000')
  764. # Ensure right closing works
  765. result = s.resample('2200L', label='right').mean()
  766. assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200')
  767. def test_corner_cases(simple_period_range_series,
  768. simple_date_range_series):
  769. # miscellaneous test coverage
  770. rng = date_range('1/1/2000', periods=12, freq='t')
  771. ts = Series(np.random.randn(len(rng)), index=rng)
  772. result = ts.resample('5t', closed='right', label='left').mean()
  773. ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t')
  774. tm.assert_index_equal(result.index, ex_index)
  775. len0pts = simple_period_range_series(
  776. '2007-01', '2010-05', freq='M')[:0]
  777. # it works
  778. result = len0pts.resample('A-DEC').mean()
  779. assert len(result) == 0
  780. # resample to periods
  781. ts = simple_date_range_series(
  782. '2000-04-28', '2000-04-30 11:00', freq='h')
  783. result = ts.resample('M', kind='period').mean()
  784. assert len(result) == 1
  785. assert result.index[0] == Period('2000-04', freq='M')
  786. def test_anchored_lowercase_buglet():
  787. dates = date_range('4/16/2012 20:00', periods=50000, freq='s')
  788. ts = Series(np.random.randn(len(dates)), index=dates)
  789. # it works!
  790. ts.resample('d').mean()
  791. def test_upsample_apply_functions():
  792. # #1596
  793. rng = pd.date_range('2012-06-12', periods=4, freq='h')
  794. ts = Series(np.random.randn(len(rng)), index=rng)
  795. result = ts.resample('20min').aggregate(['mean', 'sum'])
  796. assert isinstance(result, DataFrame)
  797. def test_resample_not_monotonic():
  798. rng = pd.date_range('2012-06-12', periods=200, freq='h')
  799. ts = Series(np.random.randn(len(rng)), index=rng)
  800. ts = ts.take(np.random.permutation(len(ts)))
  801. result = ts.resample('D').sum()
  802. exp = ts.sort_index().resample('D').sum()
  803. assert_series_equal(result, exp)
  804. def test_resample_median_bug_1688():
  805. for dtype in ['int64', 'int32', 'float64', 'float32']:
  806. df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0),
  807. datetime(2012, 1, 1, 0, 5, 0)],
  808. dtype=dtype)
  809. result = df.resample("T").apply(lambda x: x.mean())
  810. exp = df.asfreq('T')
  811. tm.assert_frame_equal(result, exp)
  812. result = df.resample("T").median()
  813. exp = df.asfreq('T')
  814. tm.assert_frame_equal(result, exp)
  815. def test_how_lambda_functions(simple_date_range_series):
  816. ts = simple_date_range_series('1/1/2000', '4/1/2000')
  817. result = ts.resample('M').apply(lambda x: x.mean())
  818. exp = ts.resample('M').mean()
  819. tm.assert_series_equal(result, exp)
  820. foo_exp = ts.resample('M').mean()
  821. foo_exp.name = 'foo'
  822. bar_exp = ts.resample('M').std()
  823. bar_exp.name = 'bar'
  824. result = ts.resample('M').apply(
  825. [lambda x: x.mean(), lambda x: x.std(ddof=1)])
  826. result.columns = ['foo', 'bar']
  827. tm.assert_series_equal(result['foo'], foo_exp)
  828. tm.assert_series_equal(result['bar'], bar_exp)
  829. # this is a MI Series, so comparing the names of the results
  830. # doesn't make sense
  831. result = ts.resample('M').aggregate({'foo': lambda x: x.mean(),
  832. 'bar': lambda x: x.std(ddof=1)})
  833. tm.assert_series_equal(result['foo'], foo_exp, check_names=False)
  834. tm.assert_series_equal(result['bar'], bar_exp, check_names=False)
  835. def test_resample_unequal_times():
  836. # #1772
  837. start = datetime(1999, 3, 1, 5)
  838. # end hour is less than start
  839. end = datetime(2012, 7, 31, 4)
  840. bad_ind = date_range(start, end, freq="30min")
  841. df = DataFrame({'close': 1}, index=bad_ind)
  842. # it works!
  843. df.resample('AS').sum()
  844. def test_resample_consistency():
  845. # GH 6418
  846. # resample with bfill / limit / reindex consistency
  847. i30 = pd.date_range('2002-02-02', periods=4, freq='30T')
  848. s = Series(np.arange(4.), index=i30)
  849. s[2] = np.NaN
  850. # Upsample by factor 3 with reindex() and resample() methods:
  851. i10 = pd.date_range(i30[0], i30[-1], freq='10T')
  852. s10 = s.reindex(index=i10, method='bfill')
  853. s10_2 = s.reindex(index=i10, method='bfill', limit=2)
  854. rl = s.reindex_like(s10, method='bfill', limit=2)
  855. r10_2 = s.resample('10Min').bfill(limit=2)
  856. r10 = s.resample('10Min').bfill()
  857. # s10_2, r10, r10_2, rl should all be equal
  858. assert_series_equal(s10_2, r10)
  859. assert_series_equal(s10_2, r10_2)
  860. assert_series_equal(s10_2, rl)
  861. def test_resample_timegrouper():
  862. # GH 7227
  863. dates1 = [datetime(2014, 10, 1), datetime(2014, 9, 3),
  864. datetime(2014, 11, 5), datetime(2014, 9, 5),
  865. datetime(2014, 10, 8), datetime(2014, 7, 15)]
  866. dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:]
  867. dates3 = [pd.NaT] + dates1 + [pd.NaT]
  868. for dates in [dates1, dates2, dates3]:
  869. df = DataFrame(dict(A=dates, B=np.arange(len(dates))))
  870. result = df.set_index('A').resample('M').count()
  871. exp_idx = pd.DatetimeIndex(['2014-07-31', '2014-08-31',
  872. '2014-09-30',
  873. '2014-10-31', '2014-11-30'],
  874. freq='M', name='A')
  875. expected = DataFrame({'B': [1, 0, 2, 2, 1]}, index=exp_idx)
  876. assert_frame_equal(result, expected)
  877. result = df.groupby(pd.Grouper(freq='M', key='A')).count()
  878. assert_frame_equal(result, expected)
  879. df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange(
  880. len(dates))))
  881. result = df.set_index('A').resample('M').count()
  882. expected = DataFrame({'B': [1, 0, 2, 2, 1], 'C': [1, 0, 2, 2, 1]},
  883. index=exp_idx, columns=['B', 'C'])
  884. assert_frame_equal(result, expected)
  885. result = df.groupby(pd.Grouper(freq='M', key='A')).count()
  886. assert_frame_equal(result, expected)
  887. def test_resample_nunique():
  888. # GH 12352
  889. df = DataFrame({
  890. 'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
  891. Timestamp('2015-06-08 00:00:00'): '0010150847'},
  892. 'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
  893. Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
  894. r = df.resample('D')
  895. g = df.groupby(pd.Grouper(freq='D'))
  896. expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
  897. x.nunique())
  898. assert expected.name == 'ID'
  899. for t in [r, g]:
  900. result = r.ID.nunique()
  901. assert_series_equal(result, expected)
  902. result = df.ID.resample('D').nunique()
  903. assert_series_equal(result, expected)
  904. result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
  905. assert_series_equal(result, expected)
  906. def test_resample_nunique_with_date_gap():
  907. # GH 13453
  908. index = pd.date_range('1-1-2000', '2-15-2000', freq='h')
  909. index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h')
  910. index3 = index.append(index2)
  911. s = Series(range(len(index3)), index=index3, dtype='int64')
  912. r = s.resample('M')
  913. # Since all elements are unique, these should all be the same
  914. results = [
  915. r.count(),
  916. r.nunique(),
  917. r.agg(Series.nunique),
  918. r.agg('nunique')
  919. ]
  920. assert_series_equal(results[0], results[1])
  921. assert_series_equal(results[0], results[2])
  922. assert_series_equal(results[0], results[3])
  923. @pytest.mark.parametrize('n', [10000, 100000])
  924. @pytest.mark.parametrize('k', [10, 100, 1000])
  925. def test_resample_group_info(n, k):
  926. # GH10914
  927. # use a fixed seed to always have the same uniques
  928. prng = np.random.RandomState(1234)
  929. dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
  930. ts = Series(prng.randint(0, n // k, n).astype('int64'),
  931. index=prng.choice(dr, n))
  932. left = ts.resample('30T').nunique()
  933. ix = date_range(start=ts.index.min(), end=ts.index.max(),
  934. freq='30T')
  935. vals = ts.values
  936. bins = np.searchsorted(ix.values, ts.index, side='right')
  937. sorter = np.lexsort((vals, bins))
  938. vals, bins = vals[sorter], bins[sorter]
  939. mask = np.r_[True, vals[1:] != vals[:-1]]
  940. mask |= np.r_[True, bins[1:] != bins[:-1]]
  941. arr = np.bincount(bins[mask] - 1,
  942. minlength=len(ix)).astype('int64', copy=False)
  943. right = Series(arr, index=ix)
  944. assert_series_equal(left, right)
  945. def test_resample_size():
  946. n = 10000
  947. dr = date_range('2015-09-19', periods=n, freq='T')
  948. ts = Series(np.random.randn(n), index=np.random.choice(dr, n))
  949. left = ts.resample('7T').size()
  950. ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T')
  951. bins = np.searchsorted(ix.values, ts.index.values, side='right')
  952. val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',
  953. copy=False)
  954. right = Series(val, index=ix)
  955. assert_series_equal(left, right)
  956. def test_resample_across_dst():
  957. # The test resamples a DatetimeIndex with values before and after a
  958. # DST change
  959. # Issue: 14682
  960. # The DatetimeIndex we will start with
  961. # (note that DST happens at 03:00+02:00 -> 02:00+01:00)
  962. # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00
  963. df1 = DataFrame([1477786980, 1477790580], columns=['ts'])
  964. dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s')
  965. .dt.tz_localize('UTC')
  966. .dt.tz_convert('Europe/Madrid'))
  967. # The expected DatetimeIndex after resampling.
  968. # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00
  969. df2 = DataFrame([1477785600, 1477789200], columns=['ts'])
  970. dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s')
  971. .dt.tz_localize('UTC')
  972. .dt.tz_convert('Europe/Madrid'))
  973. df = DataFrame([5, 5], index=dti1)
  974. result = df.resample(rule='H').sum()
  975. expected = DataFrame([5, 5], index=dti2)
  976. assert_frame_equal(result, expected)
  977. def test_groupby_with_dst_time_change():
  978. # GH 24972
  979. index = pd.DatetimeIndex([1478064900001000000, 1480037118776792000],
  980. tz='UTC').tz_convert('America/Chicago')
  981. df = pd.DataFrame([1, 2], index=index)
  982. result = df.groupby(pd.Grouper(freq='1d')).last()
  983. expected_index_values = pd.date_range('2016-11-02', '2016-11-24',
  984. freq='d', tz='America/Chicago')
  985. index = pd.DatetimeIndex(expected_index_values)
  986. expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index)
  987. assert_frame_equal(result, expected)
  988. def test_resample_dst_anchor():
  989. # 5172
  990. dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')
  991. df = DataFrame([5], index=dti)
  992. assert_frame_equal(df.resample(rule='D').sum(),
  993. DataFrame([5], index=df.index.normalize()))
  994. df.resample(rule='MS').sum()
  995. assert_frame_equal(
  996. df.resample(rule='MS').sum(),
  997. DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)],
  998. tz='US/Eastern')))
  999. dti = date_range('2013-09-30', '2013-11-02', freq='30Min',
  1000. tz='Europe/Paris')
  1001. values = range(dti.size)
  1002. df = DataFrame({"a": values,
  1003. "b": values,
  1004. "c": values}, index=dti, dtype='int64')
  1005. how = {"a": "min", "b": "max", "c": "count"}
  1006. assert_frame_equal(
  1007. df.resample("W-MON").agg(how)[["a", "b", "c"]],
  1008. DataFrame({"a": [0, 48, 384, 720, 1056, 1394],
  1009. "b": [47, 383, 719, 1055, 1393, 1586],
  1010. "c": [48, 336, 336, 336, 338, 193]},
  1011. index=date_range('9/30/2013', '11/4/2013',
  1012. freq='W-MON', tz='Europe/Paris')),
  1013. 'W-MON Frequency')
  1014. assert_frame_equal(
  1015. df.resample("2W-MON").agg(how)[["a", "b", "c"]],
  1016. DataFrame({"a": [0, 48, 720, 1394],
  1017. "b": [47, 719, 1393, 1586],
  1018. "c": [48, 672, 674, 193]},
  1019. index=date_range('9/30/2013', '11/11/2013',
  1020. freq='2W-MON', tz='Europe/Paris')),
  1021. '2W-MON Frequency')
  1022. assert_frame_equal(
  1023. df.resample("MS").agg(how)[["a", "b", "c"]],
  1024. DataFrame({"a": [0, 48, 1538],
  1025. "b": [47, 1537, 1586],
  1026. "c": [48, 1490, 49]},
  1027. index=date_range('9/1/2013', '11/1/2013',
  1028. freq='MS', tz='Europe/Paris')),
  1029. 'MS Frequency')
  1030. assert_frame_equal(
  1031. df.resample("2MS").agg(how)[["a", "b", "c"]],
  1032. DataFrame({"a": [0, 1538],
  1033. "b": [1537, 1586],
  1034. "c": [1538, 49]},
  1035. index=date_range('9/1/2013', '11/1/2013',
  1036. freq='2MS', tz='Europe/Paris')),
  1037. '2MS Frequency')
  1038. df_daily = df['10/26/2013':'10/29/2013']
  1039. assert_frame_equal(
  1040. df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})
  1041. [["a", "b", "c"]],
  1042. DataFrame({"a": [1248, 1296, 1346, 1394],
  1043. "b": [1295, 1345, 1393, 1441],
  1044. "c": [48, 50, 48, 48]},
  1045. index=date_range('10/26/2013', '10/29/2013',
  1046. freq='D', tz='Europe/Paris')),
  1047. 'D Frequency')
  1048. def test_downsample_across_dst():
  1049. # GH 8531
  1050. tz = pytz.timezone('Europe/Berlin')
  1051. dt = datetime(2014, 10, 26)
  1052. dates = date_range(tz.localize(dt), periods=4, freq='2H')
  1053. result = Series(5, index=dates).resample('H').mean()
  1054. expected = Series([5., np.nan] * 3 + [5.],
  1055. index=date_range(tz.localize(dt), periods=7,
  1056. freq='H'))
  1057. tm.assert_series_equal(result, expected)
  1058. def test_downsample_across_dst_weekly():
  1059. # GH 9119, GH 21459
  1060. df = DataFrame(index=DatetimeIndex([
  1061. '2017-03-25', '2017-03-26', '2017-03-27',
  1062. '2017-03-28', '2017-03-29'
  1063. ], tz='Europe/Amsterdam'),
  1064. data=[11, 12, 13, 14, 15])
  1065. result = df.resample('1W').sum()
  1066. expected = DataFrame([23, 42], index=pd.DatetimeIndex([
  1067. '2017-03-26', '2017-04-02'
  1068. ], tz='Europe/Amsterdam'))
  1069. tm.assert_frame_equal(result, expected)
  1070. idx = pd.date_range("2013-04-01", "2013-05-01", tz='Europe/London',
  1071. freq='H')
  1072. s = Series(index=idx)
  1073. result = s.resample('W').mean()
  1074. expected = Series(index=pd.date_range(
  1075. '2013-04-07', freq='W', periods=5, tz='Europe/London'
  1076. ))
  1077. tm.assert_series_equal(result, expected)
  1078. def test_resample_with_nat():
  1079. # GH 13020
  1080. index = DatetimeIndex([pd.NaT,
  1081. '1970-01-01 00:00:00',
  1082. pd.NaT,
  1083. '1970-01-01 00:00:01',
  1084. '1970-01-01 00:00:02'])
  1085. frame = DataFrame([2, 3, 5, 7, 11], index=index)
  1086. index_1s = DatetimeIndex(['1970-01-01 00:00:00',
  1087. '1970-01-01 00:00:01',
  1088. '1970-01-01 00:00:02'])
  1089. frame_1s = DataFrame([3, 7, 11], index=index_1s)
  1090. assert_frame_equal(frame.resample('1s').mean(), frame_1s)
  1091. index_2s = DatetimeIndex(['1970-01-01 00:00:00',
  1092. '1970-01-01 00:00:02'])
  1093. frame_2s = DataFrame([5, 11], index=index_2s)
  1094. assert_frame_equal(frame.resample('2s').mean(), frame_2s)
  1095. index_3s = DatetimeIndex(['1970-01-01 00:00:00'])
  1096. frame_3s = DataFrame([7], index=index_3s)
  1097. assert_frame_equal(frame.resample('3s').mean(), frame_3s)
  1098. assert_frame_equal(frame.resample('60s').mean(), frame_3s)
  1099. def test_resample_datetime_values():
  1100. # GH 13119
  1101. # check that datetime dtype is preserved when NaT values are
  1102. # introduced by the resampling
  1103. dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)]
  1104. df = DataFrame({'timestamp': dates}, index=dates)
  1105. exp = Series([datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)],
  1106. index=date_range('2016-01-15', periods=3, freq='2D'),
  1107. name='timestamp')
  1108. res = df.resample('2D').first()['timestamp']
  1109. tm.assert_series_equal(res, exp)
  1110. res = df['timestamp'].resample('2D').first()
  1111. tm.assert_series_equal(res, exp)
  1112. def test_resample_apply_with_additional_args(series):
  1113. # GH 14615
  1114. def f(data, add_arg):
  1115. return np.mean(data) * add_arg
  1116. multiplier = 10
  1117. result = series.resample('D').apply(f, multiplier)
  1118. expected = series.resample('D').mean().multiply(multiplier)
  1119. tm.assert_series_equal(result, expected)
  1120. # Testing as kwarg
  1121. result = series.resample('D').apply(f, add_arg=multiplier)
  1122. expected = series.resample('D').mean().multiply(multiplier)
  1123. tm.assert_series_equal(result, expected)
  1124. # Testing dataframe
  1125. df = pd.DataFrame({"A": 1, "B": 2},
  1126. index=pd.date_range('2017', periods=10))
  1127. result = df.groupby("A").resample("D").agg(f, multiplier)
  1128. expected = df.groupby("A").resample('D').mean().multiply(multiplier)
  1129. assert_frame_equal(result, expected)
  1130. @pytest.mark.parametrize('k', [1, 2, 3])
  1131. @pytest.mark.parametrize('n1, freq1, n2, freq2', [
  1132. (30, 'S', 0.5, 'Min'),
  1133. (60, 'S', 1, 'Min'),
  1134. (3600, 'S', 1, 'H'),
  1135. (60, 'Min', 1, 'H'),
  1136. (21600, 'S', 0.25, 'D'),
  1137. (86400, 'S', 1, 'D'),
  1138. (43200, 'S', 0.5, 'D'),
  1139. (1440, 'Min', 1, 'D'),
  1140. (12, 'H', 0.5, 'D'),
  1141. (24, 'H', 1, 'D'),
  1142. ])
  1143. def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k):
  1144. # GH 24127
  1145. n1_ = n1 * k
  1146. n2_ = n2 * k
  1147. s = pd.Series(0, index=pd.date_range('19910905 13:00',
  1148. '19911005 07:00',
  1149. freq=freq1))
  1150. s = s + range(len(s))
  1151. result1 = s.resample(str(n1_) + freq1).mean()
  1152. result2 = s.resample(str(n2_) + freq2).mean()
  1153. assert_series_equal(result1, result2)
  1154. @pytest.mark.parametrize('first,last,offset,exp_first,exp_last', [
  1155. ('19910905', '19920406', 'D', '19910905', '19920407'),
  1156. ('19910905 00:00', '19920406 06:00', 'D', '19910905', '19920407'),
  1157. ('19910905 06:00', '19920406 06:00', 'H', '19910905 06:00',
  1158. '19920406 07:00'),
  1159. ('19910906', '19920406', 'M', '19910831', '19920430'),
  1160. ('19910831', '19920430', 'M', '19910831', '19920531'),
  1161. ('1991-08', '1992-04', 'M', '19910831', '19920531'),
  1162. ])
  1163. def test_get_timestamp_range_edges(first, last, offset,
  1164. exp_first, exp_last):
  1165. first = pd.Period(first)
  1166. first = first.to_timestamp(first.freq)
  1167. last = pd.Period(last)
  1168. last = last.to_timestamp(last.freq)
  1169. exp_first = pd.Timestamp(exp_first, freq=offset)
  1170. exp_last = pd.Timestamp(exp_last, freq=offset)
  1171. offset = pd.tseries.frequencies.to_offset(offset)
  1172. result = _get_timestamp_range_edges(first, last, offset)
  1173. expected = (exp_first, exp_last)
  1174. assert result == expected