test_resampler_grouper.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. # pylint: disable=E1101
  2. from textwrap import dedent
  3. import numpy as np
  4. from pandas.compat import range
  5. import pandas as pd
  6. from pandas import DataFrame, Series, Timestamp
  7. from pandas.core.indexes.datetimes import date_range
  8. import pandas.util.testing as tm
  9. from pandas.util.testing import assert_frame_equal, assert_series_equal
  10. test_frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
  11. 'B': np.arange(40)},
  12. index=date_range('1/1/2000',
  13. freq='s',
  14. periods=40))
  15. def test_tab_complete_ipython6_warning(ip):
  16. from IPython.core.completer import provisionalcompleter
  17. code = dedent("""\
  18. import pandas.util.testing as tm
  19. s = tm.makeTimeSeries()
  20. rs = s.resample("D")
  21. """)
  22. ip.run_code(code)
  23. with tm.assert_produces_warning(None):
  24. with provisionalcompleter('ignore'):
  25. list(ip.Completer.completions('rs.', 1))
  26. def test_deferred_with_groupby():
  27. # GH 12486
  28. # support deferred resample ops with groupby
  29. data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3],
  30. ['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7],
  31. ['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5],
  32. ['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1],
  33. ['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]]
  34. df = DataFrame(data, columns=['date', 'id', 'score'])
  35. df.date = pd.to_datetime(df.date)
  36. def f(x):
  37. return x.set_index('date').resample('D').asfreq()
  38. expected = df.groupby('id').apply(f)
  39. result = df.set_index('date').groupby('id').resample('D').asfreq()
  40. assert_frame_equal(result, expected)
  41. df = DataFrame({'date': pd.date_range(start='2016-01-01',
  42. periods=4,
  43. freq='W'),
  44. 'group': [1, 1, 2, 2],
  45. 'val': [5, 6, 7, 8]}).set_index('date')
  46. def f(x):
  47. return x.resample('1D').ffill()
  48. expected = df.groupby('group').apply(f)
  49. result = df.groupby('group').resample('1D').ffill()
  50. assert_frame_equal(result, expected)
  51. def test_getitem():
  52. g = test_frame.groupby('A')
  53. expected = g.B.apply(lambda x: x.resample('2s').mean())
  54. result = g.resample('2s').B.mean()
  55. assert_series_equal(result, expected)
  56. result = g.B.resample('2s').mean()
  57. assert_series_equal(result, expected)
  58. result = g.resample('2s').mean().B
  59. assert_series_equal(result, expected)
  60. def test_getitem_multiple():
  61. # GH 13174
  62. # multiple calls after selection causing an issue with aliasing
  63. data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
  64. df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
  65. r = df.groupby('id').resample('1D')
  66. result = r['buyer'].count()
  67. expected = Series([1, 1],
  68. index=pd.MultiIndex.from_tuples(
  69. [(1, Timestamp('2016-01-01')),
  70. (2, Timestamp('2016-01-02'))],
  71. names=['id', None]),
  72. name='buyer')
  73. assert_series_equal(result, expected)
  74. result = r['buyer'].count()
  75. assert_series_equal(result, expected)
  76. def test_groupby_resample_on_api_with_getitem():
  77. # GH 17813
  78. df = pd.DataFrame({'id': list('aabbb'),
  79. 'date': pd.date_range('1-1-2016', periods=5),
  80. 'data': 1})
  81. exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
  82. result = df.groupby('id').resample('2D', on='date')['data'].sum()
  83. assert_series_equal(result, exp)
  84. def test_nearest():
  85. # GH 17496
  86. # Resample nearest
  87. index = pd.date_range('1/1/2000', periods=3, freq='T')
  88. result = Series(range(3), index=index).resample('20s').nearest()
  89. expected = Series(
  90. [0, 0, 1, 1, 1, 2, 2],
  91. index=pd.DatetimeIndex(
  92. ['2000-01-01 00:00:00', '2000-01-01 00:00:20',
  93. '2000-01-01 00:00:40', '2000-01-01 00:01:00',
  94. '2000-01-01 00:01:20', '2000-01-01 00:01:40',
  95. '2000-01-01 00:02:00'],
  96. dtype='datetime64[ns]',
  97. freq='20S'))
  98. assert_series_equal(result, expected)
  99. def test_methods():
  100. g = test_frame.groupby('A')
  101. r = g.resample('2s')
  102. for f in ['first', 'last', 'median', 'sem', 'sum', 'mean',
  103. 'min', 'max']:
  104. result = getattr(r, f)()
  105. expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
  106. assert_frame_equal(result, expected)
  107. for f in ['size']:
  108. result = getattr(r, f)()
  109. expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
  110. assert_series_equal(result, expected)
  111. for f in ['count']:
  112. result = getattr(r, f)()
  113. expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
  114. assert_frame_equal(result, expected)
  115. # series only
  116. for f in ['nunique']:
  117. result = getattr(r.B, f)()
  118. expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)())
  119. assert_series_equal(result, expected)
  120. for f in ['nearest', 'backfill', 'ffill', 'asfreq']:
  121. result = getattr(r, f)()
  122. expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
  123. assert_frame_equal(result, expected)
  124. result = r.ohlc()
  125. expected = g.apply(lambda x: x.resample('2s').ohlc())
  126. assert_frame_equal(result, expected)
  127. for f in ['std', 'var']:
  128. result = getattr(r, f)(ddof=1)
  129. expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1))
  130. assert_frame_equal(result, expected)
  131. def test_apply():
  132. g = test_frame.groupby('A')
  133. r = g.resample('2s')
  134. # reduction
  135. expected = g.resample('2s').sum()
  136. def f(x):
  137. return x.resample('2s').sum()
  138. result = r.apply(f)
  139. assert_frame_equal(result, expected)
  140. def f(x):
  141. return x.resample('2s').apply(lambda y: y.sum())
  142. result = g.apply(f)
  143. assert_frame_equal(result, expected)
  144. def test_apply_with_mutated_index():
  145. # GH 15169
  146. index = pd.date_range('1-1-2015', '12-31-15', freq='D')
  147. df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)
  148. def f(x):
  149. s = Series([1, 2], index=['a', 'b'])
  150. return s
  151. expected = df.groupby(pd.Grouper(freq='M')).apply(f)
  152. result = df.resample('M').apply(f)
  153. assert_frame_equal(result, expected)
  154. # A case for series
  155. expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
  156. result = df['col1'].resample('M').apply(f)
  157. assert_series_equal(result, expected)
  158. def test_resample_groupby_with_label():
  159. # GH 13235
  160. index = date_range('2000-01-01', freq='2D', periods=5)
  161. df = DataFrame(index=index,
  162. data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]}
  163. )
  164. result = df.groupby('col0').resample('1W', label='left').sum()
  165. mi = [np.array([0, 0, 1, 2]),
  166. pd.to_datetime(np.array(['1999-12-26', '2000-01-02',
  167. '2000-01-02', '2000-01-02'])
  168. )
  169. ]
  170. mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None])
  171. expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]},
  172. index=mindex
  173. )
  174. assert_frame_equal(result, expected)
  175. def test_consistency_with_window():
  176. # consistent return values with window
  177. df = test_frame
  178. expected = pd.Int64Index([1, 2, 3], name='A')
  179. result = df.groupby('A').resample('2s').mean()
  180. assert result.index.nlevels == 2
  181. tm.assert_index_equal(result.index.levels[0], expected)
  182. result = df.groupby('A').rolling(20).mean()
  183. assert result.index.nlevels == 2
  184. tm.assert_index_equal(result.index.levels[0], expected)
  185. def test_median_duplicate_columns():
  186. # GH 14233
  187. df = DataFrame(np.random.randn(20, 3),
  188. columns=list('aaa'),
  189. index=pd.date_range('2012-01-01', periods=20, freq='s'))
  190. df2 = df.copy()
  191. df2.columns = ['a', 'b', 'c']
  192. expected = df2.resample('5s').median()
  193. result = df.resample('5s').median()
  194. expected.columns = result.columns
  195. assert_frame_equal(result, expected)