123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- # pylint: disable=E1101
- from textwrap import dedent
- import numpy as np
- from pandas.compat import range
- import pandas as pd
- from pandas import DataFrame, Series, Timestamp
- from pandas.core.indexes.datetimes import date_range
- import pandas.util.testing as tm
- from pandas.util.testing import assert_frame_equal, assert_series_equal
- test_frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
- 'B': np.arange(40)},
- index=date_range('1/1/2000',
- freq='s',
- periods=40))
- def test_tab_complete_ipython6_warning(ip):
- from IPython.core.completer import provisionalcompleter
- code = dedent("""\
- import pandas.util.testing as tm
- s = tm.makeTimeSeries()
- rs = s.resample("D")
- """)
- ip.run_code(code)
- with tm.assert_produces_warning(None):
- with provisionalcompleter('ignore'):
- list(ip.Completer.completions('rs.', 1))
- def test_deferred_with_groupby():
- # GH 12486
- # support deferred resample ops with groupby
- data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3],
- ['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7],
- ['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5],
- ['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1],
- ['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]]
- df = DataFrame(data, columns=['date', 'id', 'score'])
- df.date = pd.to_datetime(df.date)
- def f(x):
- return x.set_index('date').resample('D').asfreq()
- expected = df.groupby('id').apply(f)
- result = df.set_index('date').groupby('id').resample('D').asfreq()
- assert_frame_equal(result, expected)
- df = DataFrame({'date': pd.date_range(start='2016-01-01',
- periods=4,
- freq='W'),
- 'group': [1, 1, 2, 2],
- 'val': [5, 6, 7, 8]}).set_index('date')
- def f(x):
- return x.resample('1D').ffill()
- expected = df.groupby('group').apply(f)
- result = df.groupby('group').resample('1D').ffill()
- assert_frame_equal(result, expected)
- def test_getitem():
- g = test_frame.groupby('A')
- expected = g.B.apply(lambda x: x.resample('2s').mean())
- result = g.resample('2s').B.mean()
- assert_series_equal(result, expected)
- result = g.B.resample('2s').mean()
- assert_series_equal(result, expected)
- result = g.resample('2s').mean().B
- assert_series_equal(result, expected)
- def test_getitem_multiple():
- # GH 13174
- # multiple calls after selection causing an issue with aliasing
- data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
- df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
- r = df.groupby('id').resample('1D')
- result = r['buyer'].count()
- expected = Series([1, 1],
- index=pd.MultiIndex.from_tuples(
- [(1, Timestamp('2016-01-01')),
- (2, Timestamp('2016-01-02'))],
- names=['id', None]),
- name='buyer')
- assert_series_equal(result, expected)
- result = r['buyer'].count()
- assert_series_equal(result, expected)
- def test_groupby_resample_on_api_with_getitem():
- # GH 17813
- df = pd.DataFrame({'id': list('aabbb'),
- 'date': pd.date_range('1-1-2016', periods=5),
- 'data': 1})
- exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
- result = df.groupby('id').resample('2D', on='date')['data'].sum()
- assert_series_equal(result, exp)
- def test_nearest():
- # GH 17496
- # Resample nearest
- index = pd.date_range('1/1/2000', periods=3, freq='T')
- result = Series(range(3), index=index).resample('20s').nearest()
- expected = Series(
- [0, 0, 1, 1, 1, 2, 2],
- index=pd.DatetimeIndex(
- ['2000-01-01 00:00:00', '2000-01-01 00:00:20',
- '2000-01-01 00:00:40', '2000-01-01 00:01:00',
- '2000-01-01 00:01:20', '2000-01-01 00:01:40',
- '2000-01-01 00:02:00'],
- dtype='datetime64[ns]',
- freq='20S'))
- assert_series_equal(result, expected)
- def test_methods():
- g = test_frame.groupby('A')
- r = g.resample('2s')
- for f in ['first', 'last', 'median', 'sem', 'sum', 'mean',
- 'min', 'max']:
- result = getattr(r, f)()
- expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
- assert_frame_equal(result, expected)
- for f in ['size']:
- result = getattr(r, f)()
- expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
- assert_series_equal(result, expected)
- for f in ['count']:
- result = getattr(r, f)()
- expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
- assert_frame_equal(result, expected)
- # series only
- for f in ['nunique']:
- result = getattr(r.B, f)()
- expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)())
- assert_series_equal(result, expected)
- for f in ['nearest', 'backfill', 'ffill', 'asfreq']:
- result = getattr(r, f)()
- expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
- assert_frame_equal(result, expected)
- result = r.ohlc()
- expected = g.apply(lambda x: x.resample('2s').ohlc())
- assert_frame_equal(result, expected)
- for f in ['std', 'var']:
- result = getattr(r, f)(ddof=1)
- expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1))
- assert_frame_equal(result, expected)
- def test_apply():
- g = test_frame.groupby('A')
- r = g.resample('2s')
- # reduction
- expected = g.resample('2s').sum()
- def f(x):
- return x.resample('2s').sum()
- result = r.apply(f)
- assert_frame_equal(result, expected)
- def f(x):
- return x.resample('2s').apply(lambda y: y.sum())
- result = g.apply(f)
- assert_frame_equal(result, expected)
- def test_apply_with_mutated_index():
- # GH 15169
- index = pd.date_range('1-1-2015', '12-31-15', freq='D')
- df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)
- def f(x):
- s = Series([1, 2], index=['a', 'b'])
- return s
- expected = df.groupby(pd.Grouper(freq='M')).apply(f)
- result = df.resample('M').apply(f)
- assert_frame_equal(result, expected)
- # A case for series
- expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
- result = df['col1'].resample('M').apply(f)
- assert_series_equal(result, expected)
- def test_resample_groupby_with_label():
- # GH 13235
- index = date_range('2000-01-01', freq='2D', periods=5)
- df = DataFrame(index=index,
- data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]}
- )
- result = df.groupby('col0').resample('1W', label='left').sum()
- mi = [np.array([0, 0, 1, 2]),
- pd.to_datetime(np.array(['1999-12-26', '2000-01-02',
- '2000-01-02', '2000-01-02'])
- )
- ]
- mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None])
- expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]},
- index=mindex
- )
- assert_frame_equal(result, expected)
- def test_consistency_with_window():
- # consistent return values with window
- df = test_frame
- expected = pd.Int64Index([1, 2, 3], name='A')
- result = df.groupby('A').resample('2s').mean()
- assert result.index.nlevels == 2
- tm.assert_index_equal(result.index.levels[0], expected)
- result = df.groupby('A').rolling(20).mean()
- assert result.index.nlevels == 2
- tm.assert_index_equal(result.index.levels[0], expected)
- def test_median_duplicate_columns():
- # GH 14233
- df = DataFrame(np.random.randn(20, 3),
- columns=list('aaa'),
- index=pd.date_range('2012-01-01', periods=20, freq='s'))
- df2 = df.copy()
- df2.columns = ['a', 'b', 'c']
- expected = df2.resample('5s').median()
- result = df.resample('5s').median()
- expected.columns = result.columns
- assert_frame_equal(result, expected)
|