123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- from datetime import datetime, timedelta
- import numpy as np
- import pytest
- from pandas.compat import range, zip
- import pandas as pd
- from pandas import DataFrame, Series
- from pandas.core.groupby.groupby import DataError
- from pandas.core.indexes.datetimes import date_range
- from pandas.core.indexes.period import PeriodIndex, period_range
- from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
- from pandas.core.resample import TimeGrouper
- import pandas.util.testing as tm
- from pandas.util.testing import (
- assert_almost_equal, assert_frame_equal, assert_index_equal,
- assert_series_equal)
- # a fixture value can be overridden by the test parameter value. Note that the
- # value of the fixture can be overridden this way even if the test doesn't use
- # it directly (doesn't mention it in the function prototype).
- # see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
- # in this module we override the fixture values defined in conftest.py
- # tuples of '_index_factory,_series_name,_index_start,_index_end'
- DATE_RANGE = (date_range, 'dti', datetime(2005, 1, 1), datetime(2005, 1, 10))
- PERIOD_RANGE = (
- period_range, 'pi', datetime(2005, 1, 1), datetime(2005, 1, 10))
- TIMEDELTA_RANGE = (timedelta_range, 'tdi', '1 day', '10 day')
- ALL_TIMESERIES_INDEXES = [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE]
- def pytest_generate_tests(metafunc):
- # called once per each test function
- if metafunc.function.__name__.endswith('_all_ts'):
- metafunc.parametrize(
- '_index_factory,_series_name,_index_start,_index_end',
- ALL_TIMESERIES_INDEXES)
- @pytest.fixture
- def create_index(_index_factory):
- def _create_index(*args, **kwargs):
- """ return the _index_factory created using the args, kwargs """
- return _index_factory(*args, **kwargs)
- return _create_index
- @pytest.mark.parametrize('freq', ['2D', '1H'])
- @pytest.mark.parametrize(
- '_index_factory,_series_name,_index_start,_index_end',
- [DATE_RANGE, TIMEDELTA_RANGE]
- )
- def test_asfreq(series_and_frame, freq, create_index):
- obj = series_and_frame
- result = obj.resample(freq).asfreq()
- new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
- expected = obj.reindex(new_index)
- assert_almost_equal(result, expected)
- @pytest.mark.parametrize(
- '_index_factory,_series_name,_index_start,_index_end',
- [DATE_RANGE, TIMEDELTA_RANGE]
- )
- def test_asfreq_fill_value(series, create_index):
- # test for fill value during resampling, issue 3715
- s = series
- result = s.resample('1H').asfreq()
- new_index = create_index(s.index[0], s.index[-1], freq='1H')
- expected = s.reindex(new_index)
- assert_series_equal(result, expected)
- frame = s.to_frame('value')
- frame.iloc[1] = None
- result = frame.resample('1H').asfreq(fill_value=4.0)
- new_index = create_index(frame.index[0],
- frame.index[-1], freq='1H')
- expected = frame.reindex(new_index, fill_value=4.0)
- assert_frame_equal(result, expected)
- def test_resample_interpolate_all_ts(frame):
- # # 12925
- df = frame
- assert_frame_equal(
- df.resample('1T').asfreq().interpolate(),
- df.resample('1T').interpolate())
- def test_raises_on_non_datetimelike_index():
- # this is a non datetimelike index
- xp = DataFrame()
- pytest.raises(TypeError, lambda: xp.resample('A').mean())
- @pytest.mark.parametrize('freq', ['M', 'D', 'H'])
- def test_resample_empty_series_all_ts(freq, empty_series, resample_method):
- # GH12771 & GH12868
- if resample_method == 'ohlc':
- pytest.skip('need to test for ohlc from GH13083')
- s = empty_series
- result = getattr(s.resample(freq), resample_method)()
- expected = s.copy()
- if isinstance(s.index, PeriodIndex):
- expected.index = s.index.asfreq(freq=freq)
- else:
- expected.index = s.index._shallow_copy(freq=freq)
- assert_index_equal(result.index, expected.index)
- assert result.index.freq == expected.index.freq
- assert_series_equal(result, expected, check_dtype=False)
- @pytest.mark.parametrize('freq', ['M', 'D', 'H'])
- def test_resample_empty_dataframe_all_ts(empty_frame, freq, resample_method):
- # GH13212
- df = empty_frame
- # count retains dimensions too
- result = getattr(df.resample(freq), resample_method)()
- if resample_method != 'size':
- expected = df.copy()
- else:
- # GH14962
- expected = Series([])
- if isinstance(df.index, PeriodIndex):
- expected.index = df.index.asfreq(freq=freq)
- else:
- expected.index = df.index._shallow_copy(freq=freq)
- assert_index_equal(result.index, expected.index)
- assert result.index.freq == expected.index.freq
- assert_almost_equal(result, expected, check_dtype=False)
- # test size for GH13212 (currently stays as df)
- @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
- @pytest.mark.parametrize(
- "dtype",
- [np.float, np.int, np.object, 'datetime64[ns]'])
- def test_resample_empty_dtypes(index, dtype, resample_method):
- # Empty series were sometimes causing a segfault (for the functions
- # with Cython bounds-checking disabled) or an IndexError. We just run
- # them to ensure they no longer do. (GH #10228)
- empty_series = Series([], index, dtype)
- try:
- getattr(empty_series.resample('d'), resample_method)()
- except DataError:
- # Ignore these since some combinations are invalid
- # (ex: doing mean with dtype of np.object)
- pass
- def test_resample_loffset_arg_type_all_ts(frame, create_index):
- # GH 13218, 15002
- df = frame
- expected_means = [df.values[i:i + 2].mean()
- for i in range(0, len(df.values), 2)]
- expected_index = create_index(df.index[0],
- periods=len(df.index) / 2,
- freq='2D')
- # loffset coerces PeriodIndex to DateTimeIndex
- if isinstance(expected_index, PeriodIndex):
- expected_index = expected_index.to_timestamp()
- expected_index += timedelta(hours=2)
- expected = DataFrame({'value': expected_means}, index=expected_index)
- for arg in ['mean', {'value': 'mean'}, ['mean']]:
- result_agg = df.resample('2D', loffset='2H').agg(arg)
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False):
- result_how = df.resample('2D', how=arg, loffset='2H')
- if isinstance(arg, list):
- expected.columns = pd.MultiIndex.from_tuples([('value',
- 'mean')])
- # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
- if isinstance(expected.index, TimedeltaIndex):
- with pytest.raises(AssertionError):
- assert_frame_equal(result_agg, expected)
- assert_frame_equal(result_how, expected)
- else:
- assert_frame_equal(result_agg, expected)
- assert_frame_equal(result_how, expected)
- def test_apply_to_empty_series_all_ts(empty_series):
- # GH 14313
- s = empty_series
- for freq in ['M', 'D', 'H']:
- result = s.resample(freq).apply(lambda x: 1)
- expected = s.resample(freq).apply(np.sum)
- assert_series_equal(result, expected, check_dtype=False)
- def test_resampler_is_iterable_all_ts(series):
- # GH 15314
- freq = 'H'
- tg = TimeGrouper(freq, convention='start')
- grouped = series.groupby(tg)
- resampled = series.resample(freq)
- for (rk, rv), (gk, gv) in zip(resampled, grouped):
- assert rk == gk
- assert_series_equal(rv, gv)
- def test_resample_quantile_all_ts(series):
- # GH 15023
- s = series
- q = 0.75
- freq = 'H'
- result = s.resample(freq).quantile(q)
- expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
- tm.assert_series_equal(result, expected)
|