test_base.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. from datetime import datetime, timedelta
  2. import numpy as np
  3. import pytest
  4. from pandas.compat import range, zip
  5. import pandas as pd
  6. from pandas import DataFrame, Series
  7. from pandas.core.groupby.groupby import DataError
  8. from pandas.core.indexes.datetimes import date_range
  9. from pandas.core.indexes.period import PeriodIndex, period_range
  10. from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
  11. from pandas.core.resample import TimeGrouper
  12. import pandas.util.testing as tm
  13. from pandas.util.testing import (
  14. assert_almost_equal, assert_frame_equal, assert_index_equal,
  15. assert_series_equal)
  16. # a fixture value can be overridden by the test parameter value. Note that the
  17. # value of the fixture can be overridden this way even if the test doesn't use
  18. # it directly (doesn't mention it in the function prototype).
  19. # see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
  20. # in this module we override the fixture values defined in conftest.py
  21. # tuples of '_index_factory,_series_name,_index_start,_index_end'
  22. DATE_RANGE = (date_range, 'dti', datetime(2005, 1, 1), datetime(2005, 1, 10))
  23. PERIOD_RANGE = (
  24. period_range, 'pi', datetime(2005, 1, 1), datetime(2005, 1, 10))
  25. TIMEDELTA_RANGE = (timedelta_range, 'tdi', '1 day', '10 day')
  26. ALL_TIMESERIES_INDEXES = [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE]
  27. def pytest_generate_tests(metafunc):
  28. # called once per each test function
  29. if metafunc.function.__name__.endswith('_all_ts'):
  30. metafunc.parametrize(
  31. '_index_factory,_series_name,_index_start,_index_end',
  32. ALL_TIMESERIES_INDEXES)
  33. @pytest.fixture
  34. def create_index(_index_factory):
  35. def _create_index(*args, **kwargs):
  36. """ return the _index_factory created using the args, kwargs """
  37. return _index_factory(*args, **kwargs)
  38. return _create_index
  39. @pytest.mark.parametrize('freq', ['2D', '1H'])
  40. @pytest.mark.parametrize(
  41. '_index_factory,_series_name,_index_start,_index_end',
  42. [DATE_RANGE, TIMEDELTA_RANGE]
  43. )
  44. def test_asfreq(series_and_frame, freq, create_index):
  45. obj = series_and_frame
  46. result = obj.resample(freq).asfreq()
  47. new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
  48. expected = obj.reindex(new_index)
  49. assert_almost_equal(result, expected)
  50. @pytest.mark.parametrize(
  51. '_index_factory,_series_name,_index_start,_index_end',
  52. [DATE_RANGE, TIMEDELTA_RANGE]
  53. )
  54. def test_asfreq_fill_value(series, create_index):
  55. # test for fill value during resampling, issue 3715
  56. s = series
  57. result = s.resample('1H').asfreq()
  58. new_index = create_index(s.index[0], s.index[-1], freq='1H')
  59. expected = s.reindex(new_index)
  60. assert_series_equal(result, expected)
  61. frame = s.to_frame('value')
  62. frame.iloc[1] = None
  63. result = frame.resample('1H').asfreq(fill_value=4.0)
  64. new_index = create_index(frame.index[0],
  65. frame.index[-1], freq='1H')
  66. expected = frame.reindex(new_index, fill_value=4.0)
  67. assert_frame_equal(result, expected)
  68. def test_resample_interpolate_all_ts(frame):
  69. # # 12925
  70. df = frame
  71. assert_frame_equal(
  72. df.resample('1T').asfreq().interpolate(),
  73. df.resample('1T').interpolate())
  74. def test_raises_on_non_datetimelike_index():
  75. # this is a non datetimelike index
  76. xp = DataFrame()
  77. pytest.raises(TypeError, lambda: xp.resample('A').mean())
  78. @pytest.mark.parametrize('freq', ['M', 'D', 'H'])
  79. def test_resample_empty_series_all_ts(freq, empty_series, resample_method):
  80. # GH12771 & GH12868
  81. if resample_method == 'ohlc':
  82. pytest.skip('need to test for ohlc from GH13083')
  83. s = empty_series
  84. result = getattr(s.resample(freq), resample_method)()
  85. expected = s.copy()
  86. if isinstance(s.index, PeriodIndex):
  87. expected.index = s.index.asfreq(freq=freq)
  88. else:
  89. expected.index = s.index._shallow_copy(freq=freq)
  90. assert_index_equal(result.index, expected.index)
  91. assert result.index.freq == expected.index.freq
  92. assert_series_equal(result, expected, check_dtype=False)
  93. @pytest.mark.parametrize('freq', ['M', 'D', 'H'])
  94. def test_resample_empty_dataframe_all_ts(empty_frame, freq, resample_method):
  95. # GH13212
  96. df = empty_frame
  97. # count retains dimensions too
  98. result = getattr(df.resample(freq), resample_method)()
  99. if resample_method != 'size':
  100. expected = df.copy()
  101. else:
  102. # GH14962
  103. expected = Series([])
  104. if isinstance(df.index, PeriodIndex):
  105. expected.index = df.index.asfreq(freq=freq)
  106. else:
  107. expected.index = df.index._shallow_copy(freq=freq)
  108. assert_index_equal(result.index, expected.index)
  109. assert result.index.freq == expected.index.freq
  110. assert_almost_equal(result, expected, check_dtype=False)
  111. # test size for GH13212 (currently stays as df)
  112. @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
  113. @pytest.mark.parametrize(
  114. "dtype",
  115. [np.float, np.int, np.object, 'datetime64[ns]'])
  116. def test_resample_empty_dtypes(index, dtype, resample_method):
  117. # Empty series were sometimes causing a segfault (for the functions
  118. # with Cython bounds-checking disabled) or an IndexError. We just run
  119. # them to ensure they no longer do. (GH #10228)
  120. empty_series = Series([], index, dtype)
  121. try:
  122. getattr(empty_series.resample('d'), resample_method)()
  123. except DataError:
  124. # Ignore these since some combinations are invalid
  125. # (ex: doing mean with dtype of np.object)
  126. pass
  127. def test_resample_loffset_arg_type_all_ts(frame, create_index):
  128. # GH 13218, 15002
  129. df = frame
  130. expected_means = [df.values[i:i + 2].mean()
  131. for i in range(0, len(df.values), 2)]
  132. expected_index = create_index(df.index[0],
  133. periods=len(df.index) / 2,
  134. freq='2D')
  135. # loffset coerces PeriodIndex to DateTimeIndex
  136. if isinstance(expected_index, PeriodIndex):
  137. expected_index = expected_index.to_timestamp()
  138. expected_index += timedelta(hours=2)
  139. expected = DataFrame({'value': expected_means}, index=expected_index)
  140. for arg in ['mean', {'value': 'mean'}, ['mean']]:
  141. result_agg = df.resample('2D', loffset='2H').agg(arg)
  142. with tm.assert_produces_warning(FutureWarning,
  143. check_stacklevel=False):
  144. result_how = df.resample('2D', how=arg, loffset='2H')
  145. if isinstance(arg, list):
  146. expected.columns = pd.MultiIndex.from_tuples([('value',
  147. 'mean')])
  148. # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
  149. if isinstance(expected.index, TimedeltaIndex):
  150. with pytest.raises(AssertionError):
  151. assert_frame_equal(result_agg, expected)
  152. assert_frame_equal(result_how, expected)
  153. else:
  154. assert_frame_equal(result_agg, expected)
  155. assert_frame_equal(result_how, expected)
  156. def test_apply_to_empty_series_all_ts(empty_series):
  157. # GH 14313
  158. s = empty_series
  159. for freq in ['M', 'D', 'H']:
  160. result = s.resample(freq).apply(lambda x: 1)
  161. expected = s.resample(freq).apply(np.sum)
  162. assert_series_equal(result, expected, check_dtype=False)
  163. def test_resampler_is_iterable_all_ts(series):
  164. # GH 15314
  165. freq = 'H'
  166. tg = TimeGrouper(freq, convention='start')
  167. grouped = series.groupby(tg)
  168. resampled = series.resample(freq)
  169. for (rk, rv), (gk, gv) in zip(resampled, grouped):
  170. assert rk == gk
  171. assert_series_equal(rv, gv)
  172. def test_resample_quantile_all_ts(series):
  173. # GH 15023
  174. s = series
  175. q = 0.75
  176. freq = 'H'
  177. result = s.resample(freq).quantile(q)
  178. expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
  179. tm.assert_series_equal(result, expected)