test_offsets_properties.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. # -*- coding: utf-8 -*-
  2. """
  3. Behavioral based tests for offsets and date_range.
  4. This file is adapted from https://github.com/pandas-dev/pandas/pull/18761 -
  5. which was more ambitious but less idiomatic in its use of Hypothesis.
  6. You may wish to consult the previous version for inspiration on further
  7. tests, or when trying to pin down the bugs exposed by the tests below.
  8. """
  9. import warnings
  10. from hypothesis import assume, given, strategies as st
  11. from hypothesis.extra.dateutil import timezones as dateutil_timezones
  12. from hypothesis.extra.pytz import timezones as pytz_timezones
  13. import pytest
  14. import pandas as pd
  15. from pandas.tseries.offsets import (
  16. BMonthBegin, BMonthEnd, BQuarterBegin, BQuarterEnd, BYearBegin, BYearEnd,
  17. MonthBegin, MonthEnd, QuarterBegin, QuarterEnd, YearBegin, YearEnd)
  18. # ----------------------------------------------------------------
  19. # Helpers for generating random data
  20. with warnings.catch_warnings():
  21. warnings.simplefilter('ignore')
  22. min_dt = pd.Timestamp(1900, 1, 1).to_pydatetime(),
  23. max_dt = pd.Timestamp(1900, 1, 1).to_pydatetime(),
  24. gen_date_range = st.builds(
  25. pd.date_range,
  26. start=st.datetimes(
  27. # TODO: Choose the min/max values more systematically
  28. min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
  29. max_value=pd.Timestamp(2100, 1, 1).to_pydatetime()
  30. ),
  31. periods=st.integers(min_value=2, max_value=100),
  32. freq=st.sampled_from('Y Q M D H T s ms us ns'.split()),
  33. tz=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
  34. )
  35. gen_random_datetime = st.datetimes(
  36. min_value=min_dt,
  37. max_value=max_dt,
  38. timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones())
  39. )
  40. # The strategy for each type is registered in conftest.py, as they don't carry
  41. # enough runtime information (e.g. type hints) to infer how to build them.
  42. gen_yqm_offset = st.one_of(*map(st.from_type, [
  43. MonthBegin, MonthEnd, BMonthBegin, BMonthEnd,
  44. QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd,
  45. YearBegin, YearEnd, BYearBegin, BYearEnd
  46. ]))
  47. # ----------------------------------------------------------------
  48. # Offset-specific behaviour tests
  49. # Based on CI runs: Always passes on OSX, fails on Linux, sometimes on Windows
  50. @pytest.mark.xfail(strict=False, reason='inconsistent between OSs, Pythons')
  51. @given(gen_random_datetime, gen_yqm_offset)
  52. def test_on_offset_implementations(dt, offset):
  53. assume(not offset.normalize)
  54. # check that the class-specific implementations of onOffset match
  55. # the general case definition:
  56. # (dt + offset) - offset == dt
  57. compare = (dt + offset) - offset
  58. assert offset.onOffset(dt) == (compare == dt)
  59. @pytest.mark.xfail
  60. @given(gen_yqm_offset, gen_date_range)
  61. def test_apply_index_implementations(offset, rng):
  62. # offset.apply_index(dti)[i] should match dti[i] + offset
  63. assume(offset.n != 0) # TODO: test for that case separately
  64. # rng = pd.date_range(start='1/1/2000', periods=100000, freq='T')
  65. ser = pd.Series(rng)
  66. res = rng + offset
  67. res_v2 = offset.apply_index(rng)
  68. assert (res == res_v2).all()
  69. assert res[0] == rng[0] + offset
  70. assert res[-1] == rng[-1] + offset
  71. res2 = ser + offset
  72. # apply_index is only for indexes, not series, so no res2_v2
  73. assert res2.iloc[0] == ser.iloc[0] + offset
  74. assert res2.iloc[-1] == ser.iloc[-1] + offset
  75. # TODO: Check randomly assorted entries, not just first/last
  76. @pytest.mark.xfail
  77. @given(gen_yqm_offset)
  78. def test_shift_across_dst(offset):
  79. # GH#18319 check that 1) timezone is correctly normalized and
  80. # 2) that hour is not incorrectly changed by this normalization
  81. # Note that dti includes a transition across DST boundary
  82. dti = pd.date_range(start='2017-10-30 12:00:00', end='2017-11-06',
  83. freq='D', tz='US/Eastern')
  84. assert (dti.hour == 12).all() # we haven't screwed up yet
  85. res = dti + offset
  86. assert (res.hour == 12).all()