test_interval_range.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. from __future__ import division
  2. from datetime import timedelta
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.common import is_integer
  6. from pandas import (
  7. DateOffset, Interval, IntervalIndex, Timedelta, Timestamp, date_range,
  8. interval_range, timedelta_range)
  9. import pandas.util.testing as tm
  10. from pandas.tseries.offsets import Day
  11. @pytest.fixture(scope='class', params=[None, 'foo'])
  12. def name(request):
  13. return request.param
  14. class TestIntervalRange(object):
  15. @pytest.mark.parametrize('freq, periods', [
  16. (1, 100), (2.5, 40), (5, 20), (25, 4)])
  17. def test_constructor_numeric(self, closed, name, freq, periods):
  18. start, end = 0, 100
  19. breaks = np.arange(101, step=freq)
  20. expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
  21. # defined from start/end/freq
  22. result = interval_range(
  23. start=start, end=end, freq=freq, name=name, closed=closed)
  24. tm.assert_index_equal(result, expected)
  25. # defined from start/periods/freq
  26. result = interval_range(
  27. start=start, periods=periods, freq=freq, name=name, closed=closed)
  28. tm.assert_index_equal(result, expected)
  29. # defined from end/periods/freq
  30. result = interval_range(
  31. end=end, periods=periods, freq=freq, name=name, closed=closed)
  32. tm.assert_index_equal(result, expected)
  33. # GH 20976: linspace behavior defined from start/end/periods
  34. result = interval_range(
  35. start=start, end=end, periods=periods, name=name, closed=closed)
  36. tm.assert_index_equal(result, expected)
  37. @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
  38. @pytest.mark.parametrize('freq, periods', [
  39. ('D', 364), ('2D', 182), ('22D18H', 16), ('M', 11)])
  40. def test_constructor_timestamp(self, closed, name, freq, periods, tz):
  41. start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz)
  42. breaks = date_range(start=start, end=end, freq=freq)
  43. expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
  44. # defined from start/end/freq
  45. result = interval_range(
  46. start=start, end=end, freq=freq, name=name, closed=closed)
  47. tm.assert_index_equal(result, expected)
  48. # defined from start/periods/freq
  49. result = interval_range(
  50. start=start, periods=periods, freq=freq, name=name, closed=closed)
  51. tm.assert_index_equal(result, expected)
  52. # defined from end/periods/freq
  53. result = interval_range(
  54. end=end, periods=periods, freq=freq, name=name, closed=closed)
  55. tm.assert_index_equal(result, expected)
  56. # GH 20976: linspace behavior defined from start/end/periods
  57. if not breaks.freq.isAnchored() and tz is None:
  58. # matches expected only for non-anchored offsets and tz naive
  59. # (anchored/DST transitions cause unequal spacing in expected)
  60. result = interval_range(start=start, end=end, periods=periods,
  61. name=name, closed=closed)
  62. tm.assert_index_equal(result, expected)
  63. @pytest.mark.parametrize('freq, periods', [
  64. ('D', 100), ('2D12H', 40), ('5D', 20), ('25D', 4)])
  65. def test_constructor_timedelta(self, closed, name, freq, periods):
  66. start, end = Timedelta('0 days'), Timedelta('100 days')
  67. breaks = timedelta_range(start=start, end=end, freq=freq)
  68. expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
  69. # defined from start/end/freq
  70. result = interval_range(
  71. start=start, end=end, freq=freq, name=name, closed=closed)
  72. tm.assert_index_equal(result, expected)
  73. # defined from start/periods/freq
  74. result = interval_range(
  75. start=start, periods=periods, freq=freq, name=name, closed=closed)
  76. tm.assert_index_equal(result, expected)
  77. # defined from end/periods/freq
  78. result = interval_range(
  79. end=end, periods=periods, freq=freq, name=name, closed=closed)
  80. tm.assert_index_equal(result, expected)
  81. # GH 20976: linspace behavior defined from start/end/periods
  82. result = interval_range(
  83. start=start, end=end, periods=periods, name=name, closed=closed)
  84. tm.assert_index_equal(result, expected)
  85. @pytest.mark.parametrize('start, end, freq, expected_endpoint', [
  86. (0, 10, 3, 9),
  87. (0, 10, 1.5, 9),
  88. (0.5, 10, 3, 9.5),
  89. (Timedelta('0D'), Timedelta('10D'), '2D4H', Timedelta('8D16H')),
  90. (Timestamp('2018-01-01'),
  91. Timestamp('2018-02-09'),
  92. 'MS',
  93. Timestamp('2018-02-01')),
  94. (Timestamp('2018-01-01', tz='US/Eastern'),
  95. Timestamp('2018-01-20', tz='US/Eastern'),
  96. '5D12H',
  97. Timestamp('2018-01-17 12:00:00', tz='US/Eastern'))])
  98. def test_early_truncation(self, start, end, freq, expected_endpoint):
  99. # index truncates early if freq causes end to be skipped
  100. result = interval_range(start=start, end=end, freq=freq)
  101. result_endpoint = result.right[-1]
  102. assert result_endpoint == expected_endpoint
  103. @pytest.mark.parametrize('start, end, freq', [
  104. (0.5, None, None),
  105. (None, 4.5, None),
  106. (0.5, None, 1.5),
  107. (None, 6.5, 1.5)])
  108. def test_no_invalid_float_truncation(self, start, end, freq):
  109. # GH 21161
  110. if freq is None:
  111. breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
  112. else:
  113. breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
  114. expected = IntervalIndex.from_breaks(breaks)
  115. result = interval_range(start=start, end=end, periods=4, freq=freq)
  116. tm.assert_index_equal(result, expected)
  117. @pytest.mark.parametrize('start, mid, end', [
  118. (Timestamp('2018-03-10', tz='US/Eastern'),
  119. Timestamp('2018-03-10 23:30:00', tz='US/Eastern'),
  120. Timestamp('2018-03-12', tz='US/Eastern')),
  121. (Timestamp('2018-11-03', tz='US/Eastern'),
  122. Timestamp('2018-11-04 00:30:00', tz='US/Eastern'),
  123. Timestamp('2018-11-05', tz='US/Eastern'))])
  124. def test_linspace_dst_transition(self, start, mid, end):
  125. # GH 20976: linspace behavior defined from start/end/periods
  126. # accounts for the hour gained/lost during DST transition
  127. result = interval_range(start=start, end=end, periods=2)
  128. expected = IntervalIndex.from_breaks([start, mid, end])
  129. tm.assert_index_equal(result, expected)
  130. @pytest.mark.parametrize('freq', [2, 2.0])
  131. @pytest.mark.parametrize('end', [10, 10.0])
  132. @pytest.mark.parametrize('start', [0, 0.0])
  133. def test_float_subtype(self, start, end, freq):
  134. # Has float subtype if any of start/end/freq are float, even if all
  135. # resulting endpoints can safely be upcast to integers
  136. # defined from start/end/freq
  137. index = interval_range(start=start, end=end, freq=freq)
  138. result = index.dtype.subtype
  139. expected = 'int64' if is_integer(start + end + freq) else 'float64'
  140. assert result == expected
  141. # defined from start/periods/freq
  142. index = interval_range(start=start, periods=5, freq=freq)
  143. result = index.dtype.subtype
  144. expected = 'int64' if is_integer(start + freq) else 'float64'
  145. assert result == expected
  146. # defined from end/periods/freq
  147. index = interval_range(end=end, periods=5, freq=freq)
  148. result = index.dtype.subtype
  149. expected = 'int64' if is_integer(end + freq) else 'float64'
  150. assert result == expected
  151. # GH 20976: linspace behavior defined from start/end/periods
  152. index = interval_range(start=start, end=end, periods=5)
  153. result = index.dtype.subtype
  154. expected = 'int64' if is_integer(start + end) else 'float64'
  155. assert result == expected
  156. def test_constructor_coverage(self):
  157. # float value for periods
  158. expected = interval_range(start=0, periods=10)
  159. result = interval_range(start=0, periods=10.5)
  160. tm.assert_index_equal(result, expected)
  161. # equivalent timestamp-like start/end
  162. start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
  163. expected = interval_range(start=start, end=end)
  164. result = interval_range(start=start.to_pydatetime(),
  165. end=end.to_pydatetime())
  166. tm.assert_index_equal(result, expected)
  167. result = interval_range(start=start.asm8, end=end.asm8)
  168. tm.assert_index_equal(result, expected)
  169. # equivalent freq with timestamp
  170. equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
  171. DateOffset(days=1)]
  172. for freq in equiv_freq:
  173. result = interval_range(start=start, end=end, freq=freq)
  174. tm.assert_index_equal(result, expected)
  175. # equivalent timedelta-like start/end
  176. start, end = Timedelta(days=1), Timedelta(days=10)
  177. expected = interval_range(start=start, end=end)
  178. result = interval_range(start=start.to_pytimedelta(),
  179. end=end.to_pytimedelta())
  180. tm.assert_index_equal(result, expected)
  181. result = interval_range(start=start.asm8, end=end.asm8)
  182. tm.assert_index_equal(result, expected)
  183. # equivalent freq with timedelta
  184. equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
  185. for freq in equiv_freq:
  186. result = interval_range(start=start, end=end, freq=freq)
  187. tm.assert_index_equal(result, expected)
  188. def test_errors(self):
  189. # not enough params
  190. msg = ('Of the four parameters: start, end, periods, and freq, '
  191. 'exactly three must be specified')
  192. with pytest.raises(ValueError, match=msg):
  193. interval_range(start=0)
  194. with pytest.raises(ValueError, match=msg):
  195. interval_range(end=5)
  196. with pytest.raises(ValueError, match=msg):
  197. interval_range(periods=2)
  198. with pytest.raises(ValueError, match=msg):
  199. interval_range()
  200. # too many params
  201. with pytest.raises(ValueError, match=msg):
  202. interval_range(start=0, end=5, periods=6, freq=1.5)
  203. # mixed units
  204. msg = 'start, end, freq need to be type compatible'
  205. with pytest.raises(TypeError, match=msg):
  206. interval_range(start=0, end=Timestamp('20130101'), freq=2)
  207. with pytest.raises(TypeError, match=msg):
  208. interval_range(start=0, end=Timedelta('1 day'), freq=2)
  209. with pytest.raises(TypeError, match=msg):
  210. interval_range(start=0, end=10, freq='D')
  211. with pytest.raises(TypeError, match=msg):
  212. interval_range(start=Timestamp('20130101'), end=10, freq='D')
  213. with pytest.raises(TypeError, match=msg):
  214. interval_range(start=Timestamp('20130101'),
  215. end=Timedelta('1 day'), freq='D')
  216. with pytest.raises(TypeError, match=msg):
  217. interval_range(start=Timestamp('20130101'),
  218. end=Timestamp('20130110'), freq=2)
  219. with pytest.raises(TypeError, match=msg):
  220. interval_range(start=Timedelta('1 day'), end=10, freq='D')
  221. with pytest.raises(TypeError, match=msg):
  222. interval_range(start=Timedelta('1 day'),
  223. end=Timestamp('20130110'), freq='D')
  224. with pytest.raises(TypeError, match=msg):
  225. interval_range(start=Timedelta('1 day'),
  226. end=Timedelta('10 days'), freq=2)
  227. # invalid periods
  228. msg = 'periods must be a number, got foo'
  229. with pytest.raises(TypeError, match=msg):
  230. interval_range(start=0, periods='foo')
  231. # invalid start
  232. msg = 'start must be numeric or datetime-like, got foo'
  233. with pytest.raises(ValueError, match=msg):
  234. interval_range(start='foo', periods=10)
  235. # invalid end
  236. msg = r'end must be numeric or datetime-like, got \(0, 1\]'
  237. with pytest.raises(ValueError, match=msg):
  238. interval_range(end=Interval(0, 1), periods=10)
  239. # invalid freq for datetime-like
  240. msg = 'freq must be numeric or convertible to DateOffset, got foo'
  241. with pytest.raises(ValueError, match=msg):
  242. interval_range(start=0, end=10, freq='foo')
  243. with pytest.raises(ValueError, match=msg):
  244. interval_range(start=Timestamp('20130101'), periods=10, freq='foo')
  245. with pytest.raises(ValueError, match=msg):
  246. interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
  247. # mixed tz
  248. start = Timestamp('2017-01-01', tz='US/Eastern')
  249. end = Timestamp('2017-01-07', tz='US/Pacific')
  250. msg = 'Start and end cannot both be tz-aware with different timezones'
  251. with pytest.raises(TypeError, match=msg):
  252. interval_range(start=start, end=end)