123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- from __future__ import division
- from datetime import timedelta
- import numpy as np
- import pytest
- from pandas.core.dtypes.common import is_integer
- from pandas import (
- DateOffset, Interval, IntervalIndex, Timedelta, Timestamp, date_range,
- interval_range, timedelta_range)
- import pandas.util.testing as tm
- from pandas.tseries.offsets import Day
- @pytest.fixture(scope='class', params=[None, 'foo'])
- def name(request):
- return request.param
- class TestIntervalRange(object):
- @pytest.mark.parametrize('freq, periods', [
- (1, 100), (2.5, 40), (5, 20), (25, 4)])
- def test_constructor_numeric(self, closed, name, freq, periods):
- start, end = 0, 100
- breaks = np.arange(101, step=freq)
- expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
- # defined from start/end/freq
- result = interval_range(
- start=start, end=end, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # defined from start/periods/freq
- result = interval_range(
- start=start, periods=periods, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # defined from end/periods/freq
- result = interval_range(
- end=end, periods=periods, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # GH 20976: linspace behavior defined from start/end/periods
- result = interval_range(
- start=start, end=end, periods=periods, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
- @pytest.mark.parametrize('freq, periods', [
- ('D', 364), ('2D', 182), ('22D18H', 16), ('M', 11)])
- def test_constructor_timestamp(self, closed, name, freq, periods, tz):
- start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz)
- breaks = date_range(start=start, end=end, freq=freq)
- expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
- # defined from start/end/freq
- result = interval_range(
- start=start, end=end, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # defined from start/periods/freq
- result = interval_range(
- start=start, periods=periods, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # defined from end/periods/freq
- result = interval_range(
- end=end, periods=periods, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # GH 20976: linspace behavior defined from start/end/periods
- if not breaks.freq.isAnchored() and tz is None:
- # matches expected only for non-anchored offsets and tz naive
- # (anchored/DST transitions cause unequal spacing in expected)
- result = interval_range(start=start, end=end, periods=periods,
- name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize('freq, periods', [
- ('D', 100), ('2D12H', 40), ('5D', 20), ('25D', 4)])
- def test_constructor_timedelta(self, closed, name, freq, periods):
- start, end = Timedelta('0 days'), Timedelta('100 days')
- breaks = timedelta_range(start=start, end=end, freq=freq)
- expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
- # defined from start/end/freq
- result = interval_range(
- start=start, end=end, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # defined from start/periods/freq
- result = interval_range(
- start=start, periods=periods, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # defined from end/periods/freq
- result = interval_range(
- end=end, periods=periods, freq=freq, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- # GH 20976: linspace behavior defined from start/end/periods
- result = interval_range(
- start=start, end=end, periods=periods, name=name, closed=closed)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize('start, end, freq, expected_endpoint', [
- (0, 10, 3, 9),
- (0, 10, 1.5, 9),
- (0.5, 10, 3, 9.5),
- (Timedelta('0D'), Timedelta('10D'), '2D4H', Timedelta('8D16H')),
- (Timestamp('2018-01-01'),
- Timestamp('2018-02-09'),
- 'MS',
- Timestamp('2018-02-01')),
- (Timestamp('2018-01-01', tz='US/Eastern'),
- Timestamp('2018-01-20', tz='US/Eastern'),
- '5D12H',
- Timestamp('2018-01-17 12:00:00', tz='US/Eastern'))])
- def test_early_truncation(self, start, end, freq, expected_endpoint):
- # index truncates early if freq causes end to be skipped
- result = interval_range(start=start, end=end, freq=freq)
- result_endpoint = result.right[-1]
- assert result_endpoint == expected_endpoint
- @pytest.mark.parametrize('start, end, freq', [
- (0.5, None, None),
- (None, 4.5, None),
- (0.5, None, 1.5),
- (None, 6.5, 1.5)])
- def test_no_invalid_float_truncation(self, start, end, freq):
- # GH 21161
- if freq is None:
- breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
- else:
- breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
- expected = IntervalIndex.from_breaks(breaks)
- result = interval_range(start=start, end=end, periods=4, freq=freq)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize('start, mid, end', [
- (Timestamp('2018-03-10', tz='US/Eastern'),
- Timestamp('2018-03-10 23:30:00', tz='US/Eastern'),
- Timestamp('2018-03-12', tz='US/Eastern')),
- (Timestamp('2018-11-03', tz='US/Eastern'),
- Timestamp('2018-11-04 00:30:00', tz='US/Eastern'),
- Timestamp('2018-11-05', tz='US/Eastern'))])
- def test_linspace_dst_transition(self, start, mid, end):
- # GH 20976: linspace behavior defined from start/end/periods
- # accounts for the hour gained/lost during DST transition
- result = interval_range(start=start, end=end, periods=2)
- expected = IntervalIndex.from_breaks([start, mid, end])
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize('freq', [2, 2.0])
- @pytest.mark.parametrize('end', [10, 10.0])
- @pytest.mark.parametrize('start', [0, 0.0])
- def test_float_subtype(self, start, end, freq):
- # Has float subtype if any of start/end/freq are float, even if all
- # resulting endpoints can safely be upcast to integers
- # defined from start/end/freq
- index = interval_range(start=start, end=end, freq=freq)
- result = index.dtype.subtype
- expected = 'int64' if is_integer(start + end + freq) else 'float64'
- assert result == expected
- # defined from start/periods/freq
- index = interval_range(start=start, periods=5, freq=freq)
- result = index.dtype.subtype
- expected = 'int64' if is_integer(start + freq) else 'float64'
- assert result == expected
- # defined from end/periods/freq
- index = interval_range(end=end, periods=5, freq=freq)
- result = index.dtype.subtype
- expected = 'int64' if is_integer(end + freq) else 'float64'
- assert result == expected
- # GH 20976: linspace behavior defined from start/end/periods
- index = interval_range(start=start, end=end, periods=5)
- result = index.dtype.subtype
- expected = 'int64' if is_integer(start + end) else 'float64'
- assert result == expected
- def test_constructor_coverage(self):
- # float value for periods
- expected = interval_range(start=0, periods=10)
- result = interval_range(start=0, periods=10.5)
- tm.assert_index_equal(result, expected)
- # equivalent timestamp-like start/end
- start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
- expected = interval_range(start=start, end=end)
- result = interval_range(start=start.to_pydatetime(),
- end=end.to_pydatetime())
- tm.assert_index_equal(result, expected)
- result = interval_range(start=start.asm8, end=end.asm8)
- tm.assert_index_equal(result, expected)
- # equivalent freq with timestamp
- equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
- DateOffset(days=1)]
- for freq in equiv_freq:
- result = interval_range(start=start, end=end, freq=freq)
- tm.assert_index_equal(result, expected)
- # equivalent timedelta-like start/end
- start, end = Timedelta(days=1), Timedelta(days=10)
- expected = interval_range(start=start, end=end)
- result = interval_range(start=start.to_pytimedelta(),
- end=end.to_pytimedelta())
- tm.assert_index_equal(result, expected)
- result = interval_range(start=start.asm8, end=end.asm8)
- tm.assert_index_equal(result, expected)
- # equivalent freq with timedelta
- equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
- for freq in equiv_freq:
- result = interval_range(start=start, end=end, freq=freq)
- tm.assert_index_equal(result, expected)
- def test_errors(self):
- # not enough params
- msg = ('Of the four parameters: start, end, periods, and freq, '
- 'exactly three must be specified')
- with pytest.raises(ValueError, match=msg):
- interval_range(start=0)
- with pytest.raises(ValueError, match=msg):
- interval_range(end=5)
- with pytest.raises(ValueError, match=msg):
- interval_range(periods=2)
- with pytest.raises(ValueError, match=msg):
- interval_range()
- # too many params
- with pytest.raises(ValueError, match=msg):
- interval_range(start=0, end=5, periods=6, freq=1.5)
- # mixed units
- msg = 'start, end, freq need to be type compatible'
- with pytest.raises(TypeError, match=msg):
- interval_range(start=0, end=Timestamp('20130101'), freq=2)
- with pytest.raises(TypeError, match=msg):
- interval_range(start=0, end=Timedelta('1 day'), freq=2)
- with pytest.raises(TypeError, match=msg):
- interval_range(start=0, end=10, freq='D')
- with pytest.raises(TypeError, match=msg):
- interval_range(start=Timestamp('20130101'), end=10, freq='D')
- with pytest.raises(TypeError, match=msg):
- interval_range(start=Timestamp('20130101'),
- end=Timedelta('1 day'), freq='D')
- with pytest.raises(TypeError, match=msg):
- interval_range(start=Timestamp('20130101'),
- end=Timestamp('20130110'), freq=2)
- with pytest.raises(TypeError, match=msg):
- interval_range(start=Timedelta('1 day'), end=10, freq='D')
- with pytest.raises(TypeError, match=msg):
- interval_range(start=Timedelta('1 day'),
- end=Timestamp('20130110'), freq='D')
- with pytest.raises(TypeError, match=msg):
- interval_range(start=Timedelta('1 day'),
- end=Timedelta('10 days'), freq=2)
- # invalid periods
- msg = 'periods must be a number, got foo'
- with pytest.raises(TypeError, match=msg):
- interval_range(start=0, periods='foo')
- # invalid start
- msg = 'start must be numeric or datetime-like, got foo'
- with pytest.raises(ValueError, match=msg):
- interval_range(start='foo', periods=10)
- # invalid end
- msg = r'end must be numeric or datetime-like, got \(0, 1\]'
- with pytest.raises(ValueError, match=msg):
- interval_range(end=Interval(0, 1), periods=10)
- # invalid freq for datetime-like
- msg = 'freq must be numeric or convertible to DateOffset, got foo'
- with pytest.raises(ValueError, match=msg):
- interval_range(start=0, end=10, freq='foo')
- with pytest.raises(ValueError, match=msg):
- interval_range(start=Timestamp('20130101'), periods=10, freq='foo')
- with pytest.raises(ValueError, match=msg):
- interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
- # mixed tz
- start = Timestamp('2017-01-01', tz='US/Eastern')
- end = Timestamp('2017-01-07', tz='US/Pacific')
- msg = 'Start and end cannot both be tz-aware with different timezones'
- with pytest.raises(TypeError, match=msg):
- interval_range(start=start, end=end)
|