123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498 |
- from datetime import datetime
- import warnings
- import numpy as np
- import pytest
- from pandas.core.dtypes.generic import ABCDateOffset
- import pandas as pd
- from pandas import (
- DatetimeIndex, Index, PeriodIndex, Series, Timestamp, bdate_range,
- date_range)
- from pandas.tests.test_base import Ops
- import pandas.util.testing as tm
- from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour
- START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
- class TestDatetimeIndexOps(Ops):
- def setup_method(self, method):
- super(TestDatetimeIndexOps, self).setup_method(method)
- mask = lambda x: (isinstance(x, DatetimeIndex) or
- isinstance(x, PeriodIndex))
- self.is_valid_objs = [o for o in self.objs if mask(o)]
- self.not_valid_objs = [o for o in self.objs if not mask(o)]
- def test_ops_properties(self):
- f = lambda x: isinstance(x, DatetimeIndex)
- self.check_ops_properties(DatetimeIndex._field_ops, f)
- self.check_ops_properties(DatetimeIndex._object_ops, f)
- self.check_ops_properties(DatetimeIndex._bool_ops, f)
- def test_ops_properties_basic(self):
- # sanity check that the behavior didn't change
- # GH#7206
- for op in ['year', 'day', 'second', 'weekday']:
- pytest.raises(TypeError, lambda x: getattr(self.dt_series, op))
- # attribute access should still work!
- s = Series(dict(year=2000, month=1, day=10))
- assert s.year == 2000
- assert s.month == 1
- assert s.day == 10
- pytest.raises(AttributeError, lambda: s.weekday)
- def test_repeat_range(self, tz_naive_fixture):
- tz = tz_naive_fixture
- rng = date_range('1/1/2000', '1/1/2001')
- result = rng.repeat(5)
- assert result.freq is None
- assert len(result) == 5 * len(rng)
- index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz)
- exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
- '2001-01-02', '2001-01-02'], tz=tz)
- for res in [index.repeat(2), np.repeat(index, 2)]:
- tm.assert_index_equal(res, exp)
- assert res.freq is None
- index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz)
- exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
- '2001-01-03', '2001-01-03'], tz=tz)
- for res in [index.repeat(2), np.repeat(index, 2)]:
- tm.assert_index_equal(res, exp)
- assert res.freq is None
- index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'],
- tz=tz)
- exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01',
- 'NaT', 'NaT', 'NaT',
- '2003-01-01', '2003-01-01', '2003-01-01'],
- tz=tz)
- for res in [index.repeat(3), np.repeat(index, 3)]:
- tm.assert_index_equal(res, exp)
- assert res.freq is None
- def test_repeat(self, tz_naive_fixture):
- tz = tz_naive_fixture
- reps = 2
- msg = "the 'axis' parameter is not supported"
- rng = pd.date_range(start='2016-01-01', periods=2,
- freq='30Min', tz=tz)
- expected_rng = DatetimeIndex([
- Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
- Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
- Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
- Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
- ])
- res = rng.repeat(reps)
- tm.assert_index_equal(res, expected_rng)
- assert res.freq is None
- tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
- with pytest.raises(ValueError, match=msg):
- np.repeat(rng, reps, axis=1)
- def test_resolution(self, tz_naive_fixture):
- tz = tz_naive_fixture
- for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T',
- 'S', 'L', 'U'],
- ['day', 'day', 'day', 'day', 'hour',
- 'minute', 'second', 'millisecond',
- 'microsecond']):
- idx = pd.date_range(start='2013-04-01', periods=30, freq=freq,
- tz=tz)
- assert idx.resolution == expected
- def test_value_counts_unique(self, tz_naive_fixture):
- tz = tz_naive_fixture
- # GH 7735
- idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
- # create repeated values, 'n'th element is repeated by n+1 times
- idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)),
- tz=tz)
- exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10,
- tz=tz)
- expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(), expected)
- expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10,
- tz=tz)
- tm.assert_index_equal(idx.unique(), expected)
- idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00',
- '2013-01-01 09:00', '2013-01-01 08:00',
- '2013-01-01 08:00', pd.NaT], tz=tz)
- exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
- tz=tz)
- expected = Series([3, 2], index=exp_idx)
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(), expected)
- exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00',
- pd.NaT], tz=tz)
- expected = Series([3, 2, 1], index=exp_idx)
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(dropna=False),
- expected)
- tm.assert_index_equal(idx.unique(), exp_idx)
- def test_nonunique_contains(self):
- # GH 9512
- for idx in map(DatetimeIndex,
- ([0, 1, 0], [0, 0, -1], [0, -1, -1],
- ['2015', '2015', '2016'], ['2015', '2015', '2014'])):
- assert idx[0] in idx
- @pytest.mark.parametrize('idx',
- [
- DatetimeIndex(
- ['2011-01-01',
- '2011-01-02',
- '2011-01-03'],
- freq='D', name='idx'),
- DatetimeIndex(
- ['2011-01-01 09:00',
- '2011-01-01 10:00',
- '2011-01-01 11:00'],
- freq='H', name='tzidx', tz='Asia/Tokyo')
- ])
- def test_order_with_freq(self, idx):
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, idx)
- assert ordered.freq == idx.freq
- ordered = idx.sort_values(ascending=False)
- expected = idx[::-1]
- tm.assert_index_equal(ordered, expected)
- assert ordered.freq == expected.freq
- assert ordered.freq.n == -1
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, idx)
- tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
- check_dtype=False)
- assert ordered.freq == idx.freq
- ordered, indexer = idx.sort_values(return_indexer=True,
- ascending=False)
- expected = idx[::-1]
- tm.assert_index_equal(ordered, expected)
- tm.assert_numpy_array_equal(indexer,
- np.array([2, 1, 0]),
- check_dtype=False)
- assert ordered.freq == expected.freq
- assert ordered.freq.n == -1
- @pytest.mark.parametrize('index_dates,expected_dates', [
- (['2011-01-01', '2011-01-03', '2011-01-05',
- '2011-01-02', '2011-01-01'],
- ['2011-01-01', '2011-01-01', '2011-01-02',
- '2011-01-03', '2011-01-05']),
- (['2011-01-01', '2011-01-03', '2011-01-05',
- '2011-01-02', '2011-01-01'],
- ['2011-01-01', '2011-01-01', '2011-01-02',
- '2011-01-03', '2011-01-05']),
- ([pd.NaT, '2011-01-03', '2011-01-05',
- '2011-01-02', pd.NaT],
- [pd.NaT, pd.NaT, '2011-01-02', '2011-01-03',
- '2011-01-05'])
- ])
- def test_order_without_freq(self, index_dates, expected_dates,
- tz_naive_fixture):
- tz = tz_naive_fixture
- # without freq
- index = DatetimeIndex(index_dates, tz=tz, name='idx')
- expected = DatetimeIndex(expected_dates, tz=tz, name='idx')
- ordered = index.sort_values()
- tm.assert_index_equal(ordered, expected)
- assert ordered.freq is None
- ordered = index.sort_values(ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- assert ordered.freq is None
- ordered, indexer = index.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, expected)
- exp = np.array([0, 4, 3, 1, 2])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- assert ordered.freq is None
- ordered, indexer = index.sort_values(return_indexer=True,
- ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- exp = np.array([2, 1, 3, 4, 0])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- assert ordered.freq is None
- def test_drop_duplicates_metadata(self):
- # GH 10115
- idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
- result = idx.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert idx.freq == result.freq
- idx_dup = idx.append(idx)
- assert idx_dup.freq is None # freq is reset
- result = idx_dup.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert result.freq is None
- def test_drop_duplicates(self):
- # to check Index/Series compat
- base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
- idx = base.append(base[:5])
- res = idx.drop_duplicates()
- tm.assert_index_equal(res, base)
- res = Series(idx).drop_duplicates()
- tm.assert_series_equal(res, Series(base))
- res = idx.drop_duplicates(keep='last')
- exp = base[5:].append(base[:5])
- tm.assert_index_equal(res, exp)
- res = Series(idx).drop_duplicates(keep='last')
- tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
- res = idx.drop_duplicates(keep=False)
- tm.assert_index_equal(res, base[5:])
- res = Series(idx).drop_duplicates(keep=False)
- tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
- @pytest.mark.parametrize('freq', [
- 'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D',
- '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S',
- '-3S'])
- def test_infer_freq(self, freq):
- # GH 11018
- idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10)
- result = pd.DatetimeIndex(idx.asi8, freq='infer')
- tm.assert_index_equal(idx, result)
- assert result.freq == freq
- def test_nat(self, tz_naive_fixture):
- tz = tz_naive_fixture
- assert pd.DatetimeIndex._na_value is pd.NaT
- assert pd.DatetimeIndex([])._na_value is pd.NaT
- idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
- assert idx._can_hold_na
- tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
- assert idx.hasnans is False
- tm.assert_numpy_array_equal(idx._nan_idxs,
- np.array([], dtype=np.intp))
- idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
- assert idx._can_hold_na
- tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
- assert idx.hasnans is True
- tm.assert_numpy_array_equal(idx._nan_idxs,
- np.array([1], dtype=np.intp))
- def test_equals(self):
- # GH 13107
- idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
- assert idx.equals(idx)
- assert idx.equals(idx.copy())
- assert idx.equals(idx.astype(object))
- assert idx.astype(object).equals(idx)
- assert idx.astype(object).equals(idx.astype(object))
- assert not idx.equals(list(idx))
- assert not idx.equals(pd.Series(idx))
- idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
- tz='US/Pacific')
- assert not idx.equals(idx2)
- assert not idx.equals(idx2.copy())
- assert not idx.equals(idx2.astype(object))
- assert not idx.astype(object).equals(idx2)
- assert not idx.equals(list(idx2))
- assert not idx.equals(pd.Series(idx2))
- # same internal, different tz
- idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
- tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
- assert not idx.equals(idx3)
- assert not idx.equals(idx3.copy())
- assert not idx.equals(idx3.astype(object))
- assert not idx.astype(object).equals(idx3)
- assert not idx.equals(list(idx3))
- assert not idx.equals(pd.Series(idx3))
- @pytest.mark.parametrize('values', [
- ['20180101', '20180103', '20180105'], []])
- @pytest.mark.parametrize('freq', [
- '2D', Day(2), '2B', BDay(2), '48H', Hour(48)])
- @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
- def test_freq_setter(self, values, freq, tz):
- # GH 20678
- idx = DatetimeIndex(values, tz=tz)
- # can set to an offset, converting from string if necessary
- idx.freq = freq
- assert idx.freq == freq
- assert isinstance(idx.freq, ABCDateOffset)
- # can reset to None
- idx.freq = None
- assert idx.freq is None
- def test_freq_setter_errors(self):
- # GH 20678
- idx = DatetimeIndex(['20180101', '20180103', '20180105'])
- # setting with an incompatible freq
- msg = ('Inferred frequency 2D from passed values does not conform to '
- 'passed frequency 5D')
- with pytest.raises(ValueError, match=msg):
- idx.freq = '5D'
- # setting with non-freq string
- with pytest.raises(ValueError, match='Invalid frequency'):
- idx.freq = 'foo'
- def test_offset_deprecated(self):
- # GH 20716
- idx = pd.DatetimeIndex(['20180101', '20180102'])
- # getter deprecated
- with tm.assert_produces_warning(FutureWarning):
- idx.offset
- # setter deprecated
- with tm.assert_produces_warning(FutureWarning):
- idx.offset = BDay()
- class TestBusinessDatetimeIndex(object):
- def setup_method(self, method):
- self.rng = bdate_range(START, END)
- def test_comparison(self):
- d = self.rng[10]
- comp = self.rng > d
- assert comp[11]
- assert not comp[9]
- def test_pickle_unpickle(self):
- unpickled = tm.round_trip_pickle(self.rng)
- assert unpickled.freq is not None
- def test_copy(self):
- cp = self.rng.copy()
- repr(cp)
- tm.assert_index_equal(cp, self.rng)
- def test_shift(self):
- shifted = self.rng.shift(5)
- assert shifted[0] == self.rng[5]
- assert shifted.freq == self.rng.freq
- shifted = self.rng.shift(-5)
- assert shifted[5] == self.rng[0]
- assert shifted.freq == self.rng.freq
- shifted = self.rng.shift(0)
- assert shifted[0] == self.rng[0]
- assert shifted.freq == self.rng.freq
- rng = date_range(START, END, freq=BMonthEnd())
- shifted = rng.shift(1, freq=BDay())
- assert shifted[0] == rng[0] + BDay()
- def test_equals(self):
- assert not self.rng.equals(list(self.rng))
- def test_identical(self):
- t1 = self.rng.copy()
- t2 = self.rng.copy()
- assert t1.identical(t2)
- # name
- t1 = t1.rename('foo')
- assert t1.equals(t2)
- assert not t1.identical(t2)
- t2 = t2.rename('foo')
- assert t1.identical(t2)
- # freq
- t2v = Index(t2.values)
- assert t1.equals(t2v)
- assert not t1.identical(t2v)
- class TestCustomDatetimeIndex(object):
- def setup_method(self, method):
- self.rng = bdate_range(START, END, freq='C')
- def test_comparison(self):
- d = self.rng[10]
- comp = self.rng > d
- assert comp[11]
- assert not comp[9]
- def test_copy(self):
- cp = self.rng.copy()
- repr(cp)
- tm.assert_index_equal(cp, self.rng)
- def test_shift(self):
- shifted = self.rng.shift(5)
- assert shifted[0] == self.rng[5]
- assert shifted.freq == self.rng.freq
- shifted = self.rng.shift(-5)
- assert shifted[5] == self.rng[0]
- assert shifted.freq == self.rng.freq
- shifted = self.rng.shift(0)
- assert shifted[0] == self.rng[0]
- assert shifted.freq == self.rng.freq
- with warnings.catch_warnings(record=True):
- warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
- rng = date_range(START, END, freq=BMonthEnd())
- shifted = rng.shift(1, freq=CDay())
- assert shifted[0] == rng[0] + CDay()
- def test_shift_periods(self):
- # GH#22458 : argument 'n' was deprecated in favor of 'periods'
- idx = pd.date_range(start=START, end=END, periods=3)
- tm.assert_index_equal(idx.shift(periods=0), idx)
- tm.assert_index_equal(idx.shift(0), idx)
- with tm.assert_produces_warning(FutureWarning,
- check_stacklevel=True):
- tm.assert_index_equal(idx.shift(n=0), idx)
- def test_pickle_unpickle(self):
- unpickled = tm.round_trip_pickle(self.rng)
- assert unpickled.freq is not None
- def test_equals(self):
- assert not self.rng.equals(list(self.rng))
|