123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series
- from pandas.core.arrays import PeriodArray
- from pandas.tests.test_base import Ops
- import pandas.util.testing as tm
- class TestPeriodIndexOps(Ops):
- def setup_method(self, method):
- super(TestPeriodIndexOps, self).setup_method(method)
- mask = lambda x: (isinstance(x, DatetimeIndex) or
- isinstance(x, PeriodIndex))
- self.is_valid_objs = [o for o in self.objs if mask(o)]
- self.not_valid_objs = [o for o in self.objs if not mask(o)]
- def test_ops_properties(self):
- f = lambda x: isinstance(x, PeriodIndex)
- self.check_ops_properties(PeriodArray._field_ops, f)
- self.check_ops_properties(PeriodArray._object_ops, f)
- self.check_ops_properties(PeriodArray._bool_ops, f)
- def test_resolution(self):
- for freq, expected in zip(['A', 'Q', 'M', 'D', 'H',
- 'T', 'S', 'L', 'U'],
- ['day', 'day', 'day', 'day',
- 'hour', 'minute', 'second',
- 'millisecond', 'microsecond']):
- idx = pd.period_range(start='2013-04-01', periods=30, freq=freq)
- assert idx.resolution == expected
- def test_value_counts_unique(self):
- # GH 7735
- idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10)
- # create repeated values, 'n'th element is repeated by n+1 times
- idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)),
- freq='H')
- exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00',
- '2011-01-01 16:00', '2011-01-01 15:00',
- '2011-01-01 14:00', '2011-01-01 13:00',
- '2011-01-01 12:00', '2011-01-01 11:00',
- '2011-01-01 10:00',
- '2011-01-01 09:00'], freq='H')
- expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(), expected)
- expected = pd.period_range('2011-01-01 09:00', freq='H',
- periods=10)
- tm.assert_index_equal(idx.unique(), expected)
- idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00',
- '2013-01-01 09:00', '2013-01-01 08:00',
- '2013-01-01 08:00', NaT], freq='H')
- exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
- freq='H')
- expected = Series([3, 2], index=exp_idx)
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(), expected)
- exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00',
- NaT], freq='H')
- expected = Series([3, 2, 1], index=exp_idx)
- for obj in [idx, Series(idx)]:
- tm.assert_series_equal(obj.value_counts(dropna=False), expected)
- tm.assert_index_equal(idx.unique(), exp_idx)
- def test_drop_duplicates_metadata(self):
- # GH 10115
- idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx')
- result = idx.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert idx.freq == result.freq
- idx_dup = idx.append(idx) # freq will not be reset
- result = idx_dup.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert idx.freq == result.freq
- def test_drop_duplicates(self):
- # to check Index/Series compat
- base = pd.period_range('2011-01-01', '2011-01-31', freq='D',
- name='idx')
- idx = base.append(base[:5])
- res = idx.drop_duplicates()
- tm.assert_index_equal(res, base)
- res = Series(idx).drop_duplicates()
- tm.assert_series_equal(res, Series(base))
- res = idx.drop_duplicates(keep='last')
- exp = base[5:].append(base[:5])
- tm.assert_index_equal(res, exp)
- res = Series(idx).drop_duplicates(keep='last')
- tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
- res = idx.drop_duplicates(keep=False)
- tm.assert_index_equal(res, base[5:])
- res = Series(idx).drop_duplicates(keep=False)
- tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
- def test_order_compat(self):
- def _check_freq(index, expected_index):
- if isinstance(index, PeriodIndex):
- assert index.freq == expected_index.freq
- pidx = PeriodIndex(['2011', '2012', '2013'], name='pidx', freq='A')
- # for compatibility check
- iidx = Index([2011, 2012, 2013], name='idx')
- for idx in [pidx, iidx]:
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, idx)
- _check_freq(ordered, idx)
- ordered = idx.sort_values(ascending=False)
- tm.assert_index_equal(ordered, idx[::-1])
- _check_freq(ordered, idx[::-1])
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, idx)
- tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
- check_dtype=False)
- _check_freq(ordered, idx)
- ordered, indexer = idx.sort_values(return_indexer=True,
- ascending=False)
- tm.assert_index_equal(ordered, idx[::-1])
- tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]),
- check_dtype=False)
- _check_freq(ordered, idx[::-1])
- pidx = PeriodIndex(['2011', '2013', '2015', '2012',
- '2011'], name='pidx', freq='A')
- pexpected = PeriodIndex(
- ['2011', '2011', '2012', '2013', '2015'], name='pidx', freq='A')
- # for compatibility check
- iidx = Index([2011, 2013, 2015, 2012, 2011], name='idx')
- iexpected = Index([2011, 2011, 2012, 2013, 2015], name='idx')
- for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, expected)
- _check_freq(ordered, idx)
- ordered = idx.sort_values(ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- _check_freq(ordered, idx)
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, expected)
- exp = np.array([0, 4, 3, 1, 2])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- _check_freq(ordered, idx)
- ordered, indexer = idx.sort_values(return_indexer=True,
- ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- exp = np.array([2, 1, 3, 4, 0])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- _check_freq(ordered, idx)
- pidx = PeriodIndex(['2011', '2013', 'NaT', '2011'], name='pidx',
- freq='D')
- result = pidx.sort_values()
- expected = PeriodIndex(['NaT', '2011', '2011', '2013'],
- name='pidx', freq='D')
- tm.assert_index_equal(result, expected)
- assert result.freq == 'D'
- result = pidx.sort_values(ascending=False)
- expected = PeriodIndex(
- ['2013', '2011', '2011', 'NaT'], name='pidx', freq='D')
- tm.assert_index_equal(result, expected)
- assert result.freq == 'D'
- def test_order(self):
- for freq in ['D', '2D', '4D']:
- idx = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
- freq=freq, name='idx')
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, idx)
- assert ordered.freq == idx.freq
- ordered = idx.sort_values(ascending=False)
- expected = idx[::-1]
- tm.assert_index_equal(ordered, expected)
- assert ordered.freq == expected.freq
- assert ordered.freq == freq
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, idx)
- tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
- check_dtype=False)
- assert ordered.freq == idx.freq
- assert ordered.freq == freq
- ordered, indexer = idx.sort_values(return_indexer=True,
- ascending=False)
- expected = idx[::-1]
- tm.assert_index_equal(ordered, expected)
- tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]),
- check_dtype=False)
- assert ordered.freq == expected.freq
- assert ordered.freq == freq
- idx1 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05',
- '2011-01-02', '2011-01-01'], freq='D', name='idx1')
- exp1 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02',
- '2011-01-03', '2011-01-05'], freq='D', name='idx1')
- idx2 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05',
- '2011-01-02', '2011-01-01'],
- freq='D', name='idx2')
- exp2 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02',
- '2011-01-03', '2011-01-05'],
- freq='D', name='idx2')
- idx3 = PeriodIndex([NaT, '2011-01-03', '2011-01-05',
- '2011-01-02', NaT], freq='D', name='idx3')
- exp3 = PeriodIndex([NaT, NaT, '2011-01-02', '2011-01-03',
- '2011-01-05'], freq='D', name='idx3')
- for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
- ordered = idx.sort_values()
- tm.assert_index_equal(ordered, expected)
- assert ordered.freq == 'D'
- ordered = idx.sort_values(ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- assert ordered.freq == 'D'
- ordered, indexer = idx.sort_values(return_indexer=True)
- tm.assert_index_equal(ordered, expected)
- exp = np.array([0, 4, 3, 1, 2])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- assert ordered.freq == 'D'
- ordered, indexer = idx.sort_values(return_indexer=True,
- ascending=False)
- tm.assert_index_equal(ordered, expected[::-1])
- exp = np.array([2, 1, 3, 4, 0])
- tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
- assert ordered.freq == 'D'
- def test_shift(self):
- # This is tested in test_arithmetic
- pass
- def test_nat(self):
- assert pd.PeriodIndex._na_value is NaT
- assert pd.PeriodIndex([], freq='M')._na_value is NaT
- idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
- assert idx._can_hold_na
- tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
- assert idx.hasnans is False
- tm.assert_numpy_array_equal(idx._nan_idxs,
- np.array([], dtype=np.intp))
- idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D')
- assert idx._can_hold_na
- tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
- assert idx.hasnans is True
- tm.assert_numpy_array_equal(idx._nan_idxs,
- np.array([1], dtype=np.intp))
- @pytest.mark.parametrize('freq', ['D', 'M'])
- def test_equals(self, freq):
- # GH#13107
- idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
- freq=freq)
- assert idx.equals(idx)
- assert idx.equals(idx.copy())
- assert idx.equals(idx.astype(object))
- assert idx.astype(object).equals(idx)
- assert idx.astype(object).equals(idx.astype(object))
- assert not idx.equals(list(idx))
- assert not idx.equals(pd.Series(idx))
- idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
- freq='H')
- assert not idx.equals(idx2)
- assert not idx.equals(idx2.copy())
- assert not idx.equals(idx2.astype(object))
- assert not idx.astype(object).equals(idx2)
- assert not idx.equals(list(idx2))
- assert not idx.equals(pd.Series(idx2))
- # same internal, different tz
- idx3 = pd.PeriodIndex._simple_new(
- idx._values._simple_new(idx._values.asi8, freq="H")
- )
- tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
- assert not idx.equals(idx3)
- assert not idx.equals(idx3.copy())
- assert not idx.equals(idx3.astype(object))
- assert not idx.astype(object).equals(idx3)
- assert not idx.equals(list(idx3))
- assert not idx.equals(pd.Series(idx3))
- def test_freq_setter_deprecated(self):
- # GH 20678
- idx = pd.period_range('2018Q1', periods=4, freq='Q')
- # no warning for getter
- with tm.assert_produces_warning(None):
- idx.freq
- # warning for setter
- with tm.assert_produces_warning(FutureWarning):
- idx.freq = pd.offsets.Day()
|