# coding=utf-8 # pylint: disable-msg=E1101,W0612 from datetime import datetime, time, timedelta import numpy as np import pytest from pandas._libs.tslib import iNaT from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas.compat import StringIO, lrange, product from pandas.errors import NullFrequencyError import pandas.util._test_decorators as td import pandas as pd from pandas import ( DataFrame, Index, NaT, Series, Timestamp, concat, date_range, offsets, timedelta_range, to_datetime) from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.tests.series.common import TestData import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal) from pandas.tseries.offsets import BDay, BMonthEnd def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng) def assert_range_equal(left, right): assert (left.equals(right)) assert (left.freq == right.freq) assert (left.tz == right.tz) class TestTimeSeries(TestData): def test_shift(self): shifted = self.ts.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, self.ts.index) tm.assert_index_equal(unshifted.index, self.ts.index) tm.assert_numpy_array_equal(unshifted.dropna().values, self.ts.values[:-1]) offset = BDay() shifted = self.ts.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) assert_series_equal(unshifted, self.ts) unshifted = self.ts.shift(0, freq=offset) assert_series_equal(unshifted, self.ts) shifted = self.ts.shift(1, freq='B') unshifted = shifted.shift(-1, freq='B') assert_series_equal(unshifted, self.ts) # corner case unshifted = self.ts.shift(0) assert_series_equal(unshifted, self.ts) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, 'B') shifted3 = ps.shift(1, BDay()) assert_series_equal(shifted2, shifted3) assert_series_equal(ps, shifted2.shift(-1, 'B')) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq='D') # legacy support shifted4 = ps.shift(1, freq='B') assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) assert_series_equal(shifted5, shifted4) # 32-bit taking # GH 8129 index = date_range('2000-01-01', periods=5) for dtype in ['int32', 'int64']: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) assert_series_equal(result, expected) # xref 8260 # with tz s = Series(date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s - s.shift() exp = Series(TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo') assert_series_equal(result, exp) # incompat tz s2 = Series(date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') msg = ("DatetimeArray subtraction must have the same timezones or no" " timezones") with pytest.raises(TypeError, match=msg): s - s2 def test_shift2(self): ts = Series(np.random.randn(5), index=date_range('1/1/2000', periods=5, freq='H')) result = ts.shift(1, freq='5T') exp_index = ts.index.shift(1, freq='5T') tm.assert_index_equal(result.index, exp_index) # GH #1063, multiple of same base result = ts.shift(1, freq='4H') exp_index = ts.index + offsets.Hour(4) tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) msg = "Cannot shift with no freq" with pytest.raises(NullFrequencyError, match=msg): idx.shift(1) def test_shift_fill_value(self): # GH #24128 ts = Series([1.0, 2.0, 3.0, 4.0, 5.0], index=date_range('1/1/2000', periods=5, freq='H')) exp = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=date_range('1/1/2000', periods=5, freq='H')) # check that fill value works result = ts.shift(1, fill_value=0.0) tm.assert_series_equal(result, exp) exp = Series([0.0, 0.0, 1.0, 2.0, 3.0], index=date_range('1/1/2000', periods=5, freq='H')) result = ts.shift(2, fill_value=0.0) tm.assert_series_equal(result, exp) ts = pd.Series([1, 2, 3]) res = ts.shift(2, fill_value=0) assert res.dtype == ts.dtype def test_categorical_shift_fill_value(self): ts = pd.Series(['a', 'b', 'c', 'd'], dtype="category") res = ts.shift(1, fill_value='a') expected = pd.Series(pd.Categorical(['a', 'a', 'b', 'c'], categories=['a', 'b', 'c', 'd'], ordered=False)) tm.assert_equal(res, expected) # check for incorrect fill_value msg = "'fill_value=f' is not present in this Categorical's categories" with pytest.raises(ValueError, match=msg): ts.shift(1, fill_value='f') def test_shift_dst(self): # GH 13926 dates = date_range('2016-11-06', freq='H', periods=10, tz='US/Eastern') s = Series(dates) res = s.shift(0) tm.assert_series_equal(res, s) assert res.dtype == 'datetime64[ns, US/Eastern]' res = s.shift(1) exp_vals = [NaT] + dates.astype(object).values.tolist()[:9] exp = Series(exp_vals) tm.assert_series_equal(res, exp) assert res.dtype == 'datetime64[ns, US/Eastern]' res = s.shift(-2) exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT] exp = Series(exp_vals) tm.assert_series_equal(res, exp) assert res.dtype == 'datetime64[ns, US/Eastern]' for ex in [10, -10, 20, -20]: res = s.shift(ex) exp = Series([NaT] * 10, dtype='datetime64[ns, US/Eastern]') tm.assert_series_equal(res, exp) assert res.dtype == 'datetime64[ns, US/Eastern]' def test_tshift(self): # PeriodIndex ps = tm.makePeriodSeries() shifted = ps.tshift(1) unshifted = shifted.tshift(-1) assert_series_equal(unshifted, ps) shifted2 = ps.tshift(freq='B') assert_series_equal(shifted, shifted2) shifted3 = ps.tshift(freq=BDay()) assert_series_equal(shifted, shifted3) msg = "Given freq M does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.tshift(freq='M') # DatetimeIndex shifted = self.ts.tshift(1) unshifted = shifted.tshift(-1) assert_series_equal(self.ts, unshifted) shifted2 = self.ts.tshift(freq=self.ts.index.freq) assert_series_equal(shifted, shifted2) inferred_ts = Series(self.ts.values, Index(np.asarray(self.ts.index)), name='ts') shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) assert_series_equal(shifted, self.ts.tshift(1)) assert_series_equal(unshifted, inferred_ts) no_freq = self.ts[[0, 5, 7]] msg = "Freq was not given and was not set in the index" with pytest.raises(ValueError, match=msg): no_freq.tshift() def test_truncate(self): offset = BDay() ts = self.ts[::3] start, end = self.ts.index[3], self.ts.index[6] start_missing, end_missing = self.ts.index[2], self.ts.index[7] # neither specified truncated = ts.truncate() assert_series_equal(truncated, ts) # both specified expected = ts[1:3] truncated = ts.truncate(start, end) assert_series_equal(truncated, expected) truncated = ts.truncate(start_missing, end_missing) assert_series_equal(truncated, expected) # start specified expected = ts[1:] truncated = ts.truncate(before=start) assert_series_equal(truncated, expected) truncated = ts.truncate(before=start_missing) assert_series_equal(truncated, expected) # end specified expected = ts[:3] truncated = ts.truncate(after=end) assert_series_equal(truncated, expected) truncated = ts.truncate(after=end_missing) assert_series_equal(truncated, expected) # corner case, empty series returned truncated = ts.truncate(after=self.ts.index[0] - offset) assert (len(truncated) == 0) truncated = ts.truncate(before=self.ts.index[-1] + offset) assert (len(truncated) == 0) msg = "Truncate: 1999-12-31 00:00:00 must be after 2000-02-14 00:00:00" with pytest.raises(ValueError, match=msg): ts.truncate(before=self.ts.index[-1] + offset, after=self.ts.index[0] - offset) def test_truncate_nonsortedindex(self): # GH 17935 s = pd.Series(['a', 'b', 'c', 'd', 'e'], index=[5, 3, 2, 9, 0]) msg = 'truncate requires a sorted index' with pytest.raises(ValueError, match=msg): s.truncate(before=3, after=9) rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') ts = pd.Series(np.random.randn(len(rng)), index=rng) msg = 'truncate requires a sorted index' with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=False).truncate(before='2011-11', after='2011-12') def test_asfreq(self): ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( 2009, 11, 30), datetime(2009, 12, 31)]) daily_ts = ts.asfreq('B') monthly_ts = daily_ts.asfreq('BM') tm.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq('B', method='pad') monthly_ts = daily_ts.asfreq('BM') tm.assert_series_equal(monthly_ts, ts) daily_ts = ts.asfreq(BDay()) monthly_ts = daily_ts.asfreq(BMonthEnd()) tm.assert_series_equal(monthly_ts, ts) result = ts[:0].asfreq('M') assert len(result) == 0 assert result is not ts daily_ts = ts.asfreq('D', fill_value=-1) result = daily_ts.value_counts().sort_index() expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() tm.assert_series_equal(result, expected) def test_asfreq_datetimeindex_empty_series(self): # GH 14320 expected = Series(index=pd.DatetimeIndex( ["2016-09-29 11:00"])).asfreq('H') result = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]), data=[3]).asfreq('H') tm.assert_index_equal(expected.index, result.index) def test_diff(self): # Just run the function self.ts.diff() # int dtype a = 10000000000000000 b = a + 1 s = Series([a, b]) rs = s.diff() assert rs[1] == 1 # neg n rs = self.ts.diff(-1) xp = self.ts - self.ts.shift(-1) assert_series_equal(rs, xp) # 0 rs = self.ts.diff(0) xp = self.ts - self.ts assert_series_equal(rs, xp) # datetime diff (GH3100) s = Series(date_range('20130102', periods=5)) rs = s - s.shift(1) xp = s.diff() assert_series_equal(rs, xp) # timedelta diff nrs = rs - rs.shift(1) nxp = xp.diff() assert_series_equal(nrs, nxp) # with tz s = Series( date_range('2000-01-01 09:00:00', periods=5, tz='US/Eastern'), name='foo') result = s.diff() assert_series_equal(result, Series( TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')) def test_pct_change(self): rs = self.ts.pct_change(fill_method=None) assert_series_equal(rs, self.ts / self.ts.shift(1) - 1) rs = self.ts.pct_change(2) filled = self.ts.fillna(method='pad') assert_series_equal(rs, filled / filled.shift(2) - 1) rs = self.ts.pct_change(fill_method='bfill', limit=1) filled = self.ts.fillna(method='bfill', limit=1) assert_series_equal(rs, filled / filled.shift(1) - 1) rs = self.ts.pct_change(freq='5D') filled = self.ts.fillna(method='pad') assert_series_equal(rs, (filled / filled.shift(freq='5D') - 1) .reindex_like(filled)) def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) chg = s.pct_change() expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2]) assert_series_equal(chg, expected) @pytest.mark.parametrize("freq, periods, fill_method, limit", [('5B', 5, None, None), ('3B', 3, None, None), ('3B', 3, 'bfill', None), ('7B', 7, 'pad', 1), ('7B', 7, 'bfill', 3), ('14B', 14, None, None)]) def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): # GH 7292 rs_freq = self.ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = self.ts.pct_change(periods, fill_method=fill_method, limit=limit) assert_series_equal(rs_freq, rs_periods) empty_ts = Series(index=self.ts.index) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) assert_series_equal(rs_freq, rs_periods) def test_autocorr(self): # Just run the function corr1 = self.ts.autocorr() # Now run it with the lag parameter corr2 = self.ts.autocorr(lag=1) # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: assert np.isnan(corr1) assert np.isnan(corr2) else: assert corr1 == corr2 # Choose a random lag between 1 and length of Series - 2 # and compare the result with the Series corr() function n = 1 + np.random.randint(max(1, len(self.ts) - 2)) corr1 = self.ts.corr(self.ts.shift(n)) corr2 = self.ts.autocorr(lag=n) # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: assert np.isnan(corr1) assert np.isnan(corr2) else: assert corr1 == corr2 def test_first_last_valid(self): ts = self.ts.copy() ts[:5] = np.NaN index = ts.first_valid_index() assert index == ts.index[5] ts[-5:] = np.NaN index = ts.last_valid_index() assert index == ts.index[-6] ts[:] = np.nan assert ts.last_valid_index() is None assert ts.first_valid_index() is None ser = Series([], index=[]) assert ser.last_valid_index() is None assert ser.first_valid_index() is None # GH12800 empty = Series() assert empty.last_valid_index() is None assert empty.first_valid_index() is None # GH20499: its preserves freq with holes ts.index = date_range("20110101", periods=len(ts), freq="B") ts.iloc[1] = 1 ts.iloc[-2] = 1 assert ts.first_valid_index() == ts.index[1] assert ts.last_valid_index() == ts.index[-2] assert ts.first_valid_index().freq == ts.index.freq assert ts.last_valid_index().freq == ts.index.freq def test_mpl_compat_hack(self): result = self.ts[:, np.newaxis] expected = self.ts.values[:, np.newaxis] assert_almost_equal(result, expected) def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) assert ser.index.is_all_dates assert isinstance(ser.index, DatetimeIndex) def test_contiguous_boolean_preserve_freq(self): rng = date_range('1/1/2000', '3/1/2000', freq='B') mask = np.zeros(len(rng), dtype=bool) mask[10:20] = True masked = rng[mask] expected = rng[10:20] assert expected.freq is not None assert_range_equal(masked, expected) mask[22] = True masked = rng[mask] assert masked.freq is None def test_to_datetime_unit(self): epoch = 1370745748 s = Series([epoch + t for t in range(20)]) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)]).astype(float) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)] + [iNaT]) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) # GH13834 s = Series([epoch + t for t in np.arange(0, 2, .25)] + [iNaT]).astype(float) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in np.arange(0, 2, .25)] + [NaT]) assert_series_equal(result, expected) s = concat([Series([epoch + t for t in range(20)] ).astype(float), Series([np.nan])], ignore_index=True) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') expected = DatetimeIndex([Timestamp('1970-01-02'), Timestamp('1970-01-03')] + ['NaT'] * 3) tm.assert_index_equal(result, expected) msg = "non convertible value foo with the unit 'D'" with pytest.raises(ValueError, match=msg): to_datetime([1, 2, 'foo'], unit='D') msg = "cannot convert input 111111111 with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime([1, 2, 111111111], unit='D') # coerce we can process expected = DatetimeIndex([Timestamp('1970-01-02'), Timestamp('1970-01-03')] + ['NaT'] * 1) result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') tm.assert_index_equal(result, expected) result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') tm.assert_index_equal(result, expected) def test_series_ctor_datetime64(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') dates = np.asarray(rng) series = Series(dates) assert np.issubdtype(series.dtype, np.dtype('M8[ns]')) def test_series_repr_nat(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') result = repr(series) expected = ('0 1970-01-01 00:00:00.000000\n' '1 1970-01-01 00:00:00.000001\n' '2 1970-01-01 00:00:00.000002\n' '3 NaT\n' 'dtype: datetime64[ns]') assert result == expected def test_asfreq_keep_index_name(self): # GH #9854 index_name = 'bar' index = pd.date_range('20130101', periods=20, name=index_name) df = pd.DataFrame([x for x in range(20)], columns=['foo'], index=index) assert index_name == df.index.name assert index_name == df.asfreq('10D').index.name def test_promote_datetime_date(self): rng = date_range('1/1/2000', periods=20) ts = Series(np.random.randn(20), index=rng) ts_slice = ts[5:] ts2 = ts_slice.copy() ts2.index = [x.date() for x in ts2.index] result = ts + ts2 result2 = ts2 + ts expected = ts + ts[5:] assert_series_equal(result, expected) assert_series_equal(result2, expected) # test asfreq result = ts2.asfreq('4H', method='ffill') expected = ts[5:].asfreq('4H', method='ffill') assert_series_equal(result, expected) result = rng.get_indexer(ts2.index) expected = rng.get_indexer(ts_slice.index) tm.assert_numpy_array_equal(result, expected) def test_asfreq_normalize(self): rng = date_range('1/1/2000 09:30', periods=20) norm = date_range('1/1/2000', periods=20) vals = np.random.randn(20) ts = Series(vals, index=rng) result = ts.asfreq('D', normalize=True) norm = date_range('1/1/2000', periods=20) expected = Series(vals, index=norm) assert_series_equal(result, expected) vals = np.random.randn(20, 3) ts = DataFrame(vals, index=rng) result = ts.asfreq('D', normalize=True) expected = DataFrame(vals, index=norm) assert_frame_equal(result, expected) def test_first_subset(self): ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') result = ts.first('10d') assert len(result) == 20 ts = _simple_ts('1/1/2000', '1/1/2010') result = ts.first('10d') assert len(result) == 10 result = ts.first('3M') expected = ts[:'3/31/2000'] assert_series_equal(result, expected) result = ts.first('21D') expected = ts[:21] assert_series_equal(result, expected) result = ts[:0].first('3M') assert_series_equal(result, ts[:0]) def test_first_raises(self): # GH20725 ser = pd.Series('a b c'.split()) msg = "'first' only supports a DatetimeIndex index" with pytest.raises(TypeError, match=msg): ser.first('1D') def test_last_subset(self): ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') result = ts.last('10d') assert len(result) == 20 ts = _simple_ts('1/1/2000', '1/1/2010') result = ts.last('10d') assert len(result) == 10 result = ts.last('21D') expected = ts['12/12/2009':] assert_series_equal(result, expected) result = ts.last('21D') expected = ts[-21:] assert_series_equal(result, expected) result = ts[:0].last('3M') assert_series_equal(result, ts[:0]) def test_last_raises(self): # GH20725 ser = pd.Series('a b c'.split()) msg = "'last' only supports a DatetimeIndex index" with pytest.raises(TypeError, match=msg): ser.last('1D') def test_format_pre_1900_dates(self): rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') rng.format() ts = Series(1, index=rng) repr(ts) def test_at_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() assert (rs.index.minute == rng[1].minute).all() assert (rs.index.second == rng[1].second).all() result = ts.at_time('9:30') expected = ts.at_time(time(9, 30)) assert_series_equal(result, expected) df = DataFrame(np.random.randn(len(rng), 3), index=rng) result = ts[time(9, 30)] result_df = df.loc[time(9, 30)] expected = ts[(rng.hour == 9) & (rng.minute == 30)] exp_df = df[(rng.hour == 9) & (rng.minute == 30)] # expected.index = date_range('1/1/2000', '1/4/2000') assert_series_equal(result, expected) tm.assert_frame_equal(result_df, exp_df) chunk = df.loc['1/4/2000':] result = chunk.loc[time(9, 30)] expected = result_df[-1:] tm.assert_frame_equal(result, expected) # midnight, everything rng = date_range('1/1/2000', '1/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.at_time(time(0, 0)) assert_series_equal(result, ts) # time doesn't exist rng = date_range('1/1/2012', freq='23Min', periods=384) ts = Series(np.random.randn(len(rng)), rng) rs = ts.at_time('16:00') assert len(rs) == 0 def test_at_time_raises(self): # GH20725 ser = pd.Series('a b c'.split()) msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): ser.at_time('00:00') def test_between(self): series = Series(date_range('1/1/2000', periods=10)) left, right = series[[2, 7]] result = series.between(left, right) expected = (series >= left) & (series <= right) assert_series_equal(result, expected) def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert t >= stime else: assert t > stime if inc_end: assert t <= etime else: assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_series_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(22, 0) etime = time(9, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inc_end: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime) def test_between_time_raises(self): # GH20725 ser = pd.Series('a b c'.split()) msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): ser.between_time(start_time='00:00', end_time='12:00') def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') msg = (r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\]" " to a time") with pytest.raises(ValueError, match=msg): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) with pytest.raises(ValueError, match=msg): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError, match=msg): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) @td.skip_if_has_locale def test_between_time_formats(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) strings = [("2:00", "2:30"), ("0200", "0230"), ("2:00am", "2:30am"), ("0200am", "0230am"), ("2:00:00", "2:30:00"), ("020000", "023000"), ("2:00:00am", "2:30:00am"), ("020000am", "023000am")] expected_length = 28 for time_string in strings: assert len(ts.between_time(*time_string)) == expected_length def test_between_time_axis(self): # issue 8839 rng = date_range('1/1/2000', periods=100, freq='10min') ts = Series(np.random.randn(len(rng)), index=rng) stime, etime = ('08:00:00', '09:00:00') expected_length = 7 assert len(ts.between_time(stime, etime)) == expected_length assert len(ts.between_time(stime, etime, axis=0)) == expected_length msg = r"No axis named 1 for object type <(class|type) 'type'>" with pytest.raises(ValueError, match=msg): ts.between_time(stime, etime, axis=1) def test_to_period(self): from pandas.core.indexes.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') pts = ts.to_period() exp = ts.copy() exp.index = period_range('1/1/2000', '1/1/2001') assert_series_equal(pts, exp) pts = ts.to_period('M') exp.index = exp.index.asfreq('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], freq='D') s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected) def test_groupby_count_dateparseerror(self): dr = date_range(start='1/1/2012', freq='5min', periods=10) # BAD Example, datetimes first s = Series(np.arange(10), index=[dr, lrange(10)]) grouped = s.groupby(lambda x: x[1] % 2 == 0) result = grouped.count() s = Series(np.arange(10), index=[lrange(10), dr]) grouped = s.groupby(lambda x: x[0] % 2 == 0) expected = grouped.count() assert_series_equal(result, expected) def test_to_csv_numpy_16_bug(self): frame = DataFrame({'a': date_range('1/1/2000', periods=10)}) buf = StringIO() frame.to_csv(buf) result = buf.getvalue() assert '2000-01-01' in result def test_series_map_box_timedelta(self): # GH 11349 s = Series(timedelta_range('1 day 1 s', periods=5, freq='h')) def f(x): return x.total_seconds() s.map(f) s.apply(f) DataFrame(s).applymap(f) def test_asfreq_resample_set_correct_freq(self): # GH5613 # we test if .asfreq() and .resample() set the correct value for .freq df = pd.DataFrame({'date': ["2012-01-01", "2012-01-02", "2012-01-03"], 'col': [1, 2, 3]}) df = df.set_index(pd.to_datetime(df.date)) # testing the settings before calling .asfreq() and .resample() assert df.index.freq is None assert df.index.inferred_freq == 'D' # does .asfreq() set .freq correctly? assert df.asfreq('D').index.freq == 'D' # does .resample() set .freq correctly? assert df.resample('D').asfreq().index.freq == 'D' def test_pickle(self): # GH4606 p = tm.round_trip_pickle(NaT) assert p is NaT idx = pd.to_datetime(['2013-01-01', NaT, '2014-01-06']) idx_p = tm.round_trip_pickle(idx) assert idx_p[0] == idx[0] assert idx_p[1] is NaT assert idx_p[2] == idx[2] # GH11002 # don't infer freq idx = date_range('1750-1-1', '2050-1-1', freq='7D') idx_p = tm.round_trip_pickle(idx) tm.assert_index_equal(idx, idx_p) @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo', 'US/Eastern']) def test_setops_preserve_freq(self, tz): rng = date_range('1/1/2000', '1/1/2002', name='idx', tz=tz) result = rng[:50].union(rng[50:100]) assert result.name == rng.name assert result.freq == rng.freq assert result.tz == rng.tz result = rng[:50].union(rng[30:100]) assert result.name == rng.name assert result.freq == rng.freq assert result.tz == rng.tz result = rng[:50].union(rng[60:100]) assert result.name == rng.name assert result.freq is None assert result.tz == rng.tz result = rng[:50].intersection(rng[25:75]) assert result.name == rng.name assert result.freqstr == 'D' assert result.tz == rng.tz nofreq = DatetimeIndex(list(rng[25:75]), name='other') result = rng[:50].union(nofreq) assert result.name is None assert result.freq == rng.freq assert result.tz == rng.tz result = rng[:50].intersection(nofreq) assert result.name is None assert result.freq == rng.freq assert result.tz == rng.tz def test_from_M8_structured(self): dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] arr = np.array(dates, dtype=[('Date', 'M8[us]'), ('Forecasting', 'M8[us]')]) df = DataFrame(arr) assert df['Date'][0] == dates[0][0] assert df['Forecasting'][0] == dates[0][1] s = Series(arr['Date']) assert isinstance(s[0], Timestamp) assert s[0] == dates[0][0] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): s = Series.from_array(arr['Date'], Index([0])) assert s[0] == dates[0][0] def test_get_level_values_box(self): from pandas import MultiIndex dates = date_range('1/1/2000', periods=4) levels = [dates, [0, 1]] codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] index = MultiIndex(levels=levels, codes=codes) assert isinstance(index.get_level_values(0)[0], Timestamp) def test_view_tz(self): # GH#24024 ser = pd.Series(pd.date_range('2000', periods=4, tz='US/Central')) result = ser.view("i8") expected = pd.Series([946706400000000000, 946792800000000000, 946879200000000000, 946965600000000000]) tm.assert_series_equal(result, expected) def test_asarray_tz_naive(self): # This shouldn't produce a warning. ser = pd.Series(pd.date_range('2000', periods=2)) expected = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]') with tm.assert_produces_warning(None): result = np.asarray(ser) tm.assert_numpy_array_equal(result, expected) # optionally, object with tm.assert_produces_warning(None): result = np.asarray(ser, dtype=object) expected = np.array([pd.Timestamp('2000-01-01'), pd.Timestamp('2000-01-02')]) tm.assert_numpy_array_equal(result, expected) def test_asarray_tz_aware(self): tz = 'US/Central' ser = pd.Series(pd.date_range('2000', periods=2, tz=tz)) expected = np.array(['2000-01-01T06', '2000-01-02T06'], dtype='M8[ns]') # We warn by default and return an ndarray[M8[ns]] with tm.assert_produces_warning(FutureWarning): result = np.asarray(ser) tm.assert_numpy_array_equal(result, expected) # Old behavior with no warning with tm.assert_produces_warning(None): result = np.asarray(ser, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected) # Future behavior with no warning expected = np.array([pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)]) with tm.assert_produces_warning(None): result = np.asarray(ser, dtype=object) tm.assert_numpy_array_equal(result, expected)