123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899 |
- # -*- coding: utf-8 -*-
- from __future__ import print_function
- from datetime import datetime, time
- import numpy as np
- import pytest
- from pandas.compat import product
- import pandas as pd
- from pandas import (
- DataFrame, DatetimeIndex, Index, MultiIndex, Series, Timestamp, date_range,
- period_range, to_datetime)
- from pandas.tests.frame.common import TestData
- import pandas.util.testing as tm
- from pandas.util.testing import (
- assert_frame_equal, assert_index_equal, assert_series_equal)
- import pandas.tseries.offsets as offsets
- @pytest.fixture(params=product([True, False], [True, False]))
- def close_open_fixture(request):
- return request.param
- class TestDataFrameTimeSeriesMethods(TestData):
- def test_diff(self):
- the_diff = self.tsframe.diff(1)
- assert_series_equal(the_diff['A'],
- self.tsframe['A'] - self.tsframe['A'].shift(1))
- # int dtype
- a = 10000000000000000
- b = a + 1
- s = Series([a, b])
- rs = DataFrame({'s': s}).diff()
- assert rs.s[1] == 1
- # mixed numeric
- tf = self.tsframe.astype('float32')
- the_diff = tf.diff(1)
- assert_series_equal(the_diff['A'],
- tf['A'] - tf['A'].shift(1))
- # issue 10907
- df = pd.DataFrame({'y': pd.Series([2]), 'z': pd.Series([3])})
- df.insert(0, 'x', 1)
- result = df.diff(axis=1)
- expected = pd.DataFrame({'x': np.nan, 'y': pd.Series(
- 1), 'z': pd.Series(1)}).astype('float64')
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize('tz', [None, 'UTC'])
- def test_diff_datetime_axis0(self, tz):
- # GH 18578
- df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
- 1: date_range('2010', freq='D', periods=2, tz=tz)})
- result = df.diff(axis=0)
- expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
- 1: pd.TimedeltaIndex(['NaT', '1 days'])})
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize('tz', [None, 'UTC'])
- def test_diff_datetime_axis1(self, tz):
- # GH 18578
- df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
- 1: date_range('2010', freq='D', periods=2, tz=tz)})
- if tz is None:
- result = df.diff(axis=1)
- expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
- 1: pd.TimedeltaIndex(['0 days',
- '0 days'])})
- assert_frame_equal(result, expected)
- else:
- with pytest.raises(NotImplementedError):
- result = df.diff(axis=1)
- def test_diff_timedelta(self):
- # GH 4533
- df = DataFrame(dict(time=[Timestamp('20130101 9:01'),
- Timestamp('20130101 9:02')],
- value=[1.0, 2.0]))
- res = df.diff()
- exp = DataFrame([[pd.NaT, np.nan],
- [pd.Timedelta('00:01:00'), 1]],
- columns=['time', 'value'])
- assert_frame_equal(res, exp)
- def test_diff_mixed_dtype(self):
- df = DataFrame(np.random.randn(5, 3))
- df['A'] = np.array([1, 2, 3, 4, 5], dtype=object)
- result = df.diff()
- assert result[0].dtype == np.float64
- def test_diff_neg_n(self):
- rs = self.tsframe.diff(-1)
- xp = self.tsframe - self.tsframe.shift(-1)
- assert_frame_equal(rs, xp)
- def test_diff_float_n(self):
- rs = self.tsframe.diff(1.)
- xp = self.tsframe.diff(1)
- assert_frame_equal(rs, xp)
- def test_diff_axis(self):
- # GH 9727
- df = DataFrame([[1., 2.], [3., 4.]])
- assert_frame_equal(df.diff(axis=1), DataFrame(
- [[np.nan, 1.], [np.nan, 1.]]))
- assert_frame_equal(df.diff(axis=0), DataFrame(
- [[np.nan, np.nan], [2., 2.]]))
- def test_pct_change(self):
- rs = self.tsframe.pct_change(fill_method=None)
- assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1)
- rs = self.tsframe.pct_change(2)
- filled = self.tsframe.fillna(method='pad')
- assert_frame_equal(rs, filled / filled.shift(2) - 1)
- rs = self.tsframe.pct_change(fill_method='bfill', limit=1)
- filled = self.tsframe.fillna(method='bfill', limit=1)
- assert_frame_equal(rs, filled / filled.shift(1) - 1)
- rs = self.tsframe.pct_change(freq='5D')
- filled = self.tsframe.fillna(method='pad')
- assert_frame_equal(rs,
- (filled / filled.shift(freq='5D') - 1)
- .reindex_like(filled))
- def test_pct_change_shift_over_nas(self):
- s = Series([1., 1.5, np.nan, 2.5, 3.])
- df = DataFrame({'a': s, 'b': s})
- chg = df.pct_change()
- expected = Series([np.nan, 0.5, 0., 2.5 / 1.5 - 1, .2])
- edf = DataFrame({'a': expected, 'b': expected})
- assert_frame_equal(chg, edf)
- @pytest.mark.parametrize("freq, periods, fill_method, limit",
- [('5B', 5, None, None),
- ('3B', 3, None, None),
- ('3B', 3, 'bfill', None),
- ('7B', 7, 'pad', 1),
- ('7B', 7, 'bfill', 3),
- ('14B', 14, None, None)])
- def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
- # GH 7292
- rs_freq = self.tsframe.pct_change(freq=freq,
- fill_method=fill_method,
- limit=limit)
- rs_periods = self.tsframe.pct_change(periods,
- fill_method=fill_method,
- limit=limit)
- assert_frame_equal(rs_freq, rs_periods)
- empty_ts = DataFrame(index=self.tsframe.index,
- columns=self.tsframe.columns)
- rs_freq = empty_ts.pct_change(freq=freq,
- fill_method=fill_method,
- limit=limit)
- rs_periods = empty_ts.pct_change(periods,
- fill_method=fill_method,
- limit=limit)
- assert_frame_equal(rs_freq, rs_periods)
- def test_frame_ctor_datetime64_column(self):
- rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
- dates = np.asarray(rng)
- df = DataFrame({'A': np.random.randn(len(rng)), 'B': dates})
- assert np.issubdtype(df['B'].dtype, np.dtype('M8[ns]'))
- def test_frame_append_datetime64_column(self):
- rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
- df = DataFrame(index=np.arange(len(rng)))
- df['A'] = rng
- assert np.issubdtype(df['A'].dtype, np.dtype('M8[ns]'))
- def test_frame_datetime64_pre1900_repr(self):
- df = DataFrame({'year': date_range('1/1/1700', periods=50,
- freq='A-DEC')})
- # it works!
- repr(df)
- def test_frame_append_datetime64_col_other_units(self):
- n = 100
- units = ['h', 'm', 's', 'ms', 'D', 'M', 'Y']
- ns_dtype = np.dtype('M8[ns]')
- for unit in units:
- dtype = np.dtype('M8[%s]' % unit)
- vals = np.arange(n, dtype=np.int64).view(dtype)
- df = DataFrame({'ints': np.arange(n)}, index=np.arange(n))
- df[unit] = vals
- ex_vals = to_datetime(vals.astype('O')).values
- assert df[unit].dtype == ns_dtype
- assert (df[unit].values == ex_vals).all()
- # Test insertion into existing datetime64 column
- df = DataFrame({'ints': np.arange(n)}, index=np.arange(n))
- df['dates'] = np.arange(n, dtype=np.int64).view(ns_dtype)
- for unit in units:
- dtype = np.dtype('M8[%s]' % unit)
- vals = np.arange(n, dtype=np.int64).view(dtype)
- tmp = df.copy()
- tmp['dates'] = vals
- ex_vals = to_datetime(vals.astype('O')).values
- assert (tmp['dates'].values == ex_vals).all()
- def test_shift(self):
- # naive shift
- shiftedFrame = self.tsframe.shift(5)
- tm.assert_index_equal(shiftedFrame.index, self.tsframe.index)
- shiftedSeries = self.tsframe['A'].shift(5)
- assert_series_equal(shiftedFrame['A'], shiftedSeries)
- shiftedFrame = self.tsframe.shift(-5)
- tm.assert_index_equal(shiftedFrame.index, self.tsframe.index)
- shiftedSeries = self.tsframe['A'].shift(-5)
- assert_series_equal(shiftedFrame['A'], shiftedSeries)
- # shift by 0
- unshifted = self.tsframe.shift(0)
- assert_frame_equal(unshifted, self.tsframe)
- # shift by DateOffset
- shiftedFrame = self.tsframe.shift(5, freq=offsets.BDay())
- assert len(shiftedFrame) == len(self.tsframe)
- shiftedFrame2 = self.tsframe.shift(5, freq='B')
- assert_frame_equal(shiftedFrame, shiftedFrame2)
- d = self.tsframe.index[0]
- shifted_d = d + offsets.BDay(5)
- assert_series_equal(self.tsframe.xs(d),
- shiftedFrame.xs(shifted_d), check_names=False)
- # shift int frame
- int_shifted = self.intframe.shift(1) # noqa
- # Shifting with PeriodIndex
- ps = tm.makePeriodFrame()
- shifted = ps.shift(1)
- unshifted = shifted.shift(-1)
- tm.assert_index_equal(shifted.index, ps.index)
- tm.assert_index_equal(unshifted.index, ps.index)
- tm.assert_numpy_array_equal(unshifted.iloc[:, 0].dropna().values,
- ps.iloc[:-1, 0].values)
- shifted2 = ps.shift(1, 'B')
- shifted3 = ps.shift(1, offsets.BDay())
- assert_frame_equal(shifted2, shifted3)
- assert_frame_equal(ps, shifted2.shift(-1, 'B'))
- msg = 'does not match PeriodIndex freq'
- with pytest.raises(ValueError, match=msg):
- ps.shift(freq='D')
- # shift other axis
- # GH 6371
- df = DataFrame(np.random.rand(10, 5))
- expected = pd.concat([DataFrame(np.nan, index=df.index,
- columns=[0]),
- df.iloc[:, 0:-1]],
- ignore_index=True, axis=1)
- result = df.shift(1, axis=1)
- assert_frame_equal(result, expected)
- # shift named axis
- df = DataFrame(np.random.rand(10, 5))
- expected = pd.concat([DataFrame(np.nan, index=df.index,
- columns=[0]),
- df.iloc[:, 0:-1]],
- ignore_index=True, axis=1)
- result = df.shift(1, axis='columns')
- assert_frame_equal(result, expected)
- def test_shift_bool(self):
- df = DataFrame({'high': [True, False],
- 'low': [False, False]})
- rs = df.shift(1)
- xp = DataFrame(np.array([[np.nan, np.nan],
- [True, False]], dtype=object),
- columns=['high', 'low'])
- assert_frame_equal(rs, xp)
- def test_shift_categorical(self):
- # GH 9416
- s1 = pd.Series(['a', 'b', 'c'], dtype='category')
- s2 = pd.Series(['A', 'B', 'C'], dtype='category')
- df = DataFrame({'one': s1, 'two': s2})
- rs = df.shift(1)
- xp = DataFrame({'one': s1.shift(1), 'two': s2.shift(1)})
- assert_frame_equal(rs, xp)
- def test_shift_fill_value(self):
- # GH #24128
- df = DataFrame([1, 2, 3, 4, 5],
- index=date_range('1/1/2000', periods=5, freq='H'))
- exp = DataFrame([0, 1, 2, 3, 4],
- index=date_range('1/1/2000', periods=5, freq='H'))
- result = df.shift(1, fill_value=0)
- assert_frame_equal(result, exp)
- exp = DataFrame([0, 0, 1, 2, 3],
- index=date_range('1/1/2000', periods=5, freq='H'))
- result = df.shift(2, fill_value=0)
- assert_frame_equal(result, exp)
- def test_shift_empty(self):
- # Regression test for #8019
- df = DataFrame({'foo': []})
- rs = df.shift(-1)
- assert_frame_equal(df, rs)
- def test_shift_duplicate_columns(self):
- # GH 9092; verify that position-based shifting works
- # in the presence of duplicate columns
- column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]]
- data = np.random.randn(20, 5)
- shifted = []
- for columns in column_lists:
- df = pd.DataFrame(data.copy(), columns=columns)
- for s in range(5):
- df.iloc[:, s] = df.iloc[:, s].shift(s + 1)
- df.columns = range(5)
- shifted.append(df)
- # sanity check the base case
- nulls = shifted[0].isna().sum()
- assert_series_equal(nulls, Series(range(1, 6), dtype='int64'))
- # check all answers are the same
- assert_frame_equal(shifted[0], shifted[1])
- assert_frame_equal(shifted[0], shifted[2])
- def test_tshift(self):
- # PeriodIndex
- ps = tm.makePeriodFrame()
- shifted = ps.tshift(1)
- unshifted = shifted.tshift(-1)
- assert_frame_equal(unshifted, ps)
- shifted2 = ps.tshift(freq='B')
- assert_frame_equal(shifted, shifted2)
- shifted3 = ps.tshift(freq=offsets.BDay())
- assert_frame_equal(shifted, shifted3)
- with pytest.raises(ValueError, match='does not match'):
- ps.tshift(freq='M')
- # DatetimeIndex
- shifted = self.tsframe.tshift(1)
- unshifted = shifted.tshift(-1)
- assert_frame_equal(self.tsframe, unshifted)
- shifted2 = self.tsframe.tshift(freq=self.tsframe.index.freq)
- assert_frame_equal(shifted, shifted2)
- inferred_ts = DataFrame(self.tsframe.values,
- Index(np.asarray(self.tsframe.index)),
- columns=self.tsframe.columns)
- shifted = inferred_ts.tshift(1)
- unshifted = shifted.tshift(-1)
- assert_frame_equal(shifted, self.tsframe.tshift(1))
- assert_frame_equal(unshifted, inferred_ts)
- no_freq = self.tsframe.iloc[[0, 5, 7], :]
- pytest.raises(ValueError, no_freq.tshift)
- def test_truncate(self):
- ts = self.tsframe[::3]
- start, end = self.tsframe.index[3], self.tsframe.index[6]
- start_missing = self.tsframe.index[2]
- end_missing = self.tsframe.index[7]
- # neither specified
- truncated = ts.truncate()
- assert_frame_equal(truncated, ts)
- # both specified
- expected = ts[1:3]
- truncated = ts.truncate(start, end)
- assert_frame_equal(truncated, expected)
- truncated = ts.truncate(start_missing, end_missing)
- assert_frame_equal(truncated, expected)
- # start specified
- expected = ts[1:]
- truncated = ts.truncate(before=start)
- assert_frame_equal(truncated, expected)
- truncated = ts.truncate(before=start_missing)
- assert_frame_equal(truncated, expected)
- # end specified
- expected = ts[:3]
- truncated = ts.truncate(after=end)
- assert_frame_equal(truncated, expected)
- truncated = ts.truncate(after=end_missing)
- assert_frame_equal(truncated, expected)
- pytest.raises(ValueError, ts.truncate,
- before=ts.index[-1] - ts.index.freq,
- after=ts.index[0] + ts.index.freq)
- def test_truncate_copy(self):
- index = self.tsframe.index
- truncated = self.tsframe.truncate(index[5], index[10])
- truncated.values[:] = 5.
- assert not (self.tsframe.values[5:11] == 5).any()
- def test_truncate_nonsortedindex(self):
- # GH 17935
- df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']},
- index=[5, 3, 2, 9, 0])
- msg = 'truncate requires a sorted index'
- with pytest.raises(ValueError, match=msg):
- df.truncate(before=3, after=9)
- rng = pd.date_range('2011-01-01', '2012-01-01', freq='W')
- ts = pd.DataFrame({'A': np.random.randn(len(rng)),
- 'B': np.random.randn(len(rng))},
- index=rng)
- msg = 'truncate requires a sorted index'
- with pytest.raises(ValueError, match=msg):
- ts.sort_values('A', ascending=False).truncate(before='2011-11',
- after='2011-12')
- df = pd.DataFrame({3: np.random.randn(5),
- 20: np.random.randn(5),
- 2: np.random.randn(5),
- 0: np.random.randn(5)},
- columns=[3, 20, 2, 0])
- msg = 'truncate requires a sorted index'
- with pytest.raises(ValueError, match=msg):
- df.truncate(before=2, after=20, axis=1)
- def test_asfreq(self):
- offset_monthly = self.tsframe.asfreq(offsets.BMonthEnd())
- rule_monthly = self.tsframe.asfreq('BM')
- tm.assert_almost_equal(offset_monthly['A'], rule_monthly['A'])
- filled = rule_monthly.asfreq('B', method='pad') # noqa
- # TODO: actually check that this worked.
- # don't forget!
- filled_dep = rule_monthly.asfreq('B', method='pad') # noqa
- # test does not blow up on length-0 DataFrame
- zero_length = self.tsframe.reindex([])
- result = zero_length.asfreq('BM')
- assert result is not zero_length
- def test_asfreq_datetimeindex(self):
- df = DataFrame({'A': [1, 2, 3]},
- index=[datetime(2011, 11, 1), datetime(2011, 11, 2),
- datetime(2011, 11, 3)])
- df = df.asfreq('B')
- assert isinstance(df.index, DatetimeIndex)
- ts = df['A'].asfreq('B')
- assert isinstance(ts.index, DatetimeIndex)
- def test_asfreq_fillvalue(self):
- # test for fill value during upsampling, related to issue 3715
- # setup
- rng = pd.date_range('1/1/2016', periods=10, freq='2S')
- ts = pd.Series(np.arange(len(rng)), index=rng)
- df = pd.DataFrame({'one': ts})
- # insert pre-existing missing value
- df.loc['2016-01-01 00:00:08', 'one'] = None
- actual_df = df.asfreq(freq='1S', fill_value=9.0)
- expected_df = df.asfreq(freq='1S').fillna(9.0)
- expected_df.loc['2016-01-01 00:00:08', 'one'] = None
- assert_frame_equal(expected_df, actual_df)
- expected_series = ts.asfreq(freq='1S').fillna(9.0)
- actual_series = ts.asfreq(freq='1S', fill_value=9.0)
- assert_series_equal(expected_series, actual_series)
- @pytest.mark.parametrize("data,idx,expected_first,expected_last", [
- ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
- ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
- ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
- ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
- ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
- ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
- def test_first_last_valid(self, data, idx,
- expected_first, expected_last):
- N = len(self.frame.index)
- mat = np.random.randn(N)
- mat[:5] = np.nan
- mat[-5:] = np.nan
- frame = DataFrame({'foo': mat}, index=self.frame.index)
- index = frame.first_valid_index()
- assert index == frame.index[5]
- index = frame.last_valid_index()
- assert index == frame.index[-6]
- # GH12800
- empty = DataFrame()
- assert empty.last_valid_index() is None
- assert empty.first_valid_index() is None
- # GH17400: no valid entries
- frame[:] = np.nan
- assert frame.last_valid_index() is None
- assert frame.first_valid_index() is None
- # GH20499: its preserves freq with holes
- frame.index = date_range("20110101", periods=N, freq="B")
- frame.iloc[1] = 1
- frame.iloc[-2] = 1
- assert frame.first_valid_index() == frame.index[1]
- assert frame.last_valid_index() == frame.index[-2]
- assert frame.first_valid_index().freq == frame.index.freq
- assert frame.last_valid_index().freq == frame.index.freq
- # GH 21441
- df = DataFrame(data, index=idx)
- assert expected_first == df.first_valid_index()
- assert expected_last == df.last_valid_index()
- def test_first_subset(self):
- ts = tm.makeTimeDataFrame(freq='12h')
- result = ts.first('10d')
- assert len(result) == 20
- ts = tm.makeTimeDataFrame(freq='D')
- result = ts.first('10d')
- assert len(result) == 10
- result = ts.first('3M')
- expected = ts[:'3/31/2000']
- assert_frame_equal(result, expected)
- result = ts.first('21D')
- expected = ts[:21]
- assert_frame_equal(result, expected)
- result = ts[:0].first('3M')
- assert_frame_equal(result, ts[:0])
- def test_first_raises(self):
- # GH20725
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(TypeError): # index is not a DatetimeIndex
- df.first('1D')
- def test_last_subset(self):
- ts = tm.makeTimeDataFrame(freq='12h')
- result = ts.last('10d')
- assert len(result) == 20
- ts = tm.makeTimeDataFrame(nper=30, freq='D')
- result = ts.last('10d')
- assert len(result) == 10
- result = ts.last('21D')
- expected = ts['2000-01-10':]
- assert_frame_equal(result, expected)
- result = ts.last('21D')
- expected = ts[-21:]
- assert_frame_equal(result, expected)
- result = ts[:0].last('3M')
- assert_frame_equal(result, ts[:0])
- def test_last_raises(self):
- # GH20725
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(TypeError): # index is not a DatetimeIndex
- df.last('1D')
- def test_at_time(self):
- rng = date_range('1/1/2000', '1/5/2000', freq='5min')
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
- rs = ts.at_time(rng[1])
- assert (rs.index.hour == rng[1].hour).all()
- assert (rs.index.minute == rng[1].minute).all()
- assert (rs.index.second == rng[1].second).all()
- result = ts.at_time('9:30')
- expected = ts.at_time(time(9, 30))
- assert_frame_equal(result, expected)
- result = ts.loc[time(9, 30)]
- expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)]
- assert_frame_equal(result, expected)
- # midnight, everything
- rng = date_range('1/1/2000', '1/31/2000')
- ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
- result = ts.at_time(time(0, 0))
- assert_frame_equal(result, ts)
- # time doesn't exist
- rng = date_range('1/1/2012', freq='23Min', periods=384)
- ts = DataFrame(np.random.randn(len(rng), 2), rng)
- rs = ts.at_time('16:00')
- assert len(rs) == 0
- def test_at_time_raises(self):
- # GH20725
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(TypeError): # index is not a DatetimeIndex
- df.at_time('00:00')
- @pytest.mark.parametrize('axis', ['index', 'columns', 0, 1])
- def test_at_time_axis(self, axis):
- # issue 8839
- rng = date_range('1/1/2000', '1/5/2000', freq='5min')
- ts = DataFrame(np.random.randn(len(rng), len(rng)))
- ts.index, ts.columns = rng, rng
- indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
- if axis in ['index', 0]:
- expected = ts.loc[indices, :]
- elif axis in ['columns', 1]:
- expected = ts.loc[:, indices]
- result = ts.at_time('9:30', axis=axis)
- assert_frame_equal(result, expected)
- def test_between_time(self, close_open_fixture):
- rng = date_range('1/1/2000', '1/5/2000', freq='5min')
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
- stime = time(0, 0)
- etime = time(1, 0)
- inc_start, inc_end = close_open_fixture
- filtered = ts.between_time(stime, etime, inc_start, inc_end)
- exp_len = 13 * 4 + 1
- if not inc_start:
- exp_len -= 5
- if not inc_end:
- exp_len -= 4
- assert len(filtered) == exp_len
- for rs in filtered.index:
- t = rs.time()
- if inc_start:
- assert t >= stime
- else:
- assert t > stime
- if inc_end:
- assert t <= etime
- else:
- assert t < etime
- result = ts.between_time('00:00', '01:00')
- expected = ts.between_time(stime, etime)
- assert_frame_equal(result, expected)
- # across midnight
- rng = date_range('1/1/2000', '1/5/2000', freq='5min')
- ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
- stime = time(22, 0)
- etime = time(9, 0)
- filtered = ts.between_time(stime, etime, inc_start, inc_end)
- exp_len = (12 * 11 + 1) * 4 + 1
- if not inc_start:
- exp_len -= 4
- if not inc_end:
- exp_len -= 4
- assert len(filtered) == exp_len
- for rs in filtered.index:
- t = rs.time()
- if inc_start:
- assert (t >= stime) or (t <= etime)
- else:
- assert (t > stime) or (t <= etime)
- if inc_end:
- assert (t <= etime) or (t >= stime)
- else:
- assert (t < etime) or (t >= stime)
- def test_between_time_raises(self):
- # GH20725
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(TypeError): # index is not a DatetimeIndex
- df.between_time(start_time='00:00', end_time='12:00')
- def test_between_time_axis(self, axis):
- # issue 8839
- rng = date_range('1/1/2000', periods=100, freq='10min')
- ts = DataFrame(np.random.randn(len(rng), len(rng)))
- stime, etime = ('08:00:00', '09:00:00')
- exp_len = 7
- if axis in ['index', 0]:
- ts.index = rng
- assert len(ts.between_time(stime, etime)) == exp_len
- assert len(ts.between_time(stime, etime, axis=0)) == exp_len
- if axis in ['columns', 1]:
- ts.columns = rng
- selected = ts.between_time(stime, etime, axis=1).columns
- assert len(selected) == exp_len
- def test_between_time_axis_raises(self, axis):
- # issue 8839
- rng = date_range('1/1/2000', periods=100, freq='10min')
- mask = np.arange(0, len(rng))
- rand_data = np.random.randn(len(rng), len(rng))
- ts = DataFrame(rand_data, index=rng, columns=rng)
- stime, etime = ('08:00:00', '09:00:00')
- if axis in ['columns', 1]:
- ts.index = mask
- pytest.raises(TypeError, ts.between_time, stime, etime)
- pytest.raises(TypeError, ts.between_time, stime, etime, axis=0)
- if axis in ['index', 0]:
- ts.columns = mask
- pytest.raises(TypeError, ts.between_time, stime, etime, axis=1)
- def test_operation_on_NaT(self):
- # Both NaT and Timestamp are in DataFrame.
- df = pd.DataFrame({'foo': [pd.NaT, pd.NaT,
- pd.Timestamp('2012-05-01')]})
- res = df.min()
- exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"])
- tm.assert_series_equal(res, exp)
- res = df.max()
- exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"])
- tm.assert_series_equal(res, exp)
- # GH12941, only NaTs are in DataFrame.
- df = pd.DataFrame({'foo': [pd.NaT, pd.NaT]})
- res = df.min()
- exp = pd.Series([pd.NaT], index=["foo"])
- tm.assert_series_equal(res, exp)
- res = df.max()
- exp = pd.Series([pd.NaT], index=["foo"])
- tm.assert_series_equal(res, exp)
- def test_datetime_assignment_with_NaT_and_diff_time_units(self):
- # GH 7492
- data_ns = np.array([1, 'nat'], dtype='datetime64[ns]')
- result = pd.Series(data_ns).to_frame()
- result['new'] = data_ns
- expected = pd.DataFrame({0: [1, None],
- 'new': [1, None]}, dtype='datetime64[ns]')
- tm.assert_frame_equal(result, expected)
- # OutOfBoundsDatetime error shouldn't occur
- data_s = np.array([1, 'nat'], dtype='datetime64[s]')
- result['new'] = data_s
- expected = pd.DataFrame({0: [1, None],
- 'new': [1e9, None]}, dtype='datetime64[ns]')
- tm.assert_frame_equal(result, expected)
- def test_frame_to_period(self):
- K = 5
- dr = date_range('1/1/2000', '1/1/2001')
- pr = period_range('1/1/2000', '1/1/2001')
- df = DataFrame(np.random.randn(len(dr), K), index=dr)
- df['mix'] = 'a'
- pts = df.to_period()
- exp = df.copy()
- exp.index = pr
- assert_frame_equal(pts, exp)
- pts = df.to_period('M')
- tm.assert_index_equal(pts.index, exp.index.asfreq('M'))
- df = df.T
- pts = df.to_period(axis=1)
- exp = df.copy()
- exp.columns = pr
- assert_frame_equal(pts, exp)
- pts = df.to_period('M', axis=1)
- tm.assert_index_equal(pts.columns, exp.columns.asfreq('M'))
- pytest.raises(ValueError, df.to_period, axis=2)
- @pytest.mark.parametrize("fn", ['tz_localize', 'tz_convert'])
- def test_tz_convert_and_localize(self, fn):
- l0 = date_range('20140701', periods=5, freq='D')
- l1 = date_range('20140701', periods=5, freq='D')
- int_idx = Index(range(5))
- if fn == 'tz_convert':
- l0 = l0.tz_localize('UTC')
- l1 = l1.tz_localize('UTC')
- for idx in [l0, l1]:
- l0_expected = getattr(idx, fn)('US/Pacific')
- l1_expected = getattr(idx, fn)('US/Pacific')
- df1 = DataFrame(np.ones(5), index=l0)
- df1 = getattr(df1, fn)('US/Pacific')
- assert_index_equal(df1.index, l0_expected)
- # MultiIndex
- # GH7846
- df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1]))
- df3 = getattr(df2, fn)('US/Pacific', level=0)
- assert not df3.index.levels[0].equals(l0)
- assert_index_equal(df3.index.levels[0], l0_expected)
- assert_index_equal(df3.index.levels[1], l1)
- assert not df3.index.levels[1].equals(l1_expected)
- df3 = getattr(df2, fn)('US/Pacific', level=1)
- assert_index_equal(df3.index.levels[0], l0)
- assert not df3.index.levels[0].equals(l0_expected)
- assert_index_equal(df3.index.levels[1], l1_expected)
- assert not df3.index.levels[1].equals(l1)
- df4 = DataFrame(np.ones(5),
- MultiIndex.from_arrays([int_idx, l0]))
- # TODO: untested
- df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa
- assert_index_equal(df3.index.levels[0], l0)
- assert not df3.index.levels[0].equals(l0_expected)
- assert_index_equal(df3.index.levels[1], l1_expected)
- assert not df3.index.levels[1].equals(l1)
- # Bad Inputs
- # Not DatetimeIndex / PeriodIndex
- with pytest.raises(TypeError, match='DatetimeIndex'):
- df = DataFrame(index=int_idx)
- df = getattr(df, fn)('US/Pacific')
- # Not DatetimeIndex / PeriodIndex
- with pytest.raises(TypeError, match='DatetimeIndex'):
- df = DataFrame(np.ones(5),
- MultiIndex.from_arrays([int_idx, l0]))
- df = getattr(df, fn)('US/Pacific', level=0)
- # Invalid level
- with pytest.raises(ValueError, match='not valid'):
- df = DataFrame(index=l0)
- df = getattr(df, fn)('US/Pacific', level=1)
|