""" test partial slicing on Series/Frame """ from datetime import datetime import operator as op import numpy as np import pytest import pandas as pd from pandas import ( DataFrame, DatetimeIndex, Index, Series, Timedelta, Timestamp, date_range) from pandas.core.indexing import IndexingError from pandas.util import testing as tm class TestSlicing(object): def test_dti_slicing(self): dti = date_range(start='1/1/2005', end='12/1/2005', freq='M') dti2 = dti[[1, 3, 5]] v1 = dti2[0] v2 = dti2[1] v3 = dti2[2] assert v1 == Timestamp('2/28/2005') assert v2 == Timestamp('4/30/2005') assert v3 == Timestamp('6/30/2005') # don't carry freq through irregular slicing assert dti2.freq is None def test_slice_keeps_name(self): # GH4226 st = pd.Timestamp('2013-07-01 00:00:00', tz='America/Los_Angeles') et = pd.Timestamp('2013-07-02 00:00:00', tz='America/Los_Angeles') dr = pd.date_range(st, et, freq='H', name='timebucket') assert dr[1:].name == dr.name def test_slice_with_negative_step(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) assert_slices_equivalent(SLC[Timestamp('2014-10-01')::-1], SLC[9::-1]) assert_slices_equivalent(SLC['2014-10-01'::-1], SLC[9::-1]) assert_slices_equivalent(SLC[:Timestamp('2014-10-01'):-1], SLC[:8:-1]) assert_slices_equivalent(SLC[:'2014-10-01':-1], SLC[:8:-1]) assert_slices_equivalent(SLC['2015-02-01':'2014-10-01':-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[Timestamp('2015-02-01'):Timestamp( '2014-10-01'):-1], SLC[13:8:-1]) assert_slices_equivalent(SLC['2015-02-01':Timestamp('2014-10-01'):-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[Timestamp('2015-02-01'):'2014-10-01':-1], SLC[13:8:-1]) assert_slices_equivalent(SLC['2014-10-01':'2015-02-01':-1], SLC[:0]) def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) with pytest.raises(ValueError, match='slice step cannot be zero'): ts[::0] with pytest.raises(ValueError, match='slice step cannot be zero'): ts.loc[::0] with pytest.raises(ValueError, match='slice step cannot be zero'): ts.loc[::0] def test_slice_bounds_empty(self): # GH#14354 empty_idx = date_range(freq='1H', periods=0, end='2015') right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc') exp = Timestamp('2015-01-02 23:59:59.999999999') assert right == exp left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc') exp = Timestamp('2015-01-02 00:00:00') assert left == exp def test_slice_duplicate_monotonic(self): # https://github.com/pandas-dev/pandas/issues/16515 idx = pd.DatetimeIndex(['2017', '2017']) result = idx._maybe_cast_slice_bound('2017-01-01', 'left', 'loc') expected = Timestamp('2017-01-01') assert result == expected def test_monotone_DTI_indexing_bug(self): # GH 19362 # Testing accessing the first element in a montononic descending # partial string indexing. df = pd.DataFrame(list(range(5))) date_list = ['2018-01-02', '2017-02-10', '2016-03-10', '2015-03-15', '2014-03-16'] date_index = pd.to_datetime(date_list) df['date'] = date_index expected = pd.DataFrame({0: list(range(5)), 'date': date_index}) tm.assert_frame_equal(df, expected) df = pd.DataFrame({'A': [1, 2, 3]}, index=pd.date_range('20170101', periods=3)[::-1]) expected = pd.DataFrame({'A': 1}, index=pd.date_range('20170103', periods=1)) tm.assert_frame_equal(df.loc['2017-01-03'], expected) def test_slice_year(self): dti = date_range(freq='B', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) result = s['2005'] expected = s[s.index.year == 2005] tm.assert_series_equal(result, expected) df = DataFrame(np.random.rand(len(dti), 5), index=dti) result = df.loc['2005'] expected = df[df.index.year == 2005] tm.assert_frame_equal(result, expected) rng = date_range('1/1/2000', '1/1/2010') result = rng.get_loc('2009') expected = slice(3288, 3653) assert result == expected def test_slice_quarter(self): dti = date_range(freq='D', start=datetime(2000, 6, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) assert len(s['2001Q1']) == 90 df = DataFrame(np.random.rand(len(dti), 5), index=dti) assert len(df.loc['1Q01']) == 90 def test_slice_month(self): dti = date_range(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(dti)), index=dti) assert len(s['2005-11']) == 30 df = DataFrame(np.random.rand(len(dti), 5), index=dti) assert len(df.loc['2005-11']) == 30 tm.assert_series_equal(s['2005-11'], s['11-2005']) def test_partial_slice(self): rng = date_range(freq='D', start=datetime(2005, 1, 1), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-05':'2006-02'] expected = s['20050501':'20060228'] tm.assert_series_equal(result, expected) result = s['2005-05':] expected = s['20050501':] tm.assert_series_equal(result, expected) result = s[:'2006-02'] expected = s[:'20060228'] tm.assert_series_equal(result, expected) result = s['2005-1-1'] assert result == s.iloc[0] pytest.raises(Exception, s.__getitem__, '2004-12-31') def test_partial_slice_daily(self): rng = date_range(freq='H', start=datetime(2005, 1, 31), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-1-31'] tm.assert_series_equal(result, s.iloc[:24]) pytest.raises(Exception, s.__getitem__, '2004-12-31 00') def test_partial_slice_hourly(self): rng = date_range(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-1-1'] tm.assert_series_equal(result, s.iloc[:60 * 4]) result = s['2005-1-1 20'] tm.assert_series_equal(result, s.iloc[:60]) assert s['2005-1-1 20:00'] == s.iloc[0] pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15') def test_partial_slice_minutely(self): rng = date_range(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), periods=500) s = Series(np.arange(len(rng)), index=rng) result = s['2005-1-1 23:59'] tm.assert_series_equal(result, s.iloc[:60]) result = s['2005-1-1'] tm.assert_series_equal(result, s.iloc[:60]) assert s[Timestamp('2005-1-1 23:59:00')] == s.iloc[0] pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00') def test_partial_slice_second_precision(self): rng = date_range(start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), periods=20, freq='US') s = Series(np.arange(20), rng) tm.assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10]) tm.assert_series_equal(s['2005-1-1 00:00:59'], s.iloc[:10]) tm.assert_series_equal(s['2005-1-1 00:01'], s.iloc[10:]) tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) assert s[Timestamp('2005-1-1 00:00:59.999990')] == s.iloc[0] with pytest.raises(KeyError, match='2005-1-1 00:00:00'): s['2005-1-1 00:00:00'] def test_partial_slicing_dataframe(self): # GH14856 # Test various combinations of string slicing resolution vs. # index resolution # - If string resolution is less precise than index resolution, # string is considered a slice # - If string resolution is equal to or more precise than index # resolution, string is considered an exact match formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] for rnum, resolution in enumerate(resolutions[2:], 2): # we check only 'day', 'hour', 'minute' and 'second' unit = Timedelta("1 " + resolution) middate = datetime(2012, 1, 1, 0, 0, 0) index = DatetimeIndex([middate - unit, middate, middate + unit]) values = [1, 2, 3] df = DataFrame({'a': values}, index, dtype=np.int64) assert df.index.resolution == resolution # Timestamp with the same resolution as index # Should be exact match for Series (return scalar) # and raise KeyError for Frame for timestamp, expected in zip(index, values): ts_string = timestamp.strftime(formats[rnum]) # make ts_string as precise as index result = df['a'][ts_string] assert isinstance(result, np.int64) assert result == expected pytest.raises(KeyError, df.__getitem__, ts_string) # Timestamp with resolution less precise than index for fmt in formats[:rnum]: for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]: ts_string = index[element].strftime(fmt) # Series should return slice result = df['a'][ts_string] expected = df['a'][theslice] tm.assert_series_equal(result, expected) # Frame should return slice as well result = df[ts_string] expected = df[theslice] tm.assert_frame_equal(result, expected) # Timestamp with resolution more precise than index # Compatible with existing key # Should return scalar for Series # and raise KeyError for Frame for fmt in formats[rnum + 1:]: ts_string = index[1].strftime(fmt) result = df['a'][ts_string] assert isinstance(result, np.int64) assert result == 2 pytest.raises(KeyError, df.__getitem__, ts_string) # Not compatible with existing key # Should raise KeyError for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: ts = index[1] + Timedelta("1 " + res) ts_string = ts.strftime(fmt) pytest.raises(KeyError, df['a'].__getitem__, ts_string) pytest.raises(KeyError, df.__getitem__, ts_string) def test_partial_slicing_with_multiindex(self): # GH 4758 # partial string indexing with a multi-index buggy df = DataFrame({'ACCOUNT': ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], 'TICKER': ["ABC", "MNP", "XYZ", "XYZ"], 'val': [1, 2, 3, 4]}, index=date_range("2013-06-19 09:30:00", periods=4, freq='5T')) df_multi = df.set_index(['ACCOUNT', 'TICKER'], append=True) expected = DataFrame([ [1] ], index=Index(['ABC'], name='TICKER'), columns=['val']) result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1')] tm.assert_frame_equal(result, expected) expected = df_multi.loc[ (pd.Timestamp('2013-06-19 09:30:00', tz=None), 'ACCT1', 'ABC')] result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')] tm.assert_series_equal(result, expected) # this is an IndexingError as we don't do partial string selection on # multi-levels. def f(): df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')] pytest.raises(IndexingError, f) # GH 4294 # partial slice on a series mi s = pd.DataFrame(np.random.rand(1000, 1000), index=pd.date_range( '2000-1-1', periods=1000)).stack() s2 = s[:-1].copy() expected = s2['2000-1-4'] result = s2[pd.Timestamp('2000-1-4')] tm.assert_series_equal(result, expected) result = s[pd.Timestamp('2000-1-4')] expected = s['2000-1-4'] tm.assert_series_equal(result, expected) df2 = pd.DataFrame(s) expected = df2.xs('2000-1-4') result = df2.loc[pd.Timestamp('2000-1-4')] tm.assert_frame_equal(result, expected) def test_partial_slice_doesnt_require_monotonicity(self): # For historical reasons. s = pd.Series(np.arange(10), pd.date_range('2014-01-01', periods=10)) nonmonotonic = s[[3, 5, 4]] expected = nonmonotonic.iloc[:0] timestamp = pd.Timestamp('2014-01-10') tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): nonmonotonic[timestamp:] tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): nonmonotonic.loc[timestamp:] def test_loc_datetime_length_one(self): # GH16071 df = pd.DataFrame(columns=['1'], index=pd.date_range('2016-10-01T00:00:00', '2016-10-01T23:59:59')) result = df.loc[datetime(2016, 10, 1):] tm.assert_frame_equal(result, df) result = df.loc['2016-10-01T00:00:00':] tm.assert_frame_equal(result, df) @pytest.mark.parametrize('datetimelike', [ Timestamp('20130101'), datetime(2013, 1, 1), np.datetime64('2013-01-01T00:00', 'ns')]) @pytest.mark.parametrize('op,expected', [ (op.lt, [True, False, False, False]), (op.le, [True, True, False, False]), (op.eq, [False, True, False, False]), (op.gt, [False, False, False, True])]) def test_selection_by_datetimelike(self, datetimelike, op, expected): # GH issue #17965, test for ability to compare datetime64[ns] columns # to datetimelike df = DataFrame({'A': [pd.Timestamp('20120101'), pd.Timestamp('20130101'), np.nan, pd.Timestamp('20130103')]}) result = op(df.A, datetimelike) expected = Series(expected, name='A') tm.assert_series_equal(result, expected)