123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- # coding=utf-8
- import numpy as np
- import pytest
- from pandas import DataFrame, Series, Timestamp, date_range, to_datetime
- import pandas.util.testing as tm
- from .common import TestData
- class TestFrameAsof(TestData):
- def setup_method(self, method):
- self.N = N = 50
- self.rng = date_range('1/1/1990', periods=N, freq='53s')
- self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
- index=self.rng)
- def test_basic(self):
- df = self.df.copy()
- df.loc[15:30, 'A'] = np.nan
- dates = date_range('1/1/1990', periods=self.N * 3,
- freq='25s')
- result = df.asof(dates)
- assert result.notna().all(1).all()
- lb = df.index[14]
- ub = df.index[30]
- dates = list(dates)
- result = df.asof(dates)
- assert result.notna().all(1).all()
- mask = (result.index >= lb) & (result.index < ub)
- rs = result[mask]
- assert (rs == 14).all(1).all()
- def test_subset(self):
- N = 10
- rng = date_range('1/1/1990', periods=N, freq='53s')
- df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
- index=rng)
- df.loc[4:8, 'A'] = np.nan
- dates = date_range('1/1/1990', periods=N * 3,
- freq='25s')
- # with a subset of A should be the same
- result = df.asof(dates, subset='A')
- expected = df.asof(dates)
- tm.assert_frame_equal(result, expected)
- # same with A/B
- result = df.asof(dates, subset=['A', 'B'])
- expected = df.asof(dates)
- tm.assert_frame_equal(result, expected)
- # B gives self.df.asof
- result = df.asof(dates, subset='B')
- expected = df.resample('25s', closed='right').ffill().reindex(dates)
- expected.iloc[20:] = 9
- tm.assert_frame_equal(result, expected)
- def test_missing(self):
- # GH 15118
- # no match found - `where` value before earliest date in index
- N = 10
- rng = date_range('1/1/1990', periods=N, freq='53s')
- df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
- index=rng)
- result = df.asof('1989-12-31')
- expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31'))
- tm.assert_series_equal(result, expected)
- result = df.asof(to_datetime(['1989-12-31']))
- expected = DataFrame(index=to_datetime(['1989-12-31']),
- columns=['A', 'B'], dtype='float64')
- tm.assert_frame_equal(result, expected)
- def test_all_nans(self):
- # GH 15713
- # DataFrame is all nans
- result = DataFrame([np.nan]).asof([0])
- expected = DataFrame([np.nan])
- tm.assert_frame_equal(result, expected)
- # testing non-default indexes, multiple inputs
- dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
- result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates)
- expected = DataFrame(np.nan, index=dates, columns=['A'])
- tm.assert_frame_equal(result, expected)
- # testing multiple columns
- dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
- result = DataFrame(np.nan, index=self.rng,
- columns=['A', 'B', 'C']).asof(dates)
- expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C'])
- tm.assert_frame_equal(result, expected)
- # testing scalar input
- result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3])
- expected = DataFrame(np.nan, index=[3], columns=['A', 'B'])
- tm.assert_frame_equal(result, expected)
- result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3)
- expected = Series(np.nan, index=['A', 'B'], name=3)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "stamp,expected",
- [(Timestamp('2018-01-01 23:22:43.325+00:00'),
- Series(2.0, name=Timestamp('2018-01-01 23:22:43.325+00:00'))),
- (Timestamp('2018-01-01 22:33:20.682+01:00'),
- Series(1.0, name=Timestamp('2018-01-01 22:33:20.682+01:00'))),
- ]
- )
- def test_time_zone_aware_index(self, stamp, expected):
- # GH21194
- # Testing awareness of DataFrame index considering different
- # UTC and timezone
- df = DataFrame(data=[1, 2],
- index=[Timestamp('2018-01-01 21:00:05.001+00:00'),
- Timestamp('2018-01-01 22:35:10.550+00:00')])
- result = df.asof(stamp)
- tm.assert_series_equal(result, expected)
|