test_asof.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # coding=utf-8
  2. import numpy as np
  3. import pytest
  4. from pandas import DataFrame, Series, Timestamp, date_range, to_datetime
  5. import pandas.util.testing as tm
  6. from .common import TestData
  7. class TestFrameAsof(TestData):
  8. def setup_method(self, method):
  9. self.N = N = 50
  10. self.rng = date_range('1/1/1990', periods=N, freq='53s')
  11. self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
  12. index=self.rng)
  13. def test_basic(self):
  14. df = self.df.copy()
  15. df.loc[15:30, 'A'] = np.nan
  16. dates = date_range('1/1/1990', periods=self.N * 3,
  17. freq='25s')
  18. result = df.asof(dates)
  19. assert result.notna().all(1).all()
  20. lb = df.index[14]
  21. ub = df.index[30]
  22. dates = list(dates)
  23. result = df.asof(dates)
  24. assert result.notna().all(1).all()
  25. mask = (result.index >= lb) & (result.index < ub)
  26. rs = result[mask]
  27. assert (rs == 14).all(1).all()
  28. def test_subset(self):
  29. N = 10
  30. rng = date_range('1/1/1990', periods=N, freq='53s')
  31. df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
  32. index=rng)
  33. df.loc[4:8, 'A'] = np.nan
  34. dates = date_range('1/1/1990', periods=N * 3,
  35. freq='25s')
  36. # with a subset of A should be the same
  37. result = df.asof(dates, subset='A')
  38. expected = df.asof(dates)
  39. tm.assert_frame_equal(result, expected)
  40. # same with A/B
  41. result = df.asof(dates, subset=['A', 'B'])
  42. expected = df.asof(dates)
  43. tm.assert_frame_equal(result, expected)
  44. # B gives self.df.asof
  45. result = df.asof(dates, subset='B')
  46. expected = df.resample('25s', closed='right').ffill().reindex(dates)
  47. expected.iloc[20:] = 9
  48. tm.assert_frame_equal(result, expected)
  49. def test_missing(self):
  50. # GH 15118
  51. # no match found - `where` value before earliest date in index
  52. N = 10
  53. rng = date_range('1/1/1990', periods=N, freq='53s')
  54. df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
  55. index=rng)
  56. result = df.asof('1989-12-31')
  57. expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31'))
  58. tm.assert_series_equal(result, expected)
  59. result = df.asof(to_datetime(['1989-12-31']))
  60. expected = DataFrame(index=to_datetime(['1989-12-31']),
  61. columns=['A', 'B'], dtype='float64')
  62. tm.assert_frame_equal(result, expected)
  63. def test_all_nans(self):
  64. # GH 15713
  65. # DataFrame is all nans
  66. result = DataFrame([np.nan]).asof([0])
  67. expected = DataFrame([np.nan])
  68. tm.assert_frame_equal(result, expected)
  69. # testing non-default indexes, multiple inputs
  70. dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
  71. result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates)
  72. expected = DataFrame(np.nan, index=dates, columns=['A'])
  73. tm.assert_frame_equal(result, expected)
  74. # testing multiple columns
  75. dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
  76. result = DataFrame(np.nan, index=self.rng,
  77. columns=['A', 'B', 'C']).asof(dates)
  78. expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C'])
  79. tm.assert_frame_equal(result, expected)
  80. # testing scalar input
  81. result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3])
  82. expected = DataFrame(np.nan, index=[3], columns=['A', 'B'])
  83. tm.assert_frame_equal(result, expected)
  84. result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3)
  85. expected = Series(np.nan, index=['A', 'B'], name=3)
  86. tm.assert_series_equal(result, expected)
  87. @pytest.mark.parametrize(
  88. "stamp,expected",
  89. [(Timestamp('2018-01-01 23:22:43.325+00:00'),
  90. Series(2.0, name=Timestamp('2018-01-01 23:22:43.325+00:00'))),
  91. (Timestamp('2018-01-01 22:33:20.682+01:00'),
  92. Series(1.0, name=Timestamp('2018-01-01 22:33:20.682+01:00'))),
  93. ]
  94. )
  95. def test_time_zone_aware_index(self, stamp, expected):
  96. # GH21194
  97. # Testing awareness of DataFrame index considering different
  98. # UTC and timezone
  99. df = DataFrame(data=[1, 2],
  100. index=[Timestamp('2018-01-01 21:00:05.001+00:00'),
  101. Timestamp('2018-01-01 22:35:10.550+00:00')])
  102. result = df.asof(stamp)
  103. tm.assert_series_equal(result, expected)