test_tools.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. from datetime import datetime, timedelta
  2. import numpy as np
  3. import pytest
  4. from pandas._libs.tslibs.ccalendar import MONTHS
  5. from pandas.compat import lrange
  6. import pandas as pd
  7. from pandas import (
  8. DatetimeIndex, Period, PeriodIndex, Series, Timedelta, Timestamp,
  9. date_range, period_range, to_datetime)
  10. import pandas.core.indexes.period as period
  11. import pandas.util.testing as tm
  12. class TestPeriodRepresentation(object):
  13. """
  14. Wish to match NumPy units
  15. """
  16. def _check_freq(self, freq, base_date):
  17. rng = period_range(start=base_date, periods=10, freq=freq)
  18. exp = np.arange(10, dtype=np.int64)
  19. tm.assert_numpy_array_equal(rng.asi8, exp)
  20. def test_annual(self):
  21. self._check_freq('A', 1970)
  22. def test_monthly(self):
  23. self._check_freq('M', '1970-01')
  24. @pytest.mark.parametrize('freq', ['W-THU', 'D', 'B', 'H', 'T',
  25. 'S', 'L', 'U', 'N'])
  26. def test_freq(self, freq):
  27. self._check_freq(freq, '1970-01-01')
  28. def test_negone_ordinals(self):
  29. freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S']
  30. period = Period(ordinal=-1, freq='D')
  31. for freq in freqs:
  32. repr(period.asfreq(freq))
  33. for freq in freqs:
  34. period = Period(ordinal=-1, freq=freq)
  35. repr(period)
  36. assert period.year == 1969
  37. period = Period(ordinal=-1, freq='B')
  38. repr(period)
  39. period = Period(ordinal=-1, freq='W')
  40. repr(period)
  41. class TestPeriodIndex(object):
  42. def test_to_timestamp(self):
  43. index = period_range(freq='A', start='1/1/2001', end='12/1/2009')
  44. series = Series(1, index=index, name='foo')
  45. exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC')
  46. result = series.to_timestamp(how='end')
  47. exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns')
  48. tm.assert_index_equal(result.index, exp_index)
  49. assert result.name == 'foo'
  50. exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN')
  51. result = series.to_timestamp(how='start')
  52. tm.assert_index_equal(result.index, exp_index)
  53. def _get_with_delta(delta, freq='A-DEC'):
  54. return date_range(to_datetime('1/1/2001') + delta,
  55. to_datetime('12/31/2009') + delta, freq=freq)
  56. delta = timedelta(hours=23)
  57. result = series.to_timestamp('H', 'end')
  58. exp_index = _get_with_delta(delta)
  59. exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns')
  60. tm.assert_index_equal(result.index, exp_index)
  61. delta = timedelta(hours=23, minutes=59)
  62. result = series.to_timestamp('T', 'end')
  63. exp_index = _get_with_delta(delta)
  64. exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns')
  65. tm.assert_index_equal(result.index, exp_index)
  66. result = series.to_timestamp('S', 'end')
  67. delta = timedelta(hours=23, minutes=59, seconds=59)
  68. exp_index = _get_with_delta(delta)
  69. exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns')
  70. tm.assert_index_equal(result.index, exp_index)
  71. index = period_range(freq='H', start='1/1/2001', end='1/2/2001')
  72. series = Series(1, index=index, name='foo')
  73. exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59',
  74. freq='H')
  75. result = series.to_timestamp(how='end')
  76. exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns')
  77. tm.assert_index_equal(result.index, exp_index)
  78. assert result.name == 'foo'
  79. def test_to_timestamp_freq(self):
  80. idx = pd.period_range('2017', periods=12, freq="A-DEC")
  81. result = idx.to_timestamp()
  82. expected = pd.date_range("2017", periods=12, freq="AS-JAN")
  83. tm.assert_index_equal(result, expected)
  84. def test_to_timestamp_repr_is_code(self):
  85. zs = [Timestamp('99-04-17 00:00:00', tz='UTC'),
  86. Timestamp('2001-04-17 00:00:00', tz='UTC'),
  87. Timestamp('2001-04-17 00:00:00', tz='America/Los_Angeles'),
  88. Timestamp('2001-04-17 00:00:00', tz=None)]
  89. for z in zs:
  90. assert eval(repr(z)) == z
  91. def test_to_timestamp_to_period_astype(self):
  92. idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx')
  93. res = idx.astype('period[M]')
  94. exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx')
  95. tm.assert_index_equal(res, exp)
  96. res = idx.astype('period[3M]')
  97. exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx')
  98. tm.assert_index_equal(res, exp)
  99. def test_dti_to_period(self):
  100. dti = pd.date_range(start='1/1/2005', end='12/1/2005', freq='M')
  101. pi1 = dti.to_period()
  102. pi2 = dti.to_period(freq='D')
  103. pi3 = dti.to_period(freq='3D')
  104. assert pi1[0] == Period('Jan 2005', freq='M')
  105. assert pi2[0] == Period('1/31/2005', freq='D')
  106. assert pi3[0] == Period('1/31/2005', freq='3D')
  107. assert pi1[-1] == Period('Nov 2005', freq='M')
  108. assert pi2[-1] == Period('11/30/2005', freq='D')
  109. assert pi3[-1], Period('11/30/2005', freq='3D')
  110. tm.assert_index_equal(pi1, period_range('1/1/2005', '11/1/2005',
  111. freq='M'))
  112. tm.assert_index_equal(pi2, period_range('1/1/2005', '11/1/2005',
  113. freq='M').asfreq('D'))
  114. tm.assert_index_equal(pi3, period_range('1/1/2005', '11/1/2005',
  115. freq='M').asfreq('3D'))
  116. @pytest.mark.parametrize('month', MONTHS)
  117. def test_to_period_quarterly(self, month):
  118. # make sure we can make the round trip
  119. freq = 'Q-%s' % month
  120. rng = period_range('1989Q3', '1991Q3', freq=freq)
  121. stamps = rng.to_timestamp()
  122. result = stamps.to_period(freq)
  123. tm.assert_index_equal(rng, result)
  124. @pytest.mark.parametrize('off', ['BQ', 'QS', 'BQS'])
  125. def test_to_period_quarterlyish(self, off):
  126. rng = date_range('01-Jan-2012', periods=8, freq=off)
  127. prng = rng.to_period()
  128. assert prng.freq == 'Q-DEC'
  129. @pytest.mark.parametrize('off', ['BA', 'AS', 'BAS'])
  130. def test_to_period_annualish(self, off):
  131. rng = date_range('01-Jan-2012', periods=8, freq=off)
  132. prng = rng.to_period()
  133. assert prng.freq == 'A-DEC'
  134. def test_to_period_monthish(self):
  135. offsets = ['MS', 'BM']
  136. for off in offsets:
  137. rng = date_range('01-Jan-2012', periods=8, freq=off)
  138. prng = rng.to_period()
  139. assert prng.freq == 'M'
  140. rng = date_range('01-Jan-2012', periods=8, freq='M')
  141. prng = rng.to_period()
  142. assert prng.freq == 'M'
  143. msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG
  144. with pytest.raises(ValueError, match=msg):
  145. date_range('01-Jan-2012', periods=8, freq='EOM')
  146. def test_period_dt64_round_trip(self):
  147. dti = date_range('1/1/2000', '1/7/2002', freq='B')
  148. pi = dti.to_period()
  149. tm.assert_index_equal(pi.to_timestamp(), dti)
  150. dti = date_range('1/1/2000', '1/7/2002', freq='B')
  151. pi = dti.to_period(freq='H')
  152. tm.assert_index_equal(pi.to_timestamp(), dti)
  153. def test_combine_first(self):
  154. # GH#3367
  155. didx = pd.date_range(start='1950-01-31', end='1950-07-31', freq='M')
  156. pidx = pd.period_range(start=pd.Period('1950-1'),
  157. end=pd.Period('1950-7'), freq='M')
  158. # check to be consistent with DatetimeIndex
  159. for idx in [didx, pidx]:
  160. a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
  161. b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx)
  162. result = a.combine_first(b)
  163. expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx,
  164. dtype=np.float64)
  165. tm.assert_series_equal(result, expected)
  166. @pytest.mark.parametrize('freq', ['D', '2D'])
  167. def test_searchsorted(self, freq):
  168. pidx = pd.PeriodIndex(['2014-01-01', '2014-01-02', '2014-01-03',
  169. '2014-01-04', '2014-01-05'], freq=freq)
  170. p1 = pd.Period('2014-01-01', freq=freq)
  171. assert pidx.searchsorted(p1) == 0
  172. p2 = pd.Period('2014-01-04', freq=freq)
  173. assert pidx.searchsorted(p2) == 3
  174. msg = "Input has different freq=H from PeriodIndex"
  175. with pytest.raises(period.IncompatibleFrequency, match=msg):
  176. pidx.searchsorted(pd.Period('2014-01-01', freq='H'))
  177. msg = "Input has different freq=5D from PeriodIndex"
  178. with pytest.raises(period.IncompatibleFrequency, match=msg):
  179. pidx.searchsorted(pd.Period('2014-01-01', freq='5D'))
  180. class TestPeriodIndexConversion(object):
  181. def test_tolist(self):
  182. index = period_range(freq='A', start='1/1/2001', end='12/1/2009')
  183. rs = index.tolist()
  184. for x in rs:
  185. assert isinstance(x, Period)
  186. recon = PeriodIndex(rs)
  187. tm.assert_index_equal(index, recon)
  188. def test_to_timestamp_pi_nat(self):
  189. # GH#7228
  190. index = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M',
  191. name='idx')
  192. result = index.to_timestamp('D')
  193. expected = DatetimeIndex([pd.NaT, datetime(2011, 1, 1),
  194. datetime(2011, 2, 1)], name='idx')
  195. tm.assert_index_equal(result, expected)
  196. assert result.name == 'idx'
  197. result2 = result.to_period(freq='M')
  198. tm.assert_index_equal(result2, index)
  199. assert result2.name == 'idx'
  200. result3 = result.to_period(freq='3M')
  201. exp = PeriodIndex(['NaT', '2011-01', '2011-02'],
  202. freq='3M', name='idx')
  203. tm.assert_index_equal(result3, exp)
  204. assert result3.freqstr == '3M'
  205. msg = ('Frequency must be positive, because it'
  206. ' represents span: -2A')
  207. with pytest.raises(ValueError, match=msg):
  208. result.to_period(freq='-2A')
  209. def test_to_timestamp_preserve_name(self):
  210. index = period_range(freq='A', start='1/1/2001', end='12/1/2009',
  211. name='foo')
  212. assert index.name == 'foo'
  213. conv = index.to_timestamp('D')
  214. assert conv.name == 'foo'
  215. def test_to_timestamp_quarterly_bug(self):
  216. years = np.arange(1960, 2000).repeat(4)
  217. quarters = np.tile(lrange(1, 5), 40)
  218. pindex = PeriodIndex(year=years, quarter=quarters)
  219. stamps = pindex.to_timestamp('D', 'end')
  220. expected = DatetimeIndex([x.to_timestamp('D', 'end') for x in pindex])
  221. tm.assert_index_equal(stamps, expected)
  222. def test_to_timestamp_pi_mult(self):
  223. idx = PeriodIndex(['2011-01', 'NaT', '2011-02'],
  224. freq='2M', name='idx')
  225. result = idx.to_timestamp()
  226. expected = DatetimeIndex(['2011-01-01', 'NaT', '2011-02-01'],
  227. name='idx')
  228. tm.assert_index_equal(result, expected)
  229. result = idx.to_timestamp(how='E')
  230. expected = DatetimeIndex(['2011-02-28', 'NaT', '2011-03-31'],
  231. name='idx')
  232. expected = expected + Timedelta(1, 'D') - Timedelta(1, 'ns')
  233. tm.assert_index_equal(result, expected)
  234. def test_to_timestamp_pi_combined(self):
  235. idx = period_range(start='2011', periods=2, freq='1D1H', name='idx')
  236. result = idx.to_timestamp()
  237. expected = DatetimeIndex(['2011-01-01 00:00', '2011-01-02 01:00'],
  238. name='idx')
  239. tm.assert_index_equal(result, expected)
  240. result = idx.to_timestamp(how='E')
  241. expected = DatetimeIndex(['2011-01-02 00:59:59',
  242. '2011-01-03 01:59:59'],
  243. name='idx')
  244. expected = expected + Timedelta(1, 's') - Timedelta(1, 'ns')
  245. tm.assert_index_equal(result, expected)
  246. result = idx.to_timestamp(how='E', freq='H')
  247. expected = DatetimeIndex(['2011-01-02 00:00', '2011-01-03 01:00'],
  248. name='idx')
  249. expected = expected + Timedelta(1, 'h') - Timedelta(1, 'ns')
  250. tm.assert_index_equal(result, expected)
  251. def test_period_astype_to_timestamp(self):
  252. pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M')
  253. exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'])
  254. tm.assert_index_equal(pi.astype('datetime64[ns]'), exp)
  255. exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'])
  256. exp = exp + Timedelta(1, 'D') - Timedelta(1, 'ns')
  257. tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp)
  258. exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
  259. tz='US/Eastern')
  260. res = pi.astype('datetime64[ns, US/Eastern]')
  261. tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp)
  262. exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'],
  263. tz='US/Eastern')
  264. exp = exp + Timedelta(1, 'D') - Timedelta(1, 'ns')
  265. res = pi.astype('datetime64[ns, US/Eastern]', how='end')
  266. tm.assert_index_equal(res, exp)
  267. def test_to_timestamp_1703(self):
  268. index = period_range('1/1/2012', periods=4, freq='D')
  269. result = index.to_timestamp()
  270. assert result[0] == Timestamp('1/1/2012')