test_construction.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import PY3, lmap, lrange, text_type
  4. from pandas.core.dtypes.dtypes import PeriodDtype
  5. import pandas as pd
  6. from pandas import (
  7. Index, Period, PeriodIndex, Series, date_range, offsets, period_range)
  8. import pandas.core.indexes.period as period
  9. import pandas.util.testing as tm
  10. class TestPeriodIndex(object):
  11. def setup_method(self, method):
  12. pass
  13. def test_construction_base_constructor(self):
  14. # GH 13664
  15. arr = [pd.Period('2011-01', freq='M'), pd.NaT,
  16. pd.Period('2011-03', freq='M')]
  17. tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
  18. tm.assert_index_equal(pd.Index(np.array(arr)),
  19. pd.PeriodIndex(np.array(arr)))
  20. arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')]
  21. tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
  22. tm.assert_index_equal(pd.Index(np.array(arr)),
  23. pd.PeriodIndex(np.array(arr)))
  24. arr = [pd.Period('2011-01', freq='M'), pd.NaT,
  25. pd.Period('2011-03', freq='D')]
  26. tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))
  27. tm.assert_index_equal(pd.Index(np.array(arr)),
  28. pd.Index(np.array(arr), dtype=object))
  29. def test_constructor_use_start_freq(self):
  30. # GH #1118
  31. p = Period('4/2/2012', freq='B')
  32. with tm.assert_produces_warning(FutureWarning):
  33. index = PeriodIndex(start=p, periods=10)
  34. expected = period_range(start='4/2/2012', periods=10, freq='B')
  35. tm.assert_index_equal(index, expected)
  36. index = period_range(start=p, periods=10)
  37. tm.assert_index_equal(index, expected)
  38. def test_constructor_field_arrays(self):
  39. # GH #1264
  40. years = np.arange(1990, 2010).repeat(4)[2:-2]
  41. quarters = np.tile(np.arange(1, 5), 20)[2:-2]
  42. index = PeriodIndex(year=years, quarter=quarters, freq='Q-DEC')
  43. expected = period_range('1990Q3', '2009Q2', freq='Q-DEC')
  44. tm.assert_index_equal(index, expected)
  45. index2 = PeriodIndex(year=years, quarter=quarters, freq='2Q-DEC')
  46. tm.assert_numpy_array_equal(index.asi8, index2.asi8)
  47. index = PeriodIndex(year=years, quarter=quarters)
  48. tm.assert_index_equal(index, expected)
  49. years = [2007, 2007, 2007]
  50. months = [1, 2]
  51. pytest.raises(ValueError, PeriodIndex, year=years, month=months,
  52. freq='M')
  53. pytest.raises(ValueError, PeriodIndex, year=years, month=months,
  54. freq='2M')
  55. pytest.raises(ValueError, PeriodIndex, year=years, month=months,
  56. freq='M', start=Period('2007-01', freq='M'))
  57. years = [2007, 2007, 2007]
  58. months = [1, 2, 3]
  59. idx = PeriodIndex(year=years, month=months, freq='M')
  60. exp = period_range('2007-01', periods=3, freq='M')
  61. tm.assert_index_equal(idx, exp)
  62. def test_constructor_U(self):
  63. # U was used as undefined period
  64. pytest.raises(ValueError, period_range, '2007-1-1', periods=500,
  65. freq='X')
  66. def test_constructor_nano(self):
  67. idx = period_range(start=Period(ordinal=1, freq='N'),
  68. end=Period(ordinal=4, freq='N'), freq='N')
  69. exp = PeriodIndex([Period(ordinal=1, freq='N'),
  70. Period(ordinal=2, freq='N'),
  71. Period(ordinal=3, freq='N'),
  72. Period(ordinal=4, freq='N')], freq='N')
  73. tm.assert_index_equal(idx, exp)
  74. def test_constructor_arrays_negative_year(self):
  75. years = np.arange(1960, 2000, dtype=np.int64).repeat(4)
  76. quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40)
  77. pindex = PeriodIndex(year=years, quarter=quarters)
  78. tm.assert_index_equal(pindex.year, pd.Index(years))
  79. tm.assert_index_equal(pindex.quarter, pd.Index(quarters))
  80. def test_constructor_invalid_quarters(self):
  81. pytest.raises(ValueError, PeriodIndex, year=lrange(2000, 2004),
  82. quarter=lrange(4), freq='Q-DEC')
  83. def test_constructor_corner(self):
  84. pytest.raises(ValueError, PeriodIndex, periods=10, freq='A')
  85. start = Period('2007', freq='A-JUN')
  86. end = Period('2010', freq='A-DEC')
  87. pytest.raises(ValueError, PeriodIndex, start=start, end=end)
  88. pytest.raises(ValueError, PeriodIndex, start=start)
  89. pytest.raises(ValueError, PeriodIndex, end=end)
  90. result = period_range('2007-01', periods=10.5, freq='M')
  91. exp = period_range('2007-01', periods=10, freq='M')
  92. tm.assert_index_equal(result, exp)
  93. def test_constructor_fromarraylike(self):
  94. idx = period_range('2007-01', periods=20, freq='M')
  95. # values is an array of Period, thus can retrieve freq
  96. tm.assert_index_equal(PeriodIndex(idx.values), idx)
  97. tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
  98. pytest.raises(ValueError, PeriodIndex, idx._ndarray_values)
  99. pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values))
  100. pytest.raises(TypeError, PeriodIndex,
  101. data=Period('2007', freq='A'))
  102. result = PeriodIndex(iter(idx))
  103. tm.assert_index_equal(result, idx)
  104. result = PeriodIndex(idx)
  105. tm.assert_index_equal(result, idx)
  106. result = PeriodIndex(idx, freq='M')
  107. tm.assert_index_equal(result, idx)
  108. result = PeriodIndex(idx, freq=offsets.MonthEnd())
  109. tm.assert_index_equal(result, idx)
  110. assert result.freq == 'M'
  111. result = PeriodIndex(idx, freq='2M')
  112. tm.assert_index_equal(result, idx.asfreq('2M'))
  113. assert result.freq == '2M'
  114. result = PeriodIndex(idx, freq=offsets.MonthEnd(2))
  115. tm.assert_index_equal(result, idx.asfreq('2M'))
  116. assert result.freq == '2M'
  117. result = PeriodIndex(idx, freq='D')
  118. exp = idx.asfreq('D', 'e')
  119. tm.assert_index_equal(result, exp)
  120. def test_constructor_datetime64arr(self):
  121. vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64)
  122. vals = vals.view(np.dtype('M8[us]'))
  123. pytest.raises(ValueError, PeriodIndex, vals, freq='D')
  124. @pytest.mark.parametrize('box', [None, 'series', 'index'])
  125. def test_constructor_datetime64arr_ok(self, box):
  126. # https://github.com/pandas-dev/pandas/issues/23438
  127. data = pd.date_range('2017', periods=4, freq="M")
  128. if box is None:
  129. data = data._values
  130. elif box == 'series':
  131. data = pd.Series(data)
  132. result = PeriodIndex(data, freq='D')
  133. expected = PeriodIndex([
  134. '2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30'
  135. ], freq="D")
  136. tm.assert_index_equal(result, expected)
  137. def test_constructor_dtype(self):
  138. # passing a dtype with a tz should localize
  139. idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]')
  140. exp = PeriodIndex(['2013-01', '2013-03'], freq='M')
  141. tm.assert_index_equal(idx, exp)
  142. assert idx.dtype == 'period[M]'
  143. idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]')
  144. exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D')
  145. tm.assert_index_equal(idx, exp)
  146. assert idx.dtype == 'period[3D]'
  147. # if we already have a freq and its not the same, then asfreq
  148. # (not changed)
  149. idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D')
  150. res = PeriodIndex(idx, dtype='period[M]')
  151. exp = PeriodIndex(['2013-01', '2013-01'], freq='M')
  152. tm.assert_index_equal(res, exp)
  153. assert res.dtype == 'period[M]'
  154. res = PeriodIndex(idx, freq='M')
  155. tm.assert_index_equal(res, exp)
  156. assert res.dtype == 'period[M]'
  157. msg = 'specified freq and dtype are different'
  158. with pytest.raises(period.IncompatibleFrequency, match=msg):
  159. PeriodIndex(['2011-01'], freq='M', dtype='period[D]')
  160. def test_constructor_empty(self):
  161. idx = pd.PeriodIndex([], freq='M')
  162. assert isinstance(idx, PeriodIndex)
  163. assert len(idx) == 0
  164. assert idx.freq == 'M'
  165. with pytest.raises(ValueError, match='freq not specified'):
  166. pd.PeriodIndex([])
  167. def test_constructor_pi_nat(self):
  168. idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
  169. Period('2011-01', freq='M')])
  170. exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
  171. tm.assert_index_equal(idx, exp)
  172. idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
  173. Period('2011-01', freq='M')]))
  174. tm.assert_index_equal(idx, exp)
  175. idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'),
  176. Period('2011-01', freq='M')])
  177. exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M')
  178. tm.assert_index_equal(idx, exp)
  179. idx = PeriodIndex(np.array([pd.NaT, pd.NaT,
  180. Period('2011-01', freq='M'),
  181. Period('2011-01', freq='M')]))
  182. tm.assert_index_equal(idx, exp)
  183. idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M')
  184. tm.assert_index_equal(idx, exp)
  185. with pytest.raises(ValueError, match='freq not specified'):
  186. PeriodIndex([pd.NaT, pd.NaT])
  187. with pytest.raises(ValueError, match='freq not specified'):
  188. PeriodIndex(np.array([pd.NaT, pd.NaT]))
  189. with pytest.raises(ValueError, match='freq not specified'):
  190. PeriodIndex(['NaT', 'NaT'])
  191. with pytest.raises(ValueError, match='freq not specified'):
  192. PeriodIndex(np.array(['NaT', 'NaT']))
  193. def test_constructor_incompat_freq(self):
  194. msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"
  195. with pytest.raises(period.IncompatibleFrequency, match=msg):
  196. PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
  197. Period('2011-01', freq='D')])
  198. with pytest.raises(period.IncompatibleFrequency, match=msg):
  199. PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
  200. Period('2011-01', freq='D')]))
  201. # first element is pd.NaT
  202. with pytest.raises(period.IncompatibleFrequency, match=msg):
  203. PeriodIndex([pd.NaT, Period('2011-01', freq='M'),
  204. Period('2011-01', freq='D')])
  205. with pytest.raises(period.IncompatibleFrequency, match=msg):
  206. PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'),
  207. Period('2011-01', freq='D')]))
  208. def test_constructor_mixed(self):
  209. idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')])
  210. exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
  211. tm.assert_index_equal(idx, exp)
  212. idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')])
  213. exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M')
  214. tm.assert_index_equal(idx, exp)
  215. idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT,
  216. '2012-01-01'])
  217. exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D')
  218. tm.assert_index_equal(idx, exp)
  219. def test_constructor_simple_new(self):
  220. idx = period_range('2007-01', name='p', periods=2, freq='M')
  221. result = idx._simple_new(idx, name='p', freq=idx.freq)
  222. tm.assert_index_equal(result, idx)
  223. result = idx._simple_new(idx.astype('i8'), name='p', freq=idx.freq)
  224. tm.assert_index_equal(result, idx)
  225. def test_constructor_simple_new_empty(self):
  226. # GH13079
  227. idx = PeriodIndex([], freq='M', name='p')
  228. result = idx._simple_new(idx, name='p', freq='M')
  229. tm.assert_index_equal(result, idx)
  230. @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])])
  231. def test_constructor_floats(self, floats):
  232. with pytest.raises(TypeError):
  233. pd.PeriodIndex._simple_new(floats, freq='M')
  234. with pytest.raises(TypeError):
  235. pd.PeriodIndex(floats, freq='M')
  236. def test_constructor_nat(self):
  237. pytest.raises(ValueError, period_range, start='NaT',
  238. end='2011-01-01', freq='M')
  239. pytest.raises(ValueError, period_range, start='2011-01-01',
  240. end='NaT', freq='M')
  241. def test_constructor_year_and_quarter(self):
  242. year = pd.Series([2001, 2002, 2003])
  243. quarter = year - 2000
  244. idx = PeriodIndex(year=year, quarter=quarter)
  245. strs = ['%dQ%d' % t for t in zip(quarter, year)]
  246. lops = list(map(Period, strs))
  247. p = PeriodIndex(lops)
  248. tm.assert_index_equal(p, idx)
  249. @pytest.mark.parametrize('func, warning', [
  250. (PeriodIndex, FutureWarning),
  251. (period_range, None)
  252. ])
  253. def test_constructor_freq_mult(self, func, warning):
  254. # GH #7811
  255. with tm.assert_produces_warning(warning):
  256. # must be the same, but for sure...
  257. pidx = func(start='2014-01', freq='2M', periods=4)
  258. expected = PeriodIndex(['2014-01', '2014-03',
  259. '2014-05', '2014-07'], freq='2M')
  260. tm.assert_index_equal(pidx, expected)
  261. with tm.assert_produces_warning(warning):
  262. pidx = func(start='2014-01-02', end='2014-01-15', freq='3D')
  263. expected = PeriodIndex(['2014-01-02', '2014-01-05',
  264. '2014-01-08', '2014-01-11',
  265. '2014-01-14'], freq='3D')
  266. tm.assert_index_equal(pidx, expected)
  267. with tm.assert_produces_warning(warning):
  268. pidx = func(end='2014-01-01 17:00', freq='4H', periods=3)
  269. expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00',
  270. '2014-01-01 17:00'], freq='4H')
  271. tm.assert_index_equal(pidx, expected)
  272. msg = ('Frequency must be positive, because it'
  273. ' represents span: -1M')
  274. with pytest.raises(ValueError, match=msg):
  275. PeriodIndex(['2011-01'], freq='-1M')
  276. msg = ('Frequency must be positive, because it' ' represents span: 0M')
  277. with pytest.raises(ValueError, match=msg):
  278. PeriodIndex(['2011-01'], freq='0M')
  279. msg = ('Frequency must be positive, because it' ' represents span: 0M')
  280. with pytest.raises(ValueError, match=msg):
  281. period_range('2011-01', periods=3, freq='0M')
  282. @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'T', 'S'])
  283. @pytest.mark.parametrize('mult', [1, 2, 3, 4, 5])
  284. def test_constructor_freq_mult_dti_compat(self, mult, freq):
  285. freqstr = str(mult) + freq
  286. pidx = period_range(start='2014-04-01', freq=freqstr, periods=10)
  287. expected = date_range(start='2014-04-01', freq=freqstr,
  288. periods=10).to_period(freqstr)
  289. tm.assert_index_equal(pidx, expected)
  290. def test_constructor_freq_combined(self):
  291. for freq in ['1D1H', '1H1D']:
  292. pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq)
  293. expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'],
  294. freq='25H')
  295. for freq in ['1D1H', '1H1D']:
  296. pidx = period_range(start='2016-01-01', periods=2, freq=freq)
  297. expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'],
  298. freq='25H')
  299. tm.assert_index_equal(pidx, expected)
  300. def test_constructor_range_based_deprecated(self):
  301. with tm.assert_produces_warning(FutureWarning):
  302. pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
  303. assert len(pi) == 9
  304. def test_constructor_range_based_deprecated_different_freq(self):
  305. with tm.assert_produces_warning(FutureWarning) as m:
  306. PeriodIndex(start='2000', periods=2)
  307. warning, = m
  308. assert 'freq="A-DEC"' in str(warning.message)
  309. def test_constructor(self):
  310. pi = period_range(freq='A', start='1/1/2001', end='12/1/2009')
  311. assert len(pi) == 9
  312. pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009')
  313. assert len(pi) == 4 * 9
  314. pi = period_range(freq='M', start='1/1/2001', end='12/1/2009')
  315. assert len(pi) == 12 * 9
  316. pi = period_range(freq='D', start='1/1/2001', end='12/31/2009')
  317. assert len(pi) == 365 * 9 + 2
  318. pi = period_range(freq='B', start='1/1/2001', end='12/31/2009')
  319. assert len(pi) == 261 * 9
  320. pi = period_range(freq='H', start='1/1/2001', end='12/31/2001 23:00')
  321. assert len(pi) == 365 * 24
  322. pi = period_range(freq='Min', start='1/1/2001', end='1/1/2001 23:59')
  323. assert len(pi) == 24 * 60
  324. pi = period_range(freq='S', start='1/1/2001', end='1/1/2001 23:59:59')
  325. assert len(pi) == 24 * 60 * 60
  326. start = Period('02-Apr-2005', 'B')
  327. i1 = period_range(start=start, periods=20)
  328. assert len(i1) == 20
  329. assert i1.freq == start.freq
  330. assert i1[0] == start
  331. end_intv = Period('2006-12-31', 'W')
  332. i1 = period_range(end=end_intv, periods=10)
  333. assert len(i1) == 10
  334. assert i1.freq == end_intv.freq
  335. assert i1[-1] == end_intv
  336. end_intv = Period('2006-12-31', '1w')
  337. i2 = period_range(end=end_intv, periods=10)
  338. assert len(i1) == len(i2)
  339. assert (i1 == i2).all()
  340. assert i1.freq == i2.freq
  341. end_intv = Period('2006-12-31', ('w', 1))
  342. i2 = period_range(end=end_intv, periods=10)
  343. assert len(i1) == len(i2)
  344. assert (i1 == i2).all()
  345. assert i1.freq == i2.freq
  346. end_intv = Period('2005-05-01', 'B')
  347. i1 = period_range(start=start, end=end_intv)
  348. # infer freq from first element
  349. i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
  350. assert len(i2) == 2
  351. assert i2[0] == end_intv
  352. i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
  353. assert len(i2) == 2
  354. assert i2[0] == end_intv
  355. # Mixed freq should fail
  356. vals = [end_intv, Period('2006-12-31', 'w')]
  357. pytest.raises(ValueError, PeriodIndex, vals)
  358. vals = np.array(vals)
  359. pytest.raises(ValueError, PeriodIndex, vals)
  360. def test_constructor_error(self):
  361. start = Period('02-Apr-2005', 'B')
  362. end_intv = Period('2006-12-31', ('w', 1))
  363. msg = 'start and end must have same freq'
  364. with pytest.raises(ValueError, match=msg):
  365. PeriodIndex(start=start, end=end_intv)
  366. msg = ('Of the three parameters: start, end, and periods, '
  367. 'exactly two must be specified')
  368. with pytest.raises(ValueError, match=msg):
  369. PeriodIndex(start=start)
  370. @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B',
  371. 'T', 'S', 'L', 'U', 'N', 'H'])
  372. def test_recreate_from_data(self, freq):
  373. org = period_range(start='2001/04/01', freq=freq, periods=1)
  374. idx = PeriodIndex(org.values, freq=freq)
  375. tm.assert_index_equal(idx, org)
  376. def test_map_with_string_constructor(self):
  377. raw = [2005, 2007, 2009]
  378. index = PeriodIndex(raw, freq='A')
  379. types = str,
  380. if PY3:
  381. # unicode
  382. types += text_type,
  383. for t in types:
  384. expected = Index(lmap(t, raw))
  385. res = index.map(t)
  386. # should return an Index
  387. assert isinstance(res, Index)
  388. # preserve element types
  389. assert all(isinstance(resi, t) for resi in res)
  390. # lastly, values should compare equal
  391. tm.assert_index_equal(res, expected)
  392. class TestSeriesPeriod(object):
  393. def setup_method(self, method):
  394. self.series = Series(period_range('2000-01-01', periods=10, freq='D'))
  395. def test_constructor_cant_cast_period(self):
  396. with pytest.raises(TypeError):
  397. Series(period_range('2000-01-01', periods=10, freq='D'),
  398. dtype=float)
  399. def test_constructor_cast_object(self):
  400. s = Series(period_range('1/1/2000', periods=10),
  401. dtype=PeriodDtype("D"))
  402. exp = Series(period_range('1/1/2000', periods=10))
  403. tm.assert_series_equal(s, exp)