test_ops.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series
  5. from pandas.core.arrays import PeriodArray
  6. from pandas.tests.test_base import Ops
  7. import pandas.util.testing as tm
  8. class TestPeriodIndexOps(Ops):
  9. def setup_method(self, method):
  10. super(TestPeriodIndexOps, self).setup_method(method)
  11. mask = lambda x: (isinstance(x, DatetimeIndex) or
  12. isinstance(x, PeriodIndex))
  13. self.is_valid_objs = [o for o in self.objs if mask(o)]
  14. self.not_valid_objs = [o for o in self.objs if not mask(o)]
  15. def test_ops_properties(self):
  16. f = lambda x: isinstance(x, PeriodIndex)
  17. self.check_ops_properties(PeriodArray._field_ops, f)
  18. self.check_ops_properties(PeriodArray._object_ops, f)
  19. self.check_ops_properties(PeriodArray._bool_ops, f)
  20. def test_resolution(self):
  21. for freq, expected in zip(['A', 'Q', 'M', 'D', 'H',
  22. 'T', 'S', 'L', 'U'],
  23. ['day', 'day', 'day', 'day',
  24. 'hour', 'minute', 'second',
  25. 'millisecond', 'microsecond']):
  26. idx = pd.period_range(start='2013-04-01', periods=30, freq=freq)
  27. assert idx.resolution == expected
  28. def test_value_counts_unique(self):
  29. # GH 7735
  30. idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10)
  31. # create repeated values, 'n'th element is repeated by n+1 times
  32. idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)),
  33. freq='H')
  34. exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00',
  35. '2011-01-01 16:00', '2011-01-01 15:00',
  36. '2011-01-01 14:00', '2011-01-01 13:00',
  37. '2011-01-01 12:00', '2011-01-01 11:00',
  38. '2011-01-01 10:00',
  39. '2011-01-01 09:00'], freq='H')
  40. expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
  41. for obj in [idx, Series(idx)]:
  42. tm.assert_series_equal(obj.value_counts(), expected)
  43. expected = pd.period_range('2011-01-01 09:00', freq='H',
  44. periods=10)
  45. tm.assert_index_equal(idx.unique(), expected)
  46. idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 09:00',
  47. '2013-01-01 09:00', '2013-01-01 08:00',
  48. '2013-01-01 08:00', NaT], freq='H')
  49. exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
  50. freq='H')
  51. expected = Series([3, 2], index=exp_idx)
  52. for obj in [idx, Series(idx)]:
  53. tm.assert_series_equal(obj.value_counts(), expected)
  54. exp_idx = PeriodIndex(['2013-01-01 09:00', '2013-01-01 08:00',
  55. NaT], freq='H')
  56. expected = Series([3, 2, 1], index=exp_idx)
  57. for obj in [idx, Series(idx)]:
  58. tm.assert_series_equal(obj.value_counts(dropna=False), expected)
  59. tm.assert_index_equal(idx.unique(), exp_idx)
  60. def test_drop_duplicates_metadata(self):
  61. # GH 10115
  62. idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx')
  63. result = idx.drop_duplicates()
  64. tm.assert_index_equal(idx, result)
  65. assert idx.freq == result.freq
  66. idx_dup = idx.append(idx) # freq will not be reset
  67. result = idx_dup.drop_duplicates()
  68. tm.assert_index_equal(idx, result)
  69. assert idx.freq == result.freq
  70. def test_drop_duplicates(self):
  71. # to check Index/Series compat
  72. base = pd.period_range('2011-01-01', '2011-01-31', freq='D',
  73. name='idx')
  74. idx = base.append(base[:5])
  75. res = idx.drop_duplicates()
  76. tm.assert_index_equal(res, base)
  77. res = Series(idx).drop_duplicates()
  78. tm.assert_series_equal(res, Series(base))
  79. res = idx.drop_duplicates(keep='last')
  80. exp = base[5:].append(base[:5])
  81. tm.assert_index_equal(res, exp)
  82. res = Series(idx).drop_duplicates(keep='last')
  83. tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
  84. res = idx.drop_duplicates(keep=False)
  85. tm.assert_index_equal(res, base[5:])
  86. res = Series(idx).drop_duplicates(keep=False)
  87. tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
  88. def test_order_compat(self):
  89. def _check_freq(index, expected_index):
  90. if isinstance(index, PeriodIndex):
  91. assert index.freq == expected_index.freq
  92. pidx = PeriodIndex(['2011', '2012', '2013'], name='pidx', freq='A')
  93. # for compatibility check
  94. iidx = Index([2011, 2012, 2013], name='idx')
  95. for idx in [pidx, iidx]:
  96. ordered = idx.sort_values()
  97. tm.assert_index_equal(ordered, idx)
  98. _check_freq(ordered, idx)
  99. ordered = idx.sort_values(ascending=False)
  100. tm.assert_index_equal(ordered, idx[::-1])
  101. _check_freq(ordered, idx[::-1])
  102. ordered, indexer = idx.sort_values(return_indexer=True)
  103. tm.assert_index_equal(ordered, idx)
  104. tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
  105. check_dtype=False)
  106. _check_freq(ordered, idx)
  107. ordered, indexer = idx.sort_values(return_indexer=True,
  108. ascending=False)
  109. tm.assert_index_equal(ordered, idx[::-1])
  110. tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]),
  111. check_dtype=False)
  112. _check_freq(ordered, idx[::-1])
  113. pidx = PeriodIndex(['2011', '2013', '2015', '2012',
  114. '2011'], name='pidx', freq='A')
  115. pexpected = PeriodIndex(
  116. ['2011', '2011', '2012', '2013', '2015'], name='pidx', freq='A')
  117. # for compatibility check
  118. iidx = Index([2011, 2013, 2015, 2012, 2011], name='idx')
  119. iexpected = Index([2011, 2011, 2012, 2013, 2015], name='idx')
  120. for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
  121. ordered = idx.sort_values()
  122. tm.assert_index_equal(ordered, expected)
  123. _check_freq(ordered, idx)
  124. ordered = idx.sort_values(ascending=False)
  125. tm.assert_index_equal(ordered, expected[::-1])
  126. _check_freq(ordered, idx)
  127. ordered, indexer = idx.sort_values(return_indexer=True)
  128. tm.assert_index_equal(ordered, expected)
  129. exp = np.array([0, 4, 3, 1, 2])
  130. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  131. _check_freq(ordered, idx)
  132. ordered, indexer = idx.sort_values(return_indexer=True,
  133. ascending=False)
  134. tm.assert_index_equal(ordered, expected[::-1])
  135. exp = np.array([2, 1, 3, 4, 0])
  136. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  137. _check_freq(ordered, idx)
  138. pidx = PeriodIndex(['2011', '2013', 'NaT', '2011'], name='pidx',
  139. freq='D')
  140. result = pidx.sort_values()
  141. expected = PeriodIndex(['NaT', '2011', '2011', '2013'],
  142. name='pidx', freq='D')
  143. tm.assert_index_equal(result, expected)
  144. assert result.freq == 'D'
  145. result = pidx.sort_values(ascending=False)
  146. expected = PeriodIndex(
  147. ['2013', '2011', '2011', 'NaT'], name='pidx', freq='D')
  148. tm.assert_index_equal(result, expected)
  149. assert result.freq == 'D'
  150. def test_order(self):
  151. for freq in ['D', '2D', '4D']:
  152. idx = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'],
  153. freq=freq, name='idx')
  154. ordered = idx.sort_values()
  155. tm.assert_index_equal(ordered, idx)
  156. assert ordered.freq == idx.freq
  157. ordered = idx.sort_values(ascending=False)
  158. expected = idx[::-1]
  159. tm.assert_index_equal(ordered, expected)
  160. assert ordered.freq == expected.freq
  161. assert ordered.freq == freq
  162. ordered, indexer = idx.sort_values(return_indexer=True)
  163. tm.assert_index_equal(ordered, idx)
  164. tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
  165. check_dtype=False)
  166. assert ordered.freq == idx.freq
  167. assert ordered.freq == freq
  168. ordered, indexer = idx.sort_values(return_indexer=True,
  169. ascending=False)
  170. expected = idx[::-1]
  171. tm.assert_index_equal(ordered, expected)
  172. tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]),
  173. check_dtype=False)
  174. assert ordered.freq == expected.freq
  175. assert ordered.freq == freq
  176. idx1 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05',
  177. '2011-01-02', '2011-01-01'], freq='D', name='idx1')
  178. exp1 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02',
  179. '2011-01-03', '2011-01-05'], freq='D', name='idx1')
  180. idx2 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05',
  181. '2011-01-02', '2011-01-01'],
  182. freq='D', name='idx2')
  183. exp2 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02',
  184. '2011-01-03', '2011-01-05'],
  185. freq='D', name='idx2')
  186. idx3 = PeriodIndex([NaT, '2011-01-03', '2011-01-05',
  187. '2011-01-02', NaT], freq='D', name='idx3')
  188. exp3 = PeriodIndex([NaT, NaT, '2011-01-02', '2011-01-03',
  189. '2011-01-05'], freq='D', name='idx3')
  190. for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
  191. ordered = idx.sort_values()
  192. tm.assert_index_equal(ordered, expected)
  193. assert ordered.freq == 'D'
  194. ordered = idx.sort_values(ascending=False)
  195. tm.assert_index_equal(ordered, expected[::-1])
  196. assert ordered.freq == 'D'
  197. ordered, indexer = idx.sort_values(return_indexer=True)
  198. tm.assert_index_equal(ordered, expected)
  199. exp = np.array([0, 4, 3, 1, 2])
  200. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  201. assert ordered.freq == 'D'
  202. ordered, indexer = idx.sort_values(return_indexer=True,
  203. ascending=False)
  204. tm.assert_index_equal(ordered, expected[::-1])
  205. exp = np.array([2, 1, 3, 4, 0])
  206. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  207. assert ordered.freq == 'D'
  208. def test_shift(self):
  209. # This is tested in test_arithmetic
  210. pass
  211. def test_nat(self):
  212. assert pd.PeriodIndex._na_value is NaT
  213. assert pd.PeriodIndex([], freq='M')._na_value is NaT
  214. idx = pd.PeriodIndex(['2011-01-01', '2011-01-02'], freq='D')
  215. assert idx._can_hold_na
  216. tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
  217. assert idx.hasnans is False
  218. tm.assert_numpy_array_equal(idx._nan_idxs,
  219. np.array([], dtype=np.intp))
  220. idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D')
  221. assert idx._can_hold_na
  222. tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
  223. assert idx.hasnans is True
  224. tm.assert_numpy_array_equal(idx._nan_idxs,
  225. np.array([1], dtype=np.intp))
  226. @pytest.mark.parametrize('freq', ['D', 'M'])
  227. def test_equals(self, freq):
  228. # GH#13107
  229. idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
  230. freq=freq)
  231. assert idx.equals(idx)
  232. assert idx.equals(idx.copy())
  233. assert idx.equals(idx.astype(object))
  234. assert idx.astype(object).equals(idx)
  235. assert idx.astype(object).equals(idx.astype(object))
  236. assert not idx.equals(list(idx))
  237. assert not idx.equals(pd.Series(idx))
  238. idx2 = pd.PeriodIndex(['2011-01-01', '2011-01-02', 'NaT'],
  239. freq='H')
  240. assert not idx.equals(idx2)
  241. assert not idx.equals(idx2.copy())
  242. assert not idx.equals(idx2.astype(object))
  243. assert not idx.astype(object).equals(idx2)
  244. assert not idx.equals(list(idx2))
  245. assert not idx.equals(pd.Series(idx2))
  246. # same internal, different tz
  247. idx3 = pd.PeriodIndex._simple_new(
  248. idx._values._simple_new(idx._values.asi8, freq="H")
  249. )
  250. tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
  251. assert not idx.equals(idx3)
  252. assert not idx.equals(idx3.copy())
  253. assert not idx.equals(idx3.astype(object))
  254. assert not idx.astype(object).equals(idx3)
  255. assert not idx.equals(list(idx3))
  256. assert not idx.equals(pd.Series(idx3))
  257. def test_freq_setter_deprecated(self):
  258. # GH 20678
  259. idx = pd.period_range('2018Q1', periods=4, freq='Q')
  260. # no warning for getter
  261. with tm.assert_produces_warning(None):
  262. idx.freq
  263. # warning for setter
  264. with tm.assert_produces_warning(FutureWarning):
  265. idx.freq = pd.offsets.Day()