test_period.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs import iNaT
  4. from pandas._libs.tslibs.period import IncompatibleFrequency
  5. from pandas.core.dtypes.dtypes import PeriodDtype, registry
  6. import pandas as pd
  7. from pandas.core.arrays import PeriodArray, period_array
  8. import pandas.util.testing as tm
  9. # ----------------------------------------------------------------------------
  10. # Dtype
  11. def test_registered():
  12. assert PeriodDtype in registry.dtypes
  13. result = registry.find("Period[D]")
  14. expected = PeriodDtype("D")
  15. assert result == expected
  16. # ----------------------------------------------------------------------------
  17. # period_array
  18. @pytest.mark.parametrize("data, freq, expected", [
  19. ([pd.Period("2017", "D")], None, [17167]),
  20. ([pd.Period("2017", "D")], "D", [17167]),
  21. ([2017], "D", [17167]),
  22. (["2017"], "D", [17167]),
  23. ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
  24. ([pd.Period("2017", "D"), None], None, [17167, iNaT]),
  25. (pd.Series(pd.date_range("2017", periods=3)), None,
  26. [17167, 17168, 17169]),
  27. (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
  28. ])
  29. def test_period_array_ok(data, freq, expected):
  30. result = period_array(data, freq=freq).asi8
  31. expected = np.asarray(expected, dtype=np.int64)
  32. tm.assert_numpy_array_equal(result, expected)
  33. def test_period_array_readonly_object():
  34. # https://github.com/pandas-dev/pandas/issues/25403
  35. pa = period_array([pd.Period('2019-01-01')])
  36. arr = np.asarray(pa, dtype='object')
  37. arr.setflags(write=False)
  38. result = period_array(arr)
  39. tm.assert_period_array_equal(result, pa)
  40. result = pd.Series(arr)
  41. tm.assert_series_equal(result, pd.Series(pa))
  42. result = pd.DataFrame({"A": arr})
  43. tm.assert_frame_equal(result, pd.DataFrame({"A": pa}))
  44. def test_from_datetime64_freq_changes():
  45. # https://github.com/pandas-dev/pandas/issues/23438
  46. arr = pd.date_range("2017", periods=3, freq="D")
  47. result = PeriodArray._from_datetime64(arr, freq="M")
  48. expected = period_array(['2017-01-01', '2017-01-01', '2017-01-01'],
  49. freq="M")
  50. tm.assert_period_array_equal(result, expected)
  51. @pytest.mark.parametrize("data, freq, msg", [
  52. ([pd.Period('2017', 'D'),
  53. pd.Period('2017', 'A')],
  54. None,
  55. "Input has different freq"),
  56. ([pd.Period('2017', 'D')],
  57. "A",
  58. "Input has different freq"),
  59. ])
  60. def test_period_array_raises(data, freq, msg):
  61. with pytest.raises(IncompatibleFrequency, match=msg):
  62. period_array(data, freq)
  63. def test_period_array_non_period_series_raies():
  64. ser = pd.Series([1, 2, 3])
  65. with pytest.raises(TypeError, match='dtype'):
  66. PeriodArray(ser, freq='D')
  67. def test_period_array_freq_mismatch():
  68. arr = period_array(['2000', '2001'], freq='D')
  69. with pytest.raises(IncompatibleFrequency, match='freq'):
  70. PeriodArray(arr, freq='M')
  71. with pytest.raises(IncompatibleFrequency, match='freq'):
  72. PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd())
  73. def test_asi8():
  74. result = period_array(['2000', '2001', None], freq='D').asi8
  75. expected = np.array([10957, 11323, iNaT])
  76. tm.assert_numpy_array_equal(result, expected)
  77. def test_take_raises():
  78. arr = period_array(['2000', '2001'], freq='D')
  79. with pytest.raises(IncompatibleFrequency, match='freq'):
  80. arr.take([0, -1], allow_fill=True,
  81. fill_value=pd.Period('2000', freq='W'))
  82. with pytest.raises(ValueError, match='foo'):
  83. arr.take([0, -1], allow_fill=True, fill_value='foo')
  84. @pytest.mark.parametrize('dtype', [
  85. int, np.int32, np.int64, 'uint32', 'uint64',
  86. ])
  87. def test_astype(dtype):
  88. # We choose to ignore the sign and size of integers for
  89. # Period/Datetime/Timedelta astype
  90. arr = period_array(['2000', '2001', None], freq='D')
  91. result = arr.astype(dtype)
  92. if np.dtype(dtype).kind == 'u':
  93. expected_dtype = np.dtype('uint64')
  94. else:
  95. expected_dtype = np.dtype('int64')
  96. expected = arr.astype(expected_dtype)
  97. assert result.dtype == expected_dtype
  98. tm.assert_numpy_array_equal(result, expected)
  99. def test_astype_copies():
  100. arr = period_array(['2000', '2001', None], freq='D')
  101. result = arr.astype(np.int64, copy=False)
  102. # Add the `.base`, since we now use `.asi8` which returns a view.
  103. # We could maybe override it in PeriodArray to return ._data directly.
  104. assert result.base is arr._data
  105. result = arr.astype(np.int64, copy=True)
  106. assert result is not arr._data
  107. tm.assert_numpy_array_equal(result, arr._data.view('i8'))
  108. def test_astype_categorical():
  109. arr = period_array(['2000', '2001', '2001', None], freq='D')
  110. result = arr.astype('category')
  111. categories = pd.PeriodIndex(['2000', '2001'], freq='D')
  112. expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
  113. tm.assert_categorical_equal(result, expected)
  114. def test_astype_period():
  115. arr = period_array(['2000', '2001', None], freq='D')
  116. result = arr.astype(PeriodDtype("M"))
  117. expected = period_array(['2000', '2001', None], freq='M')
  118. tm.assert_period_array_equal(result, expected)
  119. @pytest.mark.parametrize('other', [
  120. 'datetime64[ns]', 'timedelta64[ns]',
  121. ])
  122. def test_astype_datetime(other):
  123. arr = period_array(['2000', '2001', None], freq='D')
  124. # slice off the [ns] so that the regex matches.
  125. with pytest.raises(TypeError, match=other[:-4]):
  126. arr.astype(other)
  127. def test_fillna_raises():
  128. arr = period_array(['2000', '2001', '2002'], freq='D')
  129. with pytest.raises(ValueError, match='Length'):
  130. arr.fillna(arr[:2])
  131. def test_fillna_copies():
  132. arr = period_array(['2000', '2001', '2002'], freq='D')
  133. result = arr.fillna(pd.Period("2000", "D"))
  134. assert result is not arr
  135. # ----------------------------------------------------------------------------
  136. # setitem
  137. @pytest.mark.parametrize('key, value, expected', [
  138. ([0], pd.Period("2000", "D"), [10957, 1, 2]),
  139. ([0], None, [iNaT, 1, 2]),
  140. ([0], np.nan, [iNaT, 1, 2]),
  141. ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3),
  142. ([0, 1, 2], [pd.Period("2000", "D"),
  143. pd.Period("2001", "D"),
  144. pd.Period("2002", "D")],
  145. [10957, 11323, 11688]),
  146. ])
  147. def test_setitem(key, value, expected):
  148. arr = PeriodArray(np.arange(3), freq="D")
  149. expected = PeriodArray(expected, freq="D")
  150. arr[key] = value
  151. tm.assert_period_array_equal(arr, expected)
  152. def test_setitem_raises_incompatible_freq():
  153. arr = PeriodArray(np.arange(3), freq="D")
  154. with pytest.raises(IncompatibleFrequency, match="freq"):
  155. arr[0] = pd.Period("2000", freq="A")
  156. other = period_array(['2000', '2001'], freq='A')
  157. with pytest.raises(IncompatibleFrequency, match="freq"):
  158. arr[[0, 1]] = other
  159. def test_setitem_raises_length():
  160. arr = PeriodArray(np.arange(3), freq="D")
  161. with pytest.raises(ValueError, match="length"):
  162. arr[[0, 1]] = [pd.Period("2000", freq="D")]
  163. def test_setitem_raises_type():
  164. arr = PeriodArray(np.arange(3), freq="D")
  165. with pytest.raises(TypeError, match="int"):
  166. arr[0] = 1
  167. # ----------------------------------------------------------------------------
  168. # Ops
  169. def test_sub_period():
  170. arr = period_array(['2000', '2001'], freq='D')
  171. other = pd.Period("2000", freq="M")
  172. with pytest.raises(IncompatibleFrequency, match="freq"):
  173. arr - other
  174. # ----------------------------------------------------------------------------
  175. # Methods
  176. @pytest.mark.parametrize('other', [
  177. pd.Period('2000', freq='H'),
  178. period_array(['2000', '2001', '2000'], freq='H')
  179. ])
  180. def test_where_different_freq_raises(other):
  181. ser = pd.Series(period_array(['2000', '2001', '2002'], freq='D'))
  182. cond = np.array([True, False, True])
  183. with pytest.raises(IncompatibleFrequency, match="freq"):
  184. ser.where(cond, other)
  185. # ----------------------------------------------------------------------------
  186. # Printing
  187. def test_repr_small():
  188. arr = period_array(['2000', '2001'], freq='D')
  189. result = str(arr)
  190. expected = (
  191. "<PeriodArray>\n"
  192. "['2000-01-01', '2001-01-01']\n"
  193. "Length: 2, dtype: period[D]"
  194. )
  195. assert result == expected
  196. def test_repr_large():
  197. arr = period_array(['2000', '2001'] * 500, freq='D')
  198. result = str(arr)
  199. expected = (
  200. "<PeriodArray>\n"
  201. "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
  202. "'2000-01-01',\n"
  203. " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
  204. "'2001-01-01',\n"
  205. " ...\n"
  206. " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', "
  207. "'2000-01-01',\n"
  208. " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', "
  209. "'2001-01-01']\n"
  210. "Length: 1000, dtype: period[D]"
  211. )
  212. assert result == expected
  213. # ----------------------------------------------------------------------------
  214. # Reductions
  215. class TestReductions(object):
  216. def test_min_max(self):
  217. arr = period_array([
  218. '2000-01-03',
  219. '2000-01-03',
  220. 'NaT',
  221. '2000-01-02',
  222. '2000-01-05',
  223. '2000-01-04',
  224. ], freq='D')
  225. result = arr.min()
  226. expected = pd.Period('2000-01-02', freq='D')
  227. assert result == expected
  228. result = arr.max()
  229. expected = pd.Period('2000-01-05', freq='D')
  230. assert result == expected
  231. result = arr.min(skipna=False)
  232. assert result is pd.NaT
  233. result = arr.max(skipna=False)
  234. assert result is pd.NaT
  235. @pytest.mark.parametrize('skipna', [True, False])
  236. def test_min_max_empty(self, skipna):
  237. arr = period_array([], freq='D')
  238. result = arr.min(skipna=skipna)
  239. assert result is pd.NaT
  240. result = arr.max(skipna=skipna)
  241. assert result is pd.NaT