test_datetimes.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tests for DatetimeArray
  4. """
  5. import operator
  6. import numpy as np
  7. import pytest
  8. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  9. import pandas as pd
  10. from pandas.core.arrays import DatetimeArray
  11. from pandas.core.arrays.datetimes import sequence_to_dt64ns
  12. import pandas.util.testing as tm
  13. class TestDatetimeArrayConstructor(object):
  14. def test_freq_validation(self):
  15. # GH#24623 check that invalid instances cannot be created with the
  16. # public constructor
  17. arr = np.arange(5, dtype=np.int64) * 3600 * 10**9
  18. msg = ("Inferred frequency H from passed values does not "
  19. "conform to passed frequency W-SUN")
  20. with pytest.raises(ValueError, match=msg):
  21. DatetimeArray(arr, freq="W")
  22. @pytest.mark.parametrize('meth', [DatetimeArray._from_sequence,
  23. sequence_to_dt64ns,
  24. pd.to_datetime,
  25. pd.DatetimeIndex])
  26. def test_mixing_naive_tzaware_raises(self, meth):
  27. # GH#24569
  28. arr = np.array([pd.Timestamp('2000'), pd.Timestamp('2000', tz='CET')])
  29. msg = ('Cannot mix tz-aware with tz-naive values|'
  30. 'Tz-aware datetime.datetime cannot be converted '
  31. 'to datetime64 unless utc=True')
  32. for obj in [arr, arr[::-1]]:
  33. # check that we raise regardless of whether naive is found
  34. # before aware or vice-versa
  35. with pytest.raises(ValueError, match=msg):
  36. meth(obj)
  37. def test_from_pandas_array(self):
  38. arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9
  39. result = DatetimeArray._from_sequence(arr, freq='infer')
  40. expected = pd.date_range('1970-01-01', periods=5, freq='H')._data
  41. tm.assert_datetime_array_equal(result, expected)
  42. def test_mismatched_timezone_raises(self):
  43. arr = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'),
  44. dtype=DatetimeTZDtype(tz='US/Central'))
  45. dtype = DatetimeTZDtype(tz='US/Eastern')
  46. with pytest.raises(TypeError, match='Timezone of the array'):
  47. DatetimeArray(arr, dtype=dtype)
  48. def test_non_array_raises(self):
  49. with pytest.raises(ValueError, match='list'):
  50. DatetimeArray([1, 2, 3])
  51. def test_other_type_raises(self):
  52. with pytest.raises(ValueError,
  53. match="The dtype of 'values' is incorrect.*bool"):
  54. DatetimeArray(np.array([1, 2, 3], dtype='bool'))
  55. def test_incorrect_dtype_raises(self):
  56. with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
  57. DatetimeArray(np.array([1, 2, 3], dtype='i8'), dtype='category')
  58. def test_freq_infer_raises(self):
  59. with pytest.raises(ValueError, match='Frequency inference'):
  60. DatetimeArray(np.array([1, 2, 3], dtype='i8'), freq="infer")
  61. def test_copy(self):
  62. data = np.array([1, 2, 3], dtype='M8[ns]')
  63. arr = DatetimeArray(data, copy=False)
  64. assert arr._data is data
  65. arr = DatetimeArray(data, copy=True)
  66. assert arr._data is not data
  67. class TestDatetimeArrayComparisons(object):
  68. # TODO: merge this into tests/arithmetic/test_datetime64 once it is
  69. # sufficiently robust
  70. def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators):
  71. # arbitrary tz-naive DatetimeIndex
  72. opname = all_compare_operators.strip('_')
  73. op = getattr(operator, opname)
  74. dti = pd.date_range('2016-01-1', freq='MS', periods=9, tz=None)
  75. arr = DatetimeArray(dti)
  76. assert arr.freq == dti.freq
  77. assert arr.tz == dti.tz
  78. right = dti
  79. expected = np.ones(len(arr), dtype=bool)
  80. if opname in ['ne', 'gt', 'lt']:
  81. # for these the comparisons should be all-False
  82. expected = ~expected
  83. result = op(arr, arr)
  84. tm.assert_numpy_array_equal(result, expected)
  85. for other in [right, np.array(right)]:
  86. # TODO: add list and tuple, and object-dtype once those
  87. # are fixed in the constructor
  88. result = op(arr, other)
  89. tm.assert_numpy_array_equal(result, expected)
  90. result = op(other, arr)
  91. tm.assert_numpy_array_equal(result, expected)
  92. class TestDatetimeArray(object):
  93. def test_astype_to_same(self):
  94. arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
  95. result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
  96. assert result is arr
  97. @pytest.mark.parametrize("dtype", [
  98. int, np.int32, np.int64, 'uint32', 'uint64',
  99. ])
  100. def test_astype_int(self, dtype):
  101. arr = DatetimeArray._from_sequence([pd.Timestamp('2000'),
  102. pd.Timestamp('2001')])
  103. result = arr.astype(dtype)
  104. if np.dtype(dtype).kind == 'u':
  105. expected_dtype = np.dtype('uint64')
  106. else:
  107. expected_dtype = np.dtype('int64')
  108. expected = arr.astype(expected_dtype)
  109. assert result.dtype == expected_dtype
  110. tm.assert_numpy_array_equal(result, expected)
  111. def test_tz_setter_raises(self):
  112. arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
  113. with pytest.raises(AttributeError, match='tz_localize'):
  114. arr.tz = 'UTC'
  115. def test_setitem_different_tz_raises(self):
  116. data = np.array([1, 2, 3], dtype='M8[ns]')
  117. arr = DatetimeArray(data, copy=False,
  118. dtype=DatetimeTZDtype(tz="US/Central"))
  119. with pytest.raises(ValueError, match="None"):
  120. arr[0] = pd.Timestamp('2000')
  121. with pytest.raises(ValueError, match="US/Central"):
  122. arr[0] = pd.Timestamp('2000', tz="US/Eastern")
  123. def test_setitem_clears_freq(self):
  124. a = DatetimeArray(pd.date_range('2000', periods=2, freq='D',
  125. tz='US/Central'))
  126. a[0] = pd.Timestamp("2000", tz="US/Central")
  127. assert a.freq is None
  128. def test_repeat_preserves_tz(self):
  129. dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
  130. arr = DatetimeArray(dti)
  131. repeated = arr.repeat([1, 1])
  132. # preserves tz and values, but not freq
  133. expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype)
  134. tm.assert_equal(repeated, expected)
  135. def test_value_counts_preserves_tz(self):
  136. dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
  137. arr = DatetimeArray(dti).repeat([4, 3])
  138. result = arr.value_counts()
  139. # Note: not tm.assert_index_equal, since `freq`s do not match
  140. assert result.index.equals(dti)
  141. arr[-2] = pd.NaT
  142. result = arr.value_counts()
  143. expected = pd.Series([1, 4, 2],
  144. index=[pd.NaT, dti[0], dti[1]])
  145. tm.assert_series_equal(result, expected)
  146. @pytest.mark.parametrize('method', ['pad', 'backfill'])
  147. def test_fillna_preserves_tz(self, method):
  148. dti = pd.date_range('2000-01-01', periods=5, freq='D', tz='US/Central')
  149. arr = DatetimeArray(dti, copy=True)
  150. arr[2] = pd.NaT
  151. fill_val = dti[1] if method == 'pad' else dti[3]
  152. expected = DatetimeArray._from_sequence(
  153. [dti[0], dti[1], fill_val, dti[3], dti[4]],
  154. freq=None, tz='US/Central'
  155. )
  156. result = arr.fillna(method=method)
  157. tm.assert_extension_array_equal(result, expected)
  158. # assert that arr and dti were not modified in-place
  159. assert arr[2] is pd.NaT
  160. assert dti[2] == pd.Timestamp('2000-01-03', tz='US/Central')
  161. def test_array_interface_tz(self):
  162. tz = "US/Central"
  163. data = DatetimeArray(pd.date_range('2017', periods=2, tz=tz))
  164. result = np.asarray(data)
  165. expected = np.array([pd.Timestamp('2017-01-01T00:00:00', tz=tz),
  166. pd.Timestamp('2017-01-02T00:00:00', tz=tz)],
  167. dtype=object)
  168. tm.assert_numpy_array_equal(result, expected)
  169. result = np.asarray(data, dtype=object)
  170. tm.assert_numpy_array_equal(result, expected)
  171. result = np.asarray(data, dtype='M8[ns]')
  172. expected = np.array(['2017-01-01T06:00:00',
  173. '2017-01-02T06:00:00'], dtype="M8[ns]")
  174. tm.assert_numpy_array_equal(result, expected)
  175. def test_array_interface(self):
  176. data = DatetimeArray(pd.date_range('2017', periods=2))
  177. expected = np.array(['2017-01-01T00:00:00', '2017-01-02T00:00:00'],
  178. dtype='datetime64[ns]')
  179. result = np.asarray(data)
  180. tm.assert_numpy_array_equal(result, expected)
  181. result = np.asarray(data, dtype=object)
  182. expected = np.array([pd.Timestamp('2017-01-01T00:00:00'),
  183. pd.Timestamp('2017-01-02T00:00:00')],
  184. dtype=object)
  185. tm.assert_numpy_array_equal(result, expected)
  186. class TestSequenceToDT64NS(object):
  187. def test_tz_dtype_mismatch_raises(self):
  188. arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
  189. with pytest.raises(TypeError, match='data is already tz-aware'):
  190. sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC"))
  191. def test_tz_dtype_matches(self):
  192. arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
  193. result, _, _ = sequence_to_dt64ns(
  194. arr, dtype=DatetimeTZDtype(tz="US/Central"))
  195. tm.assert_numpy_array_equal(arr._data, result)
  196. class TestReductions(object):
  197. @pytest.mark.parametrize("tz", [None, "US/Central"])
  198. def test_min_max(self, tz):
  199. arr = DatetimeArray._from_sequence([
  200. '2000-01-03',
  201. '2000-01-03',
  202. 'NaT',
  203. '2000-01-02',
  204. '2000-01-05',
  205. '2000-01-04',
  206. ], tz=tz)
  207. result = arr.min()
  208. expected = pd.Timestamp('2000-01-02', tz=tz)
  209. assert result == expected
  210. result = arr.max()
  211. expected = pd.Timestamp('2000-01-05', tz=tz)
  212. assert result == expected
  213. result = arr.min(skipna=False)
  214. assert result is pd.NaT
  215. result = arr.max(skipna=False)
  216. assert result is pd.NaT
  217. @pytest.mark.parametrize("tz", [None, "US/Central"])
  218. @pytest.mark.parametrize('skipna', [True, False])
  219. def test_min_max_empty(self, skipna, tz):
  220. arr = DatetimeArray._from_sequence([], tz=tz)
  221. result = arr.min(skipna=skipna)
  222. assert result is pd.NaT
  223. result = arr.max(skipna=skipna)
  224. assert result is pd.NaT