test_datetime.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. from datetime import date
  2. import dateutil
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import lrange
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets)
  9. import pandas.util.testing as tm
  10. from pandas.util.testing import assert_almost_equal
  11. randn = np.random.randn
  12. class TestDatetimeIndex(object):
  13. def test_roundtrip_pickle_with_tz(self):
  14. # GH 8367
  15. # round-trip of timezone
  16. index = date_range('20130101', periods=3, tz='US/Eastern', name='foo')
  17. unpickled = tm.round_trip_pickle(index)
  18. tm.assert_index_equal(index, unpickled)
  19. def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
  20. # GH7774
  21. index = date_range('20130101', periods=3, tz='US/Eastern')
  22. assert str(index.reindex([])[0].tz) == 'US/Eastern'
  23. assert str(index.reindex(np.array([]))[0].tz) == 'US/Eastern'
  24. def test_time_loc(self): # GH8667
  25. from datetime import time
  26. from pandas._libs.index import _SIZE_CUTOFF
  27. ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
  28. key = time(15, 11, 30)
  29. start = key.hour * 3600 + key.minute * 60 + key.second
  30. step = 24 * 3600
  31. for n in ns:
  32. idx = pd.date_range('2014-11-26', periods=n, freq='S')
  33. ts = pd.Series(np.random.randn(n), index=idx)
  34. i = np.arange(start, n, step)
  35. tm.assert_numpy_array_equal(ts.index.get_loc(key), i,
  36. check_dtype=False)
  37. tm.assert_series_equal(ts[key], ts.iloc[i])
  38. left, right = ts.copy(), ts.copy()
  39. left[key] *= -10
  40. right.iloc[i] *= -10
  41. tm.assert_series_equal(left, right)
  42. def test_time_overflow_for_32bit_machines(self):
  43. # GH8943. On some machines NumPy defaults to np.int32 (for example,
  44. # 32-bit Linux machines). In the function _generate_regular_range
  45. # found in tseries/index.py, `periods` gets multiplied by `strides`
  46. # (which has value 1e9) and since the max value for np.int32 is ~2e9,
  47. # and since those machines won't promote np.int32 to np.int64, we get
  48. # overflow.
  49. periods = np.int_(1000)
  50. idx1 = pd.date_range(start='2000', periods=periods, freq='S')
  51. assert len(idx1) == periods
  52. idx2 = pd.date_range(end='2000', periods=periods, freq='S')
  53. assert len(idx2) == periods
  54. def test_nat(self):
  55. assert DatetimeIndex([np.nan])[0] is pd.NaT
  56. def test_week_of_month_frequency(self):
  57. # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
  58. d1 = date(2002, 9, 1)
  59. d2 = date(2013, 10, 27)
  60. d3 = date(2012, 9, 30)
  61. idx1 = DatetimeIndex([d1, d2])
  62. idx2 = DatetimeIndex([d3])
  63. result_append = idx1.append(idx2)
  64. expected = DatetimeIndex([d1, d2, d3])
  65. tm.assert_index_equal(result_append, expected)
  66. result_union = idx1.union(idx2)
  67. expected = DatetimeIndex([d1, d3, d2])
  68. tm.assert_index_equal(result_union, expected)
  69. # GH 5115
  70. result = date_range("2013-1-1", periods=4, freq='WOM-1SAT')
  71. dates = ['2013-01-05', '2013-02-02', '2013-03-02', '2013-04-06']
  72. expected = DatetimeIndex(dates, freq='WOM-1SAT')
  73. tm.assert_index_equal(result, expected)
  74. def test_hash_error(self):
  75. index = date_range('20010101', periods=10)
  76. with pytest.raises(TypeError, match=("unhashable type: %r" %
  77. type(index).__name__)):
  78. hash(index)
  79. def test_stringified_slice_with_tz(self):
  80. # GH#2658
  81. import datetime
  82. start = datetime.datetime.now()
  83. idx = date_range(start=start, freq="1d", periods=10)
  84. df = DataFrame(lrange(10), index=idx)
  85. df["2013-01-14 23:44:34.437768-05:00":] # no exception here
  86. def test_append_join_nondatetimeindex(self):
  87. rng = date_range('1/1/2000', periods=10)
  88. idx = Index(['a', 'b', 'c', 'd'])
  89. result = rng.append(idx)
  90. assert isinstance(result[0], Timestamp)
  91. # it works
  92. rng.join(idx, how='outer')
  93. def test_map(self):
  94. rng = date_range('1/1/2000', periods=10)
  95. f = lambda x: x.strftime('%Y%m%d')
  96. result = rng.map(f)
  97. exp = Index([f(x) for x in rng], dtype='<U8')
  98. tm.assert_index_equal(result, exp)
  99. def test_map_fallthrough(self, capsys):
  100. # GH#22067, check we don't get warnings about silently ignored errors
  101. dti = date_range('2017-01-01', '2018-01-01', freq='B')
  102. dti.map(lambda x: pd.Period(year=x.year, month=x.month, freq='M'))
  103. captured = capsys.readouterr()
  104. assert captured.err == ''
  105. def test_iteration_preserves_tz(self):
  106. # see gh-8890
  107. index = date_range("2012-01-01", periods=3, freq='H', tz='US/Eastern')
  108. for i, ts in enumerate(index):
  109. result = ts
  110. expected = index[i]
  111. assert result == expected
  112. index = date_range("2012-01-01", periods=3, freq='H',
  113. tz=dateutil.tz.tzoffset(None, -28800))
  114. for i, ts in enumerate(index):
  115. result = ts
  116. expected = index[i]
  117. assert result._repr_base == expected._repr_base
  118. assert result == expected
  119. # 9100
  120. index = pd.DatetimeIndex(['2014-12-01 03:32:39.987000-08:00',
  121. '2014-12-01 04:12:34.987000-08:00'])
  122. for i, ts in enumerate(index):
  123. result = ts
  124. expected = index[i]
  125. assert result._repr_base == expected._repr_base
  126. assert result == expected
  127. @pytest.mark.parametrize('periods', [0, 9999, 10000, 10001])
  128. def test_iteration_over_chunksize(self, periods):
  129. # GH21012
  130. index = date_range('2000-01-01 00:00:00', periods=periods, freq='min')
  131. num = 0
  132. for stamp in index:
  133. assert index[num] == stamp
  134. num += 1
  135. assert num == len(index)
  136. def test_misc_coverage(self):
  137. rng = date_range('1/1/2000', periods=5)
  138. result = rng.groupby(rng.day)
  139. assert isinstance(list(result.values())[0][0], Timestamp)
  140. idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02'])
  141. assert not idx.equals(list(idx))
  142. non_datetime = Index(list('abc'))
  143. assert not idx.equals(list(non_datetime))
  144. def test_string_index_series_name_converted(self):
  145. # #1644
  146. df = DataFrame(np.random.randn(10, 4),
  147. index=date_range('1/1/2000', periods=10))
  148. result = df.loc['1/3/2000']
  149. assert result.name == df.index[2]
  150. result = df.T['1/3/2000']
  151. assert result.name == df.index[2]
  152. def test_get_duplicates(self):
  153. idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
  154. '2000-01-03', '2000-01-03', '2000-01-04'])
  155. with tm.assert_produces_warning(FutureWarning):
  156. # Deprecated - see GH20239
  157. result = idx.get_duplicates()
  158. ex = DatetimeIndex(['2000-01-02', '2000-01-03'])
  159. tm.assert_index_equal(result, ex)
  160. def test_argmin_argmax(self):
  161. idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'])
  162. assert idx.argmin() == 1
  163. assert idx.argmax() == 0
  164. def test_sort_values(self):
  165. idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'])
  166. ordered = idx.sort_values()
  167. assert ordered.is_monotonic
  168. ordered = idx.sort_values(ascending=False)
  169. assert ordered[::-1].is_monotonic
  170. ordered, dexer = idx.sort_values(return_indexer=True)
  171. assert ordered.is_monotonic
  172. tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
  173. ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
  174. assert ordered[::-1].is_monotonic
  175. tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
  176. def test_map_bug_1677(self):
  177. index = DatetimeIndex(['2012-04-25 09:30:00.393000'])
  178. f = index.asof
  179. result = index.map(f)
  180. expected = Index([f(index[0])])
  181. tm.assert_index_equal(result, expected)
  182. def test_groupby_function_tuple_1677(self):
  183. df = DataFrame(np.random.rand(100),
  184. index=date_range("1/1/2000", periods=100))
  185. monthly_group = df.groupby(lambda x: (x.year, x.month))
  186. result = monthly_group.mean()
  187. assert isinstance(result.index[0], tuple)
  188. def test_append_numpy_bug_1681(self):
  189. # another datetime64 bug
  190. dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
  191. a = DataFrame()
  192. c = DataFrame({'A': 'foo', 'B': dr}, index=dr)
  193. result = a.append(c)
  194. assert (result['B'] == dr).all()
  195. def test_isin(self):
  196. index = tm.makeDateIndex(4)
  197. result = index.isin(index)
  198. assert result.all()
  199. result = index.isin(list(index))
  200. assert result.all()
  201. assert_almost_equal(index.isin([index[2], 5]),
  202. np.array([False, False, True, False]))
  203. def test_does_not_convert_mixed_integer(self):
  204. df = tm.makeCustomDataframe(10, 10,
  205. data_gen_f=lambda *args, **kwargs: randn(),
  206. r_idx_type='i', c_idx_type='dt')
  207. cols = df.columns.join(df.index, how='outer')
  208. joined = cols.join(df.columns)
  209. assert cols.dtype == np.dtype('O')
  210. assert cols.dtype == joined.dtype
  211. tm.assert_numpy_array_equal(cols.values, joined.values)
  212. def test_join_self(self, join_type):
  213. index = date_range('1/1/2000', periods=10)
  214. joined = index.join(index, how=join_type)
  215. assert index is joined
  216. def assert_index_parameters(self, index):
  217. assert index.freq == '40960N'
  218. assert index.inferred_freq == '40960N'
  219. def test_ns_index(self):
  220. nsamples = 400
  221. ns = int(1e9 / 24414)
  222. dtstart = np.datetime64('2012-09-20T00:00:00')
  223. dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, 'ns')
  224. freq = ns * offsets.Nano()
  225. index = pd.DatetimeIndex(dt, freq=freq, name='time')
  226. self.assert_index_parameters(index)
  227. new_index = pd.date_range(start=index[0], end=index[-1],
  228. freq=index.freq)
  229. self.assert_index_parameters(new_index)
  230. def test_join_with_period_index(self, join_type):
  231. df = tm.makeCustomDataframe(
  232. 10, 10, data_gen_f=lambda *args: np.random.randint(2),
  233. c_idx_type='p', r_idx_type='dt')
  234. s = df.iloc[:5, 0]
  235. msg = 'can only call with other PeriodIndex-ed objects'
  236. with pytest.raises(ValueError, match=msg):
  237. df.columns.join(s.index, how=join_type)
  238. def test_factorize(self):
  239. idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02',
  240. '2014-03', '2014-03'])
  241. exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
  242. exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03'])
  243. arr, idx = idx1.factorize()
  244. tm.assert_numpy_array_equal(arr, exp_arr)
  245. tm.assert_index_equal(idx, exp_idx)
  246. arr, idx = idx1.factorize(sort=True)
  247. tm.assert_numpy_array_equal(arr, exp_arr)
  248. tm.assert_index_equal(idx, exp_idx)
  249. # tz must be preserved
  250. idx1 = idx1.tz_localize('Asia/Tokyo')
  251. exp_idx = exp_idx.tz_localize('Asia/Tokyo')
  252. arr, idx = idx1.factorize()
  253. tm.assert_numpy_array_equal(arr, exp_arr)
  254. tm.assert_index_equal(idx, exp_idx)
  255. idx2 = pd.DatetimeIndex(['2014-03', '2014-03', '2014-02', '2014-01',
  256. '2014-03', '2014-01'])
  257. exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
  258. exp_idx = DatetimeIndex(['2014-01', '2014-02', '2014-03'])
  259. arr, idx = idx2.factorize(sort=True)
  260. tm.assert_numpy_array_equal(arr, exp_arr)
  261. tm.assert_index_equal(idx, exp_idx)
  262. exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
  263. exp_idx = DatetimeIndex(['2014-03', '2014-02', '2014-01'])
  264. arr, idx = idx2.factorize()
  265. tm.assert_numpy_array_equal(arr, exp_arr)
  266. tm.assert_index_equal(idx, exp_idx)
  267. # freq must be preserved
  268. idx3 = date_range('2000-01', periods=4, freq='M', tz='Asia/Tokyo')
  269. exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
  270. arr, idx = idx3.factorize()
  271. tm.assert_numpy_array_equal(arr, exp_arr)
  272. tm.assert_index_equal(idx, idx3)
  273. def test_factorize_tz(self, tz_naive_fixture):
  274. tz = tz_naive_fixture
  275. # GH#13750
  276. base = pd.date_range('2016-11-05', freq='H', periods=100, tz=tz)
  277. idx = base.repeat(5)
  278. exp_arr = np.arange(100, dtype=np.intp).repeat(5)
  279. for obj in [idx, pd.Series(idx)]:
  280. arr, res = obj.factorize()
  281. tm.assert_numpy_array_equal(arr, exp_arr)
  282. tm.assert_index_equal(res, base)
  283. def test_factorize_dst(self):
  284. # GH 13750
  285. idx = pd.date_range('2016-11-06', freq='H', periods=12,
  286. tz='US/Eastern')
  287. for obj in [idx, pd.Series(idx)]:
  288. arr, res = obj.factorize()
  289. tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
  290. tm.assert_index_equal(res, idx)
  291. idx = pd.date_range('2016-06-13', freq='H', periods=12,
  292. tz='US/Eastern')
  293. for obj in [idx, pd.Series(idx)]:
  294. arr, res = obj.factorize()
  295. tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
  296. tm.assert_index_equal(res, idx)
  297. @pytest.mark.parametrize('arr, expected', [
  298. (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
  299. (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
  300. pd.DatetimeIndex(['2017'], tz='US/Eastern')),
  301. ])
  302. def test_unique(self, arr, expected):
  303. result = arr.unique()
  304. tm.assert_index_equal(result, expected)
  305. # GH 21737
  306. # Ensure the underlying data is consistent
  307. assert result[0] == expected[0]
  308. def test_asarray_tz_naive(self):
  309. # This shouldn't produce a warning.
  310. idx = pd.date_range('2000', periods=2)
  311. # M8[ns] by default
  312. with tm.assert_produces_warning(None):
  313. result = np.asarray(idx)
  314. expected = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]')
  315. tm.assert_numpy_array_equal(result, expected)
  316. # optionally, object
  317. with tm.assert_produces_warning(None):
  318. result = np.asarray(idx, dtype=object)
  319. expected = np.array([pd.Timestamp('2000-01-01'),
  320. pd.Timestamp('2000-01-02')])
  321. tm.assert_numpy_array_equal(result, expected)
  322. def test_asarray_tz_aware(self):
  323. tz = 'US/Central'
  324. idx = pd.date_range('2000', periods=2, tz=tz)
  325. expected = np.array(['2000-01-01T06', '2000-01-02T06'], dtype='M8[ns]')
  326. # We warn by default and return an ndarray[M8[ns]]
  327. with tm.assert_produces_warning(FutureWarning):
  328. result = np.asarray(idx)
  329. tm.assert_numpy_array_equal(result, expected)
  330. # Old behavior with no warning
  331. with tm.assert_produces_warning(None):
  332. result = np.asarray(idx, dtype="M8[ns]")
  333. tm.assert_numpy_array_equal(result, expected)
  334. # Future behavior with no warning
  335. expected = np.array([pd.Timestamp("2000-01-01", tz=tz),
  336. pd.Timestamp("2000-01-02", tz=tz)])
  337. with tm.assert_produces_warning(None):
  338. result = np.asarray(idx, dtype=object)
  339. tm.assert_numpy_array_equal(result, expected)