test_ops.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. from datetime import datetime
  2. import warnings
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.generic import ABCDateOffset
  6. import pandas as pd
  7. from pandas import (
  8. DatetimeIndex, Index, PeriodIndex, Series, Timestamp, bdate_range,
  9. date_range)
  10. from pandas.tests.test_base import Ops
  11. import pandas.util.testing as tm
  12. from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour
  13. START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
  14. class TestDatetimeIndexOps(Ops):
  15. def setup_method(self, method):
  16. super(TestDatetimeIndexOps, self).setup_method(method)
  17. mask = lambda x: (isinstance(x, DatetimeIndex) or
  18. isinstance(x, PeriodIndex))
  19. self.is_valid_objs = [o for o in self.objs if mask(o)]
  20. self.not_valid_objs = [o for o in self.objs if not mask(o)]
  21. def test_ops_properties(self):
  22. f = lambda x: isinstance(x, DatetimeIndex)
  23. self.check_ops_properties(DatetimeIndex._field_ops, f)
  24. self.check_ops_properties(DatetimeIndex._object_ops, f)
  25. self.check_ops_properties(DatetimeIndex._bool_ops, f)
  26. def test_ops_properties_basic(self):
  27. # sanity check that the behavior didn't change
  28. # GH#7206
  29. for op in ['year', 'day', 'second', 'weekday']:
  30. pytest.raises(TypeError, lambda x: getattr(self.dt_series, op))
  31. # attribute access should still work!
  32. s = Series(dict(year=2000, month=1, day=10))
  33. assert s.year == 2000
  34. assert s.month == 1
  35. assert s.day == 10
  36. pytest.raises(AttributeError, lambda: s.weekday)
  37. def test_repeat_range(self, tz_naive_fixture):
  38. tz = tz_naive_fixture
  39. rng = date_range('1/1/2000', '1/1/2001')
  40. result = rng.repeat(5)
  41. assert result.freq is None
  42. assert len(result) == 5 * len(rng)
  43. index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz)
  44. exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
  45. '2001-01-02', '2001-01-02'], tz=tz)
  46. for res in [index.repeat(2), np.repeat(index, 2)]:
  47. tm.assert_index_equal(res, exp)
  48. assert res.freq is None
  49. index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz)
  50. exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
  51. '2001-01-03', '2001-01-03'], tz=tz)
  52. for res in [index.repeat(2), np.repeat(index, 2)]:
  53. tm.assert_index_equal(res, exp)
  54. assert res.freq is None
  55. index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'],
  56. tz=tz)
  57. exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01',
  58. 'NaT', 'NaT', 'NaT',
  59. '2003-01-01', '2003-01-01', '2003-01-01'],
  60. tz=tz)
  61. for res in [index.repeat(3), np.repeat(index, 3)]:
  62. tm.assert_index_equal(res, exp)
  63. assert res.freq is None
  64. def test_repeat(self, tz_naive_fixture):
  65. tz = tz_naive_fixture
  66. reps = 2
  67. msg = "the 'axis' parameter is not supported"
  68. rng = pd.date_range(start='2016-01-01', periods=2,
  69. freq='30Min', tz=tz)
  70. expected_rng = DatetimeIndex([
  71. Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
  72. Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
  73. Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
  74. Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
  75. ])
  76. res = rng.repeat(reps)
  77. tm.assert_index_equal(res, expected_rng)
  78. assert res.freq is None
  79. tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
  80. with pytest.raises(ValueError, match=msg):
  81. np.repeat(rng, reps, axis=1)
  82. def test_resolution(self, tz_naive_fixture):
  83. tz = tz_naive_fixture
  84. for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T',
  85. 'S', 'L', 'U'],
  86. ['day', 'day', 'day', 'day', 'hour',
  87. 'minute', 'second', 'millisecond',
  88. 'microsecond']):
  89. idx = pd.date_range(start='2013-04-01', periods=30, freq=freq,
  90. tz=tz)
  91. assert idx.resolution == expected
  92. def test_value_counts_unique(self, tz_naive_fixture):
  93. tz = tz_naive_fixture
  94. # GH 7735
  95. idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
  96. # create repeated values, 'n'th element is repeated by n+1 times
  97. idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)),
  98. tz=tz)
  99. exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10,
  100. tz=tz)
  101. expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
  102. for obj in [idx, Series(idx)]:
  103. tm.assert_series_equal(obj.value_counts(), expected)
  104. expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10,
  105. tz=tz)
  106. tm.assert_index_equal(idx.unique(), expected)
  107. idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00',
  108. '2013-01-01 09:00', '2013-01-01 08:00',
  109. '2013-01-01 08:00', pd.NaT], tz=tz)
  110. exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
  111. tz=tz)
  112. expected = Series([3, 2], index=exp_idx)
  113. for obj in [idx, Series(idx)]:
  114. tm.assert_series_equal(obj.value_counts(), expected)
  115. exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00',
  116. pd.NaT], tz=tz)
  117. expected = Series([3, 2, 1], index=exp_idx)
  118. for obj in [idx, Series(idx)]:
  119. tm.assert_series_equal(obj.value_counts(dropna=False),
  120. expected)
  121. tm.assert_index_equal(idx.unique(), exp_idx)
  122. def test_nonunique_contains(self):
  123. # GH 9512
  124. for idx in map(DatetimeIndex,
  125. ([0, 1, 0], [0, 0, -1], [0, -1, -1],
  126. ['2015', '2015', '2016'], ['2015', '2015', '2014'])):
  127. assert idx[0] in idx
  128. @pytest.mark.parametrize('idx',
  129. [
  130. DatetimeIndex(
  131. ['2011-01-01',
  132. '2011-01-02',
  133. '2011-01-03'],
  134. freq='D', name='idx'),
  135. DatetimeIndex(
  136. ['2011-01-01 09:00',
  137. '2011-01-01 10:00',
  138. '2011-01-01 11:00'],
  139. freq='H', name='tzidx', tz='Asia/Tokyo')
  140. ])
  141. def test_order_with_freq(self, idx):
  142. ordered = idx.sort_values()
  143. tm.assert_index_equal(ordered, idx)
  144. assert ordered.freq == idx.freq
  145. ordered = idx.sort_values(ascending=False)
  146. expected = idx[::-1]
  147. tm.assert_index_equal(ordered, expected)
  148. assert ordered.freq == expected.freq
  149. assert ordered.freq.n == -1
  150. ordered, indexer = idx.sort_values(return_indexer=True)
  151. tm.assert_index_equal(ordered, idx)
  152. tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
  153. check_dtype=False)
  154. assert ordered.freq == idx.freq
  155. ordered, indexer = idx.sort_values(return_indexer=True,
  156. ascending=False)
  157. expected = idx[::-1]
  158. tm.assert_index_equal(ordered, expected)
  159. tm.assert_numpy_array_equal(indexer,
  160. np.array([2, 1, 0]),
  161. check_dtype=False)
  162. assert ordered.freq == expected.freq
  163. assert ordered.freq.n == -1
  164. @pytest.mark.parametrize('index_dates,expected_dates', [
  165. (['2011-01-01', '2011-01-03', '2011-01-05',
  166. '2011-01-02', '2011-01-01'],
  167. ['2011-01-01', '2011-01-01', '2011-01-02',
  168. '2011-01-03', '2011-01-05']),
  169. (['2011-01-01', '2011-01-03', '2011-01-05',
  170. '2011-01-02', '2011-01-01'],
  171. ['2011-01-01', '2011-01-01', '2011-01-02',
  172. '2011-01-03', '2011-01-05']),
  173. ([pd.NaT, '2011-01-03', '2011-01-05',
  174. '2011-01-02', pd.NaT],
  175. [pd.NaT, pd.NaT, '2011-01-02', '2011-01-03',
  176. '2011-01-05'])
  177. ])
  178. def test_order_without_freq(self, index_dates, expected_dates,
  179. tz_naive_fixture):
  180. tz = tz_naive_fixture
  181. # without freq
  182. index = DatetimeIndex(index_dates, tz=tz, name='idx')
  183. expected = DatetimeIndex(expected_dates, tz=tz, name='idx')
  184. ordered = index.sort_values()
  185. tm.assert_index_equal(ordered, expected)
  186. assert ordered.freq is None
  187. ordered = index.sort_values(ascending=False)
  188. tm.assert_index_equal(ordered, expected[::-1])
  189. assert ordered.freq is None
  190. ordered, indexer = index.sort_values(return_indexer=True)
  191. tm.assert_index_equal(ordered, expected)
  192. exp = np.array([0, 4, 3, 1, 2])
  193. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  194. assert ordered.freq is None
  195. ordered, indexer = index.sort_values(return_indexer=True,
  196. ascending=False)
  197. tm.assert_index_equal(ordered, expected[::-1])
  198. exp = np.array([2, 1, 3, 4, 0])
  199. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  200. assert ordered.freq is None
  201. def test_drop_duplicates_metadata(self):
  202. # GH 10115
  203. idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
  204. result = idx.drop_duplicates()
  205. tm.assert_index_equal(idx, result)
  206. assert idx.freq == result.freq
  207. idx_dup = idx.append(idx)
  208. assert idx_dup.freq is None # freq is reset
  209. result = idx_dup.drop_duplicates()
  210. tm.assert_index_equal(idx, result)
  211. assert result.freq is None
  212. def test_drop_duplicates(self):
  213. # to check Index/Series compat
  214. base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
  215. idx = base.append(base[:5])
  216. res = idx.drop_duplicates()
  217. tm.assert_index_equal(res, base)
  218. res = Series(idx).drop_duplicates()
  219. tm.assert_series_equal(res, Series(base))
  220. res = idx.drop_duplicates(keep='last')
  221. exp = base[5:].append(base[:5])
  222. tm.assert_index_equal(res, exp)
  223. res = Series(idx).drop_duplicates(keep='last')
  224. tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
  225. res = idx.drop_duplicates(keep=False)
  226. tm.assert_index_equal(res, base[5:])
  227. res = Series(idx).drop_duplicates(keep=False)
  228. tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
  229. @pytest.mark.parametrize('freq', [
  230. 'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D',
  231. '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S',
  232. '-3S'])
  233. def test_infer_freq(self, freq):
  234. # GH 11018
  235. idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10)
  236. result = pd.DatetimeIndex(idx.asi8, freq='infer')
  237. tm.assert_index_equal(idx, result)
  238. assert result.freq == freq
  239. def test_nat(self, tz_naive_fixture):
  240. tz = tz_naive_fixture
  241. assert pd.DatetimeIndex._na_value is pd.NaT
  242. assert pd.DatetimeIndex([])._na_value is pd.NaT
  243. idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
  244. assert idx._can_hold_na
  245. tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
  246. assert idx.hasnans is False
  247. tm.assert_numpy_array_equal(idx._nan_idxs,
  248. np.array([], dtype=np.intp))
  249. idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
  250. assert idx._can_hold_na
  251. tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
  252. assert idx.hasnans is True
  253. tm.assert_numpy_array_equal(idx._nan_idxs,
  254. np.array([1], dtype=np.intp))
  255. def test_equals(self):
  256. # GH 13107
  257. idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
  258. assert idx.equals(idx)
  259. assert idx.equals(idx.copy())
  260. assert idx.equals(idx.astype(object))
  261. assert idx.astype(object).equals(idx)
  262. assert idx.astype(object).equals(idx.astype(object))
  263. assert not idx.equals(list(idx))
  264. assert not idx.equals(pd.Series(idx))
  265. idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
  266. tz='US/Pacific')
  267. assert not idx.equals(idx2)
  268. assert not idx.equals(idx2.copy())
  269. assert not idx.equals(idx2.astype(object))
  270. assert not idx.astype(object).equals(idx2)
  271. assert not idx.equals(list(idx2))
  272. assert not idx.equals(pd.Series(idx2))
  273. # same internal, different tz
  274. idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
  275. tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
  276. assert not idx.equals(idx3)
  277. assert not idx.equals(idx3.copy())
  278. assert not idx.equals(idx3.astype(object))
  279. assert not idx.astype(object).equals(idx3)
  280. assert not idx.equals(list(idx3))
  281. assert not idx.equals(pd.Series(idx3))
  282. @pytest.mark.parametrize('values', [
  283. ['20180101', '20180103', '20180105'], []])
  284. @pytest.mark.parametrize('freq', [
  285. '2D', Day(2), '2B', BDay(2), '48H', Hour(48)])
  286. @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
  287. def test_freq_setter(self, values, freq, tz):
  288. # GH 20678
  289. idx = DatetimeIndex(values, tz=tz)
  290. # can set to an offset, converting from string if necessary
  291. idx.freq = freq
  292. assert idx.freq == freq
  293. assert isinstance(idx.freq, ABCDateOffset)
  294. # can reset to None
  295. idx.freq = None
  296. assert idx.freq is None
  297. def test_freq_setter_errors(self):
  298. # GH 20678
  299. idx = DatetimeIndex(['20180101', '20180103', '20180105'])
  300. # setting with an incompatible freq
  301. msg = ('Inferred frequency 2D from passed values does not conform to '
  302. 'passed frequency 5D')
  303. with pytest.raises(ValueError, match=msg):
  304. idx.freq = '5D'
  305. # setting with non-freq string
  306. with pytest.raises(ValueError, match='Invalid frequency'):
  307. idx.freq = 'foo'
  308. def test_offset_deprecated(self):
  309. # GH 20716
  310. idx = pd.DatetimeIndex(['20180101', '20180102'])
  311. # getter deprecated
  312. with tm.assert_produces_warning(FutureWarning):
  313. idx.offset
  314. # setter deprecated
  315. with tm.assert_produces_warning(FutureWarning):
  316. idx.offset = BDay()
  317. class TestBusinessDatetimeIndex(object):
  318. def setup_method(self, method):
  319. self.rng = bdate_range(START, END)
  320. def test_comparison(self):
  321. d = self.rng[10]
  322. comp = self.rng > d
  323. assert comp[11]
  324. assert not comp[9]
  325. def test_pickle_unpickle(self):
  326. unpickled = tm.round_trip_pickle(self.rng)
  327. assert unpickled.freq is not None
  328. def test_copy(self):
  329. cp = self.rng.copy()
  330. repr(cp)
  331. tm.assert_index_equal(cp, self.rng)
  332. def test_shift(self):
  333. shifted = self.rng.shift(5)
  334. assert shifted[0] == self.rng[5]
  335. assert shifted.freq == self.rng.freq
  336. shifted = self.rng.shift(-5)
  337. assert shifted[5] == self.rng[0]
  338. assert shifted.freq == self.rng.freq
  339. shifted = self.rng.shift(0)
  340. assert shifted[0] == self.rng[0]
  341. assert shifted.freq == self.rng.freq
  342. rng = date_range(START, END, freq=BMonthEnd())
  343. shifted = rng.shift(1, freq=BDay())
  344. assert shifted[0] == rng[0] + BDay()
  345. def test_equals(self):
  346. assert not self.rng.equals(list(self.rng))
  347. def test_identical(self):
  348. t1 = self.rng.copy()
  349. t2 = self.rng.copy()
  350. assert t1.identical(t2)
  351. # name
  352. t1 = t1.rename('foo')
  353. assert t1.equals(t2)
  354. assert not t1.identical(t2)
  355. t2 = t2.rename('foo')
  356. assert t1.identical(t2)
  357. # freq
  358. t2v = Index(t2.values)
  359. assert t1.equals(t2v)
  360. assert not t1.identical(t2v)
  361. class TestCustomDatetimeIndex(object):
  362. def setup_method(self, method):
  363. self.rng = bdate_range(START, END, freq='C')
  364. def test_comparison(self):
  365. d = self.rng[10]
  366. comp = self.rng > d
  367. assert comp[11]
  368. assert not comp[9]
  369. def test_copy(self):
  370. cp = self.rng.copy()
  371. repr(cp)
  372. tm.assert_index_equal(cp, self.rng)
  373. def test_shift(self):
  374. shifted = self.rng.shift(5)
  375. assert shifted[0] == self.rng[5]
  376. assert shifted.freq == self.rng.freq
  377. shifted = self.rng.shift(-5)
  378. assert shifted[5] == self.rng[0]
  379. assert shifted.freq == self.rng.freq
  380. shifted = self.rng.shift(0)
  381. assert shifted[0] == self.rng[0]
  382. assert shifted.freq == self.rng.freq
  383. with warnings.catch_warnings(record=True):
  384. warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
  385. rng = date_range(START, END, freq=BMonthEnd())
  386. shifted = rng.shift(1, freq=CDay())
  387. assert shifted[0] == rng[0] + CDay()
  388. def test_shift_periods(self):
  389. # GH#22458 : argument 'n' was deprecated in favor of 'periods'
  390. idx = pd.date_range(start=START, end=END, periods=3)
  391. tm.assert_index_equal(idx.shift(periods=0), idx)
  392. tm.assert_index_equal(idx.shift(0), idx)
  393. with tm.assert_produces_warning(FutureWarning,
  394. check_stacklevel=True):
  395. tm.assert_index_equal(idx.shift(n=0), idx)
  396. def test_pickle_unpickle(self):
  397. unpickled = tm.round_trip_pickle(self.rng)
  398. assert unpickled.freq is not None
  399. def test_equals(self):
  400. assert not self.rng.equals(list(self.rng))