test_ops.py 18 KB


  1. from datetime import datetime
  2. import warnings
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.generic import ABCDateOffset
  6. import pandas as pd
  7. from pandas import (
  8. DatetimeIndex, Index, PeriodIndex, Series, Timestamp, bdate_range,
  9. date_range)
  10. from pandas.tests.test_base import Ops
  11. import pandas.util.testing as tm
  12. from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour
  13. START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
  14. class TestDatetimeIndexOps(Ops):
  15. def setup_method(self, method):
  16. super(TestDatetimeIndexOps, self).setup_method(method)
  17. mask = lambda x: (isinstance(x, DatetimeIndex) or
  18. isinstance(x, PeriodIndex))
  19. self.is_valid_objs = [o for o in self.objs if mask(o)]
  20. self.not_valid_objs = [o for o in self.objs if not mask(o)]
  21. def test_ops_properties(self):
  22. f = lambda x: isinstance(x, DatetimeIndex)
  23. self.check_ops_properties(DatetimeIndex._field_ops, f)
  24. self.check_ops_properties(DatetimeIndex._object_ops, f)
  25. self.check_ops_properties(DatetimeIndex._bool_ops, f)
  26. def test_ops_properties_basic(self):
  27. # sanity check that the behavior didn't change
  28. # GH#7206
  29. for op in ['year', 'day', 'second', 'weekday']:
  30. pytest.raises(TypeError, lambda x: getattr(self.dt_series, op))
  31. # attribute access should still work!
  32. s = Series(dict(year=2000, month=1, day=10))
  33. assert s.year == 2000
  34. assert s.month == 1
  35. assert s.day == 10
  36. pytest.raises(AttributeError, lambda: s.weekday)
  37. def test_repeat_range(self, tz_naive_fixture):
  38. tz = tz_naive_fixture
  39. rng = date_range('1/1/2000', '1/1/2001')
  40. result = rng.repeat(5)
  41. assert result.freq is None
  42. assert len(result) == 5 * len(rng)
  43. index = pd.date_range('2001-01-01', periods=2, freq='D', tz=tz)
  44. exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
  45. '2001-01-02', '2001-01-02'], tz=tz)
  46. for res in [index.repeat(2), np.repeat(index, 2)]:
  47. tm.assert_index_equal(res, exp)
  48. assert res.freq is None
  49. index = pd.date_range('2001-01-01', periods=2, freq='2D', tz=tz)
  50. exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01',
  51. '2001-01-03', '2001-01-03'], tz=tz)
  52. for res in [index.repeat(2), np.repeat(index, 2)]:
  53. tm.assert_index_equal(res, exp)
  54. assert res.freq is None
  55. index = pd.DatetimeIndex(['2001-01-01', 'NaT', '2003-01-01'],
  56. tz=tz)
  57. exp = pd.DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01',
  58. 'NaT', 'NaT', 'NaT',
  59. '2003-01-01', '2003-01-01', '2003-01-01'],
  60. tz=tz)
  61. for res in [index.repeat(3), np.repeat(index, 3)]:
  62. tm.assert_index_equal(res, exp)
  63. assert res.freq is None
  64. def test_repeat(self, tz_naive_fixture):
  65. tz = tz_naive_fixture
  66. reps = 2
  67. msg = "the 'axis' parameter is not supported"
  68. rng = pd.date_range(start='2016-01-01', periods=2,
  69. freq='30Min', tz=tz)
  70. expected_rng = DatetimeIndex([
  71. Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
  72. Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'),
  73. Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
  74. Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'),
  75. ])
  76. res = rng.repeat(reps)
  77. tm.assert_index_equal(res, expected_rng)
  78. assert res.freq is None
  79. tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
  80. with pytest.raises(ValueError, match=msg):
  81. np.repeat(rng, reps, axis=1)
  82. def test_resolution(self, tz_naive_fixture):
  83. tz = tz_naive_fixture
  84. for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T',
  85. 'S', 'L', 'U'],
  86. ['day', 'day', 'day', 'day', 'hour',
  87. 'minute', 'second', 'millisecond',
  88. 'microsecond']):
  89. idx = pd.date_range(start='2013-04-01', periods=30, freq=freq,
  90. tz=tz)
  91. assert idx.resolution == expected
  92. def test_value_counts_unique(self, tz_naive_fixture):
  93. tz = tz_naive_fixture
  94. # GH 7735
  95. idx = pd.date_range('2011-01-01 09:00', freq='H', periods=10)
  96. # create repeated values, 'n'th element is repeated by n+1 times
  97. idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)),
  98. tz=tz)
  99. exp_idx = pd.date_range('2011-01-01 18:00', freq='-1H', periods=10,
  100. tz=tz)
  101. expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
  102. for obj in [idx, Series(idx)]:
  103. tm.assert_series_equal(obj.value_counts(), expected)
  104. expected = pd.date_range('2011-01-01 09:00', freq='H', periods=10,
  105. tz=tz)
  106. tm.assert_index_equal(idx.unique(), expected)
  107. idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 09:00',
  108. '2013-01-01 09:00', '2013-01-01 08:00',
  109. '2013-01-01 08:00', pd.NaT], tz=tz)
  110. exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00'],
  111. tz=tz)
  112. expected = Series([3, 2], index=exp_idx)
  113. for obj in [idx, Series(idx)]:
  114. tm.assert_series_equal(obj.value_counts(), expected)
  115. exp_idx = DatetimeIndex(['2013-01-01 09:00', '2013-01-01 08:00',
  116. pd.NaT], tz=tz)
  117. expected = Series([3, 2, 1], index=exp_idx)
  118. for obj in [idx, Series(idx)]:
  119. tm.assert_series_equal(obj.value_counts(dropna=False),
  120. expected)
  121. tm.assert_index_equal(idx.unique(), exp_idx)
  122. def test_nonunique_contains(self):
  123. # GH 9512
  124. for idx in map(DatetimeIndex,
  125. ([0, 1, 0], [0, 0, -1], [0, -1, -1],
  126. ['2015', '2015', '2016'], ['2015', '2015', '2014'])):
  127. assert idx[0] in idx
  128. @pytest.mark.parametrize('idx',
  129. [
  130. DatetimeIndex(
  131. ['2011-01-01',
  132. '2011-01-02',
  133. '2011-01-03'],
  134. freq='D', name='idx'),
  135. DatetimeIndex(
  136. ['2011-01-01 09:00',
  137. '2011-01-01 10:00',
  138. '2011-01-01 11:00'],
  139. freq='H', name='tzidx', tz='Asia/Tokyo')
  140. ])
  141. def test_order_with_freq(self, idx):
  142. ordered = idx.sort_values()
  143. tm.assert_index_equal(ordered, idx)
  144. assert ordered.freq == idx.freq
  145. ordered = idx.sort_values(ascending=False)
  146. expected = idx[::-1]
  147. tm.assert_index_equal(ordered, expected)
  148. assert ordered.freq == expected.freq
  149. assert ordered.freq.n == -1
  150. ordered, indexer = idx.sort_values(return_indexer=True)
  151. tm.assert_index_equal(ordered, idx)
  152. tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
  153. check_dtype=False)
  154. assert ordered.freq == idx.freq
  155. ordered, indexer = idx.sort_values(return_indexer=True,
  156. ascending=False)
  157. expected = idx[::-1]
  158. tm.assert_index_equal(ordered, expected)
  159. tm.assert_numpy_array_equal(indexer,
  160. np.array([2, 1, 0]),
  161. check_dtype=False)
  162. assert ordered.freq == expected.freq
  163. assert ordered.freq.n == -1
  164. @pytest.mark.parametrize('index_dates,expected_dates', [
  165. (['2011-01-01', '2011-01-03', '2011-01-05',
  166. '2011-01-02', '2011-01-01'],
  167. ['2011-01-01', '2011-01-01', '2011-01-02',
  168. '2011-01-03', '2011-01-05']),
  169. (['2011-01-01', '2011-01-03', '2011-01-05',
  170. '2011-01-02', '2011-01-01'],
  171. ['2011-01-01', '2011-01-01', '2011-01-02',
  172. '2011-01-03', '2011-01-05']),
  173. ([pd.NaT, '2011-01-03', '2011-01-05',
  174. '2011-01-02', pd.NaT],
  175. [pd.NaT, pd.NaT, '2011-01-02', '2011-01-03',
  176. '2011-01-05'])
  177. ])
  178. def test_order_without_freq(self, index_dates, expected_dates,
  179. tz_naive_fixture):
  180. tz = tz_naive_fixture
  181. # without freq
  182. index = DatetimeIndex(index_dates, tz=tz, name='idx')
  183. expected = DatetimeIndex(expected_dates, tz=tz, name='idx')
  184. ordered = index.sort_values()
  185. tm.assert_index_equal(ordered, expected)
  186. assert ordered.freq is None
  187. ordered = index.sort_values(ascending=False)
  188. tm.assert_index_equal(ordered, expected[::-1])
  189. assert ordered.freq is None
  190. ordered, indexer = index.sort_values(return_indexer=True)
  191. tm.assert_index_equal(ordered, expected)
  192. exp = np.array([0, 4, 3, 1, 2])
  193. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  194. assert ordered.freq is None
  195. ordered, indexer = index.sort_values(return_indexer=True,
  196. ascending=False)
  197. tm.assert_index_equal(ordered, expected[::-1])
  198. exp = np.array([2, 1, 3, 4, 0])
  199. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  200. assert ordered.freq is None
  201. def test_drop_duplicates_metadata(self):
  202. # GH 10115
  203. idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
  204. result = idx.drop_duplicates()
  205. tm.assert_index_equal(idx, result)
  206. assert idx.freq == result.freq
  207. idx_dup = idx.append(idx)
  208. assert idx_dup.freq is None # freq is reset
  209. result = idx_dup.drop_duplicates()
  210. tm.assert_index_equal(idx, result)
  211. assert result.freq is None
  212. def test_drop_duplicates(self):
  213. # to check Index/Series compat
  214. base = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
  215. idx = base.append(base[:5])
  216. res = idx.drop_duplicates()
  217. tm.assert_index_equal(res, base)
  218. res = Series(idx).drop_duplicates()
  219. tm.assert_series_equal(res, Series(base))
  220. res = idx.drop_duplicates(keep='last')
  221. exp = base[5:].append(base[:5])
  222. tm.assert_index_equal(res, exp)
  223. res = Series(idx).drop_duplicates(keep='last')
  224. tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
  225. res = idx.drop_duplicates(keep=False)
  226. tm.assert_index_equal(res, base[5:])
  227. res = Series(idx).drop_duplicates(keep=False)
  228. tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
  229. @pytest.mark.parametrize('freq', [
  230. 'A', '2A', '-2A', 'Q', '-1Q', 'M', '-1M', 'D', '3D',
  231. '-3D', 'W', '-1W', 'H', '2H', '-2H', 'T', '2T', 'S',
  232. '-3S'])
  233. def test_infer_freq(self, freq):
  234. # GH 11018
  235. idx = pd.date_range('2011-01-01 09:00:00', freq=freq, periods=10)
  236. result = pd.DatetimeIndex(idx.asi8, freq='infer')
  237. tm.assert_index_equal(idx, result)
  238. assert result.freq == freq
  239. def test_nat(self, tz_naive_fixture):
  240. tz = tz_naive_fixture
  241. assert pd.DatetimeIndex._na_value is pd.NaT
  242. assert pd.DatetimeIndex([])._na_value is pd.NaT
  243. idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
  244. assert idx._can_hold_na
  245. tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
  246. assert idx.hasnans is False
  247. tm.assert_numpy_array_equal(idx._nan_idxs,
  248. np.array([], dtype=np.intp))
  249. idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz)
  250. assert idx._can_hold_na
  251. tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
  252. assert idx.hasnans is True
  253. tm.assert_numpy_array_equal(idx._nan_idxs,
  254. np.array([1], dtype=np.intp))
  255. def test_equals(self):
  256. # GH 13107
  257. idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'])
  258. assert idx.equals(idx)
  259. assert idx.equals(idx.copy())
  260. assert idx.equals(idx.astype(object))
  261. assert idx.astype(object).equals(idx)
  262. assert idx.astype(object).equals(idx.astype(object))
  263. assert not idx.equals(list(idx))
  264. assert not idx.equals(pd.Series(idx))
  265. idx2 = pd.DatetimeIndex(['2011-01-01', '2011-01-02', 'NaT'],
  266. tz='US/Pacific')
  267. assert not idx.equals(idx2)
  268. assert not idx.equals(idx2.copy())
  269. assert not idx.equals(idx2.astype(object))
  270. assert not idx.astype(object).equals(idx2)
  271. assert not idx.equals(list(idx2))
  272. assert not idx.equals(pd.Series(idx2))
  273. # same internal, different tz
  274. idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz='US/Pacific')
  275. tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
  276. assert not idx.equals(idx3)
  277. assert not idx.equals(idx3.copy())
  278. assert not idx.equals(idx3.astype(object))
  279. assert not idx.astype(object).equals(idx3)
  280. assert not idx.equals(list(idx3))
  281. assert not idx.equals(pd.Series(idx3))
  282. @pytest.mark.parametrize('values', [
  283. ['20180101', '20180103', '20180105'], []])
  284. @pytest.mark.parametrize('freq', [
  285. '2D', Day(2), '2B', BDay(2), '48H', Hour(48)])
  286. @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
  287. def test_freq_setter(self, values, freq, tz):
  288. # GH 20678
  289. idx = DatetimeIndex(values, tz=tz)
  290. # can set to an offset, converting from string if necessary
  291. idx.freq = freq
  292. assert idx.freq == freq
  293. assert isinstance(idx.freq, ABCDateOffset)
  294. # can reset to None
  295. idx.freq = None
  296. assert idx.freq is None
  297. def test_freq_setter_errors(self):
  298. # GH 20678
  299. idx = DatetimeIndex(['20180101', '20180103', '20180105'])
  300. # setting with an incompatible freq
  301. msg = ('Inferred frequency 2D from passed values does not conform to '
  302. 'passed frequency 5D')
  303. with pytest.raises(ValueError, match=msg):
  304. idx.freq = '5D'
  305. # setting with non-freq string
  306. with pytest.raises(ValueError, match='Invalid frequency'):
  307. idx.freq = 'foo'
  308. def test_offset_deprecated(self):
  309. # GH 20716
  310. idx = pd.DatetimeIndex(['20180101', '20180102'])
  311. # getter deprecated
  312. with tm.assert_produces_warning(FutureWarning):
  313. idx.offset
  314. # setter deprecated
  315. with tm.assert_produces_warning(FutureWarning):
  316. idx.offset = BDay()
  317. class TestBusinessDatetimeIndex(object):
  318. def setup_method(self, method):
  319. self.rng = bdate_range(START, END)
  320. def test_comparison(self):
  321. d = self.rng[10]
  322. comp = self.rng > d
  323. assert comp[11]
  324. assert not comp[9]
  325. def test_pickle_unpickle(self):
  326. unpickled = tm.round_trip_pickle(self.rng)
  327. assert unpickled.freq is not None
  328. def test_copy(self):
  329. cp = self.rng.copy()
  330. repr(cp)
  331. tm.assert_index_equal(cp, self.rng)
  332. def test_shift(self):
  333. shifted = self.rng.shift(5)
  334. assert shifted[0] == self.rng[5]
  335. assert shifted.freq == self.rng.freq
  336. shifted = self.rng.shift(-5)
  337. assert shifted[5] == self.rng[0]
  338. assert shifted.freq == self.rng.freq
  339. shifted = self.rng.shift(0)
  340. assert shifted[0] == self.rng[0]
  341. assert shifted.freq == self.rng.freq
  342. rng = date_range(START, END, freq=BMonthEnd())
  343. shifted = rng.shift(1, freq=BDay())
  344. assert shifted[0] == rng[0] + BDay()
  345. def test_equals(self):
  346. assert not self.rng.equals(list(self.rng))
  347. def test_identical(self):
  348. t1 = self.rng.copy()
  349. t2 = self.rng.copy()
  350. assert t1.identical(t2)
  351. # name
  352. t1 = t1.rename('foo')
  353. assert t1.equals(t2)
  354. assert not t1.identical(t2)
  355. t2 = t2.rename('foo')
  356. assert t1.identical(t2)
  357. # freq
  358. t2v = Index(t2.values)
  359. assert t1.equals(t2v)
  360. assert not t1.identical(t2v)
  361. class TestCustomDatetimeIndex(object):
  362. def setup_method(self, method):
  363. self.rng = bdate_range(START, END, freq='C')
  364. def test_comparison(self):
  365. d = self.rng[10]
  366. comp = self.rng > d
  367. assert comp[11]
  368. assert not comp[9]
  369. def test_copy(self):
  370. cp = self.rng.copy()
  371. repr(cp)
  372. tm.assert_index_equal(cp, self.rng)
  373. def test_shift(self):
  374. shifted = self.rng.shift(5)
  375. assert shifted[0] == self.rng[5]
  376. assert shifted.freq == self.rng.freq
  377. shifted = self.rng.shift(-5)
  378. assert shifted[5] == self.rng[0]
  379. assert shifted.freq == self.rng.freq
  380. shifted = self.rng.shift(0)
  381. assert shifted[0] == self.rng[0]
  382. assert shifted.freq == self.rng.freq
  383. with warnings.catch_warnings(record=True):
  384. warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
  385. rng = date_range(START, END, freq=BMonthEnd())
  386. shifted = rng.shift(1, freq=CDay())
  387. assert shifted[0] == rng[0] + CDay()
  388. def test_shift_periods(self):
  389. # GH#22458 : argument 'n' was deprecated in favor of 'periods'
  390. idx = pd.date_range(start=START, end=END, periods=3)
  391. tm.assert_index_equal(idx.shift(periods=0), idx)
  392. tm.assert_index_equal(idx.shift(0), idx)
  393. with tm.assert_produces_warning(FutureWarning,
  394. check_stacklevel=True):
  395. tm.assert_index_equal(idx.shift(n=0), idx)
  396. def test_pickle_unpickle(self):
  397. unpickled = tm.round_trip_pickle(self.rng)
  398. assert unpickled.freq is not None
  399. def test_equals(self):
  400. assert not self.rng.equals(list(self.rng))