test_ops.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. import numpy as np
  2. import pytest
  3. from pandas.core.dtypes.generic import ABCDateOffset
  4. import pandas as pd
  5. from pandas import Series, TimedeltaIndex, timedelta_range
  6. from pandas.tests.test_base import Ops
  7. import pandas.util.testing as tm
  8. from pandas.tseries.offsets import Day, Hour
  9. class TestTimedeltaIndexOps(Ops):
  10. def setup_method(self, method):
  11. super(TestTimedeltaIndexOps, self).setup_method(method)
  12. mask = lambda x: isinstance(x, TimedeltaIndex)
  13. self.is_valid_objs = [o for o in self.objs if mask(o)]
  14. self.not_valid_objs = []
  15. def test_ops_properties(self):
  16. f = lambda x: isinstance(x, TimedeltaIndex)
  17. self.check_ops_properties(TimedeltaIndex._field_ops, f)
  18. self.check_ops_properties(TimedeltaIndex._object_ops, f)
  19. def test_value_counts_unique(self):
  20. # GH 7735
  21. idx = timedelta_range('1 days 09:00:00', freq='H', periods=10)
  22. # create repeated values, 'n'th element is repeated by n+1 times
  23. idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
  24. exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10)
  25. expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
  26. for obj in [idx, Series(idx)]:
  27. tm.assert_series_equal(obj.value_counts(), expected)
  28. expected = timedelta_range('1 days 09:00:00', freq='H', periods=10)
  29. tm.assert_index_equal(idx.unique(), expected)
  30. idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00',
  31. '1 days 09:00:00', '1 days 08:00:00',
  32. '1 days 08:00:00', pd.NaT])
  33. exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00'])
  34. expected = Series([3, 2], index=exp_idx)
  35. for obj in [idx, Series(idx)]:
  36. tm.assert_series_equal(obj.value_counts(), expected)
  37. exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00',
  38. pd.NaT])
  39. expected = Series([3, 2, 1], index=exp_idx)
  40. for obj in [idx, Series(idx)]:
  41. tm.assert_series_equal(obj.value_counts(dropna=False), expected)
  42. tm.assert_index_equal(idx.unique(), exp_idx)
  43. def test_nonunique_contains(self):
  44. # GH 9512
  45. for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1],
  46. ['00:01:00', '00:01:00', '00:02:00'],
  47. ['00:01:00', '00:01:00', '00:00:01'])):
  48. assert idx[0] in idx
  49. def test_unknown_attribute(self):
  50. # see gh-9680
  51. tdi = pd.timedelta_range(start=0, periods=10, freq='1s')
  52. ts = pd.Series(np.random.normal(size=10), index=tdi)
  53. assert 'foo' not in ts.__dict__.keys()
  54. pytest.raises(AttributeError, lambda: ts.foo)
  55. def test_order(self):
  56. # GH 10295
  57. idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D',
  58. name='idx')
  59. idx2 = TimedeltaIndex(
  60. ['1 hour', '2 hour', '3 hour'], freq='H', name='idx')
  61. for idx in [idx1, idx2]:
  62. ordered = idx.sort_values()
  63. tm.assert_index_equal(ordered, idx)
  64. assert ordered.freq == idx.freq
  65. ordered = idx.sort_values(ascending=False)
  66. expected = idx[::-1]
  67. tm.assert_index_equal(ordered, expected)
  68. assert ordered.freq == expected.freq
  69. assert ordered.freq.n == -1
  70. ordered, indexer = idx.sort_values(return_indexer=True)
  71. tm.assert_index_equal(ordered, idx)
  72. tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
  73. check_dtype=False)
  74. assert ordered.freq == idx.freq
  75. ordered, indexer = idx.sort_values(return_indexer=True,
  76. ascending=False)
  77. tm.assert_index_equal(ordered, idx[::-1])
  78. assert ordered.freq == expected.freq
  79. assert ordered.freq.n == -1
  80. idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour',
  81. '2 hour ', '1 hour'], name='idx1')
  82. exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour',
  83. '3 hour', '5 hour'], name='idx1')
  84. idx2 = TimedeltaIndex(['1 day', '3 day', '5 day',
  85. '2 day', '1 day'], name='idx2')
  86. # TODO(wesm): unused?
  87. # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
  88. # '3 day', '5 day'], name='idx2')
  89. # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
  90. # '2 minute', pd.NaT], name='idx3')
  91. # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
  92. # '5 minute'], name='idx3')
  93. for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
  94. ordered = idx.sort_values()
  95. tm.assert_index_equal(ordered, expected)
  96. assert ordered.freq is None
  97. ordered = idx.sort_values(ascending=False)
  98. tm.assert_index_equal(ordered, expected[::-1])
  99. assert ordered.freq is None
  100. ordered, indexer = idx.sort_values(return_indexer=True)
  101. tm.assert_index_equal(ordered, expected)
  102. exp = np.array([0, 4, 3, 1, 2])
  103. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  104. assert ordered.freq is None
  105. ordered, indexer = idx.sort_values(return_indexer=True,
  106. ascending=False)
  107. tm.assert_index_equal(ordered, expected[::-1])
  108. exp = np.array([2, 1, 3, 4, 0])
  109. tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
  110. assert ordered.freq is None
  111. def test_drop_duplicates_metadata(self):
  112. # GH 10115
  113. idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
  114. result = idx.drop_duplicates()
  115. tm.assert_index_equal(idx, result)
  116. assert idx.freq == result.freq
  117. idx_dup = idx.append(idx)
  118. assert idx_dup.freq is None # freq is reset
  119. result = idx_dup.drop_duplicates()
  120. tm.assert_index_equal(idx, result)
  121. assert result.freq is None
  122. def test_drop_duplicates(self):
  123. # to check Index/Series compat
  124. base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
  125. idx = base.append(base[:5])
  126. res = idx.drop_duplicates()
  127. tm.assert_index_equal(res, base)
  128. res = Series(idx).drop_duplicates()
  129. tm.assert_series_equal(res, Series(base))
  130. res = idx.drop_duplicates(keep='last')
  131. exp = base[5:].append(base[:5])
  132. tm.assert_index_equal(res, exp)
  133. res = Series(idx).drop_duplicates(keep='last')
  134. tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
  135. res = idx.drop_duplicates(keep=False)
  136. tm.assert_index_equal(res, base[5:])
  137. res = Series(idx).drop_duplicates(keep=False)
  138. tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
  139. @pytest.mark.parametrize('freq', ['D', '3D', '-3D',
  140. 'H', '2H', '-2H',
  141. 'T', '2T', 'S', '-3S'])
  142. def test_infer_freq(self, freq):
  143. # GH#11018
  144. idx = pd.timedelta_range('1', freq=freq, periods=10)
  145. result = pd.TimedeltaIndex(idx.asi8, freq='infer')
  146. tm.assert_index_equal(idx, result)
  147. assert result.freq == freq
  148. def test_shift(self):
  149. pass # handled in test_arithmetic.py
  150. def test_repeat(self):
  151. index = pd.timedelta_range('1 days', periods=2, freq='D')
  152. exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days'])
  153. for res in [index.repeat(2), np.repeat(index, 2)]:
  154. tm.assert_index_equal(res, exp)
  155. assert res.freq is None
  156. index = TimedeltaIndex(['1 days', 'NaT', '3 days'])
  157. exp = TimedeltaIndex(['1 days', '1 days', '1 days',
  158. 'NaT', 'NaT', 'NaT',
  159. '3 days', '3 days', '3 days'])
  160. for res in [index.repeat(3), np.repeat(index, 3)]:
  161. tm.assert_index_equal(res, exp)
  162. assert res.freq is None
  163. def test_nat(self):
  164. assert pd.TimedeltaIndex._na_value is pd.NaT
  165. assert pd.TimedeltaIndex([])._na_value is pd.NaT
  166. idx = pd.TimedeltaIndex(['1 days', '2 days'])
  167. assert idx._can_hold_na
  168. tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
  169. assert idx.hasnans is False
  170. tm.assert_numpy_array_equal(idx._nan_idxs,
  171. np.array([], dtype=np.intp))
  172. idx = pd.TimedeltaIndex(['1 days', 'NaT'])
  173. assert idx._can_hold_na
  174. tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
  175. assert idx.hasnans is True
  176. tm.assert_numpy_array_equal(idx._nan_idxs,
  177. np.array([1], dtype=np.intp))
  178. def test_equals(self):
  179. # GH 13107
  180. idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT'])
  181. assert idx.equals(idx)
  182. assert idx.equals(idx.copy())
  183. assert idx.equals(idx.astype(object))
  184. assert idx.astype(object).equals(idx)
  185. assert idx.astype(object).equals(idx.astype(object))
  186. assert not idx.equals(list(idx))
  187. assert not idx.equals(pd.Series(idx))
  188. idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'])
  189. assert not idx.equals(idx2)
  190. assert not idx.equals(idx2.copy())
  191. assert not idx.equals(idx2.astype(object))
  192. assert not idx.astype(object).equals(idx2)
  193. assert not idx.astype(object).equals(idx2.astype(object))
  194. assert not idx.equals(list(idx2))
  195. assert not idx.equals(pd.Series(idx2))
  196. @pytest.mark.parametrize('values', [['0 days', '2 days', '4 days'], []])
  197. @pytest.mark.parametrize('freq', ['2D', Day(2), '48H', Hour(48)])
  198. def test_freq_setter(self, values, freq):
  199. # GH 20678
  200. idx = TimedeltaIndex(values)
  201. # can set to an offset, converting from string if necessary
  202. idx.freq = freq
  203. assert idx.freq == freq
  204. assert isinstance(idx.freq, ABCDateOffset)
  205. # can reset to None
  206. idx.freq = None
  207. assert idx.freq is None
  208. def test_freq_setter_errors(self):
  209. # GH 20678
  210. idx = TimedeltaIndex(['0 days', '2 days', '4 days'])
  211. # setting with an incompatible freq
  212. msg = ('Inferred frequency 2D from passed values does not conform to '
  213. 'passed frequency 5D')
  214. with pytest.raises(ValueError, match=msg):
  215. idx.freq = '5D'
  216. # setting with a non-fixed frequency
  217. msg = r'<2 \* BusinessDays> is a non-fixed frequency'
  218. with pytest.raises(ValueError, match=msg):
  219. idx.freq = '2B'
  220. # setting with non-freq string
  221. with pytest.raises(ValueError, match='Invalid frequency'):
  222. idx.freq = 'foo'