test_indexing.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. from datetime import datetime, timedelta
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import Index, Timedelta, TimedeltaIndex, compat, timedelta_range
  6. import pandas.util.testing as tm
  7. class TestGetItem(object):
  8. def test_ellipsis(self):
  9. # GH#21282
  10. idx = timedelta_range('1 day', '31 day', freq='D', name='idx')
  11. result = idx[...]
  12. assert result.equals(idx)
  13. assert result is not idx
  14. def test_getitem(self):
  15. idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx')
  16. for idx in [idx1]:
  17. result = idx[0]
  18. assert result == Timedelta('1 day')
  19. result = idx[0:5]
  20. expected = timedelta_range('1 day', '5 day', freq='D',
  21. name='idx')
  22. tm.assert_index_equal(result, expected)
  23. assert result.freq == expected.freq
  24. result = idx[0:10:2]
  25. expected = timedelta_range('1 day', '9 day', freq='2D',
  26. name='idx')
  27. tm.assert_index_equal(result, expected)
  28. assert result.freq == expected.freq
  29. result = idx[-20:-5:3]
  30. expected = timedelta_range('12 day', '24 day', freq='3D',
  31. name='idx')
  32. tm.assert_index_equal(result, expected)
  33. assert result.freq == expected.freq
  34. result = idx[4::-1]
  35. expected = TimedeltaIndex(['5 day', '4 day', '3 day',
  36. '2 day', '1 day'],
  37. freq='-1D', name='idx')
  38. tm.assert_index_equal(result, expected)
  39. assert result.freq == expected.freq
  40. @pytest.mark.parametrize('key', [pd.Timestamp('1970-01-01'),
  41. pd.Timestamp('1970-01-02'),
  42. datetime(1970, 1, 1)])
  43. def test_timestamp_invalid_key(self, key):
  44. # GH#20464
  45. tdi = pd.timedelta_range(0, periods=10)
  46. with pytest.raises(TypeError):
  47. tdi.get_loc(key)
  48. class TestWhere(object):
  49. # placeholder for symmetry with DatetimeIndex and PeriodIndex tests
  50. pass
  51. class TestTake(object):
  52. def test_take(self):
  53. # GH 10295
  54. idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx')
  55. for idx in [idx1]:
  56. result = idx.take([0])
  57. assert result == Timedelta('1 day')
  58. result = idx.take([-1])
  59. assert result == Timedelta('31 day')
  60. result = idx.take([0, 1, 2])
  61. expected = timedelta_range('1 day', '3 day', freq='D',
  62. name='idx')
  63. tm.assert_index_equal(result, expected)
  64. assert result.freq == expected.freq
  65. result = idx.take([0, 2, 4])
  66. expected = timedelta_range('1 day', '5 day', freq='2D',
  67. name='idx')
  68. tm.assert_index_equal(result, expected)
  69. assert result.freq == expected.freq
  70. result = idx.take([7, 4, 1])
  71. expected = timedelta_range('8 day', '2 day', freq='-3D',
  72. name='idx')
  73. tm.assert_index_equal(result, expected)
  74. assert result.freq == expected.freq
  75. result = idx.take([3, 2, 5])
  76. expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx')
  77. tm.assert_index_equal(result, expected)
  78. assert result.freq is None
  79. result = idx.take([-3, 2, 5])
  80. expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx')
  81. tm.assert_index_equal(result, expected)
  82. assert result.freq is None
  83. def test_take_invalid_kwargs(self):
  84. idx = timedelta_range('1 day', '31 day', freq='D', name='idx')
  85. indices = [1, 6, 5, 9, 10, 13, 15, 3]
  86. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  87. with pytest.raises(TypeError, match=msg):
  88. idx.take(indices, foo=2)
  89. msg = "the 'out' parameter is not supported"
  90. with pytest.raises(ValueError, match=msg):
  91. idx.take(indices, out=indices)
  92. msg = "the 'mode' parameter is not supported"
  93. with pytest.raises(ValueError, match=msg):
  94. idx.take(indices, mode='clip')
  95. # TODO: This method came from test_timedelta; de-dup with version above
  96. def test_take2(self):
  97. tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00']
  98. idx = timedelta_range(start='1d', end='2d', freq='H', name='idx')
  99. expected = TimedeltaIndex(tds, freq=None, name='idx')
  100. taken1 = idx.take([2, 4, 10])
  101. taken2 = idx[[2, 4, 10]]
  102. for taken in [taken1, taken2]:
  103. tm.assert_index_equal(taken, expected)
  104. assert isinstance(taken, TimedeltaIndex)
  105. assert taken.freq is None
  106. assert taken.name == expected.name
  107. def test_take_fill_value(self):
  108. # GH 12631
  109. idx = TimedeltaIndex(['1 days', '2 days', '3 days'],
  110. name='xxx')
  111. result = idx.take(np.array([1, 0, -1]))
  112. expected = TimedeltaIndex(['2 days', '1 days', '3 days'],
  113. name='xxx')
  114. tm.assert_index_equal(result, expected)
  115. # fill_value
  116. result = idx.take(np.array([1, 0, -1]), fill_value=True)
  117. expected = TimedeltaIndex(['2 days', '1 days', 'NaT'],
  118. name='xxx')
  119. tm.assert_index_equal(result, expected)
  120. # allow_fill=False
  121. result = idx.take(np.array([1, 0, -1]), allow_fill=False,
  122. fill_value=True)
  123. expected = TimedeltaIndex(['2 days', '1 days', '3 days'],
  124. name='xxx')
  125. tm.assert_index_equal(result, expected)
  126. msg = ('When allow_fill=True and fill_value is not None, '
  127. 'all indices must be >= -1')
  128. with pytest.raises(ValueError, match=msg):
  129. idx.take(np.array([1, 0, -2]), fill_value=True)
  130. with pytest.raises(ValueError, match=msg):
  131. idx.take(np.array([1, 0, -5]), fill_value=True)
  132. with pytest.raises(IndexError):
  133. idx.take(np.array([1, -5]))
  134. class TestTimedeltaIndex(object):
  135. def test_insert(self):
  136. idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx')
  137. result = idx.insert(2, timedelta(days=5))
  138. exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx')
  139. tm.assert_index_equal(result, exp)
  140. # insertion of non-datetime should coerce to object index
  141. result = idx.insert(1, 'inserted')
  142. expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'),
  143. Timedelta('2day')], name='idx')
  144. assert not isinstance(result, TimedeltaIndex)
  145. tm.assert_index_equal(result, expected)
  146. assert result.name == expected.name
  147. idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx')
  148. # preserve freq
  149. expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02',
  150. '1day 00:00:03'],
  151. name='idx', freq='s')
  152. expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02',
  153. '1day 00:00:03', '1day 00:00:04'],
  154. name='idx', freq='s')
  155. # reset freq to None
  156. expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01',
  157. '1day 00:00:02', '1day 00:00:03'],
  158. name='idx', freq=None)
  159. expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02',
  160. '1day 00:00:03', '1day 00:00:05'],
  161. name='idx', freq=None)
  162. cases = [(0, Timedelta('1day'), expected_0),
  163. (-3, Timedelta('1day'), expected_0),
  164. (3, Timedelta('1day 00:00:04'), expected_3),
  165. (1, Timedelta('1day 00:00:01'), expected_1_nofreq),
  166. (3, Timedelta('1day 00:00:05'), expected_3_nofreq)]
  167. for n, d, expected in cases:
  168. result = idx.insert(n, d)
  169. tm.assert_index_equal(result, expected)
  170. assert result.name == expected.name
  171. assert result.freq == expected.freq
  172. # GH 18295 (test missing)
  173. expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
  174. for na in (np.nan, pd.NaT, None):
  175. result = timedelta_range('1day', '3day').insert(1, na)
  176. tm.assert_index_equal(result, expected)
  177. def test_delete(self):
  178. idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')
  179. # prserve freq
  180. expected_0 = timedelta_range(start='2 Days', periods=4, freq='D',
  181. name='idx')
  182. expected_4 = timedelta_range(start='1 Days', periods=4, freq='D',
  183. name='idx')
  184. # reset freq to None
  185. expected_1 = TimedeltaIndex(
  186. ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx')
  187. cases = {0: expected_0,
  188. -5: expected_0,
  189. -1: expected_4,
  190. 4: expected_4,
  191. 1: expected_1}
  192. for n, expected in compat.iteritems(cases):
  193. result = idx.delete(n)
  194. tm.assert_index_equal(result, expected)
  195. assert result.name == expected.name
  196. assert result.freq == expected.freq
  197. with pytest.raises((IndexError, ValueError)):
  198. # either depending on numpy version
  199. idx.delete(5)
  200. def test_delete_slice(self):
  201. idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx')
  202. # prserve freq
  203. expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D',
  204. name='idx')
  205. expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D',
  206. name='idx')
  207. # reset freq to None
  208. expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d',
  209. '7 d', '8 d', '9 d', '10d'],
  210. freq=None, name='idx')
  211. cases = {(0, 1, 2): expected_0_2,
  212. (7, 8, 9): expected_7_9,
  213. (3, 4, 5): expected_3_5}
  214. for n, expected in compat.iteritems(cases):
  215. result = idx.delete(n)
  216. tm.assert_index_equal(result, expected)
  217. assert result.name == expected.name
  218. assert result.freq == expected.freq
  219. result = idx.delete(slice(n[0], n[-1] + 1))
  220. tm.assert_index_equal(result, expected)
  221. assert result.name == expected.name
  222. assert result.freq == expected.freq
  223. def test_get_loc(self):
  224. idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
  225. for method in [None, 'pad', 'backfill', 'nearest']:
  226. assert idx.get_loc(idx[1], method) == 1
  227. assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1
  228. assert idx.get_loc(str(idx[1]), method) == 1
  229. assert idx.get_loc(idx[1], 'pad',
  230. tolerance=Timedelta(0)) == 1
  231. assert idx.get_loc(idx[1], 'pad',
  232. tolerance=np.timedelta64(0, 's')) == 1
  233. assert idx.get_loc(idx[1], 'pad',
  234. tolerance=timedelta(0)) == 1
  235. with pytest.raises(ValueError, match='unit abbreviation w/o a number'):
  236. idx.get_loc(idx[1], method='nearest', tolerance='foo')
  237. with pytest.raises(
  238. ValueError,
  239. match='tolerance size must match'):
  240. idx.get_loc(idx[1], method='nearest',
  241. tolerance=[Timedelta(0).to_timedelta64(),
  242. Timedelta(0).to_timedelta64()])
  243. for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
  244. assert idx.get_loc('1 day 1 hour', method) == loc
  245. # GH 16909
  246. assert idx.get_loc(idx[1].to_timedelta64()) == 1
  247. # GH 16896
  248. assert idx.get_loc('0 days') == 0
  249. def test_get_loc_nat(self):
  250. tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00'])
  251. assert tidx.get_loc(pd.NaT) == 1
  252. assert tidx.get_loc(None) == 1
  253. assert tidx.get_loc(float('nan')) == 1
  254. assert tidx.get_loc(np.nan) == 1
  255. def test_get_indexer(self):
  256. idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
  257. tm.assert_numpy_array_equal(idx.get_indexer(idx),
  258. np.array([0, 1, 2], dtype=np.intp))
  259. target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour'])
  260. tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
  261. np.array([-1, 0, 1], dtype=np.intp))
  262. tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
  263. np.array([0, 1, 2], dtype=np.intp))
  264. tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
  265. np.array([0, 1, 1], dtype=np.intp))
  266. res = idx.get_indexer(target, 'nearest',
  267. tolerance=Timedelta('1 hour'))
  268. tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))