test_interval.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import DataFrame, Interval, IntervalIndex, Series
  5. import pandas.util.testing as tm
  6. class TestIntervalIndex(object):
  7. def setup_method(self, method):
  8. self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
  9. # To be removed, replaced by test_interval_new.py (see #16316, #16386)
  10. def test_loc_with_scalar(self):
  11. s = self.s
  12. expected = s.iloc[:3]
  13. tm.assert_series_equal(expected, s.loc[:3])
  14. tm.assert_series_equal(expected, s.loc[:2.5])
  15. tm.assert_series_equal(expected, s.loc[0.1:2.5])
  16. tm.assert_series_equal(expected, s.loc[-1:3])
  17. expected = s.iloc[1:4]
  18. tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]])
  19. tm.assert_series_equal(expected, s.loc[[2, 3, 4]])
  20. tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]])
  21. expected = s.iloc[2:5]
  22. tm.assert_series_equal(expected, s.loc[s >= 2])
  23. # TODO: check this behavior is consistent with test_interval_new.py
  24. def test_getitem_with_scalar(self):
  25. s = self.s
  26. expected = s.iloc[:3]
  27. tm.assert_series_equal(expected, s[:3])
  28. tm.assert_series_equal(expected, s[:2.5])
  29. tm.assert_series_equal(expected, s[0.1:2.5])
  30. tm.assert_series_equal(expected, s[-1:3])
  31. expected = s.iloc[1:4]
  32. tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]])
  33. tm.assert_series_equal(expected, s[[2, 3, 4]])
  34. tm.assert_series_equal(expected, s[[1.5, 3, 4]])
  35. expected = s.iloc[2:5]
  36. tm.assert_series_equal(expected, s[s >= 2])
  37. # TODO: check this behavior is consistent with test_interval_new.py
  38. @pytest.mark.parametrize('direction', ['increasing', 'decreasing'])
  39. def test_nonoverlapping_monotonic(self, direction, closed):
  40. tpls = [(0, 1), (2, 3), (4, 5)]
  41. if direction == 'decreasing':
  42. tpls = tpls[::-1]
  43. idx = IntervalIndex.from_tuples(tpls, closed=closed)
  44. s = Series(list('abc'), idx)
  45. for key, expected in zip(idx.left, s):
  46. if idx.closed_left:
  47. assert s[key] == expected
  48. assert s.loc[key] == expected
  49. else:
  50. with pytest.raises(KeyError):
  51. s[key]
  52. with pytest.raises(KeyError):
  53. s.loc[key]
  54. for key, expected in zip(idx.right, s):
  55. if idx.closed_right:
  56. assert s[key] == expected
  57. assert s.loc[key] == expected
  58. else:
  59. with pytest.raises(KeyError):
  60. s[key]
  61. with pytest.raises(KeyError):
  62. s.loc[key]
  63. for key, expected in zip(idx.mid, s):
  64. assert s[key] == expected
  65. assert s.loc[key] == expected
  66. # To be removed, replaced by test_interval_new.py (see #16316, #16386)
  67. def test_with_interval(self):
  68. s = self.s
  69. expected = 0
  70. result = s.loc[Interval(0, 1)]
  71. assert result == expected
  72. result = s[Interval(0, 1)]
  73. assert result == expected
  74. expected = s.iloc[3:5]
  75. result = s.loc[Interval(3, 6)]
  76. tm.assert_series_equal(expected, result)
  77. expected = s.iloc[3:5]
  78. result = s.loc[[Interval(3, 6)]]
  79. tm.assert_series_equal(expected, result)
  80. expected = s.iloc[3:5]
  81. result = s.loc[[Interval(3, 5)]]
  82. tm.assert_series_equal(expected, result)
  83. # missing
  84. with pytest.raises(KeyError):
  85. s.loc[Interval(-2, 0)]
  86. with pytest.raises(KeyError):
  87. s[Interval(-2, 0)]
  88. with pytest.raises(KeyError):
  89. s.loc[Interval(5, 6)]
  90. with pytest.raises(KeyError):
  91. s[Interval(5, 6)]
  92. # To be removed, replaced by test_interval_new.py (see #16316, #16386)
  93. def test_with_slices(self):
  94. s = self.s
  95. # slice of interval
  96. with pytest.raises(NotImplementedError):
  97. s.loc[Interval(3, 6):]
  98. with pytest.raises(NotImplementedError):
  99. s[Interval(3, 6):]
  100. expected = s.iloc[3:5]
  101. result = s[[Interval(3, 6)]]
  102. tm.assert_series_equal(expected, result)
  103. # slice of scalar with step != 1
  104. with pytest.raises(ValueError):
  105. s[0:4:2]
  106. # To be removed, replaced by test_interval_new.py (see #16316, #16386)
  107. def test_with_overlaps(self):
  108. s = self.s
  109. expected = s.iloc[[3, 4, 3, 4]]
  110. result = s.loc[[Interval(3, 6), Interval(3, 6)]]
  111. tm.assert_series_equal(expected, result)
  112. idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
  113. s = Series(range(len(idx)), index=idx)
  114. result = s[4]
  115. expected = s
  116. tm.assert_series_equal(expected, result)
  117. result = s[[4]]
  118. expected = s
  119. tm.assert_series_equal(expected, result)
  120. result = s.loc[[4]]
  121. expected = s
  122. tm.assert_series_equal(expected, result)
  123. result = s[Interval(3, 5)]
  124. expected = s
  125. tm.assert_series_equal(expected, result)
  126. result = s.loc[Interval(3, 5)]
  127. expected = s
  128. tm.assert_series_equal(expected, result)
  129. # doesn't intersect unique set of intervals
  130. with pytest.raises(KeyError):
  131. s[[Interval(3, 5)]]
  132. with pytest.raises(KeyError):
  133. s.loc[[Interval(3, 5)]]
  134. # To be removed, replaced by test_interval_new.py (see #16316, #16386)
  135. def test_non_unique(self):
  136. idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
  137. s = Series(range(len(idx)), index=idx)
  138. result = s.loc[Interval(1, 3)]
  139. assert result == 0
  140. result = s.loc[[Interval(1, 3)]]
  141. expected = s.iloc[0:1]
  142. tm.assert_series_equal(expected, result)
  143. # To be removed, replaced by test_interval_new.py (see #16316, #16386)
  144. def test_non_unique_moar(self):
  145. idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
  146. s = Series(range(len(idx)), index=idx)
  147. result = s.loc[Interval(1, 3)]
  148. expected = s.iloc[[0, 1]]
  149. tm.assert_series_equal(expected, result)
  150. # non-unique index and slices not allowed
  151. with pytest.raises(ValueError):
  152. s.loc[Interval(1, 3):]
  153. with pytest.raises(ValueError):
  154. s[Interval(1, 3):]
  155. # non-unique
  156. with pytest.raises(ValueError):
  157. s[[Interval(1, 3)]]
  158. # TODO: check this behavior is consistent with test_interval_new.py
  159. def test_non_matching(self):
  160. s = self.s
  161. # this is a departure from our current
  162. # indexin scheme, but simpler
  163. with pytest.raises(KeyError):
  164. s.loc[[-1, 3, 4, 5]]
  165. with pytest.raises(KeyError):
  166. s.loc[[-1, 3]]
  167. def test_large_series(self):
  168. s = Series(np.arange(1000000),
  169. index=IntervalIndex.from_breaks(np.arange(1000001)))
  170. result1 = s.loc[:80000]
  171. result2 = s.loc[0:80000]
  172. result3 = s.loc[0:80000:1]
  173. tm.assert_series_equal(result1, result2)
  174. tm.assert_series_equal(result1, result3)
  175. def test_loc_getitem_frame(self):
  176. df = DataFrame({'A': range(10)})
  177. s = pd.cut(df.A, 5)
  178. df['B'] = s
  179. df = df.set_index('B')
  180. result = df.loc[4]
  181. expected = df.iloc[4:6]
  182. tm.assert_frame_equal(result, expected)
  183. with pytest.raises(KeyError):
  184. df.loc[10]
  185. # single list-like
  186. result = df.loc[[4]]
  187. expected = df.iloc[4:6]
  188. tm.assert_frame_equal(result, expected)
  189. # non-unique
  190. result = df.loc[[4, 5]]
  191. expected = df.take([4, 5, 4, 5])
  192. tm.assert_frame_equal(result, expected)
  193. with pytest.raises(KeyError):
  194. df.loc[[10]]
  195. # partial missing
  196. with pytest.raises(KeyError):
  197. df.loc[[10, 4]]