getitem.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from .base import BaseExtensionTests
  5. class BaseGetitemTests(BaseExtensionTests):
  6. """Tests for ExtensionArray.__getitem__."""
  7. def test_iloc_series(self, data):
  8. ser = pd.Series(data)
  9. result = ser.iloc[:4]
  10. expected = pd.Series(data[:4])
  11. self.assert_series_equal(result, expected)
  12. result = ser.iloc[[0, 1, 2, 3]]
  13. self.assert_series_equal(result, expected)
  14. def test_iloc_frame(self, data):
  15. df = pd.DataFrame({"A": data, 'B':
  16. np.arange(len(data), dtype='int64')})
  17. expected = pd.DataFrame({"A": data[:4]})
  18. # slice -> frame
  19. result = df.iloc[:4, [0]]
  20. self.assert_frame_equal(result, expected)
  21. # sequence -> frame
  22. result = df.iloc[[0, 1, 2, 3], [0]]
  23. self.assert_frame_equal(result, expected)
  24. expected = pd.Series(data[:4], name='A')
  25. # slice -> series
  26. result = df.iloc[:4, 0]
  27. self.assert_series_equal(result, expected)
  28. # sequence -> series
  29. result = df.iloc[:4, 0]
  30. self.assert_series_equal(result, expected)
  31. def test_loc_series(self, data):
  32. ser = pd.Series(data)
  33. result = ser.loc[:3]
  34. expected = pd.Series(data[:4])
  35. self.assert_series_equal(result, expected)
  36. result = ser.loc[[0, 1, 2, 3]]
  37. self.assert_series_equal(result, expected)
  38. def test_loc_frame(self, data):
  39. df = pd.DataFrame({"A": data,
  40. 'B': np.arange(len(data), dtype='int64')})
  41. expected = pd.DataFrame({"A": data[:4]})
  42. # slice -> frame
  43. result = df.loc[:3, ['A']]
  44. self.assert_frame_equal(result, expected)
  45. # sequence -> frame
  46. result = df.loc[[0, 1, 2, 3], ['A']]
  47. self.assert_frame_equal(result, expected)
  48. expected = pd.Series(data[:4], name='A')
  49. # slice -> series
  50. result = df.loc[:3, 'A']
  51. self.assert_series_equal(result, expected)
  52. # sequence -> series
  53. result = df.loc[:3, 'A']
  54. self.assert_series_equal(result, expected)
  55. def test_getitem_scalar(self, data):
  56. result = data[0]
  57. assert isinstance(result, data.dtype.type)
  58. result = pd.Series(data)[0]
  59. assert isinstance(result, data.dtype.type)
  60. def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
  61. result = data_missing[0]
  62. assert na_cmp(result, na_value)
  63. def test_getitem_mask(self, data):
  64. # Empty mask, raw array
  65. mask = np.zeros(len(data), dtype=bool)
  66. result = data[mask]
  67. assert len(result) == 0
  68. assert isinstance(result, type(data))
  69. # Empty mask, in series
  70. mask = np.zeros(len(data), dtype=bool)
  71. result = pd.Series(data)[mask]
  72. assert len(result) == 0
  73. assert result.dtype == data.dtype
  74. # non-empty mask, raw array
  75. mask[0] = True
  76. result = data[mask]
  77. assert len(result) == 1
  78. assert isinstance(result, type(data))
  79. # non-empty mask, in series
  80. result = pd.Series(data)[mask]
  81. assert len(result) == 1
  82. assert result.dtype == data.dtype
  83. def test_getitem_slice(self, data):
  84. # getitem[slice] should return an array
  85. result = data[slice(0)] # empty
  86. assert isinstance(result, type(data))
  87. result = data[slice(1)] # scalar
  88. assert isinstance(result, type(data))
  89. def test_get(self, data):
  90. # GH 20882
  91. s = pd.Series(data, index=[2 * i for i in range(len(data))])
  92. assert s.get(4) == s.iloc[2]
  93. result = s.get([4, 6])
  94. expected = s.iloc[[2, 3]]
  95. self.assert_series_equal(result, expected)
  96. result = s.get(slice(2))
  97. expected = s.iloc[[0, 1]]
  98. self.assert_series_equal(result, expected)
  99. assert s.get(-1) is None
  100. assert s.get(s.index.max() + 1) is None
  101. s = pd.Series(data[:6], index=list('abcdef'))
  102. assert s.get('c') == s.iloc[2]
  103. result = s.get(slice('b', 'd'))
  104. expected = s.iloc[[1, 2, 3]]
  105. self.assert_series_equal(result, expected)
  106. result = s.get('Z')
  107. assert result is None
  108. assert s.get(4) == s.iloc[4]
  109. assert s.get(-1) == s.iloc[-1]
  110. assert s.get(len(s)) is None
  111. # GH 21257
  112. s = pd.Series(data)
  113. s2 = s[::2]
  114. assert s2.get(1) is None
  115. def test_take_sequence(self, data):
  116. result = pd.Series(data)[[0, 1, 3]]
  117. assert result.iloc[0] == data[0]
  118. assert result.iloc[1] == data[1]
  119. assert result.iloc[2] == data[3]
  120. def test_take(self, data, na_value, na_cmp):
  121. result = data.take([0, -1])
  122. assert result.dtype == data.dtype
  123. assert result[0] == data[0]
  124. assert result[1] == data[-1]
  125. result = data.take([0, -1], allow_fill=True, fill_value=na_value)
  126. assert result[0] == data[0]
  127. assert na_cmp(result[1], na_value)
  128. with pytest.raises(IndexError, match="out of bounds"):
  129. data.take([len(data) + 1])
  130. def test_take_empty(self, data, na_value, na_cmp):
  131. empty = data[:0]
  132. result = empty.take([-1], allow_fill=True)
  133. assert na_cmp(result[0], na_value)
  134. with pytest.raises(IndexError):
  135. empty.take([-1])
  136. with pytest.raises(IndexError, match="cannot do a non-empty take"):
  137. empty.take([0, 1])
  138. def test_take_negative(self, data):
  139. # https://github.com/pandas-dev/pandas/issues/20640
  140. n = len(data)
  141. result = data.take([0, -n, n - 1, -1])
  142. expected = data.take([0, 0, n - 1, n - 1])
  143. self.assert_extension_array_equal(result, expected)
  144. def test_take_non_na_fill_value(self, data_missing):
  145. fill_value = data_missing[1] # valid
  146. na = data_missing[0]
  147. array = data_missing._from_sequence([na, fill_value, na])
  148. result = array.take([-1, 1], fill_value=fill_value, allow_fill=True)
  149. expected = array.take([1, 1])
  150. self.assert_extension_array_equal(result, expected)
  151. def test_take_pandas_style_negative_raises(self, data, na_value):
  152. with pytest.raises(ValueError):
  153. data.take([0, -2], fill_value=na_value, allow_fill=True)
  154. @pytest.mark.parametrize('allow_fill', [True, False])
  155. def test_take_out_of_bounds_raises(self, data, allow_fill):
  156. arr = data[:3]
  157. with pytest.raises(IndexError):
  158. arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
  159. def test_take_series(self, data):
  160. s = pd.Series(data)
  161. result = s.take([0, -1])
  162. expected = pd.Series(
  163. data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
  164. index=[0, len(data) - 1])
  165. self.assert_series_equal(result, expected)
  166. def test_reindex(self, data, na_value):
  167. s = pd.Series(data)
  168. result = s.reindex([0, 1, 3])
  169. expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
  170. self.assert_series_equal(result, expected)
  171. n = len(data)
  172. result = s.reindex([-1, 0, n])
  173. expected = pd.Series(
  174. data._from_sequence([na_value, data[0], na_value],
  175. dtype=s.dtype),
  176. index=[-1, 0, n])
  177. self.assert_series_equal(result, expected)
  178. result = s.reindex([n, n + 1])
  179. expected = pd.Series(data._from_sequence([na_value, na_value],
  180. dtype=s.dtype),
  181. index=[n, n + 1])
  182. self.assert_series_equal(result, expected)
  183. def test_reindex_non_na_fill_value(self, data_missing):
  184. valid = data_missing[1]
  185. na = data_missing[0]
  186. array = data_missing._from_sequence([na, valid])
  187. ser = pd.Series(array)
  188. result = ser.reindex([0, 1, 2], fill_value=valid)
  189. expected = pd.Series(data_missing._from_sequence([na, valid, valid]))
  190. self.assert_series_equal(result, expected)