test_iloc.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. """ test positional based indexing with iloc """
  2. from warnings import catch_warnings, filterwarnings, simplefilter
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import lmap, lrange
  6. import pandas as pd
  7. from pandas import DataFrame, Series, concat, date_range, isna
  8. from pandas.api.types import is_scalar
  9. from pandas.tests.indexing.common import Base
  10. from pandas.util import testing as tm
  11. class TestiLoc(Base):
  12. def test_iloc_exceeds_bounds(self):
  13. # GH6296
  14. # iloc should allow indexers that exceed the bounds
  15. df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE'))
  16. # lists of positions should raise IndexErrror!
  17. msg = 'positional indexers are out-of-bounds'
  18. with pytest.raises(IndexError, match=msg):
  19. df.iloc[:, [0, 1, 2, 3, 4, 5]]
  20. pytest.raises(IndexError, lambda: df.iloc[[1, 30]])
  21. pytest.raises(IndexError, lambda: df.iloc[[1, -30]])
  22. pytest.raises(IndexError, lambda: df.iloc[[100]])
  23. s = df['A']
  24. pytest.raises(IndexError, lambda: s.iloc[[100]])
  25. pytest.raises(IndexError, lambda: s.iloc[[-100]])
  26. # still raise on a single indexer
  27. msg = 'single positional indexer is out-of-bounds'
  28. with pytest.raises(IndexError, match=msg):
  29. df.iloc[30]
  30. pytest.raises(IndexError, lambda: df.iloc[-30])
  31. # GH10779
  32. # single positive/negative indexer exceeding Series bounds should raise
  33. # an IndexError
  34. with pytest.raises(IndexError, match=msg):
  35. s.iloc[30]
  36. pytest.raises(IndexError, lambda: s.iloc[-30])
  37. # slices are ok
  38. result = df.iloc[:, 4:10] # 0 < start < len < stop
  39. expected = df.iloc[:, 4:]
  40. tm.assert_frame_equal(result, expected)
  41. result = df.iloc[:, -4:-10] # stop < 0 < start < len
  42. expected = df.iloc[:, :0]
  43. tm.assert_frame_equal(result, expected)
  44. result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
  45. expected = df.iloc[:, :4:-1]
  46. tm.assert_frame_equal(result, expected)
  47. result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
  48. expected = df.iloc[:, 4::-1]
  49. tm.assert_frame_equal(result, expected)
  50. result = df.iloc[:, -10:4] # start < 0 < stop < len
  51. expected = df.iloc[:, :4]
  52. tm.assert_frame_equal(result, expected)
  53. result = df.iloc[:, 10:4] # 0 < stop < len < start
  54. expected = df.iloc[:, :0]
  55. tm.assert_frame_equal(result, expected)
  56. result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
  57. expected = df.iloc[:, :0]
  58. tm.assert_frame_equal(result, expected)
  59. result = df.iloc[:, 10:11] # 0 < len < start < stop
  60. expected = df.iloc[:, :0]
  61. tm.assert_frame_equal(result, expected)
  62. # slice bounds exceeding is ok
  63. result = s.iloc[18:30]
  64. expected = s.iloc[18:]
  65. tm.assert_series_equal(result, expected)
  66. result = s.iloc[30:]
  67. expected = s.iloc[:0]
  68. tm.assert_series_equal(result, expected)
  69. result = s.iloc[30::-1]
  70. expected = s.iloc[::-1]
  71. tm.assert_series_equal(result, expected)
  72. # doc example
  73. def check(result, expected):
  74. str(result)
  75. result.dtypes
  76. tm.assert_frame_equal(result, expected)
  77. dfl = DataFrame(np.random.randn(5, 2), columns=list('AB'))
  78. check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
  79. check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
  80. check(dfl.iloc[4:6], dfl.iloc[[4]])
  81. pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]])
  82. pytest.raises(IndexError, lambda: dfl.iloc[:, 4])
  83. def test_iloc_getitem_int(self):
  84. # integer
  85. self.check_result('integer', 'iloc', 2, 'ix',
  86. {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints'])
  87. self.check_result('integer', 'iloc', 2, 'indexer', 2,
  88. typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
  89. fails=IndexError)
  90. def test_iloc_getitem_neg_int(self):
  91. # neg integer
  92. self.check_result('neg int', 'iloc', -1, 'ix',
  93. {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints'])
  94. self.check_result('neg int', 'iloc', -1, 'indexer', -1,
  95. typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
  96. fails=IndexError)
  97. @pytest.mark.parametrize('dims', [1, 2])
  98. def test_iloc_getitem_invalid_scalar(self, dims):
  99. # GH 21982
  100. if dims == 1:
  101. s = Series(np.arange(10))
  102. else:
  103. s = DataFrame(np.arange(100).reshape(10, 10))
  104. with pytest.raises(TypeError, match='Cannot index by location index'):
  105. s.iloc['a']
  106. def test_iloc_array_not_mutating_negative_indices(self):
  107. # GH 21867
  108. array_with_neg_numbers = np.array([1, 2, -1])
  109. array_copy = array_with_neg_numbers.copy()
  110. df = pd.DataFrame({
  111. 'A': [100, 101, 102],
  112. 'B': [103, 104, 105],
  113. 'C': [106, 107, 108]},
  114. index=[1, 2, 3])
  115. df.iloc[array_with_neg_numbers]
  116. tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
  117. df.iloc[:, array_with_neg_numbers]
  118. tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
  119. def test_iloc_getitem_list_int(self):
  120. # list of ints
  121. self.check_result('list int', 'iloc', [0, 1, 2], 'ix',
  122. {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
  123. typs=['ints', 'uints'])
  124. self.check_result('list int', 'iloc', [2], 'ix',
  125. {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
  126. self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2],
  127. typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
  128. fails=IndexError)
  129. # array of ints (GH5006), make sure that a single indexer is returning
  130. # the correct type
  131. self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix',
  132. {0: [0, 2, 4],
  133. 1: [0, 3, 6],
  134. 2: [0, 4, 8]}, typs=['ints', 'uints'])
  135. self.check_result('array int', 'iloc', np.array([2]), 'ix',
  136. {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
  137. self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer',
  138. [0, 1, 2],
  139. typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
  140. fails=IndexError)
  141. def test_iloc_getitem_neg_int_can_reach_first_index(self):
  142. # GH10547 and GH10779
  143. # negative integers should be able to reach index 0
  144. df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]})
  145. s = df['A']
  146. expected = df.iloc[0]
  147. result = df.iloc[-3]
  148. tm.assert_series_equal(result, expected)
  149. expected = df.iloc[[0]]
  150. result = df.iloc[[-3]]
  151. tm.assert_frame_equal(result, expected)
  152. expected = s.iloc[0]
  153. result = s.iloc[-3]
  154. assert result == expected
  155. expected = s.iloc[[0]]
  156. result = s.iloc[[-3]]
  157. tm.assert_series_equal(result, expected)
  158. # check the length 1 Series case highlighted in GH10547
  159. expected = Series(['a'], index=['A'])
  160. result = expected.iloc[[-1]]
  161. tm.assert_series_equal(result, expected)
  162. def test_iloc_getitem_dups(self):
  163. # no dups in panel (bug?)
  164. self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix',
  165. {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
  166. objs=['series', 'frame'], typs=['ints', 'uints'])
  167. # GH 6766
  168. df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
  169. df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
  170. df = concat([df1, df2], axis=1)
  171. # cross-sectional indexing
  172. result = df.iloc[0, 0]
  173. assert isna(result)
  174. result = df.iloc[0, :]
  175. expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'],
  176. name=0)
  177. tm.assert_series_equal(result, expected)
  178. def test_iloc_getitem_array(self):
  179. # array like
  180. s = Series(index=lrange(1, 4))
  181. self.check_result('array like', 'iloc', s.index, 'ix',
  182. {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
  183. typs=['ints', 'uints'])
  184. def test_iloc_getitem_bool(self):
  185. # boolean indexers
  186. b = [True, False, True, False, ]
  187. self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints'])
  188. self.check_result('bool', 'iloc', b, 'ix', b,
  189. typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
  190. fails=IndexError)
  191. def test_iloc_getitem_slice(self):
  192. # slices
  193. self.check_result('slice', 'iloc', slice(1, 3), 'ix',
  194. {0: [2, 4], 1: [3, 6], 2: [4, 8]},
  195. typs=['ints', 'uints'])
  196. self.check_result('slice', 'iloc', slice(1, 3), 'indexer',
  197. slice(1, 3),
  198. typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
  199. fails=IndexError)
  200. def test_iloc_getitem_slice_dups(self):
  201. df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
  202. df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2),
  203. columns=['A', 'C'])
  204. # axis=1
  205. df = concat([df1, df2], axis=1)
  206. tm.assert_frame_equal(df.iloc[:, :4], df1)
  207. tm.assert_frame_equal(df.iloc[:, 4:], df2)
  208. df = concat([df2, df1], axis=1)
  209. tm.assert_frame_equal(df.iloc[:, :2], df2)
  210. tm.assert_frame_equal(df.iloc[:, 2:], df1)
  211. exp = concat([df2, df1.iloc[:, [0]]], axis=1)
  212. tm.assert_frame_equal(df.iloc[:, 0:3], exp)
  213. # axis=0
  214. df = concat([df, df], axis=0)
  215. tm.assert_frame_equal(df.iloc[0:10, :2], df2)
  216. tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
  217. tm.assert_frame_equal(df.iloc[10:, :2], df2)
  218. tm.assert_frame_equal(df.iloc[10:, 2:], df1)
  219. def test_iloc_setitem(self):
  220. df = self.frame_ints
  221. df.iloc[1, 1] = 1
  222. result = df.iloc[1, 1]
  223. assert result == 1
  224. df.iloc[:, 2:3] = 0
  225. expected = df.iloc[:, 2:3]
  226. result = df.iloc[:, 2:3]
  227. tm.assert_frame_equal(result, expected)
  228. # GH5771
  229. s = Series(0, index=[4, 5, 6])
  230. s.iloc[1:2] += 1
  231. expected = Series([0, 1, 0], index=[4, 5, 6])
  232. tm.assert_series_equal(s, expected)
  233. def test_iloc_setitem_list(self):
  234. # setitem with an iloc list
  235. df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"],
  236. columns=["A", "B", "C"])
  237. df.iloc[[0, 1], [1, 2]]
  238. df.iloc[[0, 1], [1, 2]] += 100
  239. expected = DataFrame(
  240. np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
  241. index=["A", "B", "C"], columns=["A", "B", "C"])
  242. tm.assert_frame_equal(df, expected)
  243. def test_iloc_setitem_pandas_object(self):
  244. # GH 17193
  245. s_orig = Series([0, 1, 2, 3])
  246. expected = Series([0, -1, -2, 3])
  247. s = s_orig.copy()
  248. s.iloc[Series([1, 2])] = [-1, -2]
  249. tm.assert_series_equal(s, expected)
  250. s = s_orig.copy()
  251. s.iloc[pd.Index([1, 2])] = [-1, -2]
  252. tm.assert_series_equal(s, expected)
  253. def test_iloc_setitem_dups(self):
  254. # GH 6766
  255. # iloc with a mask aligning from another iloc
  256. df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
  257. df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
  258. df = concat([df1, df2], axis=1)
  259. expected = df.fillna(3)
  260. expected['A'] = expected['A'].astype('float64')
  261. inds = np.isnan(df.iloc[:, 0])
  262. mask = inds[inds].index
  263. df.iloc[mask, 0] = df.iloc[mask, 2]
  264. tm.assert_frame_equal(df, expected)
  265. # del a dup column across blocks
  266. expected = DataFrame({0: [1, 2], 1: [3, 4]})
  267. expected.columns = ['B', 'B']
  268. del df['A']
  269. tm.assert_frame_equal(df, expected)
  270. # assign back to self
  271. df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
  272. tm.assert_frame_equal(df, expected)
  273. # reversed x 2
  274. df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
  275. drop=True)
  276. df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
  277. drop=True)
  278. tm.assert_frame_equal(df, expected)
  279. def test_iloc_getitem_frame(self):
  280. df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
  281. columns=lrange(0, 8, 2))
  282. result = df.iloc[2]
  283. with catch_warnings(record=True):
  284. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  285. exp = df.ix[4]
  286. tm.assert_series_equal(result, exp)
  287. result = df.iloc[2, 2]
  288. with catch_warnings(record=True):
  289. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  290. exp = df.ix[4, 4]
  291. assert result == exp
  292. # slice
  293. result = df.iloc[4:8]
  294. with catch_warnings(record=True):
  295. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  296. expected = df.ix[8:14]
  297. tm.assert_frame_equal(result, expected)
  298. result = df.iloc[:, 2:3]
  299. with catch_warnings(record=True):
  300. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  301. expected = df.ix[:, 4:5]
  302. tm.assert_frame_equal(result, expected)
  303. # list of integers
  304. result = df.iloc[[0, 1, 3]]
  305. with catch_warnings(record=True):
  306. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  307. expected = df.ix[[0, 2, 6]]
  308. tm.assert_frame_equal(result, expected)
  309. result = df.iloc[[0, 1, 3], [0, 1]]
  310. with catch_warnings(record=True):
  311. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  312. expected = df.ix[[0, 2, 6], [0, 2]]
  313. tm.assert_frame_equal(result, expected)
  314. # neg indices
  315. result = df.iloc[[-1, 1, 3], [-1, 1]]
  316. with catch_warnings(record=True):
  317. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  318. expected = df.ix[[18, 2, 6], [6, 2]]
  319. tm.assert_frame_equal(result, expected)
  320. # dups indices
  321. result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
  322. with catch_warnings(record=True):
  323. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  324. expected = df.ix[[18, 18, 2, 6], [6, 2]]
  325. tm.assert_frame_equal(result, expected)
  326. # with index-like
  327. s = Series(index=lrange(1, 5))
  328. result = df.iloc[s.index]
  329. with catch_warnings(record=True):
  330. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  331. expected = df.ix[[2, 4, 6, 8]]
  332. tm.assert_frame_equal(result, expected)
  333. def test_iloc_getitem_labelled_frame(self):
  334. # try with labelled frame
  335. df = DataFrame(np.random.randn(10, 4),
  336. index=list('abcdefghij'), columns=list('ABCD'))
  337. result = df.iloc[1, 1]
  338. exp = df.loc['b', 'B']
  339. assert result == exp
  340. result = df.iloc[:, 2:3]
  341. expected = df.loc[:, ['C']]
  342. tm.assert_frame_equal(result, expected)
  343. # negative indexing
  344. result = df.iloc[-1, -1]
  345. exp = df.loc['j', 'D']
  346. assert result == exp
  347. # out-of-bounds exception
  348. pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5]))
  349. # trying to use a label
  350. pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D']))
  351. def test_iloc_getitem_doc_issue(self):
  352. # multi axis slicing issue with single block
  353. # surfaced in GH 6059
  354. arr = np.random.randn(6, 4)
  355. index = date_range('20130101', periods=6)
  356. columns = list('ABCD')
  357. df = DataFrame(arr, index=index, columns=columns)
  358. # defines ref_locs
  359. df.describe()
  360. result = df.iloc[3:5, 0:2]
  361. str(result)
  362. result.dtypes
  363. expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
  364. columns=columns[0:2])
  365. tm.assert_frame_equal(result, expected)
  366. # for dups
  367. df.columns = list('aaaa')
  368. result = df.iloc[3:5, 0:2]
  369. str(result)
  370. result.dtypes
  371. expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
  372. columns=list('aa'))
  373. tm.assert_frame_equal(result, expected)
  374. # related
  375. arr = np.random.randn(6, 4)
  376. index = list(range(0, 12, 2))
  377. columns = list(range(0, 8, 2))
  378. df = DataFrame(arr, index=index, columns=columns)
  379. df._data.blocks[0].mgr_locs
  380. result = df.iloc[1:5, 2:4]
  381. str(result)
  382. result.dtypes
  383. expected = DataFrame(arr[1:5, 2:4], index=index[1:5],
  384. columns=columns[2:4])
  385. tm.assert_frame_equal(result, expected)
  386. def test_iloc_setitem_series(self):
  387. df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'),
  388. columns=list('ABCD'))
  389. df.iloc[1, 1] = 1
  390. result = df.iloc[1, 1]
  391. assert result == 1
  392. df.iloc[:, 2:3] = 0
  393. expected = df.iloc[:, 2:3]
  394. result = df.iloc[:, 2:3]
  395. tm.assert_frame_equal(result, expected)
  396. s = Series(np.random.randn(10), index=lrange(0, 20, 2))
  397. s.iloc[1] = 1
  398. result = s.iloc[1]
  399. assert result == 1
  400. s.iloc[:4] = 0
  401. expected = s.iloc[:4]
  402. result = s.iloc[:4]
  403. tm.assert_series_equal(result, expected)
  404. s = Series([-1] * 6)
  405. s.iloc[0::2] = [0, 2, 4]
  406. s.iloc[1::2] = [1, 3, 5]
  407. result = s
  408. expected = Series([0, 1, 2, 3, 4, 5])
  409. tm.assert_series_equal(result, expected)
  410. def test_iloc_setitem_list_of_lists(self):
  411. # GH 7551
  412. # list-of-list is set incorrectly in mixed vs. single dtyped frames
  413. df = DataFrame(dict(A=np.arange(5, dtype='int64'),
  414. B=np.arange(5, 10, dtype='int64')))
  415. df.iloc[2:4] = [[10, 11], [12, 13]]
  416. expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
  417. tm.assert_frame_equal(df, expected)
  418. df = DataFrame(
  419. dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64')))
  420. df.iloc[2:4] = [['x', 11], ['y', 13]]
  421. expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'],
  422. B=[5, 6, 11, 13, 9]))
  423. tm.assert_frame_equal(df, expected)
  424. @pytest.mark.parametrize(
  425. 'indexer', [[0], slice(None, 1, None), np.array([0])])
  426. @pytest.mark.parametrize(
  427. 'value', [['Z'], np.array(['Z'])])
  428. def test_iloc_setitem_with_scalar_index(self, indexer, value):
  429. # GH #19474
  430. # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
  431. # elementwisely, not using "setter('A', ['Z'])".
  432. df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
  433. df.iloc[0, indexer] = value
  434. result = df.iloc[0, 0]
  435. assert is_scalar(result) and result == 'Z'
  436. def test_iloc_mask(self):
  437. # GH 3631, iloc with a mask (of a series) should raise
  438. df = DataFrame(lrange(5), list('ABCDE'), columns=['a'])
  439. mask = (df.a % 2 == 0)
  440. pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask]))
  441. mask.index = lrange(len(mask))
  442. pytest.raises(NotImplementedError, df.iloc.__getitem__,
  443. tuple([mask]))
  444. # ndarray ok
  445. result = df.iloc[np.array([True] * len(mask), dtype=bool)]
  446. tm.assert_frame_equal(result, df)
  447. # the possibilities
  448. locs = np.arange(4)
  449. nums = 2 ** locs
  450. reps = lmap(bin, nums)
  451. df = DataFrame({'locs': locs, 'nums': nums}, reps)
  452. expected = {
  453. (None, ''): '0b1100',
  454. (None, '.loc'): '0b1100',
  455. (None, '.iloc'): '0b1100',
  456. ('index', ''): '0b11',
  457. ('index', '.loc'): '0b11',
  458. ('index', '.iloc'): ('iLocation based boolean indexing '
  459. 'cannot use an indexable as a mask'),
  460. ('locs', ''): 'Unalignable boolean Series provided as indexer '
  461. '(index of the boolean Series and of the indexed '
  462. 'object do not match',
  463. ('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
  464. '(index of the boolean Series and of the '
  465. 'indexed object do not match',
  466. ('locs', '.iloc'): ('iLocation based boolean indexing on an '
  467. 'integer type is not available'),
  468. }
  469. # UserWarnings from reindex of a boolean mask
  470. with catch_warnings(record=True):
  471. simplefilter("ignore", UserWarning)
  472. result = dict()
  473. for idx in [None, 'index', 'locs']:
  474. mask = (df.nums > 2).values
  475. if idx:
  476. mask = Series(mask, list(reversed(getattr(df, idx))))
  477. for method in ['', '.loc', '.iloc']:
  478. try:
  479. if method:
  480. accessor = getattr(df, method[1:])
  481. else:
  482. accessor = df
  483. ans = str(bin(accessor[mask]['nums'].sum()))
  484. except Exception as e:
  485. ans = str(e)
  486. key = tuple([idx, method])
  487. r = expected.get(key)
  488. if r != ans:
  489. raise AssertionError(
  490. "[%s] does not match [%s], received [%s]"
  491. % (key, ans, r))
  492. def test_iloc_non_unique_indexing(self):
  493. # GH 4017, non-unique indexing (on the axis)
  494. df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000})
  495. idx = np.array(lrange(30)) * 99
  496. expected = df.iloc[idx]
  497. df3 = concat([df, 2 * df, 3 * df])
  498. result = df3.iloc[idx]
  499. tm.assert_frame_equal(result, expected)
  500. df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000})
  501. df2 = concat([df2, 2 * df2, 3 * df2])
  502. sidx = df2.index.to_series()
  503. expected = df2.iloc[idx[idx <= sidx.max()]]
  504. new_list = []
  505. for r, s in expected.iterrows():
  506. new_list.append(s)
  507. new_list.append(s * 2)
  508. new_list.append(s * 3)
  509. expected = DataFrame(new_list)
  510. expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
  511. sort=True)
  512. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  513. result = df2.loc[idx]
  514. tm.assert_frame_equal(result, expected, check_index_type=False)
  515. def test_iloc_empty_list_indexer_is_ok(self):
  516. from pandas.util.testing import makeCustomDataframe as mkdf
  517. df = mkdf(5, 2)
  518. # vertical empty
  519. tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
  520. check_index_type=True, check_column_type=True)
  521. # horizontal empty
  522. tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
  523. check_index_type=True, check_column_type=True)
  524. # horizontal empty
  525. tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
  526. check_index_type=True,
  527. check_column_type=True)
  528. def test_identity_slice_returns_new_object(self):
  529. # GH13873
  530. original_df = DataFrame({'a': [1, 2, 3]})
  531. sliced_df = original_df.iloc[:]
  532. assert sliced_df is not original_df
  533. # should be a shallow copy
  534. original_df['a'] = [4, 4, 4]
  535. assert (sliced_df['a'] == 4).all()
  536. original_series = Series([1, 2, 3, 4, 5, 6])
  537. sliced_series = original_series.iloc[:]
  538. assert sliced_series is not original_series
  539. # should also be a shallow copy
  540. original_series[:3] = [7, 8, 9]
  541. assert all(sliced_series[:3] == [7, 8, 9])