test_loc.py 29 KB


  1. """ test label based indexing with loc """
  2. from warnings import catch_warnings, filterwarnings
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import PY2, StringIO, lrange
  6. import pandas as pd
  7. from pandas import DataFrame, Series, Timestamp, date_range
  8. from pandas.api.types import is_scalar
  9. from pandas.tests.indexing.common import Base
  10. from pandas.util import testing as tm
  11. class TestLoc(Base):
  12. def test_loc_getitem_dups(self):
  13. # GH 5678
  14. # repeated gettitems on a dup index returning a ndarray
  15. df = DataFrame(
  16. np.random.random_sample((20, 5)),
  17. index=['ABCDE' [x % 5] for x in range(20)])
  18. expected = df.loc['A', 0]
  19. result = df.loc[:, 0].loc['A']
  20. tm.assert_series_equal(result, expected)
  21. def test_loc_getitem_dups2(self):
  22. # GH4726
  23. # dup indexing with iloc/loc
  24. df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
  25. columns=['a', 'a', 'a', 'a', 'a'], index=[1])
  26. expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
  27. index=['a', 'a', 'a', 'a', 'a'], name=1)
  28. result = df.iloc[0]
  29. tm.assert_series_equal(result, expected)
  30. result = df.loc[1]
  31. tm.assert_series_equal(result, expected)
  32. def test_loc_setitem_dups(self):
  33. # GH 6541
  34. df_orig = DataFrame(
  35. {'me': list('rttti'),
  36. 'foo': list('aaade'),
  37. 'bar': np.arange(5, dtype='float64') * 1.34 + 2,
  38. 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')
  39. indexer = tuple(['r', ['bar', 'bar2']])
  40. df = df_orig.copy()
  41. df.loc[indexer] *= 2.0
  42. tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
  43. indexer = tuple(['r', 'bar'])
  44. df = df_orig.copy()
  45. df.loc[indexer] *= 2.0
  46. assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
  47. indexer = tuple(['t', ['bar', 'bar2']])
  48. df = df_orig.copy()
  49. df.loc[indexer] *= 2.0
  50. tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
  51. def test_loc_setitem_slice(self):
  52. # GH10503
  53. # assigning the same type should not change the type
  54. df1 = DataFrame({'a': [0, 1, 1],
  55. 'b': Series([100, 200, 300], dtype='uint32')})
  56. ix = df1['a'] == 1
  57. newb1 = df1.loc[ix, 'b'] + 1
  58. df1.loc[ix, 'b'] = newb1
  59. expected = DataFrame({'a': [0, 1, 1],
  60. 'b': Series([100, 201, 301], dtype='uint32')})
  61. tm.assert_frame_equal(df1, expected)
  62. # assigning a new type should get the inferred type
  63. df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
  64. dtype='uint64')
  65. ix = df1['a'] == 1
  66. newb2 = df2.loc[ix, 'b']
  67. df1.loc[ix, 'b'] = newb2
  68. expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
  69. dtype='uint64')
  70. tm.assert_frame_equal(df2, expected)
  71. def test_loc_getitem_int(self):
  72. # int label
  73. self.check_result('int label', 'loc', 2, 'ix', 2,
  74. typs=['ints', 'uints'], axes=0)
  75. self.check_result('int label', 'loc', 3, 'ix', 3,
  76. typs=['ints', 'uints'], axes=1)
  77. self.check_result('int label', 'loc', 4, 'ix', 4,
  78. typs=['ints', 'uints'], axes=2)
  79. self.check_result('int label', 'loc', 2, 'ix', 2,
  80. typs=['label'], fails=KeyError)
  81. def test_loc_getitem_label(self):
  82. # label
  83. self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
  84. axes=0)
  85. self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
  86. axes=0)
  87. self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
  88. self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
  89. typs=['ts'], axes=0)
  90. self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
  91. fails=KeyError)
  92. def test_loc_getitem_label_out_of_range(self):
  93. # out of range label
  94. self.check_result('label range', 'loc', 'f', 'ix', 'f',
  95. typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
  96. fails=KeyError)
  97. self.check_result('label range', 'loc', 'f', 'ix', 'f',
  98. typs=['floats'], fails=KeyError)
  99. self.check_result('label range', 'loc', 20, 'ix', 20,
  100. typs=['ints', 'uints', 'mixed'], fails=KeyError)
  101. self.check_result('label range', 'loc', 20, 'ix', 20,
  102. typs=['labels'], fails=TypeError)
  103. self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
  104. axes=0, fails=TypeError)
  105. self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
  106. axes=0, fails=KeyError)
  107. def test_loc_getitem_label_list(self):
  108. # list of labels
  109. self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
  110. typs=['ints', 'uints'], axes=0)
  111. self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
  112. typs=['ints', 'uints'], axes=1)
  113. self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
  114. typs=['ints', 'uints'], axes=2)
  115. self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
  116. ['a', 'b', 'd'], typs=['labels'], axes=0)
  117. self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
  118. ['A', 'B', 'C'], typs=['labels'], axes=1)
  119. self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
  120. ['Z', 'Y', 'W'], typs=['labels'], axes=2)
  121. self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
  122. [2, 8, 'null'], typs=['mixed'], axes=0)
  123. self.check_result('list lbl', 'loc',
  124. [Timestamp('20130102'), Timestamp('20130103')], 'ix',
  125. [Timestamp('20130102'), Timestamp('20130103')],
  126. typs=['ts'], axes=0)
  127. @pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
  128. "Python 2 (GH #20770)"))
  129. def test_loc_getitem_label_list_with_missing(self):
  130. self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
  131. typs=['empty'], fails=KeyError)
  132. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  133. self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
  134. typs=['ints', 'uints', 'floats'],
  135. axes=0, fails=KeyError)
  136. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  137. self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
  138. typs=['ints', 'uints', 'floats'],
  139. axes=1, fails=KeyError)
  140. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  141. self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
  142. typs=['ints', 'uints', 'floats'],
  143. axes=2, fails=KeyError)
  144. # GH 17758 - MultiIndex and missing keys
  145. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  146. self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
  147. 'ix', [(1, 3), (1, 4), (2, 5)],
  148. typs=['multi'],
  149. axes=0)
  150. def test_getitem_label_list_with_missing(self):
  151. s = Series(range(3), index=['a', 'b', 'c'])
  152. # consistency
  153. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  154. s[['a', 'd']]
  155. s = Series(range(3))
  156. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  157. s[[0, 3]]
  158. def test_loc_getitem_label_list_fails(self):
  159. # fails
  160. self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
  161. typs=['ints', 'uints'], axes=1, fails=KeyError)
  162. self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
  163. typs=['ints', 'uints'], axes=2, fails=KeyError)
  164. def test_loc_getitem_label_array_like(self):
  165. # array like
  166. self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
  167. 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
  168. self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
  169. 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
  170. self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
  171. 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)
  172. def test_loc_getitem_bool(self):
  173. # boolean indexers
  174. b = [True, False, True, False]
  175. self.check_result('bool', 'loc', b, 'ix', b,
  176. typs=['ints', 'uints', 'labels',
  177. 'mixed', 'ts', 'floats'])
  178. self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
  179. fails=KeyError)
  180. def test_loc_getitem_int_slice(self):
  181. # ok
  182. self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
  183. typs=['ints', 'uints'], axes=0)
  184. self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
  185. typs=['ints', 'uints'], axes=1)
  186. self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
  187. typs=['ints', 'uints'], axes=2)
  188. def test_loc_to_fail(self):
  189. # GH3449
  190. df = DataFrame(np.random.random((3, 3)),
  191. index=['a', 'b', 'c'],
  192. columns=['e', 'f', 'g'])
  193. # raise a KeyError?
  194. pytest.raises(KeyError, df.loc.__getitem__,
  195. tuple([[1, 2], [1, 2]]))
  196. # GH 7496
  197. # loc should not fallback
  198. s = Series()
  199. s.loc[1] = 1
  200. s.loc['a'] = 2
  201. pytest.raises(KeyError, lambda: s.loc[-1])
  202. pytest.raises(KeyError, lambda: s.loc[[-1, -2]])
  203. pytest.raises(KeyError, lambda: s.loc[['4']])
  204. s.loc[-1] = 3
  205. with tm.assert_produces_warning(FutureWarning,
  206. check_stacklevel=False):
  207. result = s.loc[[-1, -2]]
  208. expected = Series([3, np.nan], index=[-1, -2])
  209. tm.assert_series_equal(result, expected)
  210. s['a'] = 2
  211. pytest.raises(KeyError, lambda: s.loc[[-2]])
  212. del s['a']
  213. def f():
  214. s.loc[[-2]] = 0
  215. pytest.raises(KeyError, f)
  216. # inconsistency between .loc[values] and .loc[values,:]
  217. # GH 7999
  218. df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])
  219. def f():
  220. df.loc[[3], :]
  221. pytest.raises(KeyError, f)
  222. def f():
  223. df.loc[[3]]
  224. pytest.raises(KeyError, f)
  225. def test_loc_getitem_list_with_fail(self):
  226. # 15747
  227. # should KeyError if *any* missing labels
  228. s = Series([1, 2, 3])
  229. s.loc[[2]]
  230. with pytest.raises(KeyError):
  231. s.loc[[3]]
  232. # a non-match and a match
  233. with tm.assert_produces_warning(FutureWarning):
  234. expected = s.loc[[2, 3]]
  235. result = s.reindex([2, 3])
  236. tm.assert_series_equal(result, expected)
  237. def test_loc_getitem_label_slice(self):
  238. # label slices (with ints)
  239. self.check_result('lab slice', 'loc', slice(1, 3),
  240. 'ix', slice(1, 3),
  241. typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
  242. fails=TypeError)
  243. # real label slices
  244. self.check_result('lab slice', 'loc', slice('a', 'c'),
  245. 'ix', slice('a', 'c'), typs=['labels'], axes=0)
  246. self.check_result('lab slice', 'loc', slice('A', 'C'),
  247. 'ix', slice('A', 'C'), typs=['labels'], axes=1)
  248. self.check_result('lab slice', 'loc', slice('W', 'Z'),
  249. 'ix', slice('W', 'Z'), typs=['labels'], axes=2)
  250. self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
  251. 'ix', slice('20130102', '20130104'),
  252. typs=['ts'], axes=0)
  253. self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
  254. 'ix', slice('20130102', '20130104'),
  255. typs=['ts'], axes=1, fails=TypeError)
  256. self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
  257. 'ix', slice('20130102', '20130104'),
  258. typs=['ts'], axes=2, fails=TypeError)
  259. # GH 14316
  260. self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
  261. 'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)
  262. self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
  263. typs=['mixed'], axes=0, fails=TypeError)
  264. self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
  265. typs=['mixed'], axes=1, fails=KeyError)
  266. self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
  267. typs=['mixed'], axes=2, fails=KeyError)
  268. self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
  269. 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
  270. def test_loc_index(self):
  271. # gh-17131
  272. # a boolean index should index like a boolean numpy array
  273. df = DataFrame(
  274. np.random.random(size=(5, 10)),
  275. index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
  276. mask = df.index.map(lambda x: "alpha" in x)
  277. expected = df.loc[np.array(mask)]
  278. result = df.loc[mask]
  279. tm.assert_frame_equal(result, expected)
  280. result = df.loc[mask.values]
  281. tm.assert_frame_equal(result, expected)
  282. def test_loc_general(self):
  283. df = DataFrame(
  284. np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'],
  285. index=['A', 'B', 'C', 'D'])
  286. # want this to work
  287. result = df.loc[:, "A":"B"].iloc[0:2, :]
  288. assert (result.columns == ['A', 'B']).all()
  289. assert (result.index == ['A', 'B']).all()
  290. # mixed type
  291. result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0]
  292. expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0)
  293. tm.assert_series_equal(result, expected)
  294. assert result.dtype == object
  295. def test_loc_setitem_consistency(self):
  296. # GH 6149
  297. # coerce similarly for setitem and loc when rows have a null-slice
  298. expected = DataFrame({'date': Series(0, index=range(5),
  299. dtype=np.int64),
  300. 'val': Series(range(5), dtype=np.int64)})
  301. df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
  302. 'val': Series(
  303. range(5), dtype=np.int64)})
  304. df.loc[:, 'date'] = 0
  305. tm.assert_frame_equal(df, expected)
  306. df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
  307. 'val': Series(range(5), dtype=np.int64)})
  308. df.loc[:, 'date'] = np.array(0, dtype=np.int64)
  309. tm.assert_frame_equal(df, expected)
  310. df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
  311. 'val': Series(range(5), dtype=np.int64)})
  312. df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
  313. tm.assert_frame_equal(df, expected)
  314. expected = DataFrame({'date': Series('foo', index=range(5)),
  315. 'val': Series(range(5), dtype=np.int64)})
  316. df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
  317. 'val': Series(range(5), dtype=np.int64)})
  318. df.loc[:, 'date'] = 'foo'
  319. tm.assert_frame_equal(df, expected)
  320. expected = DataFrame({'date': Series(1.0, index=range(5)),
  321. 'val': Series(range(5), dtype=np.int64)})
  322. df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
  323. 'val': Series(range(5), dtype=np.int64)})
  324. df.loc[:, 'date'] = 1.0
  325. tm.assert_frame_equal(df, expected)
  326. # GH 15494
  327. # setting on frame with single row
  328. df = DataFrame({'date': Series([Timestamp('20180101')])})
  329. df.loc[:, 'date'] = 'string'
  330. expected = DataFrame({'date': Series(['string'])})
  331. tm.assert_frame_equal(df, expected)
  332. def test_loc_setitem_consistency_empty(self):
  333. # empty (essentially noops)
  334. expected = DataFrame(columns=['x', 'y'])
  335. expected['x'] = expected['x'].astype(np.int64)
  336. df = DataFrame(columns=['x', 'y'])
  337. df.loc[:, 'x'] = 1
  338. tm.assert_frame_equal(df, expected)
  339. df = DataFrame(columns=['x', 'y'])
  340. df['x'] = 1
  341. tm.assert_frame_equal(df, expected)
  342. def test_loc_setitem_consistency_slice_column_len(self):
  343. # .loc[:,column] setting with slice == len of the column
  344. # GH10408
  345. data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
  346. Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
  347. Region,Site,RespondentID,,,,,
  348. Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
  349. Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
  350. Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
  351. Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""
  352. df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
  353. df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
  354. 'Respondent', 'StartDate')])
  355. df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
  356. 'Respondent', 'EndDate')])
  357. df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
  358. 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]
  359. df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
  360. 'Respondent', 'Duration')].astype('timedelta64[s]')
  361. expected = Series([1380, 720, 840, 2160.], index=df.index,
  362. name=('Respondent', 'Duration'))
  363. tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
  364. def test_loc_setitem_frame(self):
  365. df = self.frame_labels
  366. result = df.iloc[0, 0]
  367. df.loc['a', 'A'] = 1
  368. result = df.loc['a', 'A']
  369. assert result == 1
  370. result = df.iloc[0, 0]
  371. assert result == 1
  372. df.loc[:, 'B':'D'] = 0
  373. expected = df.loc[:, 'B':'D']
  374. result = df.iloc[:, 1:]
  375. tm.assert_frame_equal(result, expected)
  376. # GH 6254
  377. # setting issue
  378. df = DataFrame(index=[3, 5, 4], columns=['A'])
  379. df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64')
  380. expected = DataFrame(dict(A=Series(
  381. [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4])
  382. tm.assert_frame_equal(df, expected)
  383. # GH 6252
  384. # setting with an empty frame
  385. keys1 = ['@' + str(i) for i in range(5)]
  386. val1 = np.arange(5, dtype='int64')
  387. keys2 = ['@' + str(i) for i in range(4)]
  388. val2 = np.arange(4, dtype='int64')
  389. index = list(set(keys1).union(keys2))
  390. df = DataFrame(index=index)
  391. df['A'] = np.nan
  392. df.loc[keys1, 'A'] = val1
  393. df['B'] = np.nan
  394. df.loc[keys2, 'B'] = val2
  395. expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series(
  396. val2, index=keys2))).reindex(index=index)
  397. tm.assert_frame_equal(df, expected)
  398. # GH 8669
  399. # invalid coercion of nan -> int
  400. df = DataFrame({'A': [1, 2, 3], 'B': np.nan})
  401. df.loc[df.B > df.A, 'B'] = df.A
  402. expected = DataFrame({'A': [1, 2, 3], 'B': np.nan})
  403. tm.assert_frame_equal(df, expected)
  404. # GH 6546
  405. # setting with mixed labels
  406. df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']})
  407. result = df.loc[0, [1, 2]]
  408. expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
  409. tm.assert_series_equal(result, expected)
  410. expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']})
  411. df.loc[0, [1, 2]] = [5, 6]
  412. tm.assert_frame_equal(df, expected)
  413. def test_loc_setitem_frame_multiples(self):
  414. # multiple setting
  415. df = DataFrame({'A': ['foo', 'bar', 'baz'],
  416. 'B': Series(
  417. range(3), dtype=np.int64)})
  418. rhs = df.loc[1:2]
  419. rhs.index = df.index[0:2]
  420. df.loc[0:1] = rhs
  421. expected = DataFrame({'A': ['bar', 'baz', 'baz'],
  422. 'B': Series(
  423. [1, 2, 2], dtype=np.int64)})
  424. tm.assert_frame_equal(df, expected)
  425. # multiple setting with frame on rhs (with M8)
  426. df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
  427. 'val': Series(
  428. range(5), dtype=np.int64)})
  429. expected = DataFrame({'date': [Timestamp('20000101'), Timestamp(
  430. '20000102'), Timestamp('20000101'), Timestamp('20000102'),
  431. Timestamp('20000103')],
  432. 'val': Series(
  433. [0, 1, 0, 1, 2], dtype=np.int64)})
  434. rhs = df.loc[0:2]
  435. rhs.index = df.index[2:5]
  436. df.loc[2:4] = rhs
  437. tm.assert_frame_equal(df, expected)
  438. @pytest.mark.parametrize(
  439. 'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
  440. @pytest.mark.parametrize(
  441. 'value', [['Z'], np.array(['Z'])])
  442. def test_loc_setitem_with_scalar_index(self, indexer, value):
  443. # GH #19474
  444. # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
  445. # elementwisely, not using "setter('A', ['Z'])".
  446. df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
  447. df.loc[0, indexer] = value
  448. result = df.loc[0, 'A']
  449. assert is_scalar(result) and result == 'Z'
  450. def test_loc_coerceion(self):
  451. # 12411
  452. df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'),
  453. pd.NaT]})
  454. expected = df.dtypes
  455. result = df.iloc[[0]]
  456. tm.assert_series_equal(result.dtypes, expected)
  457. result = df.iloc[[1]]
  458. tm.assert_series_equal(result.dtypes, expected)
  459. # 12045
  460. import datetime
  461. df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
  462. datetime.datetime(1012, 1, 2)]})
  463. expected = df.dtypes
  464. result = df.iloc[[0]]
  465. tm.assert_series_equal(result.dtypes, expected)
  466. result = df.iloc[[1]]
  467. tm.assert_series_equal(result.dtypes, expected)
  468. # 11594
  469. df = DataFrame({'text': ['some words'] + [None] * 9})
  470. expected = df.dtypes
  471. result = df.iloc[0:2]
  472. tm.assert_series_equal(result.dtypes, expected)
  473. result = df.iloc[3:]
  474. tm.assert_series_equal(result.dtypes, expected)
  475. def test_loc_non_unique(self):
  476. # GH3659
  477. # non-unique indexer with loc slice
  478. # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
  479. # these are going to raise because the we are non monotonic
  480. df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
  481. 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3])
  482. pytest.raises(KeyError, df.loc.__getitem__,
  483. tuple([slice(1, None)]))
  484. pytest.raises(KeyError, df.loc.__getitem__,
  485. tuple([slice(0, None)]))
  486. pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)]))
  487. # monotonic are ok
  488. df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
  489. 'B': [3, 4, 5, 6, 7, 8]},
  490. index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0)
  491. result = df.loc[1:]
  492. expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]},
  493. index=[1, 1, 2, 3])
  494. tm.assert_frame_equal(result, expected)
  495. result = df.loc[0:]
  496. tm.assert_frame_equal(result, df)
  497. result = df.loc[1:2]
  498. expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]},
  499. index=[1, 1, 2])
  500. tm.assert_frame_equal(result, expected)
  501. def test_loc_non_unique_memory_error(self):
  502. # GH 4280
  503. # non_unique index with a large selection triggers a memory error
  504. columns = list('ABCDEFG')
  505. def gen_test(l, l2):
  506. return pd.concat([
  507. DataFrame(np.random.randn(l, len(columns)),
  508. index=lrange(l), columns=columns),
  509. DataFrame(np.ones((l2, len(columns))),
  510. index=[0] * l2, columns=columns)])
  511. def gen_expected(df, mask):
  512. len_mask = len(mask)
  513. return pd.concat([df.take([0]),
  514. DataFrame(np.ones((len_mask, len(columns))),
  515. index=[0] * len_mask,
  516. columns=columns),
  517. df.take(mask[1:])])
  518. df = gen_test(900, 100)
  519. assert df.index.is_unique is False
  520. mask = np.arange(100)
  521. result = df.loc[mask]
  522. expected = gen_expected(df, mask)
  523. tm.assert_frame_equal(result, expected)
  524. df = gen_test(900000, 100000)
  525. assert df.index.is_unique is False
  526. mask = np.arange(100000)
  527. result = df.loc[mask]
  528. expected = gen_expected(df, mask)
  529. tm.assert_frame_equal(result, expected)
  530. def test_loc_name(self):
  531. # GH 3880
  532. df = DataFrame([[1, 1], [1, 1]])
  533. df.index.name = 'index_name'
  534. result = df.iloc[[0, 1]].index.name
  535. assert result == 'index_name'
  536. with catch_warnings(record=True):
  537. filterwarnings("ignore", "\\n.ix", DeprecationWarning)
  538. result = df.ix[[0, 1]].index.name
  539. assert result == 'index_name'
  540. result = df.loc[[0, 1]].index.name
  541. assert result == 'index_name'
  542. def test_loc_empty_list_indexer_is_ok(self):
  543. from pandas.util.testing import makeCustomDataframe as mkdf
  544. df = mkdf(5, 2)
  545. # vertical empty
  546. tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
  547. check_index_type=True, check_column_type=True)
  548. # horizontal empty
  549. tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
  550. check_index_type=True, check_column_type=True)
  551. # horizontal empty
  552. tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :],
  553. check_index_type=True,
  554. check_column_type=True)
  555. def test_identity_slice_returns_new_object(self):
  556. # GH13873
  557. original_df = DataFrame({'a': [1, 2, 3]})
  558. sliced_df = original_df.loc[:]
  559. assert sliced_df is not original_df
  560. assert original_df[:] is not original_df
  561. # should be a shallow copy
  562. original_df['a'] = [4, 4, 4]
  563. assert (sliced_df['a'] == 4).all()
  564. # These should not return copies
  565. assert original_df is original_df.loc[:, :]
  566. df = DataFrame(np.random.randn(10, 4))
  567. assert df[0] is df.loc[:, 0]
  568. # Same tests for Series
  569. original_series = Series([1, 2, 3, 4, 5, 6])
  570. sliced_series = original_series.loc[:]
  571. assert sliced_series is not original_series
  572. assert original_series[:] is not original_series
  573. original_series[:3] = [7, 8, 9]
  574. assert all(sliced_series[:3] == [7, 8, 9])
  575. def test_loc_uint64(self):
  576. # GH20722
  577. # Test whether loc accept uint64 max value as index.
  578. s = pd.Series([1, 2],
  579. index=[np.iinfo('uint64').max - 1,
  580. np.iinfo('uint64').max])
  581. result = s.loc[np.iinfo('uint64').max - 1]
  582. expected = s.iloc[0]
  583. assert result == expected
  584. result = s.loc[[np.iinfo('uint64').max - 1]]
  585. expected = s.iloc[[0]]
  586. tm.assert_series_equal(result, expected)
  587. result = s.loc[[np.iinfo('uint64').max - 1,
  588. np.iinfo('uint64').max]]
  589. tm.assert_series_equal(result, s)
  590. def test_loc_setitem_empty_append(self):
  591. # GH6173, various appends to an empty dataframe
  592. data = [1, 2, 3]
  593. expected = DataFrame({'x': data, 'y': [None] * len(data)})
  594. # appends to fit length of data
  595. df = DataFrame(columns=['x', 'y'])
  596. df.loc[:, 'x'] = data
  597. tm.assert_frame_equal(df, expected)
  598. # only appends one value
  599. expected = DataFrame({'x': [1.0], 'y': [np.nan]})
  600. df = DataFrame(columns=['x', 'y'],
  601. dtype=np.float)
  602. df.loc[0, 'x'] = expected.loc[0, 'x']
  603. tm.assert_frame_equal(df, expected)
  604. def test_loc_setitem_empty_append_raises(self):
  605. # GH6173, various appends to an empty dataframe
  606. data = [1, 2]
  607. df = DataFrame(columns=['x', 'y'])
  608. msg = (r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
  609. r"are in the \[index\]")
  610. with pytest.raises(KeyError, match=msg):
  611. df.loc[[0, 1], 'x'] = data
  612. msg = "cannot copy sequence with size 2 to array axis with dimension 0"
  613. with pytest.raises(ValueError, match=msg):
  614. df.loc[0:2, 'x'] = data