test_indexing.py 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. # -*- coding: utf-8 -*-
  2. # pylint: disable-msg=W0612,E1101
  3. """ test fancy indexing & misc """
  4. from datetime import datetime
  5. from warnings import catch_warnings, simplefilter
  6. import weakref
  7. import numpy as np
  8. import pytest
  9. from pandas.compat import PY2, lrange, range
  10. from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
  11. import pandas as pd
  12. from pandas import DataFrame, Index, NaT, Series
  13. from pandas.core.indexing import (
  14. _maybe_numeric_slice, _non_reducing_slice, validate_indices)
  15. from pandas.tests.indexing.common import Base, _mklbl
  16. import pandas.util.testing as tm
  17. # ------------------------------------------------------------------------
  18. # Indexing test cases
  19. class TestFancy(Base):
  20. """ pure get/set item & fancy indexing """
  21. def test_setitem_ndarray_1d(self):
  22. # GH5508
  23. # len of indexer vs length of the 1d ndarray
  24. df = DataFrame(index=Index(lrange(1, 11)))
  25. df['foo'] = np.zeros(10, dtype=np.float64)
  26. df['bar'] = np.zeros(10, dtype=np.complex)
  27. # invalid
  28. with pytest.raises(ValueError):
  29. df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
  30. 2.2, 1.0])
  31. # valid
  32. df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
  33. 2.2, 1.0])
  34. result = df.loc[df.index[2:6], 'bar']
  35. expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6],
  36. name='bar')
  37. tm.assert_series_equal(result, expected)
  38. # dtype getting changed?
  39. df = DataFrame(index=Index(lrange(1, 11)))
  40. df['foo'] = np.zeros(10, dtype=np.float64)
  41. df['bar'] = np.zeros(10, dtype=np.complex)
  42. with pytest.raises(ValueError):
  43. df[2:5] = np.arange(1, 4) * 1j
  44. def test_inf_upcast(self):
  45. # GH 16957
  46. # We should be able to use np.inf as a key
  47. # np.inf should cause an index to convert to float
  48. # Test with np.inf in rows
  49. df = DataFrame(columns=[0])
  50. df.loc[1] = 1
  51. df.loc[2] = 2
  52. df.loc[np.inf] = 3
  53. # make sure we can look up the value
  54. assert df.loc[np.inf, 0] == 3
  55. result = df.index
  56. expected = pd.Float64Index([1, 2, np.inf])
  57. tm.assert_index_equal(result, expected)
  58. # Test with np.inf in columns
  59. df = DataFrame()
  60. df.loc[0, 0] = 1
  61. df.loc[1, 1] = 2
  62. df.loc[0, np.inf] = 3
  63. result = df.columns
  64. expected = pd.Float64Index([0, 1, np.inf])
  65. tm.assert_index_equal(result, expected)
  66. def test_setitem_dtype_upcast(self):
  67. # GH3216
  68. df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
  69. df['c'] = np.nan
  70. assert df['c'].dtype == np.float64
  71. df.loc[0, 'c'] = 'foo'
  72. expected = DataFrame([{"a": 1, "c": 'foo'},
  73. {"a": 3, "b": 2, "c": np.nan}])
  74. tm.assert_frame_equal(df, expected)
  75. # GH10280
  76. df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
  77. index=list('ab'),
  78. columns=['foo', 'bar', 'baz'])
  79. for val in [3.14, 'wxyz']:
  80. left = df.copy()
  81. left.loc['a', 'bar'] = val
  82. right = DataFrame([[0, val, 2], [3, 4, 5]], index=list('ab'),
  83. columns=['foo', 'bar', 'baz'])
  84. tm.assert_frame_equal(left, right)
  85. assert is_integer_dtype(left['foo'])
  86. assert is_integer_dtype(left['baz'])
  87. left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0,
  88. index=list('ab'),
  89. columns=['foo', 'bar', 'baz'])
  90. left.loc['a', 'bar'] = 'wxyz'
  91. right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], index=list('ab'),
  92. columns=['foo', 'bar', 'baz'])
  93. tm.assert_frame_equal(left, right)
  94. assert is_float_dtype(left['foo'])
  95. assert is_float_dtype(left['baz'])
  96. def test_dups_fancy_indexing(self):
  97. # GH 3455
  98. from pandas.util.testing import makeCustomDataframe as mkdf
  99. df = mkdf(10, 3)
  100. df.columns = ['a', 'a', 'b']
  101. result = df[['b', 'a']].columns
  102. expected = Index(['b', 'a', 'a'])
  103. tm.assert_index_equal(result, expected)
  104. # across dtypes
  105. df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']],
  106. columns=list('aaaaaaa'))
  107. df.head()
  108. str(df)
  109. result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']])
  110. result.columns = list('aaaaaaa')
  111. # TODO(wesm): unused?
  112. df_v = df.iloc[:, 4] # noqa
  113. res_v = result.iloc[:, 4] # noqa
  114. tm.assert_frame_equal(df, result)
  115. # GH 3561, dups not in selected order
  116. df = DataFrame(
  117. {'test': [5, 7, 9, 11],
  118. 'test1': [4., 5, 6, 7],
  119. 'other': list('abcd')}, index=['A', 'A', 'B', 'C'])
  120. rows = ['C', 'B']
  121. expected = DataFrame(
  122. {'test': [11, 9],
  123. 'test1': [7., 6],
  124. 'other': ['d', 'c']}, index=rows)
  125. result = df.loc[rows]
  126. tm.assert_frame_equal(result, expected)
  127. result = df.loc[Index(rows)]
  128. tm.assert_frame_equal(result, expected)
  129. rows = ['C', 'B', 'E']
  130. expected = DataFrame(
  131. {'test': [11, 9, np.nan],
  132. 'test1': [7., 6, np.nan],
  133. 'other': ['d', 'c', np.nan]}, index=rows)
  134. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  135. result = df.loc[rows]
  136. tm.assert_frame_equal(result, expected)
  137. # see GH5553, make sure we use the right indexer
  138. rows = ['F', 'G', 'H', 'C', 'B', 'E']
  139. expected = DataFrame({'test': [np.nan, np.nan, np.nan, 11, 9, np.nan],
  140. 'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan],
  141. 'other': [np.nan, np.nan, np.nan,
  142. 'd', 'c', np.nan]},
  143. index=rows)
  144. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  145. result = df.loc[rows]
  146. tm.assert_frame_equal(result, expected)
  147. # List containing only missing label
  148. dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD'))
  149. with pytest.raises(KeyError):
  150. dfnu.loc[['E']]
  151. # ToDo: check_index_type can be True after GH 11497
  152. # GH 4619; duplicate indexer with missing label
  153. df = DataFrame({"A": [0, 1, 2]})
  154. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  155. result = df.loc[[0, 8, 0]]
  156. expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
  157. tm.assert_frame_equal(result, expected, check_index_type=False)
  158. df = DataFrame({"A": list('abc')})
  159. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  160. result = df.loc[[0, 8, 0]]
  161. expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0])
  162. tm.assert_frame_equal(result, expected, check_index_type=False)
  163. # non unique with non unique selector
  164. df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C'])
  165. expected = DataFrame(
  166. {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E'])
  167. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  168. result = df.loc[['A', 'A', 'E']]
  169. tm.assert_frame_equal(result, expected)
  170. @pytest.mark.skipif(PY2,
  171. reason="GH-20770. Py2 unreliable warnings catching.")
  172. def test_dups_fancy_indexing2(self):
  173. # GH 5835
  174. # dups on index and missing values
  175. df = DataFrame(
  176. np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A'])
  177. expected = pd.concat(
  178. [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'],
  179. index=df.index)], axis=1)
  180. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  181. result = df.loc[:, ['A', 'B', 'C']]
  182. tm.assert_frame_equal(result, expected)
  183. # GH 6504, multi-axis indexing
  184. df = DataFrame(np.random.randn(9, 2),
  185. index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=['a', 'b'])
  186. expected = df.iloc[0:6]
  187. result = df.loc[[1, 2]]
  188. tm.assert_frame_equal(result, expected)
  189. expected = df
  190. result = df.loc[:, ['a', 'b']]
  191. tm.assert_frame_equal(result, expected)
  192. expected = df.iloc[0:6, :]
  193. result = df.loc[[1, 2], ['a', 'b']]
  194. tm.assert_frame_equal(result, expected)
  195. def test_indexing_mixed_frame_bug(self):
  196. # GH3492
  197. df = DataFrame({'a': {1: 'aaa', 2: 'bbb', 3: 'ccc'},
  198. 'b': {1: 111, 2: 222, 3: 333}})
  199. # this works, new column is created correctly
  200. df['test'] = df['a'].apply(lambda x: '_' if x == 'aaa' else x)
  201. # this does not work, ie column test is not changed
  202. idx = df['test'] == '_'
  203. temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x)
  204. df.loc[idx, 'test'] = temp
  205. assert df.iloc[0, 2] == '-----'
  206. # if I look at df, then element [0,2] equals '_'. If instead I type
  207. # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I
  208. # get '_'.
  209. def test_multitype_list_index_access(self):
  210. # GH 10610
  211. df = DataFrame(np.random.random((10, 5)),
  212. columns=["a"] + [20, 21, 22, 23])
  213. with pytest.raises(KeyError):
  214. df[[22, 26, -8]]
  215. assert df[21].shape[0] == df.shape[0]
  216. def test_set_index_nan(self):
  217. # GH 3586
  218. df = DataFrame({'PRuid': {17: 'nonQC',
  219. 18: 'nonQC',
  220. 19: 'nonQC',
  221. 20: '10',
  222. 21: '11',
  223. 22: '12',
  224. 23: '13',
  225. 24: '24',
  226. 25: '35',
  227. 26: '46',
  228. 27: '47',
  229. 28: '48',
  230. 29: '59',
  231. 30: '10'},
  232. 'QC': {17: 0.0,
  233. 18: 0.0,
  234. 19: 0.0,
  235. 20: np.nan,
  236. 21: np.nan,
  237. 22: np.nan,
  238. 23: np.nan,
  239. 24: 1.0,
  240. 25: np.nan,
  241. 26: np.nan,
  242. 27: np.nan,
  243. 28: np.nan,
  244. 29: np.nan,
  245. 30: np.nan},
  246. 'data': {17: 7.9544899999999998,
  247. 18: 8.0142609999999994,
  248. 19: 7.8591520000000008,
  249. 20: 0.86140349999999999,
  250. 21: 0.87853110000000001,
  251. 22: 0.8427041999999999,
  252. 23: 0.78587700000000005,
  253. 24: 0.73062459999999996,
  254. 25: 0.81668560000000001,
  255. 26: 0.81927080000000008,
  256. 27: 0.80705009999999999,
  257. 28: 0.81440240000000008,
  258. 29: 0.80140849999999997,
  259. 30: 0.81307740000000006},
  260. 'year': {17: 2006,
  261. 18: 2007,
  262. 19: 2008,
  263. 20: 1985,
  264. 21: 1985,
  265. 22: 1985,
  266. 23: 1985,
  267. 24: 1985,
  268. 25: 1985,
  269. 26: 1985,
  270. 27: 1985,
  271. 28: 1985,
  272. 29: 1985,
  273. 30: 1986}}).reset_index()
  274. result = df.set_index(['year', 'PRuid', 'QC']).reset_index().reindex(
  275. columns=df.columns)
  276. tm.assert_frame_equal(result, df)
  277. def test_multi_assign(self):
  278. # GH 3626, an assignment of a sub-df to a df
  279. df = DataFrame({'FC': ['a', 'b', 'a', 'b', 'a', 'b'],
  280. 'PF': [0, 0, 0, 0, 1, 1],
  281. 'col1': lrange(6),
  282. 'col2': lrange(6, 12)})
  283. df.iloc[1, 0] = np.nan
  284. df2 = df.copy()
  285. mask = ~df2.FC.isna()
  286. cols = ['col1', 'col2']
  287. dft = df2 * 2
  288. dft.iloc[3, 3] = np.nan
  289. expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
  290. 'PF': [0, 0, 0, 0, 1, 1],
  291. 'col1': Series([0, 1, 4, 6, 8, 10]),
  292. 'col2': [12, 7, 16, np.nan, 20, 22]})
  293. # frame on rhs
  294. df2.loc[mask, cols] = dft.loc[mask, cols]
  295. tm.assert_frame_equal(df2, expected)
  296. df2.loc[mask, cols] = dft.loc[mask, cols]
  297. tm.assert_frame_equal(df2, expected)
  298. # with an ndarray on rhs
  299. # coerces to float64 because values has float64 dtype
  300. # GH 14001
  301. expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
  302. 'PF': [0, 0, 0, 0, 1, 1],
  303. 'col1': [0., 1., 4., 6., 8., 10.],
  304. 'col2': [12, 7, 16, np.nan, 20, 22]})
  305. df2 = df.copy()
  306. df2.loc[mask, cols] = dft.loc[mask, cols].values
  307. tm.assert_frame_equal(df2, expected)
  308. df2.loc[mask, cols] = dft.loc[mask, cols].values
  309. tm.assert_frame_equal(df2, expected)
  310. # broadcasting on the rhs is required
  311. df = DataFrame(dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[
  312. 0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7]))
  313. expected = df.copy()
  314. mask = expected['A'] == 0
  315. for col in ['A', 'B']:
  316. expected.loc[mask, col] = df['D']
  317. df.loc[df['A'] == 0, ['A', 'B']] = df['D']
  318. tm.assert_frame_equal(df, expected)
  319. def test_setitem_list(self):
  320. # GH 6043
  321. # ix with a list
  322. df = DataFrame(index=[0, 1], columns=[0])
  323. with catch_warnings(record=True):
  324. simplefilter("ignore")
  325. df.ix[1, 0] = [1, 2, 3]
  326. df.ix[1, 0] = [1, 2]
  327. result = DataFrame(index=[0, 1], columns=[0])
  328. with catch_warnings(record=True):
  329. simplefilter("ignore")
  330. result.ix[1, 0] = [1, 2]
  331. tm.assert_frame_equal(result, df)
  332. # ix with an object
  333. class TO(object):
  334. def __init__(self, value):
  335. self.value = value
  336. def __str__(self):
  337. return "[{0}]".format(self.value)
  338. __repr__ = __str__
  339. def __eq__(self, other):
  340. return self.value == other.value
  341. def view(self):
  342. return self
  343. df = DataFrame(index=[0, 1], columns=[0])
  344. with catch_warnings(record=True):
  345. simplefilter("ignore")
  346. df.ix[1, 0] = TO(1)
  347. df.ix[1, 0] = TO(2)
  348. result = DataFrame(index=[0, 1], columns=[0])
  349. with catch_warnings(record=True):
  350. simplefilter("ignore")
  351. result.ix[1, 0] = TO(2)
  352. tm.assert_frame_equal(result, df)
  353. # remains object dtype even after setting it back
  354. df = DataFrame(index=[0, 1], columns=[0])
  355. with catch_warnings(record=True):
  356. simplefilter("ignore")
  357. df.ix[1, 0] = TO(1)
  358. df.ix[1, 0] = np.nan
  359. result = DataFrame(index=[0, 1], columns=[0])
  360. tm.assert_frame_equal(result, df)
  361. def test_string_slice(self):
  362. # GH 14424
  363. # string indexing against datetimelike with object
  364. # dtype should properly raises KeyError
  365. df = DataFrame([1], Index([pd.Timestamp('2011-01-01')], dtype=object))
  366. assert df.index.is_all_dates
  367. with pytest.raises(KeyError):
  368. df['2011']
  369. with pytest.raises(KeyError):
  370. df.loc['2011', 0]
  371. df = DataFrame()
  372. assert not df.index.is_all_dates
  373. with pytest.raises(KeyError):
  374. df['2011']
  375. with pytest.raises(KeyError):
  376. df.loc['2011', 0]
  377. def test_astype_assignment(self):
  378. # GH4312 (iloc)
  379. df_orig = DataFrame([['1', '2', '3', '.4', 5, 6., 'foo']],
  380. columns=list('ABCDEFG'))
  381. df = df_orig.copy()
  382. df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
  383. expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']],
  384. columns=list('ABCDEFG'))
  385. tm.assert_frame_equal(df, expected)
  386. df = df_orig.copy()
  387. df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
  388. expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']],
  389. columns=list('ABCDEFG'))
  390. tm.assert_frame_equal(df, expected)
  391. # GH5702 (loc)
  392. df = df_orig.copy()
  393. df.loc[:, 'A'] = df.loc[:, 'A'].astype(np.int64)
  394. expected = DataFrame([[1, '2', '3', '.4', 5, 6., 'foo']],
  395. columns=list('ABCDEFG'))
  396. tm.assert_frame_equal(df, expected)
  397. df = df_orig.copy()
  398. df.loc[:, ['B', 'C']] = df.loc[:, ['B', 'C']].astype(np.int64)
  399. expected = DataFrame([['1', 2, 3, '.4', 5, 6., 'foo']],
  400. columns=list('ABCDEFG'))
  401. tm.assert_frame_equal(df, expected)
  402. # full replacements / no nans
  403. df = DataFrame({'A': [1., 2., 3., 4.]})
  404. df.iloc[:, 0] = df['A'].astype(np.int64)
  405. expected = DataFrame({'A': [1, 2, 3, 4]})
  406. tm.assert_frame_equal(df, expected)
  407. df = DataFrame({'A': [1., 2., 3., 4.]})
  408. df.loc[:, 'A'] = df['A'].astype(np.int64)
  409. expected = DataFrame({'A': [1, 2, 3, 4]})
  410. tm.assert_frame_equal(df, expected)
  411. @pytest.mark.parametrize("index,val", [
  412. (Index([0, 1, 2]), 2),
  413. (Index([0, 1, '2']), '2'),
  414. (Index([0, 1, 2, np.inf, 4]), 4),
  415. (Index([0, 1, 2, np.nan, 4]), 4),
  416. (Index([0, 1, 2, np.inf]), np.inf),
  417. (Index([0, 1, 2, np.nan]), np.nan),
  418. ])
  419. def test_index_contains(self, index, val):
  420. assert val in index
  421. @pytest.mark.parametrize("index,val", [
  422. (Index([0, 1, 2]), '2'),
  423. (Index([0, 1, '2']), 2),
  424. (Index([0, 1, 2, np.inf]), 4),
  425. (Index([0, 1, 2, np.nan]), 4),
  426. (Index([0, 1, 2, np.inf]), np.nan),
  427. (Index([0, 1, 2, np.nan]), np.inf),
  428. # Checking if np.inf in Int64Index should not cause an OverflowError
  429. # Related to GH 16957
  430. (pd.Int64Index([0, 1, 2]), np.inf),
  431. (pd.Int64Index([0, 1, 2]), np.nan),
  432. (pd.UInt64Index([0, 1, 2]), np.inf),
  433. (pd.UInt64Index([0, 1, 2]), np.nan),
  434. ])
  435. def test_index_not_contains(self, index, val):
  436. assert val not in index
  437. @pytest.mark.parametrize("index,val", [
  438. (Index([0, 1, '2']), 0),
  439. (Index([0, 1, '2']), '2'),
  440. ])
  441. def test_mixed_index_contains(self, index, val):
  442. # GH 19860
  443. assert val in index
  444. @pytest.mark.parametrize("index,val", [
  445. (Index([0, 1, '2']), '1'),
  446. (Index([0, 1, '2']), 2),
  447. ])
  448. def test_mixed_index_not_contains(self, index, val):
  449. # GH 19860
  450. assert val not in index
  451. def test_contains_with_float_index(self):
  452. # GH#22085
  453. integer_index = pd.Int64Index([0, 1, 2, 3])
  454. uinteger_index = pd.UInt64Index([0, 1, 2, 3])
  455. float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3])
  456. for index in (integer_index, uinteger_index):
  457. assert 1.1 not in index
  458. assert 1.0 in index
  459. assert 1 in index
  460. assert 1.1 in float_index
  461. assert 1.0 not in float_index
  462. assert 1 not in float_index
  463. def test_index_type_coercion(self):
  464. with catch_warnings(record=True):
  465. simplefilter("ignore")
  466. # GH 11836
  467. # if we have an index type and set it with something that looks
  468. # to numpy like the same, but is actually, not
  469. # (e.g. setting with a float or string '0')
  470. # then we need to coerce to object
  471. # integer indexes
  472. for s in [Series(range(5)),
  473. Series(range(5), index=range(1, 6))]:
  474. assert s.index.is_integer()
  475. for indexer in [lambda x: x.ix,
  476. lambda x: x.loc,
  477. lambda x: x]:
  478. s2 = s.copy()
  479. indexer(s2)[0.1] = 0
  480. assert s2.index.is_floating()
  481. assert indexer(s2)[0.1] == 0
  482. s2 = s.copy()
  483. indexer(s2)[0.0] = 0
  484. exp = s.index
  485. if 0 not in s:
  486. exp = Index(s.index.tolist() + [0])
  487. tm.assert_index_equal(s2.index, exp)
  488. s2 = s.copy()
  489. indexer(s2)['0'] = 0
  490. assert s2.index.is_object()
  491. for s in [Series(range(5), index=np.arange(5.))]:
  492. assert s.index.is_floating()
  493. for idxr in [lambda x: x.ix,
  494. lambda x: x.loc,
  495. lambda x: x]:
  496. s2 = s.copy()
  497. idxr(s2)[0.1] = 0
  498. assert s2.index.is_floating()
  499. assert idxr(s2)[0.1] == 0
  500. s2 = s.copy()
  501. idxr(s2)[0.0] = 0
  502. tm.assert_index_equal(s2.index, s.index)
  503. s2 = s.copy()
  504. idxr(s2)['0'] = 0
  505. assert s2.index.is_object()
  506. class TestMisc(Base):
  507. def test_float_index_to_mixed(self):
  508. df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)})
  509. df['a'] = 10
  510. tm.assert_frame_equal(DataFrame({0.0: df[0.0],
  511. 1.0: df[1.0],
  512. 'a': [10] * 10}),
  513. df)
  514. def test_float_index_non_scalar_assignment(self):
  515. df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.])
  516. df.loc[df.index[:2]] = 1
  517. expected = DataFrame({'a': [1, 1, 3], 'b': [1, 1, 5]}, index=df.index)
  518. tm.assert_frame_equal(expected, df)
  519. df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.])
  520. df2 = df.copy()
  521. df.loc[df.index] = df.loc[df.index]
  522. tm.assert_frame_equal(df, df2)
  523. def test_float_index_at_iat(self):
  524. s = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
  525. for el, item in s.iteritems():
  526. assert s.at[el] == item
  527. for i in range(len(s)):
  528. assert s.iat[i] == i + 1
  529. def test_mixed_index_assignment(self):
  530. # GH 19860
  531. s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 1, 2])
  532. s.at['a'] = 11
  533. assert s.iat[0] == 11
  534. s.at[1] = 22
  535. assert s.iat[3] == 22
  536. def test_mixed_index_no_fallback(self):
  537. # GH 19860
  538. s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 1, 2])
  539. with pytest.raises(KeyError):
  540. s.at[0]
  541. with pytest.raises(KeyError):
  542. s.at[4]
  543. def test_rhs_alignment(self):
  544. # GH8258, tests that both rows & columns are aligned to what is
  545. # assigned to. covers both uniform data-type & multi-type cases
  546. def run_tests(df, rhs, right):
  547. # label, index, slice
  548. lbl_one, idx_one, slice_one = list('bcd'), [1, 2, 3], slice(1, 4)
  549. lbl_two, idx_two, slice_two = ['joe', 'jolie'], [1, 2], slice(1, 3)
  550. left = df.copy()
  551. left.loc[lbl_one, lbl_two] = rhs
  552. tm.assert_frame_equal(left, right)
  553. left = df.copy()
  554. left.iloc[idx_one, idx_two] = rhs
  555. tm.assert_frame_equal(left, right)
  556. left = df.copy()
  557. with catch_warnings(record=True):
  558. # XXX: finer-filter here.
  559. simplefilter("ignore")
  560. left.ix[slice_one, slice_two] = rhs
  561. tm.assert_frame_equal(left, right)
  562. left = df.copy()
  563. with catch_warnings(record=True):
  564. simplefilter("ignore")
  565. left.ix[idx_one, idx_two] = rhs
  566. tm.assert_frame_equal(left, right)
  567. left = df.copy()
  568. with catch_warnings(record=True):
  569. simplefilter("ignore")
  570. left.ix[lbl_one, lbl_two] = rhs
  571. tm.assert_frame_equal(left, right)
  572. xs = np.arange(20).reshape(5, 4)
  573. cols = ['jim', 'joe', 'jolie', 'joline']
  574. df = DataFrame(xs, columns=cols, index=list('abcde'))
  575. # right hand side; permute the indices and multiplpy by -2
  576. rhs = -2 * df.iloc[3:0:-1, 2:0:-1]
  577. # expected `right` result; just multiply by -2
  578. right = df.copy()
  579. right.iloc[1:4, 1:3] *= -2
  580. # run tests with uniform dtypes
  581. run_tests(df, rhs, right)
  582. # make frames multi-type & re-run tests
  583. for frame in [df, rhs, right]:
  584. frame['joe'] = frame['joe'].astype('float64')
  585. frame['jolie'] = frame['jolie'].map('@{0}'.format)
  586. run_tests(df, rhs, right)
  587. def test_str_label_slicing_with_negative_step(self):
  588. SLC = pd.IndexSlice
  589. def assert_slices_equivalent(l_slc, i_slc):
  590. tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
  591. if not idx.is_integer:
  592. # For integer indices, ix and plain getitem are position-based.
  593. tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
  594. tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
  595. for idx in [_mklbl('A', 20), np.arange(20) + 100,
  596. np.linspace(100, 150, 20)]:
  597. idx = Index(idx)
  598. s = Series(np.arange(20), index=idx)
  599. assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1])
  600. assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1])
  601. assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1])
  602. assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0])
  603. def test_slice_with_zero_step_raises(self):
  604. s = Series(np.arange(20), index=_mklbl('A', 20))
  605. with pytest.raises(ValueError, match='slice step cannot be zero'):
  606. s[::0]
  607. with pytest.raises(ValueError, match='slice step cannot be zero'):
  608. s.loc[::0]
  609. with catch_warnings(record=True):
  610. simplefilter("ignore")
  611. with pytest.raises(ValueError, match='slice step cannot be zero'):
  612. s.ix[::0]
  613. def test_indexing_assignment_dict_already_exists(self):
  614. df = DataFrame({'x': [1, 2, 6],
  615. 'y': [2, 2, 8],
  616. 'z': [-5, 0, 5]}).set_index('z')
  617. expected = df.copy()
  618. rhs = dict(x=9, y=99)
  619. df.loc[5] = rhs
  620. expected.loc[5] = [9, 99]
  621. tm.assert_frame_equal(df, expected)
  622. def test_indexing_dtypes_on_empty(self):
  623. # Check that .iloc and .ix return correct dtypes GH9983
  624. df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']})
  625. with catch_warnings(record=True):
  626. simplefilter("ignore")
  627. df2 = df.ix[[], :]
  628. assert df2.loc[:, 'a'].dtype == np.int64
  629. tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0])
  630. with catch_warnings(record=True):
  631. simplefilter("ignore")
  632. tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0])
  633. def test_range_in_series_indexing(self):
  634. # range can cause an indexing error
  635. # GH 11652
  636. for x in [5, 999999, 1000000]:
  637. s = Series(index=range(x))
  638. s.loc[range(1)] = 42
  639. tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0]))
  640. s.loc[range(2)] = 43
  641. tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1]))
  642. def test_non_reducing_slice(self):
  643. df = DataFrame([[0, 1], [2, 3]])
  644. slices = [
  645. # pd.IndexSlice[:, :],
  646. pd.IndexSlice[:, 1],
  647. pd.IndexSlice[1, :],
  648. pd.IndexSlice[[1], [1]],
  649. pd.IndexSlice[1, [1]],
  650. pd.IndexSlice[[1], 1],
  651. pd.IndexSlice[1],
  652. pd.IndexSlice[1, 1],
  653. slice(None, None, None),
  654. [0, 1],
  655. np.array([0, 1]),
  656. Series([0, 1])
  657. ]
  658. for slice_ in slices:
  659. tslice_ = _non_reducing_slice(slice_)
  660. assert isinstance(df.loc[tslice_], DataFrame)
  661. def test_list_slice(self):
  662. # like dataframe getitem
  663. slices = [['A'], Series(['A']), np.array(['A'])]
  664. df = DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B'])
  665. expected = pd.IndexSlice[:, ['A']]
  666. for subset in slices:
  667. result = _non_reducing_slice(subset)
  668. tm.assert_frame_equal(df.loc[result], df.loc[expected])
  669. def test_maybe_numeric_slice(self):
  670. df = DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]})
  671. result = _maybe_numeric_slice(df, slice_=None)
  672. expected = pd.IndexSlice[:, ['A']]
  673. assert result == expected
  674. result = _maybe_numeric_slice(df, None, include_bool=True)
  675. expected = pd.IndexSlice[:, ['A', 'C']]
  676. result = _maybe_numeric_slice(df, [1])
  677. expected = [1]
  678. assert result == expected
  679. def test_partial_boolean_frame_indexing(self):
  680. # GH 17170
  681. df = DataFrame(np.arange(9.).reshape(3, 3),
  682. index=list('abc'), columns=list('ABC'))
  683. index_df = DataFrame(1, index=list('ab'), columns=list('AB'))
  684. result = df[index_df.notnull()]
  685. expected = DataFrame(np.array([[0., 1., np.nan],
  686. [3., 4., np.nan],
  687. [np.nan] * 3]),
  688. index=list('abc'),
  689. columns=list('ABC'))
  690. tm.assert_frame_equal(result, expected)
  691. def test_no_reference_cycle(self):
  692. df = DataFrame({'a': [0, 1], 'b': [2, 3]})
  693. for name in ('loc', 'iloc', 'at', 'iat'):
  694. getattr(df, name)
  695. with catch_warnings(record=True):
  696. simplefilter("ignore")
  697. getattr(df, 'ix')
  698. wr = weakref.ref(df)
  699. del df
  700. assert wr() is None
  701. class TestSeriesNoneCoercion(object):
  702. EXPECTED_RESULTS = [
  703. # For numeric series, we should coerce to NaN.
  704. ([1, 2, 3], [np.nan, 2, 3]),
  705. ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),
  706. # For datetime series, we should coerce to NaT.
  707. ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  708. [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),
  709. # For objects, we should preserve the None value.
  710. (["foo", "bar", "baz"], [None, "bar", "baz"]),
  711. ]
  712. def test_coercion_with_setitem(self):
  713. for start_data, expected_result in self.EXPECTED_RESULTS:
  714. start_series = Series(start_data)
  715. start_series[0] = None
  716. expected_series = Series(expected_result)
  717. tm.assert_series_equal(start_series, expected_series)
  718. def test_coercion_with_loc_setitem(self):
  719. for start_data, expected_result in self.EXPECTED_RESULTS:
  720. start_series = Series(start_data)
  721. start_series.loc[0] = None
  722. expected_series = Series(expected_result)
  723. tm.assert_series_equal(start_series, expected_series)
  724. def test_coercion_with_setitem_and_series(self):
  725. for start_data, expected_result in self.EXPECTED_RESULTS:
  726. start_series = Series(start_data)
  727. start_series[start_series == start_series[0]] = None
  728. expected_series = Series(expected_result)
  729. tm.assert_series_equal(start_series, expected_series)
  730. def test_coercion_with_loc_and_series(self):
  731. for start_data, expected_result in self.EXPECTED_RESULTS:
  732. start_series = Series(start_data)
  733. start_series.loc[start_series == start_series[0]] = None
  734. expected_series = Series(expected_result)
  735. tm.assert_series_equal(start_series, expected_series)
  736. class TestDataframeNoneCoercion(object):
  737. EXPECTED_SINGLE_ROW_RESULTS = [
  738. # For numeric series, we should coerce to NaN.
  739. ([1, 2, 3], [np.nan, 2, 3]),
  740. ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),
  741. # For datetime series, we should coerce to NaT.
  742. ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  743. [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),
  744. # For objects, we should preserve the None value.
  745. (["foo", "bar", "baz"], [None, "bar", "baz"]),
  746. ]
  747. def test_coercion_with_loc(self):
  748. for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS:
  749. start_dataframe = DataFrame({'foo': start_data})
  750. start_dataframe.loc[0, ['foo']] = None
  751. expected_dataframe = DataFrame({'foo': expected_result})
  752. tm.assert_frame_equal(start_dataframe, expected_dataframe)
  753. def test_coercion_with_setitem_and_dataframe(self):
  754. for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS:
  755. start_dataframe = DataFrame({'foo': start_data})
  756. start_dataframe[start_dataframe['foo'] == start_dataframe['foo'][
  757. 0]] = None
  758. expected_dataframe = DataFrame({'foo': expected_result})
  759. tm.assert_frame_equal(start_dataframe, expected_dataframe)
  760. def test_none_coercion_loc_and_dataframe(self):
  761. for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS:
  762. start_dataframe = DataFrame({'foo': start_data})
  763. start_dataframe.loc[start_dataframe['foo'] == start_dataframe[
  764. 'foo'][0]] = None
  765. expected_dataframe = DataFrame({'foo': expected_result})
  766. tm.assert_frame_equal(start_dataframe, expected_dataframe)
  767. def test_none_coercion_mixed_dtypes(self):
  768. start_dataframe = DataFrame({
  769. 'a': [1, 2, 3],
  770. 'b': [1.0, 2.0, 3.0],
  771. 'c': [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1,
  772. 3)],
  773. 'd': ['a', 'b', 'c']
  774. })
  775. start_dataframe.iloc[0] = None
  776. exp = DataFrame({'a': [np.nan, 2, 3],
  777. 'b': [np.nan, 2.0, 3.0],
  778. 'c': [NaT, datetime(2000, 1, 2),
  779. datetime(2000, 1, 3)],
  780. 'd': [None, 'b', 'c']})
  781. tm.assert_frame_equal(start_dataframe, exp)
  782. def test_validate_indices_ok():
  783. indices = np.asarray([0, 1])
  784. validate_indices(indices, 2)
  785. validate_indices(indices[:0], 0)
  786. validate_indices(np.array([-1, -1]), 0)
  787. def test_validate_indices_low():
  788. indices = np.asarray([0, -2])
  789. with pytest.raises(ValueError, match="'indices' contains"):
  790. validate_indices(indices, 2)
  791. def test_validate_indices_high():
  792. indices = np.asarray([0, 1, 2])
  793. with pytest.raises(IndexError, match="indices are out"):
  794. validate_indices(indices, 2)
  795. def test_validate_indices_empty():
  796. with pytest.raises(IndexError, match="indices are out"):
  797. validate_indices(np.array([0, 1]), 0)
  798. def test_extension_array_cross_section():
  799. # A cross-section of a homogeneous EA should be an EA
  800. df = pd.DataFrame({
  801. "A": pd.core.arrays.integer_array([1, 2]),
  802. "B": pd.core.arrays.integer_array([3, 4])
  803. }, index=['a', 'b'])
  804. expected = pd.Series(pd.core.arrays.integer_array([1, 3]),
  805. index=['A', 'B'], name='a')
  806. result = df.loc['a']
  807. tm.assert_series_equal(result, expected)
  808. result = df.iloc[0]
  809. tm.assert_series_equal(result, expected)
  810. def test_extension_array_cross_section_converts():
  811. df = pd.DataFrame({
  812. "A": pd.core.arrays.integer_array([1, 2]),
  813. "B": np.array([1, 2]),
  814. }, index=['a', 'b'])
  815. result = df.loc['a']
  816. expected = pd.Series([1, 1], dtype=object, index=['A', 'B'], name='a')
  817. tm.assert_series_equal(result, expected)
  818. result = df.iloc[0]
  819. tm.assert_series_equal(result, expected)