test_indexing.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. """ test get/set & misc """
  4. from datetime import timedelta
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import lrange, range
  8. from pandas.core.dtypes.common import is_scalar
  9. import pandas as pd
  10. from pandas import (
  11. Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp)
  12. import pandas.util.testing as tm
  13. from pandas.util.testing import assert_series_equal
  14. from pandas.tseries.offsets import BDay
  15. def test_basic_indexing():
  16. s = Series(np.random.randn(5), index=['a', 'b', 'a', 'a', 'b'])
  17. msg = "index out of bounds"
  18. with pytest.raises(IndexError, match=msg):
  19. s[5]
  20. msg = "index 5 is out of bounds for axis 0 with size 5"
  21. with pytest.raises(IndexError, match=msg):
  22. s[5] = 0
  23. with pytest.raises(KeyError, match=r"^'c'$"):
  24. s['c']
  25. s = s.sort_index()
  26. msg = r"index out of bounds|^5$"
  27. with pytest.raises(IndexError, match=msg):
  28. s[5]
  29. msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
  30. with pytest.raises(IndexError, match=msg):
  31. s[5] = 0
  32. def test_basic_getitem_with_labels(test_data):
  33. indices = test_data.ts.index[[5, 10, 15]]
  34. result = test_data.ts[indices]
  35. expected = test_data.ts.reindex(indices)
  36. assert_series_equal(result, expected)
  37. result = test_data.ts[indices[0]:indices[2]]
  38. expected = test_data.ts.loc[indices[0]:indices[2]]
  39. assert_series_equal(result, expected)
  40. # integer indexes, be careful
  41. s = Series(np.random.randn(10), index=lrange(0, 20, 2))
  42. inds = [0, 2, 5, 7, 8]
  43. arr_inds = np.array([0, 2, 5, 7, 8])
  44. with tm.assert_produces_warning(FutureWarning,
  45. check_stacklevel=False):
  46. result = s[inds]
  47. expected = s.reindex(inds)
  48. assert_series_equal(result, expected)
  49. with tm.assert_produces_warning(FutureWarning,
  50. check_stacklevel=False):
  51. result = s[arr_inds]
  52. expected = s.reindex(arr_inds)
  53. assert_series_equal(result, expected)
  54. # GH12089
  55. # with tz for values
  56. s = Series(pd.date_range("2011-01-01", periods=3, tz="US/Eastern"),
  57. index=['a', 'b', 'c'])
  58. expected = Timestamp('2011-01-01', tz='US/Eastern')
  59. result = s.loc['a']
  60. assert result == expected
  61. result = s.iloc[0]
  62. assert result == expected
  63. result = s['a']
  64. assert result == expected
  65. def test_getitem_setitem_ellipsis():
  66. s = Series(np.random.randn(10))
  67. np.fix(s)
  68. result = s[...]
  69. assert_series_equal(result, s)
  70. s[...] = 5
  71. assert (result == 5).all()
  72. def test_getitem_get(test_data):
  73. test_series = test_data.series
  74. test_obj_series = test_data.objSeries
  75. idx1 = test_series.index[5]
  76. idx2 = test_obj_series.index[5]
  77. assert test_series[idx1] == test_series.get(idx1)
  78. assert test_obj_series[idx2] == test_obj_series.get(idx2)
  79. assert test_series[idx1] == test_series[5]
  80. assert test_obj_series[idx2] == test_obj_series[5]
  81. assert test_series.get(-1) == test_series.get(test_series.index[-1])
  82. assert test_series[5] == test_series.get(test_series.index[5])
  83. # missing
  84. d = test_data.ts.index[0] - BDay()
  85. with pytest.raises(KeyError, match=r"Timestamp\('1999-12-31 00:00:00'\)"):
  86. test_data.ts[d]
  87. # None
  88. # GH 5652
  89. for s in [Series(), Series(index=list('abc'))]:
  90. result = s.get(None)
  91. assert result is None
  92. def test_getitem_fancy(test_data):
  93. slice1 = test_data.series[[1, 2, 3]]
  94. slice2 = test_data.objSeries[[1, 2, 3]]
  95. assert test_data.series.index[2] == slice1.index[1]
  96. assert test_data.objSeries.index[2] == slice2.index[1]
  97. assert test_data.series[2] == slice1[1]
  98. assert test_data.objSeries[2] == slice2[1]
  99. def test_getitem_generator(test_data):
  100. gen = (x > 0 for x in test_data.series)
  101. result = test_data.series[gen]
  102. result2 = test_data.series[iter(test_data.series > 0)]
  103. expected = test_data.series[test_data.series > 0]
  104. assert_series_equal(result, expected)
  105. assert_series_equal(result2, expected)
  106. def test_type_promotion():
  107. # GH12599
  108. s = pd.Series()
  109. s["a"] = pd.Timestamp("2016-01-01")
  110. s["b"] = 3.0
  111. s["c"] = "foo"
  112. expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"],
  113. index=["a", "b", "c"])
  114. assert_series_equal(s, expected)
  115. @pytest.mark.parametrize(
  116. 'result_1, duplicate_item, expected_1',
  117. [
  118. [
  119. pd.Series({1: 12, 2: [1, 2, 2, 3]}), pd.Series({1: 313}),
  120. pd.Series({1: 12, }, dtype=object),
  121. ],
  122. [
  123. pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
  124. pd.Series({1: [1, 2, 3]}), pd.Series({1: [1, 2, 3], }),
  125. ],
  126. ])
  127. def test_getitem_with_duplicates_indices(
  128. result_1, duplicate_item, expected_1):
  129. # GH 17610
  130. result = result_1.append(duplicate_item)
  131. expected = expected_1.append(duplicate_item)
  132. assert_series_equal(result[1], expected)
  133. assert result[2] == result_1[2]
  134. def test_getitem_out_of_bounds(test_data):
  135. # don't segfault, GH #495
  136. msg = "index out of bounds"
  137. with pytest.raises(IndexError, match=msg):
  138. test_data.ts[len(test_data.ts)]
  139. # GH #917
  140. s = Series([])
  141. with pytest.raises(IndexError, match=msg):
  142. s[-1]
  143. def test_getitem_setitem_integers():
  144. # caused bug without test
  145. s = Series([1, 2, 3], ['a', 'b', 'c'])
  146. assert s.iloc[0] == s['a']
  147. s.iloc[0] = 5
  148. tm.assert_almost_equal(s['a'], 5)
  149. def test_getitem_box_float64(test_data):
  150. value = test_data.ts[5]
  151. assert isinstance(value, np.float64)
  152. @pytest.mark.parametrize(
  153. 'arr',
  154. [
  155. np.random.randn(10),
  156. tm.makeDateIndex(10, name='a').tz_localize(
  157. tz='US/Eastern'),
  158. ])
  159. def test_get(arr):
  160. # GH 21260
  161. s = Series(arr, index=[2 * i for i in range(len(arr))])
  162. assert s.get(4) == s.iloc[2]
  163. result = s.get([4, 6])
  164. expected = s.iloc[[2, 3]]
  165. tm.assert_series_equal(result, expected)
  166. result = s.get(slice(2))
  167. expected = s.iloc[[0, 1]]
  168. tm.assert_series_equal(result, expected)
  169. assert s.get(-1) is None
  170. assert s.get(s.index.max() + 1) is None
  171. s = Series(arr[:6], index=list('abcdef'))
  172. assert s.get('c') == s.iloc[2]
  173. result = s.get(slice('b', 'd'))
  174. expected = s.iloc[[1, 2, 3]]
  175. tm.assert_series_equal(result, expected)
  176. result = s.get('Z')
  177. assert result is None
  178. assert s.get(4) == s.iloc[4]
  179. assert s.get(-1) == s.iloc[-1]
  180. assert s.get(len(s)) is None
  181. # GH 21257
  182. s = pd.Series(arr)
  183. s2 = s[::2]
  184. assert s2.get(1) is None
  185. def test_series_box_timestamp():
  186. rng = pd.date_range('20090415', '20090519', freq='B')
  187. ser = Series(rng)
  188. assert isinstance(ser[5], pd.Timestamp)
  189. rng = pd.date_range('20090415', '20090519', freq='B')
  190. ser = Series(rng, index=rng)
  191. assert isinstance(ser[5], pd.Timestamp)
  192. assert isinstance(ser.iat[5], pd.Timestamp)
  193. def test_getitem_ambiguous_keyerror():
  194. s = Series(lrange(10), index=lrange(0, 20, 2))
  195. with pytest.raises(KeyError, match=r"^1L?$"):
  196. s[1]
  197. with pytest.raises(KeyError, match=r"^1L?$"):
  198. s.loc[1]
  199. def test_getitem_unordered_dup():
  200. obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b'])
  201. assert is_scalar(obj['c'])
  202. assert obj['c'] == 0
  203. def test_getitem_dups_with_missing():
  204. # breaks reindex, so need to use .loc internally
  205. # GH 4246
  206. s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah'])
  207. with tm.assert_produces_warning(FutureWarning,
  208. check_stacklevel=False):
  209. expected = s.loc[['foo', 'bar', 'bah', 'bam']]
  210. with tm.assert_produces_warning(FutureWarning,
  211. check_stacklevel=False):
  212. result = s[['foo', 'bar', 'bah', 'bam']]
  213. assert_series_equal(result, expected)
  214. def test_getitem_dups():
  215. s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64)
  216. expected = Series([3, 4], index=['C', 'C'], dtype=np.int64)
  217. result = s['C']
  218. assert_series_equal(result, expected)
  219. def test_setitem_ambiguous_keyerror():
  220. s = Series(lrange(10), index=lrange(0, 20, 2))
  221. # equivalent of an append
  222. s2 = s.copy()
  223. s2[1] = 5
  224. expected = s.append(Series([5], index=[1]))
  225. assert_series_equal(s2, expected)
  226. s2 = s.copy()
  227. s2.loc[1] = 5
  228. expected = s.append(Series([5], index=[1]))
  229. assert_series_equal(s2, expected)
  230. def test_getitem_dataframe():
  231. rng = list(range(10))
  232. s = pd.Series(10, index=rng)
  233. df = pd.DataFrame(rng, index=rng)
  234. msg = ("Indexing a Series with DataFrame is not supported,"
  235. " use the appropriate DataFrame column")
  236. with pytest.raises(TypeError, match=msg):
  237. s[df > 5]
  238. def test_setitem(test_data):
  239. test_data.ts[test_data.ts.index[5]] = np.NaN
  240. test_data.ts[[1, 2, 17]] = np.NaN
  241. test_data.ts[6] = np.NaN
  242. assert np.isnan(test_data.ts[6])
  243. assert np.isnan(test_data.ts[2])
  244. test_data.ts[np.isnan(test_data.ts)] = 5
  245. assert not np.isnan(test_data.ts[2])
  246. # caught this bug when writing tests
  247. series = Series(tm.makeIntIndex(20).astype(float),
  248. index=tm.makeIntIndex(20))
  249. series[::2] = 0
  250. assert (series[::2] == 0).all()
  251. # set item that's not contained
  252. s = test_data.series.copy()
  253. s['foobar'] = 1
  254. app = Series([1], index=['foobar'], name='series')
  255. expected = test_data.series.append(app)
  256. assert_series_equal(s, expected)
  257. # Test for issue #10193
  258. key = pd.Timestamp('2012-01-01')
  259. series = pd.Series()
  260. series[key] = 47
  261. expected = pd.Series(47, [key])
  262. assert_series_equal(series, expected)
  263. series = pd.Series([], pd.DatetimeIndex([], freq='D'))
  264. series[key] = 47
  265. expected = pd.Series(47, pd.DatetimeIndex([key], freq='D'))
  266. assert_series_equal(series, expected)
  267. def test_setitem_dtypes():
  268. # change dtypes
  269. # GH 4463
  270. expected = Series([np.nan, 2, 3])
  271. s = Series([1, 2, 3])
  272. s.iloc[0] = np.nan
  273. assert_series_equal(s, expected)
  274. s = Series([1, 2, 3])
  275. s.loc[0] = np.nan
  276. assert_series_equal(s, expected)
  277. s = Series([1, 2, 3])
  278. s[0] = np.nan
  279. assert_series_equal(s, expected)
  280. s = Series([False])
  281. s.loc[0] = np.nan
  282. assert_series_equal(s, Series([np.nan]))
  283. s = Series([False, True])
  284. s.loc[0] = np.nan
  285. assert_series_equal(s, Series([np.nan, 1.0]))
  286. def test_set_value(test_data):
  287. idx = test_data.ts.index[10]
  288. with tm.assert_produces_warning(FutureWarning,
  289. check_stacklevel=False):
  290. res = test_data.ts.set_value(idx, 0)
  291. assert res is test_data.ts
  292. assert test_data.ts[idx] == 0
  293. # equiv
  294. s = test_data.series.copy()
  295. with tm.assert_produces_warning(FutureWarning,
  296. check_stacklevel=False):
  297. res = s.set_value('foobar', 0)
  298. assert res is s
  299. assert res.index[-1] == 'foobar'
  300. assert res['foobar'] == 0
  301. s = test_data.series.copy()
  302. s.loc['foobar'] = 0
  303. assert s.index[-1] == 'foobar'
  304. assert s['foobar'] == 0
  305. def test_setslice(test_data):
  306. sl = test_data.ts[5:20]
  307. assert len(sl) == len(sl.index)
  308. assert sl.index.is_unique is True
  309. # FutureWarning from NumPy about [slice(None, 5).
  310. @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
  311. def test_basic_getitem_setitem_corner(test_data):
  312. # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
  313. msg = "Can only tuple-index with a MultiIndex"
  314. with pytest.raises(ValueError, match=msg):
  315. test_data.ts[:, 2]
  316. with pytest.raises(ValueError, match=msg):
  317. test_data.ts[:, 2] = 2
  318. # weird lists. [slice(0, 5)] will work but not two slices
  319. result = test_data.ts[[slice(None, 5)]]
  320. expected = test_data.ts[:5]
  321. assert_series_equal(result, expected)
  322. # OK
  323. msg = r"unhashable type(: 'slice')?"
  324. with pytest.raises(TypeError, match=msg):
  325. test_data.ts[[5, slice(None, None)]]
  326. with pytest.raises(TypeError, match=msg):
  327. test_data.ts[[5, slice(None, None)]] = 2
  328. @pytest.mark.parametrize('tz', ['US/Eastern', 'UTC', 'Asia/Tokyo'])
  329. def test_setitem_with_tz(tz):
  330. orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3,
  331. tz=tz))
  332. assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
  333. # scalar
  334. s = orig.copy()
  335. s[1] = pd.Timestamp('2011-01-01', tz=tz)
  336. exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
  337. pd.Timestamp('2011-01-01 00:00', tz=tz),
  338. pd.Timestamp('2016-01-01 02:00', tz=tz)])
  339. tm.assert_series_equal(s, exp)
  340. s = orig.copy()
  341. s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
  342. tm.assert_series_equal(s, exp)
  343. s = orig.copy()
  344. s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
  345. tm.assert_series_equal(s, exp)
  346. # vector
  347. vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
  348. pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
  349. assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
  350. s[[1, 2]] = vals
  351. exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz),
  352. pd.Timestamp('2011-01-01 00:00', tz=tz),
  353. pd.Timestamp('2012-01-01 00:00', tz=tz)])
  354. tm.assert_series_equal(s, exp)
  355. s = orig.copy()
  356. s.loc[[1, 2]] = vals
  357. tm.assert_series_equal(s, exp)
  358. s = orig.copy()
  359. s.iloc[[1, 2]] = vals
  360. tm.assert_series_equal(s, exp)
  361. def test_setitem_with_tz_dst():
  362. # GH XXX
  363. tz = 'US/Eastern'
  364. orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3,
  365. tz=tz))
  366. assert orig.dtype == 'datetime64[ns, {0}]'.format(tz)
  367. # scalar
  368. s = orig.copy()
  369. s[1] = pd.Timestamp('2011-01-01', tz=tz)
  370. exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
  371. pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
  372. pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
  373. tm.assert_series_equal(s, exp)
  374. s = orig.copy()
  375. s.loc[1] = pd.Timestamp('2011-01-01', tz=tz)
  376. tm.assert_series_equal(s, exp)
  377. s = orig.copy()
  378. s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz)
  379. tm.assert_series_equal(s, exp)
  380. # vector
  381. vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
  382. pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2])
  383. assert vals.dtype == 'datetime64[ns, {0}]'.format(tz)
  384. s[[1, 2]] = vals
  385. exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
  386. pd.Timestamp('2011-01-01 00:00', tz=tz),
  387. pd.Timestamp('2012-01-01 00:00', tz=tz)])
  388. tm.assert_series_equal(s, exp)
  389. s = orig.copy()
  390. s.loc[[1, 2]] = vals
  391. tm.assert_series_equal(s, exp)
  392. s = orig.copy()
  393. s.iloc[[1, 2]] = vals
  394. tm.assert_series_equal(s, exp)
  395. def test_categorial_assigning_ops():
  396. orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
  397. s = orig.copy()
  398. s[:] = "a"
  399. exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
  400. tm.assert_series_equal(s, exp)
  401. s = orig.copy()
  402. s[1] = "a"
  403. exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
  404. tm.assert_series_equal(s, exp)
  405. s = orig.copy()
  406. s[s.index > 0] = "a"
  407. exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
  408. tm.assert_series_equal(s, exp)
  409. s = orig.copy()
  410. s[[False, True]] = "a"
  411. exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
  412. tm.assert_series_equal(s, exp)
  413. s = orig.copy()
  414. s.index = ["x", "y"]
  415. s["y"] = "a"
  416. exp = Series(Categorical(["b", "a"], categories=["a", "b"]),
  417. index=["x", "y"])
  418. tm.assert_series_equal(s, exp)
  419. # ensure that one can set something to np.nan
  420. s = Series(Categorical([1, 2, 3]))
  421. exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
  422. s[1] = np.nan
  423. tm.assert_series_equal(s, exp)
  424. def test_slice(test_data):
  425. numSlice = test_data.series[10:20]
  426. numSliceEnd = test_data.series[-10:]
  427. objSlice = test_data.objSeries[10:20]
  428. assert test_data.series.index[9] not in numSlice.index
  429. assert test_data.objSeries.index[9] not in objSlice.index
  430. assert len(numSlice) == len(numSlice.index)
  431. assert test_data.series[numSlice.index[0]] == numSlice[numSlice.index[0]]
  432. assert numSlice.index[1] == test_data.series.index[11]
  433. assert tm.equalContents(numSliceEnd, np.array(test_data.series)[-10:])
  434. # Test return view.
  435. sl = test_data.series[10:20]
  436. sl[:] = 0
  437. assert (test_data.series[10:20] == 0).all()
  438. def test_slice_can_reorder_not_uniquely_indexed():
  439. s = Series(1, index=['a', 'a', 'b', 'b', 'c'])
  440. s[::-1] # it works!
  441. def test_ix_setitem(test_data):
  442. inds = test_data.series.index[[3, 4, 7]]
  443. result = test_data.series.copy()
  444. result.loc[inds] = 5
  445. expected = test_data.series.copy()
  446. expected[[3, 4, 7]] = 5
  447. assert_series_equal(result, expected)
  448. result.iloc[5:10] = 10
  449. expected[5:10] = 10
  450. assert_series_equal(result, expected)
  451. # set slice with indices
  452. d1, d2 = test_data.series.index[[5, 15]]
  453. result.loc[d1:d2] = 6
  454. expected[5:16] = 6 # because it's inclusive
  455. assert_series_equal(result, expected)
  456. # set index value
  457. test_data.series.loc[d1] = 4
  458. test_data.series.loc[d2] = 6
  459. assert test_data.series[d1] == 4
  460. assert test_data.series[d2] == 6
  461. def test_setitem_na():
  462. # these induce dtype changes
  463. expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan])
  464. s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
  465. s[::2] = np.nan
  466. assert_series_equal(s, expected)
  467. # gets coerced to float, right?
  468. expected = Series([np.nan, 1, np.nan, 0])
  469. s = Series([True, True, False, False])
  470. s[::2] = np.nan
  471. assert_series_equal(s, expected)
  472. expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8,
  473. 9])
  474. s = Series(np.arange(10))
  475. s[:5] = np.nan
  476. assert_series_equal(s, expected)
  477. def test_timedelta_assignment():
  478. # GH 8209
  479. s = Series([])
  480. s.loc['B'] = timedelta(1)
  481. tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B']))
  482. s = s.reindex(s.index.insert(0, 'A'))
  483. tm.assert_series_equal(s, Series(
  484. [np.nan, Timedelta('1 days')], index=['A', 'B']))
  485. result = s.fillna(timedelta(1))
  486. expected = Series(Timedelta('1 days'), index=['A', 'B'])
  487. tm.assert_series_equal(result, expected)
  488. s.loc['A'] = timedelta(1)
  489. tm.assert_series_equal(s, expected)
  490. # GH 14155
  491. s = Series(10 * [np.timedelta64(10, 'm')])
  492. s.loc[[1, 2, 3]] = np.timedelta64(20, 'm')
  493. expected = pd.Series(10 * [np.timedelta64(10, 'm')])
  494. expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, 'm'))
  495. tm.assert_series_equal(s, expected)
  496. def test_underlying_data_conversion():
  497. # GH 4080
  498. df = DataFrame({c: [1, 2, 3] for c in ['a', 'b', 'c']})
  499. df.set_index(['a', 'b', 'c'], inplace=True)
  500. s = Series([1], index=[(2, 2, 2)])
  501. df['val'] = 0
  502. df
  503. df['val'].update(s)
  504. expected = DataFrame(
  505. dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0]))
  506. expected.set_index(['a', 'b', 'c'], inplace=True)
  507. tm.assert_frame_equal(df, expected)
  508. # GH 3970
  509. # these are chained assignments as well
  510. pd.set_option('chained_assignment', None)
  511. df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
  512. df["cc"] = 0.0
  513. ck = [True] * len(df)
  514. df["bb"].iloc[0] = .13
  515. # TODO: unused
  516. df_tmp = df.iloc[ck] # noqa
  517. df["bb"].iloc[0] = .15
  518. assert df['bb'].iloc[0] == 0.15
  519. pd.set_option('chained_assignment', 'raise')
  520. # GH 3217
  521. df = DataFrame(dict(a=[1, 3], b=[np.nan, 2]))
  522. df['c'] = np.nan
  523. df['c'].update(pd.Series(['foo'], index=[0]))
  524. expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan]))
  525. tm.assert_frame_equal(df, expected)
  526. def test_preserve_refs(test_data):
  527. seq = test_data.ts[[5, 10, 15]]
  528. seq[1] = np.NaN
  529. assert not np.isnan(test_data.ts[10])
  530. def test_cast_on_putmask():
  531. # GH 2746
  532. # need to upcast
  533. s = Series([1, 2], index=[1, 2], dtype='int64')
  534. s[[True, False]] = Series([0], index=[1], dtype='int64')
  535. expected = Series([0, 2], index=[1, 2], dtype='int64')
  536. assert_series_equal(s, expected)
  537. def test_type_promote_putmask():
  538. # GH8387: test that changing types does not break alignment
  539. ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
  540. left, mask = ts.copy(), ts > 0
  541. right = ts[mask].copy().map(str)
  542. left[mask] = right
  543. assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
  544. s = Series([0, 1, 2, 0])
  545. mask = s > 0
  546. s2 = s[mask].map(str)
  547. s[mask] = s2
  548. assert_series_equal(s, Series([0, '1', '2', 0]))
  549. s = Series([0, 'foo', 'bar', 0])
  550. mask = Series([False, True, True, False])
  551. s2 = s[mask]
  552. s[mask] = s2
  553. assert_series_equal(s, Series([0, 'foo', 'bar', 0]))
  554. def test_multilevel_preserve_name():
  555. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
  556. 'three']],
  557. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  558. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  559. names=['first', 'second'])
  560. s = Series(np.random.randn(len(index)), index=index, name='sth')
  561. result = s['foo']
  562. result2 = s.loc['foo']
  563. assert result.name == s.name
  564. assert result2.name == s.name
  565. def test_setitem_scalar_into_readonly_backing_data():
  566. # GH14359: test that you cannot mutate a read only buffer
  567. array = np.zeros(5)
  568. array.flags.writeable = False # make the array immutable
  569. series = Series(array)
  570. for n in range(len(series)):
  571. msg = "assignment destination is read-only"
  572. with pytest.raises(ValueError, match=msg):
  573. series[n] = 1
  574. assert array[n] == 0
  575. def test_setitem_slice_into_readonly_backing_data():
  576. # GH14359: test that you cannot mutate a read only buffer
  577. array = np.zeros(5)
  578. array.flags.writeable = False # make the array immutable
  579. series = Series(array)
  580. msg = "assignment destination is read-only"
  581. with pytest.raises(ValueError, match=msg):
  582. series[1:3] = 1
  583. assert not array.any()
  584. """
  585. miscellaneous methods
  586. """
  587. def test_select(test_data):
  588. # deprecated: gh-12410
  589. with tm.assert_produces_warning(FutureWarning,
  590. check_stacklevel=False):
  591. n = len(test_data.ts)
  592. result = test_data.ts.select(lambda x: x >= test_data.ts.index[n // 2])
  593. expected = test_data.ts.reindex(test_data.ts.index[n // 2:])
  594. assert_series_equal(result, expected)
  595. result = test_data.ts.select(lambda x: x.weekday() == 2)
  596. expected = test_data.ts[test_data.ts.index.weekday == 2]
  597. assert_series_equal(result, expected)
  598. def test_pop():
  599. # GH 6600
  600. df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, })
  601. k = df.iloc[4]
  602. result = k.pop('B')
  603. assert result == 4
  604. expected = Series([0, 0], index=['A', 'C'], name=4)
  605. assert_series_equal(k, expected)
  606. def test_take():
  607. s = Series([-1, 5, 6, 2, 4])
  608. actual = s.take([1, 3, 4])
  609. expected = Series([5, 2, 4], index=[1, 3, 4])
  610. tm.assert_series_equal(actual, expected)
  611. actual = s.take([-1, 3, 4])
  612. expected = Series([4, 2, 4], index=[4, 3, 4])
  613. tm.assert_series_equal(actual, expected)
  614. msg = "index {} is out of bounds for size 5"
  615. with pytest.raises(IndexError, match=msg.format(10)):
  616. s.take([1, 10])
  617. with pytest.raises(IndexError, match=msg.format(5)):
  618. s.take([2, 5])
  619. with tm.assert_produces_warning(FutureWarning):
  620. s.take([-1, 3, 4], convert=False)
  621. def test_take_categorical():
  622. # https://github.com/pandas-dev/pandas/issues/20664
  623. s = Series(pd.Categorical(['a', 'b', 'c']))
  624. result = s.take([-2, -2, 0])
  625. expected = Series(pd.Categorical(['b', 'b', 'a'],
  626. categories=['a', 'b', 'c']),
  627. index=[1, 1, 0])
  628. assert_series_equal(result, expected)
  629. def test_head_tail(test_data):
  630. assert_series_equal(test_data.series.head(), test_data.series[:5])
  631. assert_series_equal(test_data.series.head(0), test_data.series[0:0])
  632. assert_series_equal(test_data.series.tail(), test_data.series[-5:])
  633. assert_series_equal(test_data.series.tail(0), test_data.series[0:0])