test_setitem.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. from warnings import catch_warnings, simplefilter
  2. import numpy as np
  3. from numpy.random import randn
  4. import pytest
  5. import pandas as pd
  6. from pandas import (
  7. DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
  8. import pandas.core.common as com
  9. from pandas.util import testing as tm
  10. @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
  11. class TestMultiIndexSetItem(object):
  12. def test_setitem_multiindex(self):
  13. with catch_warnings(record=True):
  14. for index_fn in ('ix', 'loc'):
  15. def assert_equal(a, b):
  16. assert a == b
  17. def check(target, indexers, value, compare_fn, expected=None):
  18. fn = getattr(target, index_fn)
  19. fn.__setitem__(indexers, value)
  20. result = fn.__getitem__(indexers)
  21. if expected is None:
  22. expected = value
  23. compare_fn(result, expected)
  24. # GH7190
  25. index = MultiIndex.from_product([np.arange(0, 100),
  26. np.arange(0, 80)],
  27. names=['time', 'firm'])
  28. t, n = 0, 2
  29. df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
  30. 'X', 'd', 'profit'],
  31. index=index)
  32. check(target=df, indexers=((t, n), 'X'), value=0,
  33. compare_fn=assert_equal)
  34. df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
  35. 'X', 'd', 'profit'],
  36. index=index)
  37. check(target=df, indexers=((t, n), 'X'), value=1,
  38. compare_fn=assert_equal)
  39. df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
  40. 'X', 'd', 'profit'],
  41. index=index)
  42. check(target=df, indexers=((t, n), 'X'), value=2,
  43. compare_fn=assert_equal)
  44. # gh-7218: assigning with 0-dim arrays
  45. df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
  46. 'X', 'd', 'profit'],
  47. index=index)
  48. check(target=df,
  49. indexers=((t, n), 'X'),
  50. value=np.array(3),
  51. compare_fn=assert_equal,
  52. expected=3, )
  53. # GH5206
  54. df = DataFrame(np.arange(25).reshape(5, 5),
  55. columns='A,B,C,D,E'.split(','), dtype=float)
  56. df['F'] = 99
  57. row_selection = df['A'] % 2 == 0
  58. col_selection = ['B', 'C']
  59. with catch_warnings(record=True):
  60. df.ix[row_selection, col_selection] = df['F']
  61. output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
  62. with catch_warnings(record=True):
  63. tm.assert_frame_equal(df.ix[row_selection, col_selection],
  64. output)
  65. check(target=df,
  66. indexers=(row_selection, col_selection),
  67. value=df['F'],
  68. compare_fn=tm.assert_frame_equal,
  69. expected=output, )
  70. # GH11372
  71. idx = MultiIndex.from_product([
  72. ['A', 'B', 'C'],
  73. date_range('2015-01-01', '2015-04-01', freq='MS')])
  74. cols = MultiIndex.from_product([
  75. ['foo', 'bar'],
  76. date_range('2016-01-01', '2016-02-01', freq='MS')])
  77. df = DataFrame(np.random.random((12, 4)),
  78. index=idx, columns=cols)
  79. subidx = MultiIndex.from_tuples(
  80. [('A', Timestamp('2015-01-01')),
  81. ('A', Timestamp('2015-02-01'))])
  82. subcols = MultiIndex.from_tuples(
  83. [('foo', Timestamp('2016-01-01')),
  84. ('foo', Timestamp('2016-02-01'))])
  85. vals = DataFrame(np.random.random((2, 2)),
  86. index=subidx, columns=subcols)
  87. check(target=df,
  88. indexers=(subidx, subcols),
  89. value=vals,
  90. compare_fn=tm.assert_frame_equal, )
  91. # set all columns
  92. vals = DataFrame(
  93. np.random.random((2, 4)), index=subidx, columns=cols)
  94. check(target=df,
  95. indexers=(subidx, slice(None, None, None)),
  96. value=vals,
  97. compare_fn=tm.assert_frame_equal, )
  98. # identity
  99. copy = df.copy()
  100. check(target=df, indexers=(df.index, df.columns), value=df,
  101. compare_fn=tm.assert_frame_equal, expected=copy)
  102. def test_multiindex_setitem(self):
  103. # GH 3738
  104. # setting with a multi-index right hand side
  105. arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
  106. np.array(['one', 'two', 'one', 'one', 'two', 'one']),
  107. np.arange(0, 6, 1)]
  108. df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
  109. columns=['A', 'B', 'C']).sort_index()
  110. expected = df_orig.loc[['bar']] * 2
  111. df = df_orig.copy()
  112. df.loc[['bar']] *= 2
  113. tm.assert_frame_equal(df.loc[['bar']], expected)
  114. # raise because these have differing levels
  115. with pytest.raises(TypeError):
  116. df.loc['bar'] *= 2
  117. # from SO
  118. # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
  119. df_orig = DataFrame.from_dict({'price': {
  120. ('DE', 'Coal', 'Stock'): 2,
  121. ('DE', 'Gas', 'Stock'): 4,
  122. ('DE', 'Elec', 'Demand'): 1,
  123. ('FR', 'Gas', 'Stock'): 5,
  124. ('FR', 'Solar', 'SupIm'): 0,
  125. ('FR', 'Wind', 'SupIm'): 0
  126. }})
  127. df_orig.index = MultiIndex.from_tuples(df_orig.index,
  128. names=['Sit', 'Com', 'Type'])
  129. expected = df_orig.copy()
  130. expected.iloc[[0, 2, 3]] *= 2
  131. idx = pd.IndexSlice
  132. df = df_orig.copy()
  133. df.loc[idx[:, :, 'Stock'], :] *= 2
  134. tm.assert_frame_equal(df, expected)
  135. df = df_orig.copy()
  136. df.loc[idx[:, :, 'Stock'], 'price'] *= 2
  137. tm.assert_frame_equal(df, expected)
  138. def test_multiindex_assignment(self):
  139. # GH3777 part 2
  140. # mixed dtype
  141. df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
  142. columns=list('abc'),
  143. index=[[4, 4, 8], [8, 10, 12]])
  144. df['d'] = np.nan
  145. arr = np.array([0., 1.])
  146. with catch_warnings(record=True):
  147. df.ix[4, 'd'] = arr
  148. tm.assert_series_equal(df.ix[4, 'd'],
  149. Series(arr, index=[8, 10], name='d'))
  150. # single dtype
  151. df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
  152. columns=list('abc'),
  153. index=[[4, 4, 8], [8, 10, 12]])
  154. with catch_warnings(record=True):
  155. df.ix[4, 'c'] = arr
  156. exp = Series(arr, index=[8, 10], name='c', dtype='float64')
  157. tm.assert_series_equal(df.ix[4, 'c'], exp)
  158. # scalar ok
  159. with catch_warnings(record=True):
  160. df.ix[4, 'c'] = 10
  161. exp = Series(10, index=[8, 10], name='c', dtype='float64')
  162. tm.assert_series_equal(df.ix[4, 'c'], exp)
  163. # invalid assignments
  164. with pytest.raises(ValueError):
  165. with catch_warnings(record=True):
  166. df.ix[4, 'c'] = [0, 1, 2, 3]
  167. with pytest.raises(ValueError):
  168. with catch_warnings(record=True):
  169. df.ix[4, 'c'] = [0]
  170. # groupby example
  171. NUM_ROWS = 100
  172. NUM_COLS = 10
  173. col_names = ['A' + num for num in
  174. map(str, np.arange(NUM_COLS).tolist())]
  175. index_cols = col_names[:5]
  176. df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
  177. dtype=np.int64, columns=col_names)
  178. df = df.set_index(index_cols).sort_index()
  179. grp = df.groupby(level=index_cols[:4])
  180. df['new_col'] = np.nan
  181. f_index = np.arange(5)
  182. def f(name, df2):
  183. return Series(np.arange(df2.shape[0]),
  184. name=df2.index.values[0]).reindex(f_index)
  185. # TODO(wesm): unused?
  186. # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
  187. # we are actually operating on a copy here
  188. # but in this case, that's ok
  189. for name, df2 in grp:
  190. new_vals = np.arange(df2.shape[0])
  191. with catch_warnings(record=True):
  192. df.ix[name, 'new_col'] = new_vals
  193. def test_series_setitem(
  194. self, multiindex_year_month_day_dataframe_random_data):
  195. ymd = multiindex_year_month_day_dataframe_random_data
  196. s = ymd['A']
  197. s[2000, 3] = np.nan
  198. assert isna(s.values[42:65]).all()
  199. assert notna(s.values[:42]).all()
  200. assert notna(s.values[65:]).all()
  201. s[2000, 3, 10] = np.nan
  202. assert isna(s[49])
  203. def test_frame_getitem_setitem_boolean(
  204. self, multiindex_dataframe_random_data):
  205. frame = multiindex_dataframe_random_data
  206. df = frame.T.copy()
  207. values = df.values
  208. result = df[df > 0]
  209. expected = df.where(df > 0)
  210. tm.assert_frame_equal(result, expected)
  211. df[df > 0] = 5
  212. values[values > 0] = 5
  213. tm.assert_almost_equal(df.values, values)
  214. df[df == 5] = 0
  215. values[values == 5] = 0
  216. tm.assert_almost_equal(df.values, values)
  217. # a df that needs alignment first
  218. df[df[:-1] < 0] = 2
  219. np.putmask(values[:-1], values[:-1] < 0, 2)
  220. tm.assert_almost_equal(df.values, values)
  221. with pytest.raises(TypeError, match='boolean values only'):
  222. df[df * 0] = 2
  223. def test_frame_getitem_setitem_multislice(self):
  224. levels = [['t1', 't2'], ['a', 'b', 'c']]
  225. codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
  226. midx = MultiIndex(codes=codes, levels=levels, names=[None, 'id'])
  227. df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
  228. result = df.loc[:, 'value']
  229. tm.assert_series_equal(df['value'], result)
  230. with catch_warnings(record=True):
  231. simplefilter("ignore", DeprecationWarning)
  232. result = df.ix[:, 'value']
  233. tm.assert_series_equal(df['value'], result)
  234. result = df.loc[df.index[1:3], 'value']
  235. tm.assert_series_equal(df['value'][1:3], result)
  236. result = df.loc[:, :]
  237. tm.assert_frame_equal(df, result)
  238. result = df
  239. df.loc[:, 'value'] = 10
  240. result['value'] = 10
  241. tm.assert_frame_equal(df, result)
  242. df.loc[:, :] = 10
  243. tm.assert_frame_equal(df, result)
  244. def test_frame_setitem_multi_column(self):
  245. df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
  246. [0, 1, 0, 1]])
  247. cp = df.copy()
  248. cp['a'] = cp['b']
  249. tm.assert_frame_equal(cp['a'], cp['b'])
  250. # set with ndarray
  251. cp = df.copy()
  252. cp['a'] = cp['b'].values
  253. tm.assert_frame_equal(cp['a'], cp['b'])
  254. # ---------------------------------------
  255. # #1803
  256. columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
  257. df = DataFrame(index=[1, 3, 5], columns=columns)
  258. # Works, but adds a column instead of updating the two existing ones
  259. df['A'] = 0.0 # Doesn't work
  260. assert (df['A'].values == 0).all()
  261. # it broadcasts
  262. df['B', '1'] = [1, 2, 3]
  263. df['A'] = df['B', '1']
  264. sliced_a1 = df['A', '1']
  265. sliced_a2 = df['A', '2']
  266. sliced_b1 = df['B', '1']
  267. tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
  268. tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
  269. assert sliced_a1.name == ('A', '1')
  270. assert sliced_a2.name == ('A', '2')
  271. assert sliced_b1.name == ('B', '1')
  272. def test_getitem_setitem_tuple_plus_columns(
  273. self, multiindex_year_month_day_dataframe_random_data):
  274. # GH #1013
  275. ymd = multiindex_year_month_day_dataframe_random_data
  276. df = ymd[:5]
  277. result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
  278. expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
  279. tm.assert_series_equal(result, expected)
  280. def test_getitem_setitem_slice_integers(self):
  281. index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
  282. codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])
  283. frame = DataFrame(np.random.randn(len(index), 4), index=index,
  284. columns=['a', 'b', 'c', 'd'])
  285. res = frame.loc[1:2]
  286. exp = frame.reindex(frame.index[2:])
  287. tm.assert_frame_equal(res, exp)
  288. frame.loc[1:2] = 7
  289. assert (frame.loc[1:2] == 7).values.all()
  290. series = Series(np.random.randn(len(index)), index=index)
  291. res = series.loc[1:2]
  292. exp = series.reindex(series.index[2:])
  293. tm.assert_series_equal(res, exp)
  294. series.loc[1:2] = 7
  295. assert (series.loc[1:2] == 7).values.all()
  296. def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
  297. frame = multiindex_dataframe_random_data
  298. dft = frame.T
  299. s = dft['foo', 'two']
  300. dft['foo', 'two'] = s > s.median()
  301. tm.assert_series_equal(dft['foo', 'two'], s > s.median())
  302. # assert isinstance(dft._data.blocks[1].items, MultiIndex)
  303. reindexed = dft.reindex(columns=[('foo', 'two')])
  304. tm.assert_series_equal(reindexed['foo', 'two'], s > s.median())
  305. def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
  306. frame = multiindex_dataframe_random_data
  307. subset = frame.index[[1, 4, 5]]
  308. frame.loc[subset] = 99
  309. assert (frame.loc[subset].values == 99).all()
  310. col = frame['B']
  311. col[subset] = 97
  312. assert (frame.loc[subset, 'B'] == 97).all()
  313. def test_nonunique_assignment_1750(self):
  314. df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]],
  315. columns=list("ABCD"))
  316. df = df.set_index(['A', 'B'])
  317. ix = MultiIndex.from_tuples([(1, 1)])
  318. df.loc[ix, "C"] = '_'
  319. assert (df.xs((1, 1))['C'] == '_').all()
  320. def test_astype_assignment_with_dups(self):
  321. # GH 4686
  322. # assignment with dups that has a dtype change
  323. cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')])
  324. df = DataFrame(np.arange(3).reshape((1, 3)),
  325. columns=cols, dtype=object)
  326. index = df.index.copy()
  327. df['A'] = df['A'].astype(np.float64)
  328. tm.assert_index_equal(df.index, index)
  329. def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
  330. # this works because we are modifying the underlying array
  331. # really a no-no
  332. df = multiindex_dataframe_random_data.T
  333. df['foo'].values[:] = 0
  334. assert (df['foo'].values == 0).all()
  335. def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
  336. # will raise/warn as its chained assignment
  337. df = multiindex_dataframe_random_data.T
  338. msg = "A value is trying to be set on a copy of a slice from a DataFrame"
  339. with pytest.raises(com.SettingWithCopyError, match=msg):
  340. df['foo']['one'] = 2
  341. def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
  342. frame = multiindex_dataframe_random_data.T
  343. expected = frame
  344. df = frame.copy()
  345. msg = "A value is trying to be set on a copy of a slice from a DataFrame"
  346. with pytest.raises(com.SettingWithCopyError, match=msg):
  347. df['foo']['one'] = 2
  348. result = df
  349. tm.assert_frame_equal(result, expected)