test_ix.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. """ test indexing with ix """
  2. from warnings import catch_warnings
  3. import numpy as np
  4. import pytest
  5. from pandas.compat import lrange
  6. from pandas.core.dtypes.common import is_scalar
  7. import pandas as pd
  8. from pandas import DataFrame, Series, option_context
  9. from pandas.util import testing as tm
  10. def test_ix_deprecation():
  11. # GH 15114
  12. df = DataFrame({'A': [1, 2, 3]})
  13. with tm.assert_produces_warning(DeprecationWarning,
  14. check_stacklevel=False):
  15. df.ix[1, 'A']
  16. @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
  17. class TestIX(object):
  18. def test_ix_loc_setitem_consistency(self):
  19. # GH 5771
  20. # loc with slice and series
  21. s = Series(0, index=[4, 5, 6])
  22. s.loc[4:5] += 1
  23. expected = Series([1, 1, 0], index=[4, 5, 6])
  24. tm.assert_series_equal(s, expected)
  25. # GH 5928
  26. # chained indexing assignment
  27. df = DataFrame({'a': [0, 1, 2]})
  28. expected = df.copy()
  29. with catch_warnings(record=True):
  30. expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']
  31. with catch_warnings(record=True):
  32. df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
  33. tm.assert_frame_equal(df, expected)
  34. df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
  35. with catch_warnings(record=True):
  36. df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
  37. 'float64') + 0.5
  38. expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
  39. tm.assert_frame_equal(df, expected)
  40. # GH 8607
  41. # ix setitem consistency
  42. df = DataFrame({'delta': [1174, 904, 161],
  43. 'elapsed': [7673, 9277, 1470],
  44. 'timestamp': [1413840976, 1413842580, 1413760580]})
  45. expected = DataFrame({'delta': [1174, 904, 161],
  46. 'elapsed': [7673, 9277, 1470],
  47. 'timestamp': pd.to_datetime(
  48. [1413840976, 1413842580, 1413760580],
  49. unit='s')
  50. })
  51. df2 = df.copy()
  52. df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
  53. tm.assert_frame_equal(df2, expected)
  54. df2 = df.copy()
  55. df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
  56. tm.assert_frame_equal(df2, expected)
  57. df2 = df.copy()
  58. with catch_warnings(record=True):
  59. df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
  60. tm.assert_frame_equal(df2, expected)
  61. def test_ix_loc_consistency(self):
  62. # GH 8613
  63. # some edge cases where ix/loc should return the same
  64. # this is not an exhaustive case
  65. def compare(result, expected):
  66. if is_scalar(expected):
  67. assert result == expected
  68. else:
  69. assert expected.equals(result)
  70. # failure cases for .loc, but these work for .ix
  71. df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'))
  72. for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]),
  73. tuple([slice(0, 2), df.columns[0:2]])]:
  74. for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
  75. tm.makeDateIndex, tm.makePeriodIndex,
  76. tm.makeTimedeltaIndex]:
  77. df.index = index(len(df.index))
  78. with catch_warnings(record=True):
  79. df.ix[key]
  80. pytest.raises(TypeError, lambda: df.loc[key])
  81. df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'),
  82. index=pd.date_range('2012-01-01', periods=5))
  83. for key in ['2012-01-03',
  84. '2012-01-31',
  85. slice('2012-01-03', '2012-01-03'),
  86. slice('2012-01-03', '2012-01-04'),
  87. slice('2012-01-03', '2012-01-06', 2),
  88. slice('2012-01-03', '2012-01-31'),
  89. tuple([[True, True, True, False, True]]), ]:
  90. # getitem
  91. # if the expected raises, then compare the exceptions
  92. try:
  93. with catch_warnings(record=True):
  94. expected = df.ix[key]
  95. except KeyError:
  96. pytest.raises(KeyError, lambda: df.loc[key])
  97. continue
  98. result = df.loc[key]
  99. compare(result, expected)
  100. # setitem
  101. df1 = df.copy()
  102. df2 = df.copy()
  103. with catch_warnings(record=True):
  104. df1.ix[key] = 10
  105. df2.loc[key] = 10
  106. compare(df2, df1)
  107. # edge cases
  108. s = Series([1, 2, 3, 4], index=list('abde'))
  109. result1 = s['a':'c']
  110. with catch_warnings(record=True):
  111. result2 = s.ix['a':'c']
  112. result3 = s.loc['a':'c']
  113. tm.assert_series_equal(result1, result2)
  114. tm.assert_series_equal(result1, result3)
  115. # now work rather than raising KeyError
  116. s = Series(range(5), [-2, -1, 1, 2, 3])
  117. with catch_warnings(record=True):
  118. result1 = s.ix[-10:3]
  119. result2 = s.loc[-10:3]
  120. tm.assert_series_equal(result1, result2)
  121. with catch_warnings(record=True):
  122. result1 = s.ix[0:3]
  123. result2 = s.loc[0:3]
  124. tm.assert_series_equal(result1, result2)
  125. def test_ix_weird_slicing(self):
  126. # http://stackoverflow.com/q/17056560/1240268
  127. df = DataFrame({'one': [1, 2, 3, np.nan, np.nan],
  128. 'two': [1, 2, 3, 4, 5]})
  129. df.loc[df['one'] > 1, 'two'] = -df['two']
  130. expected = DataFrame({'one': {0: 1.0,
  131. 1: 2.0,
  132. 2: 3.0,
  133. 3: np.nan,
  134. 4: np.nan},
  135. 'two': {0: 1,
  136. 1: -2,
  137. 2: -3,
  138. 3: 4,
  139. 4: 5}})
  140. tm.assert_frame_equal(df, expected)
  141. def test_ix_assign_column_mixed(self):
  142. # GH #1142
  143. df = DataFrame(tm.getSeriesData())
  144. df['foo'] = 'bar'
  145. orig = df.loc[:, 'B'].copy()
  146. df.loc[:, 'B'] = df.loc[:, 'B'] + 1
  147. tm.assert_series_equal(df.B, orig + 1)
  148. # GH 3668, mixed frame with series value
  149. df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'})
  150. expected = df.copy()
  151. for i in range(5):
  152. indexer = i * 2
  153. v = 1000 + i * 200
  154. expected.loc[indexer, 'y'] = v
  155. assert expected.loc[indexer, 'y'] == v
  156. df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
  157. tm.assert_frame_equal(df, expected)
  158. # GH 4508, making sure consistency of assignments
  159. df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
  160. df.loc[[0, 2, ], 'b'] = [100, -100]
  161. expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
  162. tm.assert_frame_equal(df, expected)
  163. df = DataFrame({'a': lrange(4)})
  164. df['b'] = np.nan
  165. df.loc[[1, 3], 'b'] = [100, -100]
  166. expected = DataFrame({'a': [0, 1, 2, 3],
  167. 'b': [np.nan, 100, np.nan, -100]})
  168. tm.assert_frame_equal(df, expected)
  169. # ok, but chained assignments are dangerous
  170. # if we turn off chained assignment it will work
  171. with option_context('chained_assignment', None):
  172. df = DataFrame({'a': lrange(4)})
  173. df['b'] = np.nan
  174. df['b'].loc[[1, 3]] = [100, -100]
  175. tm.assert_frame_equal(df, expected)
  176. def test_ix_get_set_consistency(self):
  177. # GH 4544
  178. # ix/loc get/set not consistent when
  179. # a mixed int/string index
  180. df = DataFrame(np.arange(16).reshape((4, 4)),
  181. columns=['a', 'b', 8, 'c'],
  182. index=['e', 7, 'f', 'g'])
  183. with catch_warnings(record=True):
  184. assert df.ix['e', 8] == 2
  185. assert df.loc['e', 8] == 2
  186. with catch_warnings(record=True):
  187. df.ix['e', 8] = 42
  188. assert df.ix['e', 8] == 42
  189. assert df.loc['e', 8] == 42
  190. df.loc['e', 8] = 45
  191. with catch_warnings(record=True):
  192. assert df.ix['e', 8] == 45
  193. assert df.loc['e', 8] == 45
  194. def test_ix_slicing_strings(self):
  195. # see gh-3836
  196. data = {'Classification':
  197. ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'],
  198. 'Random': [1, 2, 3, 4, 5],
  199. 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']}
  200. df = DataFrame(data)
  201. x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'
  202. ])]
  203. with catch_warnings(record=True):
  204. df.ix[x.index, 'X'] = df['Classification']
  205. expected = DataFrame({'Classification': {0: 'SA EQUITY CFD',
  206. 1: 'bbb',
  207. 2: 'SA EQUITY',
  208. 3: 'SA SSF',
  209. 4: 'aaa'},
  210. 'Random': {0: 1,
  211. 1: 2,
  212. 2: 3,
  213. 3: 4,
  214. 4: 5},
  215. 'X': {0: 'correct',
  216. 1: 'bbb',
  217. 2: 'correct',
  218. 3: 'correct',
  219. 4: 'aaa'}}) # bug was 4: 'bbb'
  220. tm.assert_frame_equal(df, expected)
  221. def test_ix_setitem_out_of_bounds_axis_0(self):
  222. df = DataFrame(
  223. np.random.randn(2, 5), index=["row%s" % i for i in range(2)],
  224. columns=["col%s" % i for i in range(5)])
  225. with catch_warnings(record=True):
  226. pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100)
  227. def test_ix_setitem_out_of_bounds_axis_1(self):
  228. df = DataFrame(
  229. np.random.randn(5, 2), index=["row%s" % i for i in range(5)],
  230. columns=["col%s" % i for i in range(2)])
  231. with catch_warnings(record=True):
  232. pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100)
  233. def test_ix_empty_list_indexer_is_ok(self):
  234. with catch_warnings(record=True):
  235. from pandas.util.testing import makeCustomDataframe as mkdf
  236. df = mkdf(5, 2)
  237. # vertical empty
  238. tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
  239. check_index_type=True,
  240. check_column_type=True)
  241. # horizontal empty
  242. tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
  243. check_index_type=True,
  244. check_column_type=True)
  245. # horizontal empty
  246. tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :],
  247. check_index_type=True,
  248. check_column_type=True)
  249. def test_ix_duplicate_returns_series(self):
  250. df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2],
  251. columns=list('abc'))
  252. with catch_warnings(record=True):
  253. r = df.ix[0.2, 'a']
  254. e = df.loc[0.2, 'a']
  255. tm.assert_series_equal(r, e)