test_replace.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. import pandas.util.testing as tm
  7. from .common import TestData
  8. class TestSeriesReplace(TestData):
  9. def test_replace(self):
  10. N = 100
  11. ser = pd.Series(np.random.randn(N))
  12. ser[0:4] = np.nan
  13. ser[6:10] = 0
  14. # replace list with a single value
  15. ser.replace([np.nan], -1, inplace=True)
  16. exp = ser.fillna(-1)
  17. tm.assert_series_equal(ser, exp)
  18. rs = ser.replace(0., np.nan)
  19. ser[ser == 0.] = np.nan
  20. tm.assert_series_equal(rs, ser)
  21. ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
  22. dtype=object)
  23. ser[:5] = np.nan
  24. ser[6:10] = 'foo'
  25. ser[20:30] = 'bar'
  26. # replace list with a single value
  27. rs = ser.replace([np.nan, 'foo', 'bar'], -1)
  28. assert (rs[:5] == -1).all()
  29. assert (rs[6:10] == -1).all()
  30. assert (rs[20:30] == -1).all()
  31. assert (pd.isna(ser[:5])).all()
  32. # replace with different values
  33. rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
  34. assert (rs[:5] == -1).all()
  35. assert (rs[6:10] == -2).all()
  36. assert (rs[20:30] == -3).all()
  37. assert (pd.isna(ser[:5])).all()
  38. # replace with different values with 2 lists
  39. rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
  40. tm.assert_series_equal(rs, rs2)
  41. # replace inplace
  42. ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
  43. assert (ser[:5] == -1).all()
  44. assert (ser[6:10] == -1).all()
  45. assert (ser[20:30] == -1).all()
  46. ser = pd.Series([np.nan, 0, np.inf])
  47. tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
  48. ser = pd.Series([np.nan, 0, 'foo', 'bar', np.inf, None, pd.NaT])
  49. tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
  50. filled = ser.copy()
  51. filled[4] = 0
  52. tm.assert_series_equal(ser.replace(np.inf, 0), filled)
  53. ser = pd.Series(self.ts.index)
  54. tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
  55. # malformed
  56. msg = r"Replacement lists must match in length\. Expecting 3 got 2"
  57. with pytest.raises(ValueError, match=msg):
  58. ser.replace([1, 2, 3], [np.nan, 0])
  59. # make sure that we aren't just masking a TypeError because bools don't
  60. # implement indexing
  61. with pytest.raises(TypeError, match='Cannot compare types .+'):
  62. ser.replace([1, 2], [np.nan, 0])
  63. ser = pd.Series([0, 1, 2, 3, 4])
  64. result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
  65. tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
  66. def test_replace_gh5319(self):
  67. # API change from 0.12?
  68. # GH 5319
  69. ser = pd.Series([0, np.nan, 2, 3, 4])
  70. expected = ser.ffill()
  71. result = ser.replace([np.nan])
  72. tm.assert_series_equal(result, expected)
  73. ser = pd.Series([0, np.nan, 2, 3, 4])
  74. expected = ser.ffill()
  75. result = ser.replace(np.nan)
  76. tm.assert_series_equal(result, expected)
  77. # GH 5797
  78. ser = pd.Series(pd.date_range('20130101', periods=5))
  79. expected = ser.copy()
  80. expected.loc[2] = pd.Timestamp('20120101')
  81. result = ser.replace({pd.Timestamp('20130103'):
  82. pd.Timestamp('20120101')})
  83. tm.assert_series_equal(result, expected)
  84. result = ser.replace(pd.Timestamp('20130103'),
  85. pd.Timestamp('20120101'))
  86. tm.assert_series_equal(result, expected)
  87. # GH 11792: Test with replacing NaT in a list with tz data
  88. ts = pd.Timestamp('2015/01/01', tz='UTC')
  89. s = pd.Series([pd.NaT, pd.Timestamp('2015/01/01', tz='UTC')])
  90. result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
  91. expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
  92. tm.assert_series_equal(expected, result)
  93. def test_replace_with_single_list(self):
  94. ser = pd.Series([0, 1, 2, 3, 4])
  95. result = ser.replace([1, 2, 3])
  96. tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
  97. s = ser.copy()
  98. s.replace([1, 2, 3], inplace=True)
  99. tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
  100. # make sure things don't get corrupted when fillna call fails
  101. s = ser.copy()
  102. msg = (r"Invalid fill method\. Expecting pad \(ffill\) or backfill"
  103. r" \(bfill\)\. Got crash_cymbal")
  104. with pytest.raises(ValueError, match=msg):
  105. s.replace([1, 2, 3], inplace=True, method='crash_cymbal')
  106. tm.assert_series_equal(s, ser)
  107. def test_replace_with_empty_list(self):
  108. # GH 21977
  109. s = pd.Series([[1], [2, 3], [], np.nan, [4]])
  110. expected = s
  111. result = s.replace([], np.nan)
  112. tm.assert_series_equal(result, expected)
  113. # GH 19266
  114. with pytest.raises(ValueError, match="cannot assign mismatch"):
  115. s.replace({np.nan: []})
  116. with pytest.raises(ValueError, match="cannot assign mismatch"):
  117. s.replace({np.nan: ['dummy', 'alt']})
  118. def test_replace_mixed_types(self):
  119. s = pd.Series(np.arange(5), dtype='int64')
  120. def check_replace(to_rep, val, expected):
  121. sc = s.copy()
  122. r = s.replace(to_rep, val)
  123. sc.replace(to_rep, val, inplace=True)
  124. tm.assert_series_equal(expected, r)
  125. tm.assert_series_equal(expected, sc)
  126. # MUST upcast to float
  127. e = pd.Series([0., 1., 2., 3., 4.])
  128. tr, v = [3], [3.0]
  129. check_replace(tr, v, e)
  130. # MUST upcast to float
  131. e = pd.Series([0, 1, 2, 3.5, 4])
  132. tr, v = [3], [3.5]
  133. check_replace(tr, v, e)
  134. # casts to object
  135. e = pd.Series([0, 1, 2, 3.5, 'a'])
  136. tr, v = [3, 4], [3.5, 'a']
  137. check_replace(tr, v, e)
  138. # again casts to object
  139. e = pd.Series([0, 1, 2, 3.5, pd.Timestamp('20130101')])
  140. tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
  141. check_replace(tr, v, e)
  142. # casts to object
  143. e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
  144. tr, v = [3, 4], [3.5, True]
  145. check_replace(tr, v, e)
  146. # test an object with dates + floats + integers + strings
  147. dr = pd.date_range('1/1/2001', '1/10/2001',
  148. freq='D').to_series().reset_index(drop=True)
  149. result = dr.astype(object).replace(
  150. [dr[0], dr[1], dr[2]], [1.0, 2, 'a'])
  151. expected = pd.Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object)
  152. tm.assert_series_equal(result, expected)
  153. def test_replace_bool_with_string_no_op(self):
  154. s = pd.Series([True, False, True])
  155. result = s.replace('fun', 'in-the-sun')
  156. tm.assert_series_equal(s, result)
  157. def test_replace_bool_with_string(self):
  158. # nonexistent elements
  159. s = pd.Series([True, False, True])
  160. result = s.replace(True, '2u')
  161. expected = pd.Series(['2u', False, '2u'])
  162. tm.assert_series_equal(expected, result)
  163. def test_replace_bool_with_bool(self):
  164. s = pd.Series([True, False, True])
  165. result = s.replace(True, False)
  166. expected = pd.Series([False] * len(s))
  167. tm.assert_series_equal(expected, result)
  168. def test_replace_with_dict_with_bool_keys(self):
  169. s = pd.Series([True, False, True])
  170. with pytest.raises(TypeError, match='Cannot compare types .+'):
  171. s.replace({'asdf': 'asdb', True: 'yes'})
  172. def test_replace2(self):
  173. N = 100
  174. ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N),
  175. dtype=object)
  176. ser[:5] = np.nan
  177. ser[6:10] = 'foo'
  178. ser[20:30] = 'bar'
  179. # replace list with a single value
  180. rs = ser.replace([np.nan, 'foo', 'bar'], -1)
  181. assert (rs[:5] == -1).all()
  182. assert (rs[6:10] == -1).all()
  183. assert (rs[20:30] == -1).all()
  184. assert (pd.isna(ser[:5])).all()
  185. # replace with different values
  186. rs = ser.replace({np.nan: -1, 'foo': -2, 'bar': -3})
  187. assert (rs[:5] == -1).all()
  188. assert (rs[6:10] == -2).all()
  189. assert (rs[20:30] == -3).all()
  190. assert (pd.isna(ser[:5])).all()
  191. # replace with different values with 2 lists
  192. rs2 = ser.replace([np.nan, 'foo', 'bar'], [-1, -2, -3])
  193. tm.assert_series_equal(rs, rs2)
  194. # replace inplace
  195. ser.replace([np.nan, 'foo', 'bar'], -1, inplace=True)
  196. assert (ser[:5] == -1).all()
  197. assert (ser[6:10] == -1).all()
  198. assert (ser[20:30] == -1).all()
  199. def test_replace_with_empty_dictlike(self):
  200. # GH 15289
  201. s = pd.Series(list('abcd'))
  202. tm.assert_series_equal(s, s.replace(dict()))
  203. tm.assert_series_equal(s, s.replace(pd.Series([])))
  204. def test_replace_string_with_number(self):
  205. # GH 15743
  206. s = pd.Series([1, 2, 3])
  207. result = s.replace('2', np.nan)
  208. expected = pd.Series([1, 2, 3])
  209. tm.assert_series_equal(expected, result)
  210. def test_replace_replacer_equals_replacement(self):
  211. # GH 20656
  212. # make sure all replacers are matching against original values
  213. s = pd.Series(['a', 'b'])
  214. expected = pd.Series(['b', 'a'])
  215. result = s.replace({'a': 'b', 'b': 'a'})
  216. tm.assert_series_equal(expected, result)
  217. def test_replace_unicode_with_number(self):
  218. # GH 15743
  219. s = pd.Series([1, 2, 3])
  220. result = s.replace(u'2', np.nan)
  221. expected = pd.Series([1, 2, 3])
  222. tm.assert_series_equal(expected, result)
  223. def test_replace_mixed_types_with_string(self):
  224. # Testing mixed
  225. s = pd.Series([1, 2, 3, '4', 4, 5])
  226. result = s.replace([2, '4'], np.nan)
  227. expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
  228. tm.assert_series_equal(expected, result)
  229. def test_replace_with_no_overflowerror(self):
  230. # GH 25616
  231. # casts to object without Exception from OverflowError
  232. s = pd.Series([0, 1, 2, 3, 4])
  233. result = s.replace([3], ['100000000000000000000'])
  234. expected = pd.Series([0, 1, 2, '100000000000000000000', 4])
  235. tm.assert_series_equal(result, expected)
  236. s = pd.Series([0, '100000000000000000000',
  237. '100000000000000000001'])
  238. result = s.replace(['100000000000000000000'], [1])
  239. expected = pd.Series([0, 1, '100000000000000000001'])
  240. tm.assert_series_equal(result, expected)