test_apply.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import numpy as np
  2. import pytest
  3. from pandas import DataFrame, Series, SparseDataFrame, bdate_range
  4. from pandas.core import nanops
  5. from pandas.core.sparse.api import SparseDtype
  6. from pandas.util import testing as tm
  7. @pytest.fixture
  8. def dates():
  9. return bdate_range('1/1/2011', periods=10)
  10. @pytest.fixture
  11. def empty():
  12. return SparseDataFrame()
  13. @pytest.fixture
  14. def frame(dates):
  15. data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
  16. 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
  17. 'C': np.arange(10, dtype=np.float64),
  18. 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}
  19. return SparseDataFrame(data, index=dates)
  20. @pytest.fixture
  21. def fill_frame(frame):
  22. values = frame.values.copy()
  23. values[np.isnan(values)] = 2
  24. return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
  25. default_fill_value=2,
  26. index=frame.index)
  27. def test_apply(frame):
  28. applied = frame.apply(np.sqrt)
  29. assert isinstance(applied, SparseDataFrame)
  30. tm.assert_almost_equal(applied.values, np.sqrt(frame.values))
  31. # agg / broadcast
  32. with tm.assert_produces_warning(FutureWarning):
  33. broadcasted = frame.apply(np.sum, broadcast=True)
  34. assert isinstance(broadcasted, SparseDataFrame)
  35. with tm.assert_produces_warning(FutureWarning):
  36. exp = frame.to_dense().apply(np.sum, broadcast=True)
  37. tm.assert_frame_equal(broadcasted.to_dense(), exp)
  38. applied = frame.apply(np.sum)
  39. tm.assert_series_equal(applied,
  40. frame.to_dense().apply(nanops.nansum).to_sparse())
  41. def test_apply_fill(fill_frame):
  42. applied = fill_frame.apply(np.sqrt)
  43. assert applied['A'].fill_value == np.sqrt(2)
  44. def test_apply_empty(empty):
  45. assert empty.apply(np.sqrt) is empty
  46. def test_apply_nonuq():
  47. orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  48. index=['a', 'a', 'c'])
  49. sparse = orig.to_sparse()
  50. res = sparse.apply(lambda s: s[0], axis=1)
  51. exp = orig.apply(lambda s: s[0], axis=1)
  52. # dtype must be kept
  53. assert res.dtype == SparseDtype(np.int64)
  54. # ToDo: apply must return subclassed dtype
  55. assert isinstance(res, Series)
  56. tm.assert_series_equal(res.to_dense(), exp)
  57. # df.T breaks
  58. sparse = orig.T.to_sparse()
  59. res = sparse.apply(lambda s: s[0], axis=0) # noqa
  60. exp = orig.T.apply(lambda s: s[0], axis=0)
  61. # TODO: no non-unique columns supported in sparse yet
  62. # tm.assert_series_equal(res.to_dense(), exp)
  63. def test_applymap(frame):
  64. # just test that it works
  65. result = frame.applymap(lambda x: x * 2)
  66. assert isinstance(result, SparseDataFrame)
  67. def test_apply_keep_sparse_dtype():
  68. # GH 23744
  69. sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
  70. columns=['b', 'a', 'c'], default_fill_value=1)
  71. df = DataFrame(sdf)
  72. expected = sdf.apply(np.exp)
  73. result = df.apply(np.exp)
  74. tm.assert_frame_equal(expected, result)