test_groupby.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. import pandas.util.testing as tm
  6. class TestSparseGroupBy(object):
  7. def setup_method(self, method):
  8. self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
  9. 'foo', 'bar', 'foo', 'foo'],
  10. 'B': ['one', 'one', 'two', 'three',
  11. 'two', 'two', 'one', 'three'],
  12. 'C': np.random.randn(8),
  13. 'D': np.random.randn(8),
  14. 'E': [np.nan, np.nan, 1, 2,
  15. np.nan, 1, np.nan, np.nan]})
  16. self.sparse = self.dense.to_sparse()
  17. def test_first_last_nth(self):
  18. # tests for first / last / nth
  19. sparse_grouped = self.sparse.groupby('A')
  20. dense_grouped = self.dense.groupby('A')
  21. sparse_grouped_first = sparse_grouped.first()
  22. sparse_grouped_last = sparse_grouped.last()
  23. sparse_grouped_nth = sparse_grouped.nth(1)
  24. dense_grouped_first = dense_grouped.first().to_sparse()
  25. dense_grouped_last = dense_grouped.last().to_sparse()
  26. dense_grouped_nth = dense_grouped.nth(1).to_sparse()
  27. # TODO: shouldn't these all be spares or not?
  28. tm.assert_frame_equal(sparse_grouped_first,
  29. dense_grouped_first)
  30. tm.assert_frame_equal(sparse_grouped_last,
  31. dense_grouped_last)
  32. tm.assert_frame_equal(sparse_grouped_nth,
  33. dense_grouped_nth)
  34. def test_aggfuncs(self):
  35. sparse_grouped = self.sparse.groupby('A')
  36. dense_grouped = self.dense.groupby('A')
  37. result = sparse_grouped.mean().to_sparse()
  38. expected = dense_grouped.mean().to_sparse()
  39. tm.assert_frame_equal(result, expected)
  40. # ToDo: sparse sum includes str column
  41. # tm.assert_frame_equal(sparse_grouped.sum(),
  42. # dense_grouped.sum())
  43. result = sparse_grouped.count().to_sparse()
  44. expected = dense_grouped.count().to_sparse()
  45. tm.assert_frame_equal(result, expected)
  46. @pytest.mark.parametrize("fill_value", [0, np.nan])
  47. def test_groupby_includes_fill_value(fill_value):
  48. # https://github.com/pandas-dev/pandas/issues/5078
  49. df = pd.DataFrame({'a': [fill_value, 1, fill_value, fill_value],
  50. 'b': [fill_value, 1, fill_value, fill_value]})
  51. sdf = df.to_sparse(fill_value=fill_value)
  52. result = sdf.groupby('a').sum()
  53. expected = df.groupby('a').sum().to_sparse(fill_value=fill_value)
  54. tm.assert_frame_equal(result, expected, check_index_type=False)