12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- # -*- coding: utf-8 -*-
- import numpy as np
- import pytest
- import pandas as pd
- import pandas.util.testing as tm
- class TestSparseGroupBy(object):
- def setup_method(self, method):
- self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
- 'foo', 'bar', 'foo', 'foo'],
- 'B': ['one', 'one', 'two', 'three',
- 'two', 'two', 'one', 'three'],
- 'C': np.random.randn(8),
- 'D': np.random.randn(8),
- 'E': [np.nan, np.nan, 1, 2,
- np.nan, 1, np.nan, np.nan]})
- self.sparse = self.dense.to_sparse()
- def test_first_last_nth(self):
- # tests for first / last / nth
- sparse_grouped = self.sparse.groupby('A')
- dense_grouped = self.dense.groupby('A')
- sparse_grouped_first = sparse_grouped.first()
- sparse_grouped_last = sparse_grouped.last()
- sparse_grouped_nth = sparse_grouped.nth(1)
- dense_grouped_first = dense_grouped.first().to_sparse()
- dense_grouped_last = dense_grouped.last().to_sparse()
- dense_grouped_nth = dense_grouped.nth(1).to_sparse()
- # TODO: shouldn't these all be spares or not?
- tm.assert_frame_equal(sparse_grouped_first,
- dense_grouped_first)
- tm.assert_frame_equal(sparse_grouped_last,
- dense_grouped_last)
- tm.assert_frame_equal(sparse_grouped_nth,
- dense_grouped_nth)
- def test_aggfuncs(self):
- sparse_grouped = self.sparse.groupby('A')
- dense_grouped = self.dense.groupby('A')
- result = sparse_grouped.mean().to_sparse()
- expected = dense_grouped.mean().to_sparse()
- tm.assert_frame_equal(result, expected)
- # ToDo: sparse sum includes str column
- # tm.assert_frame_equal(sparse_grouped.sum(),
- # dense_grouped.sum())
- result = sparse_grouped.count().to_sparse()
- expected = dense_grouped.count().to_sparse()
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("fill_value", [0, np.nan])
- def test_groupby_includes_fill_value(fill_value):
- # https://github.com/pandas-dev/pandas/issues/5078
- df = pd.DataFrame({'a': [fill_value, 1, fill_value, fill_value],
- 'b': [fill_value, 1, fill_value, fill_value]})
- sdf = df.to_sparse(fill_value=fill_value)
- result = sdf.groupby('a').sum()
- expected = df.groupby('a').sum().to_sparse(fill_value=fill_value)
- tm.assert_frame_equal(result, expected, check_index_type=False)
|