groupby.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import pytest
  2. import pandas as pd
  3. import pandas.util.testing as tm
  4. from .base import BaseExtensionTests
  5. class BaseGroupbyTests(BaseExtensionTests):
  6. """Groupby-specific tests."""
  7. def test_grouping_grouper(self, data_for_grouping):
  8. df = pd.DataFrame({
  9. "A": ["B", "B", None, None, "A", "A", "B", "C"],
  10. "B": data_for_grouping
  11. })
  12. gr1 = df.groupby("A").grouper.groupings[0]
  13. gr2 = df.groupby("B").grouper.groupings[0]
  14. tm.assert_numpy_array_equal(gr1.grouper, df.A.values)
  15. tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
  16. @pytest.mark.parametrize('as_index', [True, False])
  17. def test_groupby_extension_agg(self, as_index, data_for_grouping):
  18. df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
  19. "B": data_for_grouping})
  20. result = df.groupby("B", as_index=as_index).A.mean()
  21. _, index = pd.factorize(data_for_grouping, sort=True)
  22. index = pd.Index(index, name="B")
  23. expected = pd.Series([3, 1, 4], index=index, name="A")
  24. if as_index:
  25. self.assert_series_equal(result, expected)
  26. else:
  27. expected = expected.reset_index()
  28. self.assert_frame_equal(result, expected)
  29. def test_groupby_extension_no_sort(self, data_for_grouping):
  30. df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
  31. "B": data_for_grouping})
  32. result = df.groupby("B", sort=False).A.mean()
  33. _, index = pd.factorize(data_for_grouping, sort=False)
  34. index = pd.Index(index, name="B")
  35. expected = pd.Series([1, 3, 4], index=index, name="A")
  36. self.assert_series_equal(result, expected)
  37. def test_groupby_extension_transform(self, data_for_grouping):
  38. valid = data_for_grouping[~data_for_grouping.isna()]
  39. df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4],
  40. "B": valid})
  41. result = df.groupby("B").A.transform(len)
  42. expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
  43. self.assert_series_equal(result, expected)
  44. @pytest.mark.parametrize('op', [
  45. lambda x: 1,
  46. lambda x: [1] * len(x),
  47. lambda x: pd.Series([1] * len(x)),
  48. lambda x: x,
  49. ], ids=['scalar', 'list', 'series', 'object'])
  50. def test_groupby_extension_apply(self, data_for_grouping, op):
  51. df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
  52. "B": data_for_grouping})
  53. df.groupby("B").apply(op)
  54. df.groupby("B").A.apply(op)
  55. df.groupby("A").apply(op)
  56. df.groupby("A").B.apply(op)
  57. def test_in_numeric_groupby(self, data_for_grouping):
  58. df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
  59. "B": data_for_grouping,
  60. "C": [1, 1, 1, 1, 1, 1, 1, 1]})
  61. result = df.groupby("A").sum().columns
  62. if data_for_grouping.dtype._is_numeric:
  63. expected = pd.Index(['B', 'C'])
  64. else:
  65. expected = pd.Index(['C'])
  66. tm.assert_index_equal(result, expected)