test_bin_groupby.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from numpy import nan
  4. import pytest
  5. from pandas._libs import groupby, lib, reduction
  6. from pandas.core.dtypes.common import ensure_int64
  7. from pandas import Index, isna
  8. from pandas.core.groupby.ops import generate_bins_generic
  9. import pandas.util.testing as tm
  10. from pandas.util.testing import assert_almost_equal
  11. def test_series_grouper():
  12. from pandas import Series
  13. obj = Series(np.random.randn(10))
  14. dummy = obj[:0]
  15. labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)
  16. grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
  17. result, counts = grouper.get_result()
  18. expected = np.array([obj[3:6].mean(), obj[6:].mean()])
  19. assert_almost_equal(result, expected)
  20. exp_counts = np.array([3, 4], dtype=np.int64)
  21. assert_almost_equal(counts, exp_counts)
  22. def test_series_bin_grouper():
  23. from pandas import Series
  24. obj = Series(np.random.randn(10))
  25. dummy = obj[:0]
  26. bins = np.array([3, 6])
  27. grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
  28. result, counts = grouper.get_result()
  29. expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
  30. assert_almost_equal(result, expected)
  31. exp_counts = np.array([3, 3, 4], dtype=np.int64)
  32. assert_almost_equal(counts, exp_counts)
  33. class TestBinGroupers(object):
  34. def setup_method(self, method):
  35. self.obj = np.random.randn(10, 1)
  36. self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64)
  37. self.bins = np.array([3, 6], dtype=np.int64)
  38. def test_generate_bins(self):
  39. values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
  40. binner = np.array([0, 3, 6, 9], dtype=np.int64)
  41. for func in [lib.generate_bins_dt64, generate_bins_generic]:
  42. bins = func(values, binner, closed='left')
  43. assert ((bins == np.array([2, 5, 6])).all())
  44. bins = func(values, binner, closed='right')
  45. assert ((bins == np.array([3, 6, 6])).all())
  46. for func in [lib.generate_bins_dt64, generate_bins_generic]:
  47. values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
  48. binner = np.array([0, 3, 6], dtype=np.int64)
  49. bins = func(values, binner, closed='right')
  50. assert ((bins == np.array([3, 6])).all())
  51. msg = "Invalid length for values or for binner"
  52. with pytest.raises(ValueError, match=msg):
  53. generate_bins_generic(values, [], 'right')
  54. with pytest.raises(ValueError, match=msg):
  55. generate_bins_generic(values[:0], binner, 'right')
  56. msg = "Values falls before first bin"
  57. with pytest.raises(ValueError, match=msg):
  58. generate_bins_generic(values, [4], 'right')
  59. msg = "Values falls after last bin"
  60. with pytest.raises(ValueError, match=msg):
  61. generate_bins_generic(values, [-3, -1], 'right')
  62. def test_group_ohlc():
  63. def _check(dtype):
  64. obj = np.array(np.random.randn(20), dtype=dtype)
  65. bins = np.array([6, 12, 20])
  66. out = np.zeros((3, 4), dtype)
  67. counts = np.zeros(len(out), dtype=np.int64)
  68. labels = ensure_int64(np.repeat(np.arange(3),
  69. np.diff(np.r_[0, bins])))
  70. func = getattr(groupby, 'group_ohlc_%s' % dtype)
  71. func(out, counts, obj[:, None], labels)
  72. def _ohlc(group):
  73. if isna(group).all():
  74. return np.repeat(nan, 4)
  75. return [group[0], group.max(), group.min(), group[-1]]
  76. expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]),
  77. _ohlc(obj[12:])])
  78. assert_almost_equal(out, expected)
  79. tm.assert_numpy_array_equal(counts,
  80. np.array([6, 6, 8], dtype=np.int64))
  81. obj[:6] = nan
  82. func(out, counts, obj[:, None], labels)
  83. expected[0] = nan
  84. assert_almost_equal(out, expected)
  85. _check('float32')
  86. _check('float64')
  87. class TestMoments(object):
  88. pass
  89. class TestReducer(object):
  90. def test_int_index(self):
  91. from pandas.core.series import Series
  92. arr = np.random.randn(100, 4)
  93. result = reduction.reduce(arr, np.sum, labels=Index(np.arange(4)))
  94. expected = arr.sum(0)
  95. assert_almost_equal(result, expected)
  96. result = reduction.reduce(arr, np.sum, axis=1,
  97. labels=Index(np.arange(100)))
  98. expected = arr.sum(1)
  99. assert_almost_equal(result, expected)
  100. dummy = Series(0., index=np.arange(100))
  101. result = reduction.reduce(arr, np.sum, dummy=dummy,
  102. labels=Index(np.arange(4)))
  103. expected = arr.sum(0)
  104. assert_almost_equal(result, expected)
  105. dummy = Series(0., index=np.arange(4))
  106. result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy,
  107. labels=Index(np.arange(100)))
  108. expected = arr.sum(1)
  109. assert_almost_equal(result, expected)
  110. result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy,
  111. labels=Index(np.arange(100)))
  112. assert_almost_equal(result, expected)