base.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. """
  2. Provide basic components for groupby. These defintiions
  3. hold the whitelist of methods that are exposed on the
  4. SeriesGroupBy and the DataFrameGroupBy objects.
  5. """
  6. import types
  7. from pandas.util._decorators import make_signature
  8. from pandas.core.dtypes.common import is_list_like, is_scalar
  9. class GroupByMixin(object):
  10. """
  11. Provide the groupby facilities to the mixed object.
  12. """
  13. @staticmethod
  14. def _dispatch(name, *args, **kwargs):
  15. """
  16. Dispatch to apply.
  17. """
  18. def outer(self, *args, **kwargs):
  19. def f(x):
  20. x = self._shallow_copy(x, groupby=self._groupby)
  21. return getattr(x, name)(*args, **kwargs)
  22. return self._groupby.apply(f)
  23. outer.__name__ = name
  24. return outer
  25. def _gotitem(self, key, ndim, subset=None):
  26. """
  27. Sub-classes to define. Return a sliced object.
  28. Parameters
  29. ----------
  30. key : string / list of selections
  31. ndim : 1,2
  32. requested ndim of result
  33. subset : object, default None
  34. subset to act on
  35. """
  36. # create a new object to prevent aliasing
  37. if subset is None:
  38. subset = self.obj
  39. # we need to make a shallow copy of ourselves
  40. # with the same groupby
  41. kwargs = {attr: getattr(self, attr) for attr in self._attributes}
  42. # Try to select from a DataFrame, falling back to a Series
  43. try:
  44. groupby = self._groupby[key]
  45. except IndexError:
  46. groupby = self._groupby
  47. self = self.__class__(subset,
  48. groupby=groupby,
  49. parent=self,
  50. **kwargs)
  51. self._reset_cache()
  52. if subset.ndim == 2:
  53. if is_scalar(key) and key in subset or is_list_like(key):
  54. self._selection = key
  55. return self
  56. # special case to prevent duplicate plots when catching exceptions when
  57. # forwarding methods from NDFrames
  58. plotting_methods = frozenset(['plot', 'hist'])
  59. common_apply_whitelist = frozenset([
  60. 'quantile', 'fillna', 'mad', 'take',
  61. 'idxmax', 'idxmin', 'tshift',
  62. 'skew', 'corr', 'cov', 'diff'
  63. ]) | plotting_methods
  64. series_apply_whitelist = ((common_apply_whitelist |
  65. {'nlargest', 'nsmallest',
  66. 'is_monotonic_increasing',
  67. 'is_monotonic_decreasing'})
  68. ) | frozenset(['dtype', 'unique'])
  69. dataframe_apply_whitelist = ((common_apply_whitelist |
  70. frozenset(['dtypes', 'corrwith'])))
  71. cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
  72. 'cummin', 'cummax'])
  73. cython_cast_blacklist = frozenset(['rank', 'count', 'size'])
  74. def whitelist_method_generator(base, klass, whitelist):
  75. """
  76. Yields all GroupBy member defs for DataFrame/Series names in whitelist.
  77. Parameters
  78. ----------
  79. base : class
  80. base class
  81. klass : class
  82. class where members are defined.
  83. Should be Series or DataFrame
  84. whitelist : list
  85. list of names of klass methods to be constructed
  86. Returns
  87. -------
  88. The generator yields a sequence of strings, each suitable for exec'ing,
  89. that define implementations of the named methods for DataFrameGroupBy
  90. or SeriesGroupBy.
  91. Since we don't want to override methods explicitly defined in the
  92. base class, any such name is skipped.
  93. """
  94. method_wrapper_template = \
  95. """def %(name)s(%(sig)s) :
  96. \"""
  97. %(doc)s
  98. \"""
  99. f = %(self)s.__getattr__('%(name)s')
  100. return f(%(args)s)"""
  101. property_wrapper_template = \
  102. """@property
  103. def %(name)s(self) :
  104. \"""
  105. %(doc)s
  106. \"""
  107. return self.__getattr__('%(name)s')"""
  108. for name in whitelist:
  109. # don't override anything that was explicitly defined
  110. # in the base class
  111. if hasattr(base, name):
  112. continue
  113. # ugly, but we need the name string itself in the method.
  114. f = getattr(klass, name)
  115. doc = f.__doc__
  116. doc = doc if type(doc) == str else ''
  117. if isinstance(f, types.MethodType):
  118. wrapper_template = method_wrapper_template
  119. decl, args = make_signature(f)
  120. # pass args by name to f because otherwise
  121. # GroupBy._make_wrapper won't know whether
  122. # we passed in an axis parameter.
  123. args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
  124. params = {'name': name,
  125. 'doc': doc,
  126. 'sig': ','.join(decl),
  127. 'self': args[0],
  128. 'args': ','.join(args_by_name)}
  129. else:
  130. wrapper_template = property_wrapper_template
  131. params = {'name': name, 'doc': doc}
  132. yield wrapper_template % params