123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- """
- Provide basic components for groupby. These defintiions
- hold the whitelist of methods that are exposed on the
- SeriesGroupBy and the DataFrameGroupBy objects.
- """
- import types
- from pandas.util._decorators import make_signature
- from pandas.core.dtypes.common import is_list_like, is_scalar
- class GroupByMixin(object):
- """
- Provide the groupby facilities to the mixed object.
- """
- @staticmethod
- def _dispatch(name, *args, **kwargs):
- """
- Dispatch to apply.
- """
- def outer(self, *args, **kwargs):
- def f(x):
- x = self._shallow_copy(x, groupby=self._groupby)
- return getattr(x, name)(*args, **kwargs)
- return self._groupby.apply(f)
- outer.__name__ = name
- return outer
- def _gotitem(self, key, ndim, subset=None):
- """
- Sub-classes to define. Return a sliced object.
- Parameters
- ----------
- key : string / list of selections
- ndim : 1,2
- requested ndim of result
- subset : object, default None
- subset to act on
- """
- # create a new object to prevent aliasing
- if subset is None:
- subset = self.obj
- # we need to make a shallow copy of ourselves
- # with the same groupby
- kwargs = {attr: getattr(self, attr) for attr in self._attributes}
- # Try to select from a DataFrame, falling back to a Series
- try:
- groupby = self._groupby[key]
- except IndexError:
- groupby = self._groupby
- self = self.__class__(subset,
- groupby=groupby,
- parent=self,
- **kwargs)
- self._reset_cache()
- if subset.ndim == 2:
- if is_scalar(key) and key in subset or is_list_like(key):
- self._selection = key
- return self
- # special case to prevent duplicate plots when catching exceptions when
- # forwarding methods from NDFrames
- plotting_methods = frozenset(['plot', 'hist'])
- common_apply_whitelist = frozenset([
- 'quantile', 'fillna', 'mad', 'take',
- 'idxmax', 'idxmin', 'tshift',
- 'skew', 'corr', 'cov', 'diff'
- ]) | plotting_methods
- series_apply_whitelist = ((common_apply_whitelist |
- {'nlargest', 'nsmallest',
- 'is_monotonic_increasing',
- 'is_monotonic_decreasing'})
- ) | frozenset(['dtype', 'unique'])
- dataframe_apply_whitelist = ((common_apply_whitelist |
- frozenset(['dtypes', 'corrwith'])))
- cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
- 'cummin', 'cummax'])
- cython_cast_blacklist = frozenset(['rank', 'count', 'size'])
- def whitelist_method_generator(base, klass, whitelist):
- """
- Yields all GroupBy member defs for DataFrame/Series names in whitelist.
- Parameters
- ----------
- base : class
- base class
- klass : class
- class where members are defined.
- Should be Series or DataFrame
- whitelist : list
- list of names of klass methods to be constructed
- Returns
- -------
- The generator yields a sequence of strings, each suitable for exec'ing,
- that define implementations of the named methods for DataFrameGroupBy
- or SeriesGroupBy.
- Since we don't want to override methods explicitly defined in the
- base class, any such name is skipped.
- """
- method_wrapper_template = \
- """def %(name)s(%(sig)s) :
- \"""
- %(doc)s
- \"""
- f = %(self)s.__getattr__('%(name)s')
- return f(%(args)s)"""
- property_wrapper_template = \
- """@property
- def %(name)s(self) :
- \"""
- %(doc)s
- \"""
- return self.__getattr__('%(name)s')"""
- for name in whitelist:
- # don't override anything that was explicitly defined
- # in the base class
- if hasattr(base, name):
- continue
- # ugly, but we need the name string itself in the method.
- f = getattr(klass, name)
- doc = f.__doc__
- doc = doc if type(doc) == str else ''
- if isinstance(f, types.MethodType):
- wrapper_template = method_wrapper_template
- decl, args = make_signature(f)
- # pass args by name to f because otherwise
- # GroupBy._make_wrapper won't know whether
- # we passed in an axis parameter.
- args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
- params = {'name': name,
- 'doc': doc,
- 'sig': ','.join(decl),
- 'self': args[0],
- 'args': ','.join(args_by_name)}
- else:
- wrapper_template = property_wrapper_template
- params = {'name': name, 'doc': doc}
- yield wrapper_template % params
|