123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- """
- For compatibility with numpy libraries, pandas functions or
- methods have to accept '*args' and '**kwargs' parameters to
- accommodate numpy arguments that are not actually used or
- respected in the pandas implementation.
- To ensure that users do not abuse these parameters, validation
- is performed in 'validators.py' to make sure that any extra
- parameters passed correspond ONLY to those in the numpy signature.
- Part of that validation includes whether or not the user attempted
- to pass in non-default values for these extraneous parameters. As we
- want to discourage users from relying on these parameters when calling
- the pandas implementation, we want them only to pass in the default values
- for these parameters.
- This module provides a set of commonly used default arguments for functions
- and methods that are spread throughout the codebase. This module will make it
- easier to adjust to future upstream changes in the analogous numpy signatures.
- """
- from numpy import ndarray
- from pandas.compat import OrderedDict
- from pandas.errors import UnsupportedFunctionCall
- from pandas.util._validators import (
- validate_args, validate_args_and_kwargs, validate_kwargs)
- from pandas.core.dtypes.common import is_bool, is_integer
- class CompatValidator(object):
- def __init__(self, defaults, fname=None, method=None,
- max_fname_arg_count=None):
- self.fname = fname
- self.method = method
- self.defaults = defaults
- self.max_fname_arg_count = max_fname_arg_count
- def __call__(self, args, kwargs, fname=None,
- max_fname_arg_count=None, method=None):
- if args or kwargs:
- fname = self.fname if fname is None else fname
- max_fname_arg_count = (self.max_fname_arg_count if
- max_fname_arg_count is None
- else max_fname_arg_count)
- method = self.method if method is None else method
- if method == 'args':
- validate_args(fname, args, max_fname_arg_count, self.defaults)
- elif method == 'kwargs':
- validate_kwargs(fname, kwargs, self.defaults)
- elif method == 'both':
- validate_args_and_kwargs(fname, args, kwargs,
- max_fname_arg_count,
- self.defaults)
- else:
- raise ValueError("invalid validation method "
- "'{method}'".format(method=method))
- ARGMINMAX_DEFAULTS = dict(out=None)
- validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin',
- method='both', max_fname_arg_count=1)
- validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax',
- method='both', max_fname_arg_count=1)
- def process_skipna(skipna, args):
- if isinstance(skipna, ndarray) or skipna is None:
- args = (skipna,) + args
- skipna = True
- return skipna, args
- def validate_argmin_with_skipna(skipna, args, kwargs):
- """
- If 'Series.argmin' is called via the 'numpy' library,
- the third parameter in its signature is 'out', which
- takes either an ndarray or 'None', so check if the
- 'skipna' parameter is either an instance of ndarray or
- is None, since 'skipna' itself should be a boolean
- """
- skipna, args = process_skipna(skipna, args)
- validate_argmin(args, kwargs)
- return skipna
- def validate_argmax_with_skipna(skipna, args, kwargs):
- """
- If 'Series.argmax' is called via the 'numpy' library,
- the third parameter in its signature is 'out', which
- takes either an ndarray or 'None', so check if the
- 'skipna' parameter is either an instance of ndarray or
- is None, since 'skipna' itself should be a boolean
- """
- skipna, args = process_skipna(skipna, args)
- validate_argmax(args, kwargs)
- return skipna
- ARGSORT_DEFAULTS = OrderedDict()
- ARGSORT_DEFAULTS['axis'] = -1
- ARGSORT_DEFAULTS['kind'] = 'quicksort'
- ARGSORT_DEFAULTS['order'] = None
- validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort',
- max_fname_arg_count=0, method='both')
- # two different signatures of argsort, this second validation
- # for when the `kind` param is supported
- ARGSORT_DEFAULTS_KIND = OrderedDict()
- ARGSORT_DEFAULTS_KIND['axis'] = -1
- ARGSORT_DEFAULTS_KIND['order'] = None
- validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort',
- max_fname_arg_count=0, method='both')
- def validate_argsort_with_ascending(ascending, args, kwargs):
- """
- If 'Categorical.argsort' is called via the 'numpy' library, the
- first parameter in its signature is 'axis', which takes either
- an integer or 'None', so check if the 'ascending' parameter has
- either integer type or is None, since 'ascending' itself should
- be a boolean
- """
- if is_integer(ascending) or ascending is None:
- args = (ascending,) + args
- ascending = True
- validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
- return ascending
- CLIP_DEFAULTS = dict(out=None)
- validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip',
- method='both', max_fname_arg_count=3)
- def validate_clip_with_axis(axis, args, kwargs):
- """
- If 'NDFrame.clip' is called via the numpy library, the third
- parameter in its signature is 'out', which can takes an ndarray,
- so check if the 'axis' parameter is an instance of ndarray, since
- 'axis' itself should either be an integer or None
- """
- if isinstance(axis, ndarray):
- args = (axis,) + args
- axis = None
- validate_clip(args, kwargs)
- return axis
- COMPRESS_DEFAULTS = OrderedDict()
- COMPRESS_DEFAULTS['axis'] = None
- COMPRESS_DEFAULTS['out'] = None
- validate_compress = CompatValidator(COMPRESS_DEFAULTS, fname='compress',
- method='both', max_fname_arg_count=1)
- CUM_FUNC_DEFAULTS = OrderedDict()
- CUM_FUNC_DEFAULTS['dtype'] = None
- CUM_FUNC_DEFAULTS['out'] = None
- validate_cum_func = CompatValidator(CUM_FUNC_DEFAULTS, method='both',
- max_fname_arg_count=1)
- validate_cumsum = CompatValidator(CUM_FUNC_DEFAULTS, fname='cumsum',
- method='both', max_fname_arg_count=1)
- def validate_cum_func_with_skipna(skipna, args, kwargs, name):
- """
- If this function is called via the 'numpy' library, the third
- parameter in its signature is 'dtype', which takes either a
- 'numpy' dtype or 'None', so check if the 'skipna' parameter is
- a boolean or not
- """
- if not is_bool(skipna):
- args = (skipna,) + args
- skipna = True
- validate_cum_func(args, kwargs, fname=name)
- return skipna
- ALLANY_DEFAULTS = OrderedDict()
- ALLANY_DEFAULTS['dtype'] = None
- ALLANY_DEFAULTS['out'] = None
- ALLANY_DEFAULTS['keepdims'] = False
- validate_all = CompatValidator(ALLANY_DEFAULTS, fname='all',
- method='both', max_fname_arg_count=1)
- validate_any = CompatValidator(ALLANY_DEFAULTS, fname='any',
- method='both', max_fname_arg_count=1)
- LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
- validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs')
- MINMAX_DEFAULTS = dict(out=None, keepdims=False)
- validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min',
- method='both', max_fname_arg_count=1)
- validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max',
- method='both', max_fname_arg_count=1)
- RESHAPE_DEFAULTS = dict(order='C')
- validate_reshape = CompatValidator(RESHAPE_DEFAULTS, fname='reshape',
- method='both', max_fname_arg_count=1)
- REPEAT_DEFAULTS = dict(axis=None)
- validate_repeat = CompatValidator(REPEAT_DEFAULTS, fname='repeat',
- method='both', max_fname_arg_count=1)
- ROUND_DEFAULTS = dict(out=None)
- validate_round = CompatValidator(ROUND_DEFAULTS, fname='round',
- method='both', max_fname_arg_count=1)
- SORT_DEFAULTS = OrderedDict()
- SORT_DEFAULTS['axis'] = -1
- SORT_DEFAULTS['kind'] = 'quicksort'
- SORT_DEFAULTS['order'] = None
- validate_sort = CompatValidator(SORT_DEFAULTS, fname='sort',
- method='kwargs')
- STAT_FUNC_DEFAULTS = OrderedDict()
- STAT_FUNC_DEFAULTS['dtype'] = None
- STAT_FUNC_DEFAULTS['out'] = None
- PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
- SUM_DEFAULTS['keepdims'] = False
- SUM_DEFAULTS['initial'] = None
- MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
- MEDIAN_DEFAULTS['overwrite_input'] = False
- MEDIAN_DEFAULTS['keepdims'] = False
- STAT_FUNC_DEFAULTS['keepdims'] = False
- validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS,
- method='kwargs')
- validate_sum = CompatValidator(SUM_DEFAULTS, fname='sum',
- method='both', max_fname_arg_count=1)
- validate_prod = CompatValidator(PROD_DEFAULTS, fname="prod",
- method="both", max_fname_arg_count=1)
- validate_mean = CompatValidator(STAT_FUNC_DEFAULTS, fname='mean',
- method='both', max_fname_arg_count=1)
- validate_median = CompatValidator(MEDIAN_DEFAULTS, fname='median',
- method='both', max_fname_arg_count=1)
- STAT_DDOF_FUNC_DEFAULTS = OrderedDict()
- STAT_DDOF_FUNC_DEFAULTS['dtype'] = None
- STAT_DDOF_FUNC_DEFAULTS['out'] = None
- STAT_DDOF_FUNC_DEFAULTS['keepdims'] = False
- validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS,
- method='kwargs')
- TAKE_DEFAULTS = OrderedDict()
- TAKE_DEFAULTS['out'] = None
- TAKE_DEFAULTS['mode'] = 'raise'
- validate_take = CompatValidator(TAKE_DEFAULTS, fname='take',
- method='kwargs')
- def validate_take_with_convert(convert, args, kwargs):
- """
- If this function is called via the 'numpy' library, the third
- parameter in its signature is 'axis', which takes either an
- ndarray or 'None', so check if the 'convert' parameter is either
- an instance of ndarray or is None
- """
- if isinstance(convert, ndarray) or convert is None:
- args = (convert,) + args
- convert = True
- validate_take(args, kwargs, max_fname_arg_count=3, method='both')
- return convert
- TRANSPOSE_DEFAULTS = dict(axes=None)
- validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose',
- method='both', max_fname_arg_count=0)
- def validate_transpose_for_generic(inst, kwargs):
- try:
- validate_transpose(tuple(), kwargs)
- except ValueError as e:
- klass = type(inst).__name__
- msg = str(e)
- # the Panel class actual relies on the 'axes' parameter if called
- # via the 'numpy' library, so let's make sure the error is specific
- # about saying that the parameter is not supported for particular
- # implementations of 'transpose'
- if "the 'axes' parameter is not supported" in msg:
- msg += " for {klass} instances".format(klass=klass)
- raise ValueError(msg)
- def validate_window_func(name, args, kwargs):
- numpy_args = ('axis', 'dtype', 'out')
- msg = ("numpy operations are not "
- "valid with window objects. "
- "Use .{func}() directly instead ".format(func=name))
- if len(args) > 0:
- raise UnsupportedFunctionCall(msg)
- for arg in numpy_args:
- if arg in kwargs:
- raise UnsupportedFunctionCall(msg)
- def validate_rolling_func(name, args, kwargs):
- numpy_args = ('axis', 'dtype', 'out')
- msg = ("numpy operations are not "
- "valid with window objects. "
- "Use .rolling(...).{func}() instead ".format(func=name))
- if len(args) > 0:
- raise UnsupportedFunctionCall(msg)
- for arg in numpy_args:
- if arg in kwargs:
- raise UnsupportedFunctionCall(msg)
- def validate_expanding_func(name, args, kwargs):
- numpy_args = ('axis', 'dtype', 'out')
- msg = ("numpy operations are not "
- "valid with window objects. "
- "Use .expanding(...).{func}() instead ".format(func=name))
- if len(args) > 0:
- raise UnsupportedFunctionCall(msg)
- for arg in numpy_args:
- if arg in kwargs:
- raise UnsupportedFunctionCall(msg)
- def validate_groupby_func(name, args, kwargs, allowed=None):
- """
- 'args' and 'kwargs' should be empty, except for allowed
- kwargs because all of
- their necessary parameters are explicitly listed in
- the function signature
- """
- if allowed is None:
- allowed = []
- kwargs = set(kwargs) - set(allowed)
- if len(args) + len(kwargs) > 0:
- raise UnsupportedFunctionCall((
- "numpy operations are not valid "
- "with groupby. Use .groupby(...)."
- "{func}() instead".format(func=name)))
- RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
- 'mean', 'std', 'var')
- def validate_resampler_func(method, args, kwargs):
- """
- 'args' and 'kwargs' should be empty because all of
- their necessary parameters are explicitly listed in
- the function signature
- """
- if len(args) + len(kwargs) > 0:
- if method in RESAMPLER_NUMPY_OPS:
- raise UnsupportedFunctionCall((
- "numpy operations are not valid "
- "with resample. Use .resample(...)."
- "{func}() instead".format(func=method)))
- else:
- raise TypeError("too many arguments passed in")
- def validate_minmax_axis(axis):
- """
- Ensure that the axis argument passed to min, max, argmin, or argmax is
- zero or None, as otherwise it will be incorrectly ignored.
- Parameters
- ----------
- axis : int or None
- Raises
- ------
- ValueError
- """
- ndim = 1 # hard-coded for Index
- if axis is None:
- return
- if axis >= ndim or (axis < 0 and ndim + axis < 0):
- raise ValueError("`axis` must be fewer than the number of "
- "dimensions ({ndim})".format(ndim=ndim))
|