function.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. """
  2. For compatibility with numpy libraries, pandas functions or
  3. methods have to accept '*args' and '**kwargs' parameters to
  4. accommodate numpy arguments that are not actually used or
  5. respected in the pandas implementation.
  6. To ensure that users do not abuse these parameters, validation
  7. is performed in 'validators.py' to make sure that any extra
  8. parameters passed correspond ONLY to those in the numpy signature.
  9. Part of that validation includes whether or not the user attempted
  10. to pass in non-default values for these extraneous parameters. As we
  11. want to discourage users from relying on these parameters when calling
  12. the pandas implementation, we want them only to pass in the default values
  13. for these parameters.
  14. This module provides a set of commonly used default arguments for functions
  15. and methods that are spread throughout the codebase. This module will make it
  16. easier to adjust to future upstream changes in the analogous numpy signatures.
  17. """
  18. from numpy import ndarray
  19. from pandas.compat import OrderedDict
  20. from pandas.errors import UnsupportedFunctionCall
  21. from pandas.util._validators import (
  22. validate_args, validate_args_and_kwargs, validate_kwargs)
  23. from pandas.core.dtypes.common import is_bool, is_integer
  24. class CompatValidator(object):
  25. def __init__(self, defaults, fname=None, method=None,
  26. max_fname_arg_count=None):
  27. self.fname = fname
  28. self.method = method
  29. self.defaults = defaults
  30. self.max_fname_arg_count = max_fname_arg_count
  31. def __call__(self, args, kwargs, fname=None,
  32. max_fname_arg_count=None, method=None):
  33. if args or kwargs:
  34. fname = self.fname if fname is None else fname
  35. max_fname_arg_count = (self.max_fname_arg_count if
  36. max_fname_arg_count is None
  37. else max_fname_arg_count)
  38. method = self.method if method is None else method
  39. if method == 'args':
  40. validate_args(fname, args, max_fname_arg_count, self.defaults)
  41. elif method == 'kwargs':
  42. validate_kwargs(fname, kwargs, self.defaults)
  43. elif method == 'both':
  44. validate_args_and_kwargs(fname, args, kwargs,
  45. max_fname_arg_count,
  46. self.defaults)
  47. else:
  48. raise ValueError("invalid validation method "
  49. "'{method}'".format(method=method))
  50. ARGMINMAX_DEFAULTS = dict(out=None)
  51. validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin',
  52. method='both', max_fname_arg_count=1)
  53. validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax',
  54. method='both', max_fname_arg_count=1)
  55. def process_skipna(skipna, args):
  56. if isinstance(skipna, ndarray) or skipna is None:
  57. args = (skipna,) + args
  58. skipna = True
  59. return skipna, args
  60. def validate_argmin_with_skipna(skipna, args, kwargs):
  61. """
  62. If 'Series.argmin' is called via the 'numpy' library,
  63. the third parameter in its signature is 'out', which
  64. takes either an ndarray or 'None', so check if the
  65. 'skipna' parameter is either an instance of ndarray or
  66. is None, since 'skipna' itself should be a boolean
  67. """
  68. skipna, args = process_skipna(skipna, args)
  69. validate_argmin(args, kwargs)
  70. return skipna
  71. def validate_argmax_with_skipna(skipna, args, kwargs):
  72. """
  73. If 'Series.argmax' is called via the 'numpy' library,
  74. the third parameter in its signature is 'out', which
  75. takes either an ndarray or 'None', so check if the
  76. 'skipna' parameter is either an instance of ndarray or
  77. is None, since 'skipna' itself should be a boolean
  78. """
  79. skipna, args = process_skipna(skipna, args)
  80. validate_argmax(args, kwargs)
  81. return skipna
  82. ARGSORT_DEFAULTS = OrderedDict()
  83. ARGSORT_DEFAULTS['axis'] = -1
  84. ARGSORT_DEFAULTS['kind'] = 'quicksort'
  85. ARGSORT_DEFAULTS['order'] = None
  86. validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort',
  87. max_fname_arg_count=0, method='both')
  88. # two different signatures of argsort, this second validation
  89. # for when the `kind` param is supported
  90. ARGSORT_DEFAULTS_KIND = OrderedDict()
  91. ARGSORT_DEFAULTS_KIND['axis'] = -1
  92. ARGSORT_DEFAULTS_KIND['order'] = None
  93. validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort',
  94. max_fname_arg_count=0, method='both')
  95. def validate_argsort_with_ascending(ascending, args, kwargs):
  96. """
  97. If 'Categorical.argsort' is called via the 'numpy' library, the
  98. first parameter in its signature is 'axis', which takes either
  99. an integer or 'None', so check if the 'ascending' parameter has
  100. either integer type or is None, since 'ascending' itself should
  101. be a boolean
  102. """
  103. if is_integer(ascending) or ascending is None:
  104. args = (ascending,) + args
  105. ascending = True
  106. validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
  107. return ascending
  108. CLIP_DEFAULTS = dict(out=None)
  109. validate_clip = CompatValidator(CLIP_DEFAULTS, fname='clip',
  110. method='both', max_fname_arg_count=3)
  111. def validate_clip_with_axis(axis, args, kwargs):
  112. """
  113. If 'NDFrame.clip' is called via the numpy library, the third
  114. parameter in its signature is 'out', which can takes an ndarray,
  115. so check if the 'axis' parameter is an instance of ndarray, since
  116. 'axis' itself should either be an integer or None
  117. """
  118. if isinstance(axis, ndarray):
  119. args = (axis,) + args
  120. axis = None
  121. validate_clip(args, kwargs)
  122. return axis
  123. COMPRESS_DEFAULTS = OrderedDict()
  124. COMPRESS_DEFAULTS['axis'] = None
  125. COMPRESS_DEFAULTS['out'] = None
  126. validate_compress = CompatValidator(COMPRESS_DEFAULTS, fname='compress',
  127. method='both', max_fname_arg_count=1)
  128. CUM_FUNC_DEFAULTS = OrderedDict()
  129. CUM_FUNC_DEFAULTS['dtype'] = None
  130. CUM_FUNC_DEFAULTS['out'] = None
  131. validate_cum_func = CompatValidator(CUM_FUNC_DEFAULTS, method='both',
  132. max_fname_arg_count=1)
  133. validate_cumsum = CompatValidator(CUM_FUNC_DEFAULTS, fname='cumsum',
  134. method='both', max_fname_arg_count=1)
  135. def validate_cum_func_with_skipna(skipna, args, kwargs, name):
  136. """
  137. If this function is called via the 'numpy' library, the third
  138. parameter in its signature is 'dtype', which takes either a
  139. 'numpy' dtype or 'None', so check if the 'skipna' parameter is
  140. a boolean or not
  141. """
  142. if not is_bool(skipna):
  143. args = (skipna,) + args
  144. skipna = True
  145. validate_cum_func(args, kwargs, fname=name)
  146. return skipna
  147. ALLANY_DEFAULTS = OrderedDict()
  148. ALLANY_DEFAULTS['dtype'] = None
  149. ALLANY_DEFAULTS['out'] = None
  150. ALLANY_DEFAULTS['keepdims'] = False
  151. validate_all = CompatValidator(ALLANY_DEFAULTS, fname='all',
  152. method='both', max_fname_arg_count=1)
  153. validate_any = CompatValidator(ALLANY_DEFAULTS, fname='any',
  154. method='both', max_fname_arg_count=1)
  155. LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
  156. validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs')
  157. MINMAX_DEFAULTS = dict(out=None, keepdims=False)
  158. validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min',
  159. method='both', max_fname_arg_count=1)
  160. validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max',
  161. method='both', max_fname_arg_count=1)
  162. RESHAPE_DEFAULTS = dict(order='C')
  163. validate_reshape = CompatValidator(RESHAPE_DEFAULTS, fname='reshape',
  164. method='both', max_fname_arg_count=1)
  165. REPEAT_DEFAULTS = dict(axis=None)
  166. validate_repeat = CompatValidator(REPEAT_DEFAULTS, fname='repeat',
  167. method='both', max_fname_arg_count=1)
  168. ROUND_DEFAULTS = dict(out=None)
  169. validate_round = CompatValidator(ROUND_DEFAULTS, fname='round',
  170. method='both', max_fname_arg_count=1)
  171. SORT_DEFAULTS = OrderedDict()
  172. SORT_DEFAULTS['axis'] = -1
  173. SORT_DEFAULTS['kind'] = 'quicksort'
  174. SORT_DEFAULTS['order'] = None
  175. validate_sort = CompatValidator(SORT_DEFAULTS, fname='sort',
  176. method='kwargs')
  177. STAT_FUNC_DEFAULTS = OrderedDict()
  178. STAT_FUNC_DEFAULTS['dtype'] = None
  179. STAT_FUNC_DEFAULTS['out'] = None
  180. PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
  181. SUM_DEFAULTS['keepdims'] = False
  182. SUM_DEFAULTS['initial'] = None
  183. MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
  184. MEDIAN_DEFAULTS['overwrite_input'] = False
  185. MEDIAN_DEFAULTS['keepdims'] = False
  186. STAT_FUNC_DEFAULTS['keepdims'] = False
  187. validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS,
  188. method='kwargs')
  189. validate_sum = CompatValidator(SUM_DEFAULTS, fname='sum',
  190. method='both', max_fname_arg_count=1)
  191. validate_prod = CompatValidator(PROD_DEFAULTS, fname="prod",
  192. method="both", max_fname_arg_count=1)
  193. validate_mean = CompatValidator(STAT_FUNC_DEFAULTS, fname='mean',
  194. method='both', max_fname_arg_count=1)
  195. validate_median = CompatValidator(MEDIAN_DEFAULTS, fname='median',
  196. method='both', max_fname_arg_count=1)
  197. STAT_DDOF_FUNC_DEFAULTS = OrderedDict()
  198. STAT_DDOF_FUNC_DEFAULTS['dtype'] = None
  199. STAT_DDOF_FUNC_DEFAULTS['out'] = None
  200. STAT_DDOF_FUNC_DEFAULTS['keepdims'] = False
  201. validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS,
  202. method='kwargs')
  203. TAKE_DEFAULTS = OrderedDict()
  204. TAKE_DEFAULTS['out'] = None
  205. TAKE_DEFAULTS['mode'] = 'raise'
  206. validate_take = CompatValidator(TAKE_DEFAULTS, fname='take',
  207. method='kwargs')
  208. def validate_take_with_convert(convert, args, kwargs):
  209. """
  210. If this function is called via the 'numpy' library, the third
  211. parameter in its signature is 'axis', which takes either an
  212. ndarray or 'None', so check if the 'convert' parameter is either
  213. an instance of ndarray or is None
  214. """
  215. if isinstance(convert, ndarray) or convert is None:
  216. args = (convert,) + args
  217. convert = True
  218. validate_take(args, kwargs, max_fname_arg_count=3, method='both')
  219. return convert
  220. TRANSPOSE_DEFAULTS = dict(axes=None)
  221. validate_transpose = CompatValidator(TRANSPOSE_DEFAULTS, fname='transpose',
  222. method='both', max_fname_arg_count=0)
  223. def validate_transpose_for_generic(inst, kwargs):
  224. try:
  225. validate_transpose(tuple(), kwargs)
  226. except ValueError as e:
  227. klass = type(inst).__name__
  228. msg = str(e)
  229. # the Panel class actual relies on the 'axes' parameter if called
  230. # via the 'numpy' library, so let's make sure the error is specific
  231. # about saying that the parameter is not supported for particular
  232. # implementations of 'transpose'
  233. if "the 'axes' parameter is not supported" in msg:
  234. msg += " for {klass} instances".format(klass=klass)
  235. raise ValueError(msg)
  236. def validate_window_func(name, args, kwargs):
  237. numpy_args = ('axis', 'dtype', 'out')
  238. msg = ("numpy operations are not "
  239. "valid with window objects. "
  240. "Use .{func}() directly instead ".format(func=name))
  241. if len(args) > 0:
  242. raise UnsupportedFunctionCall(msg)
  243. for arg in numpy_args:
  244. if arg in kwargs:
  245. raise UnsupportedFunctionCall(msg)
  246. def validate_rolling_func(name, args, kwargs):
  247. numpy_args = ('axis', 'dtype', 'out')
  248. msg = ("numpy operations are not "
  249. "valid with window objects. "
  250. "Use .rolling(...).{func}() instead ".format(func=name))
  251. if len(args) > 0:
  252. raise UnsupportedFunctionCall(msg)
  253. for arg in numpy_args:
  254. if arg in kwargs:
  255. raise UnsupportedFunctionCall(msg)
  256. def validate_expanding_func(name, args, kwargs):
  257. numpy_args = ('axis', 'dtype', 'out')
  258. msg = ("numpy operations are not "
  259. "valid with window objects. "
  260. "Use .expanding(...).{func}() instead ".format(func=name))
  261. if len(args) > 0:
  262. raise UnsupportedFunctionCall(msg)
  263. for arg in numpy_args:
  264. if arg in kwargs:
  265. raise UnsupportedFunctionCall(msg)
  266. def validate_groupby_func(name, args, kwargs, allowed=None):
  267. """
  268. 'args' and 'kwargs' should be empty, except for allowed
  269. kwargs because all of
  270. their necessary parameters are explicitly listed in
  271. the function signature
  272. """
  273. if allowed is None:
  274. allowed = []
  275. kwargs = set(kwargs) - set(allowed)
  276. if len(args) + len(kwargs) > 0:
  277. raise UnsupportedFunctionCall((
  278. "numpy operations are not valid "
  279. "with groupby. Use .groupby(...)."
  280. "{func}() instead".format(func=name)))
  281. RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
  282. 'mean', 'std', 'var')
  283. def validate_resampler_func(method, args, kwargs):
  284. """
  285. 'args' and 'kwargs' should be empty because all of
  286. their necessary parameters are explicitly listed in
  287. the function signature
  288. """
  289. if len(args) + len(kwargs) > 0:
  290. if method in RESAMPLER_NUMPY_OPS:
  291. raise UnsupportedFunctionCall((
  292. "numpy operations are not valid "
  293. "with resample. Use .resample(...)."
  294. "{func}() instead".format(func=method)))
  295. else:
  296. raise TypeError("too many arguments passed in")
  297. def validate_minmax_axis(axis):
  298. """
  299. Ensure that the axis argument passed to min, max, argmin, or argmax is
  300. zero or None, as otherwise it will be incorrectly ignored.
  301. Parameters
  302. ----------
  303. axis : int or None
  304. Raises
  305. ------
  306. ValueError
  307. """
  308. ndim = 1 # hard-coded for Index
  309. if axis is None:
  310. return
  311. if axis >= ndim or (axis < 0 and ndim + axis < 0):
  312. raise ValueError("`axis` must be fewer than the number of "
  313. "dimensions ({ndim})".format(ndim=ndim))