_validators.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. """
  2. Module that contains many useful utilities
  3. for validating data or function arguments
  4. """
  5. import warnings
  6. from pandas.core.dtypes.common import is_bool
  7. def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
  8. """
  9. Checks whether 'args' has length of at most 'compat_args'. Raises
  10. a TypeError if that is not the case, similar to in Python when a
  11. function is called with too many arguments.
  12. """
  13. if max_fname_arg_count < 0:
  14. raise ValueError("'max_fname_arg_count' must be non-negative")
  15. if len(args) > len(compat_args):
  16. max_arg_count = len(compat_args) + max_fname_arg_count
  17. actual_arg_count = len(args) + max_fname_arg_count
  18. argument = 'argument' if max_arg_count == 1 else 'arguments'
  19. raise TypeError(
  20. "{fname}() takes at most {max_arg} {argument} "
  21. "({given_arg} given)".format(
  22. fname=fname, max_arg=max_arg_count,
  23. argument=argument, given_arg=actual_arg_count))
  24. def _check_for_default_values(fname, arg_val_dict, compat_args):
  25. """
  26. Check that the keys in `arg_val_dict` are mapped to their
  27. default values as specified in `compat_args`.
  28. Note that this function is to be called only when it has been
  29. checked that arg_val_dict.keys() is a subset of compat_args
  30. """
  31. for key in arg_val_dict:
  32. # try checking equality directly with '=' operator,
  33. # as comparison may have been overridden for the left
  34. # hand object
  35. try:
  36. v1 = arg_val_dict[key]
  37. v2 = compat_args[key]
  38. # check for None-ness otherwise we could end up
  39. # comparing a numpy array vs None
  40. if (v1 is not None and v2 is None) or \
  41. (v1 is None and v2 is not None):
  42. match = False
  43. else:
  44. match = (v1 == v2)
  45. if not is_bool(match):
  46. raise ValueError("'match' is not a boolean")
  47. # could not compare them directly, so try comparison
  48. # using the 'is' operator
  49. except ValueError:
  50. match = (arg_val_dict[key] is compat_args[key])
  51. if not match:
  52. raise ValueError(("the '{arg}' parameter is not "
  53. "supported in the pandas "
  54. "implementation of {fname}()".
  55. format(fname=fname, arg=key)))
  56. def validate_args(fname, args, max_fname_arg_count, compat_args):
  57. """
  58. Checks whether the length of the `*args` argument passed into a function
  59. has at most `len(compat_args)` arguments and whether or not all of these
  60. elements in `args` are set to their default values.
  61. fname: str
  62. The name of the function being passed the `*args` parameter
  63. args: tuple
  64. The `*args` parameter passed into a function
  65. max_fname_arg_count: int
  66. The maximum number of arguments that the function `fname`
  67. can accept, excluding those in `args`. Used for displaying
  68. appropriate error messages. Must be non-negative.
  69. compat_args: OrderedDict
  70. A ordered dictionary of keys and their associated default values.
  71. In order to accommodate buggy behaviour in some versions of `numpy`,
  72. where a signature displayed keyword arguments but then passed those
  73. arguments **positionally** internally when calling downstream
  74. implementations, an ordered dictionary ensures that the original
  75. order of the keyword arguments is enforced. Note that if there is
  76. only one key, a generic dict can be passed in as well.
  77. Raises
  78. ------
  79. TypeError if `args` contains more values than there are `compat_args`
  80. ValueError if `args` contains values that do not correspond to those
  81. of the default values specified in `compat_args`
  82. """
  83. _check_arg_length(fname, args, max_fname_arg_count, compat_args)
  84. # We do this so that we can provide a more informative
  85. # error message about the parameters that we are not
  86. # supporting in the pandas implementation of 'fname'
  87. kwargs = dict(zip(compat_args, args))
  88. _check_for_default_values(fname, kwargs, compat_args)
  89. def _check_for_invalid_keys(fname, kwargs, compat_args):
  90. """
  91. Checks whether 'kwargs' contains any keys that are not
  92. in 'compat_args' and raises a TypeError if there is one.
  93. """
  94. # set(dict) --> set of the dictionary's keys
  95. diff = set(kwargs) - set(compat_args)
  96. if diff:
  97. bad_arg = list(diff)[0]
  98. raise TypeError(("{fname}() got an unexpected "
  99. "keyword argument '{arg}'".
  100. format(fname=fname, arg=bad_arg)))
  101. def validate_kwargs(fname, kwargs, compat_args):
  102. """
  103. Checks whether parameters passed to the **kwargs argument in a
  104. function `fname` are valid parameters as specified in `*compat_args`
  105. and whether or not they are set to their default values.
  106. Parameters
  107. ----------
  108. fname: str
  109. The name of the function being passed the `**kwargs` parameter
  110. kwargs: dict
  111. The `**kwargs` parameter passed into `fname`
  112. compat_args: dict
  113. A dictionary of keys that `kwargs` is allowed to have and their
  114. associated default values
  115. Raises
  116. ------
  117. TypeError if `kwargs` contains keys not in `compat_args`
  118. ValueError if `kwargs` contains keys in `compat_args` that do not
  119. map to the default values specified in `compat_args`
  120. """
  121. kwds = kwargs.copy()
  122. _check_for_invalid_keys(fname, kwargs, compat_args)
  123. _check_for_default_values(fname, kwds, compat_args)
  124. def validate_args_and_kwargs(fname, args, kwargs,
  125. max_fname_arg_count,
  126. compat_args):
  127. """
  128. Checks whether parameters passed to the *args and **kwargs argument in a
  129. function `fname` are valid parameters as specified in `*compat_args`
  130. and whether or not they are set to their default values.
  131. Parameters
  132. ----------
  133. fname: str
  134. The name of the function being passed the `**kwargs` parameter
  135. args: tuple
  136. The `*args` parameter passed into a function
  137. kwargs: dict
  138. The `**kwargs` parameter passed into `fname`
  139. max_fname_arg_count: int
  140. The minimum number of arguments that the function `fname`
  141. requires, excluding those in `args`. Used for displaying
  142. appropriate error messages. Must be non-negative.
  143. compat_args: OrderedDict
  144. A ordered dictionary of keys that `kwargs` is allowed to
  145. have and their associated default values. Note that if there
  146. is only one key, a generic dict can be passed in as well.
  147. Raises
  148. ------
  149. TypeError if `args` contains more values than there are
  150. `compat_args` OR `kwargs` contains keys not in `compat_args`
  151. ValueError if `args` contains values not at the default value (`None`)
  152. `kwargs` contains keys in `compat_args` that do not map to the default
  153. value as specified in `compat_args`
  154. See Also
  155. --------
  156. validate_args : Purely args validation.
  157. validate_kwargs : Purely kwargs validation.
  158. """
  159. # Check that the total number of arguments passed in (i.e.
  160. # args and kwargs) does not exceed the length of compat_args
  161. _check_arg_length(fname, args + tuple(kwargs.values()),
  162. max_fname_arg_count, compat_args)
  163. # Check there is no overlap with the positional and keyword
  164. # arguments, similar to what is done in actual Python functions
  165. args_dict = dict(zip(compat_args, args))
  166. for key in args_dict:
  167. if key in kwargs:
  168. raise TypeError("{fname}() got multiple values for keyword "
  169. "argument '{arg}'".format(fname=fname, arg=key))
  170. kwargs.update(args_dict)
  171. validate_kwargs(fname, kwargs, compat_args)
  172. def validate_bool_kwarg(value, arg_name):
  173. """ Ensures that argument passed in arg_name is of type bool. """
  174. if not (is_bool(value) or value is None):
  175. raise ValueError('For argument "{arg}" expected type bool, received '
  176. 'type {typ}.'.format(arg=arg_name,
  177. typ=type(value).__name__))
  178. return value
  179. def validate_axis_style_args(data, args, kwargs, arg_name, method_name):
  180. """Argument handler for mixed index, columns / axis functions
  181. In an attempt to handle both `.method(index, columns)`, and
  182. `.method(arg, axis=.)`, we have to do some bad things to argument
  183. parsing. This translates all arguments to `{index=., columns=.}` style.
  184. Parameters
  185. ----------
  186. data : DataFrame or Panel
  187. arg : tuple
  188. All positional arguments from the user
  189. kwargs : dict
  190. All keyword arguments from the user
  191. arg_name, method_name : str
  192. Used for better error messages
  193. Returns
  194. -------
  195. kwargs : dict
  196. A dictionary of keyword arguments. Doesn't modify ``kwargs``
  197. inplace, so update them with the return value here.
  198. Examples
  199. --------
  200. >>> df._validate_axis_style_args((str.upper,), {'columns': id},
  201. ... 'mapper', 'rename')
  202. {'columns': <function id>, 'index': <method 'upper' of 'str' objects>}
  203. This emits a warning
  204. >>> df._validate_axis_style_args((str.upper, id), {},
  205. ... 'mapper', 'rename')
  206. {'columns': <function id>, 'index': <method 'upper' of 'str' objects>}
  207. """
  208. # TODO(PY3): Change to keyword-only args and remove all this
  209. out = {}
  210. # Goal: fill 'out' with index/columns-style arguments
  211. # like out = {'index': foo, 'columns': bar}
  212. # Start by validating for consistency
  213. if 'axis' in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS):
  214. msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
  215. raise TypeError(msg)
  216. # First fill with explicit values provided by the user...
  217. if arg_name in kwargs:
  218. if args:
  219. msg = ("{} got multiple values for argument "
  220. "'{}'".format(method_name, arg_name))
  221. raise TypeError(msg)
  222. axis = data._get_axis_name(kwargs.get('axis', 0))
  223. out[axis] = kwargs[arg_name]
  224. # More user-provided arguments, now from kwargs
  225. for k, v in kwargs.items():
  226. try:
  227. ax = data._get_axis_name(k)
  228. except ValueError:
  229. pass
  230. else:
  231. out[ax] = v
  232. # All user-provided kwargs have been handled now.
  233. # Now we supplement with positional arguments, emitting warnings
  234. # when there's ambiguity and raising when there's conflicts
  235. if len(args) == 0:
  236. pass # It's up to the function to decide if this is valid
  237. elif len(args) == 1:
  238. axis = data._get_axis_name(kwargs.get('axis', 0))
  239. out[axis] = args[0]
  240. elif len(args) == 2:
  241. if 'axis' in kwargs:
  242. # Unambiguously wrong
  243. msg = ("Cannot specify both 'axis' and any of 'index' "
  244. "or 'columns'")
  245. raise TypeError(msg)
  246. msg = ("Interpreting call\n\t'.{method_name}(a, b)' as "
  247. "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
  248. "arguments to remove any ambiguity. In the future, using "
  249. "positional arguments for 'index' or 'columns' will raise "
  250. " a 'TypeError'.")
  251. warnings.warn(msg.format(method_name=method_name,), FutureWarning,
  252. stacklevel=4)
  253. out[data._AXIS_NAMES[0]] = args[0]
  254. out[data._AXIS_NAMES[1]] = args[1]
  255. else:
  256. msg = "Cannot specify all of '{}', 'index', 'columns'."
  257. raise TypeError(msg.format(arg_name))
  258. return out
  259. def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
  260. """Validate the keyword arguments to 'fillna'.
  261. This checks that exactly one of 'value' and 'method' is specified.
  262. If 'method' is specified, this validates that it's a valid method.
  263. Parameters
  264. ----------
  265. value, method : object
  266. The 'value' and 'method' keyword arguments for 'fillna'.
  267. validate_scalar_dict_value : bool, default True
  268. Whether to validate that 'value' is a scalar or dict. Specifically,
  269. validate that it is not a list or tuple.
  270. Returns
  271. -------
  272. value, method : object
  273. """
  274. from pandas.core.missing import clean_fill_method
  275. if value is None and method is None:
  276. raise ValueError("Must specify a fill 'value' or 'method'.")
  277. elif value is None and method is not None:
  278. method = clean_fill_method(method)
  279. elif value is not None and method is None:
  280. if validate_scalar_dict_value and isinstance(value, (list, tuple)):
  281. raise TypeError('"value" parameter must be a scalar or dict, but '
  282. 'you passed a "{0}"'.format(type(value).__name__))
  283. elif value is not None and method is not None:
  284. raise ValueError("Cannot specify both 'value' and 'method'.")
  285. return value, method