common.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. """
  2. Misc tools for implementing data structures
  3. Note: pandas.core.common is *not* part of the public API.
  4. """
  5. import collections
  6. from datetime import datetime, timedelta
  7. from functools import partial
  8. import inspect
  9. import numpy as np
  10. from pandas._libs import lib, tslibs
  11. import pandas.compat as compat
  12. from pandas.compat import PY36, OrderedDict, iteritems
  13. from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
  14. from pandas.core.dtypes.common import (
  15. is_array_like, is_bool_dtype, is_extension_array_dtype, is_integer)
  16. from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
  17. from pandas.core.dtypes.inference import _iterable_not_string
  18. from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
  19. class SettingWithCopyError(ValueError):
  20. pass
  21. class SettingWithCopyWarning(Warning):
  22. pass
  23. def flatten(l):
  24. """Flatten an arbitrarily nested sequence.
  25. Parameters
  26. ----------
  27. l : sequence
  28. The non string sequence to flatten
  29. Notes
  30. -----
  31. This doesn't consider strings sequences.
  32. Returns
  33. -------
  34. flattened : generator
  35. """
  36. for el in l:
  37. if _iterable_not_string(el):
  38. for s in flatten(el):
  39. yield s
  40. else:
  41. yield el
  42. def consensus_name_attr(objs):
  43. name = objs[0].name
  44. for obj in objs[1:]:
  45. try:
  46. if obj.name != name:
  47. name = None
  48. except ValueError:
  49. name = None
  50. return name
  51. def maybe_box(indexer, values, obj, key):
  52. # if we have multiples coming back, box em
  53. if isinstance(values, np.ndarray):
  54. return obj[indexer.get_loc(key)]
  55. # return the value
  56. return values
  57. def maybe_box_datetimelike(value):
  58. # turn a datetime like into a Timestamp/timedelta as needed
  59. if isinstance(value, (np.datetime64, datetime)):
  60. value = tslibs.Timestamp(value)
  61. elif isinstance(value, (np.timedelta64, timedelta)):
  62. value = tslibs.Timedelta(value)
  63. return value
  64. values_from_object = lib.values_from_object
  65. def is_bool_indexer(key):
  66. # type: (Any) -> bool
  67. """
  68. Check whether `key` is a valid boolean indexer.
  69. Parameters
  70. ----------
  71. key : Any
  72. Only list-likes may be considered boolean indexers.
  73. All other types are not considered a boolean indexer.
  74. For array-like input, boolean ndarrays or ExtensionArrays
  75. with ``_is_boolean`` set are considered boolean indexers.
  76. Returns
  77. -------
  78. bool
  79. Raises
  80. ------
  81. ValueError
  82. When the array is an object-dtype ndarray or ExtensionArray
  83. and contains missing values.
  84. """
  85. na_msg = 'cannot index with vector containing NA / NaN values'
  86. if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or
  87. (is_array_like(key) and is_extension_array_dtype(key.dtype))):
  88. if key.dtype == np.object_:
  89. key = np.asarray(values_from_object(key))
  90. if not lib.is_bool_array(key):
  91. if isna(key).any():
  92. raise ValueError(na_msg)
  93. return False
  94. return True
  95. elif is_bool_dtype(key.dtype):
  96. # an ndarray with bool-dtype by definition has no missing values.
  97. # So we only need to check for NAs in ExtensionArrays
  98. if is_extension_array_dtype(key.dtype):
  99. if np.any(key.isna()):
  100. raise ValueError(na_msg)
  101. return True
  102. elif isinstance(key, list):
  103. try:
  104. arr = np.asarray(key)
  105. return arr.dtype == np.bool_ and len(arr) == len(key)
  106. except TypeError: # pragma: no cover
  107. return False
  108. return False
  109. def cast_scalar_indexer(val):
  110. """
  111. To avoid numpy DeprecationWarnings, cast float to integer where valid.
  112. Parameters
  113. ----------
  114. val : scalar
  115. Returns
  116. -------
  117. outval : scalar
  118. """
  119. # assumes lib.is_scalar(val)
  120. if lib.is_float(val) and val == int(val):
  121. return int(val)
  122. return val
  123. def _not_none(*args):
  124. """Returns a generator consisting of the arguments that are not None"""
  125. return (arg for arg in args if arg is not None)
  126. def _any_none(*args):
  127. """Returns a boolean indicating if any argument is None"""
  128. for arg in args:
  129. if arg is None:
  130. return True
  131. return False
  132. def _all_none(*args):
  133. """Returns a boolean indicating if all arguments are None"""
  134. for arg in args:
  135. if arg is not None:
  136. return False
  137. return True
  138. def _any_not_none(*args):
  139. """Returns a boolean indicating if any argument is not None"""
  140. for arg in args:
  141. if arg is not None:
  142. return True
  143. return False
  144. def _all_not_none(*args):
  145. """Returns a boolean indicating if all arguments are not None"""
  146. for arg in args:
  147. if arg is None:
  148. return False
  149. return True
  150. def count_not_none(*args):
  151. """Returns the count of arguments that are not None"""
  152. return sum(x is not None for x in args)
  153. def try_sort(iterable):
  154. listed = list(iterable)
  155. try:
  156. return sorted(listed)
  157. except Exception:
  158. return listed
  159. def dict_keys_to_ordered_list(mapping):
  160. # when pandas drops support for Python < 3.6, this function
  161. # can be replaced by a simple list(mapping.keys())
  162. if PY36 or isinstance(mapping, OrderedDict):
  163. keys = list(mapping.keys())
  164. else:
  165. keys = try_sort(mapping)
  166. return keys
  167. def asarray_tuplesafe(values, dtype=None):
  168. if not (isinstance(values, (list, tuple)) or hasattr(values, '__array__')):
  169. values = list(values)
  170. elif isinstance(values, ABCIndexClass):
  171. return values.values
  172. if isinstance(values, list) and dtype in [np.object_, object]:
  173. return construct_1d_object_array_from_listlike(values)
  174. result = np.asarray(values, dtype=dtype)
  175. if issubclass(result.dtype.type, compat.string_types):
  176. result = np.asarray(values, dtype=object)
  177. if result.ndim == 2:
  178. # Avoid building an array of arrays:
  179. # TODO: verify whether any path hits this except #18819 (invalid)
  180. values = [tuple(x) for x in values]
  181. result = construct_1d_object_array_from_listlike(values)
  182. return result
  183. def index_labels_to_array(labels, dtype=None):
  184. """
  185. Transform label or iterable of labels to array, for use in Index.
  186. Parameters
  187. ----------
  188. dtype : dtype
  189. If specified, use as dtype of the resulting array, otherwise infer.
  190. Returns
  191. -------
  192. array
  193. """
  194. if isinstance(labels, (compat.string_types, tuple)):
  195. labels = [labels]
  196. if not isinstance(labels, (list, np.ndarray)):
  197. try:
  198. labels = list(labels)
  199. except TypeError: # non-iterable
  200. labels = [labels]
  201. labels = asarray_tuplesafe(labels, dtype=dtype)
  202. return labels
  203. def maybe_make_list(obj):
  204. if obj is not None and not isinstance(obj, (tuple, list)):
  205. return [obj]
  206. return obj
  207. def is_null_slice(obj):
  208. """ we have a null slice """
  209. return (isinstance(obj, slice) and obj.start is None and
  210. obj.stop is None and obj.step is None)
  211. def is_true_slices(l):
  212. """
  213. Find non-trivial slices in "l": return a list of booleans with same length.
  214. """
  215. return [isinstance(k, slice) and not is_null_slice(k) for k in l]
  216. # TODO: used only once in indexing; belongs elsewhere?
  217. def is_full_slice(obj, l):
  218. """ we have a full length slice """
  219. return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and
  220. obj.step is None)
  221. def get_callable_name(obj):
  222. # typical case has name
  223. if hasattr(obj, '__name__'):
  224. return getattr(obj, '__name__')
  225. # some objects don't; could recurse
  226. if isinstance(obj, partial):
  227. return get_callable_name(obj.func)
  228. # fall back to class name
  229. if hasattr(obj, '__call__'):
  230. return obj.__class__.__name__
  231. # everything failed (probably because the argument
  232. # wasn't actually callable); we return None
  233. # instead of the empty string in this case to allow
  234. # distinguishing between no name and a name of ''
  235. return None
  236. def apply_if_callable(maybe_callable, obj, **kwargs):
  237. """
  238. Evaluate possibly callable input using obj and kwargs if it is callable,
  239. otherwise return as it is
  240. Parameters
  241. ----------
  242. maybe_callable : possibly a callable
  243. obj : NDFrame
  244. **kwargs
  245. """
  246. if callable(maybe_callable):
  247. return maybe_callable(obj, **kwargs)
  248. return maybe_callable
  249. def dict_compat(d):
  250. """
  251. Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict
  252. Parameters
  253. ----------
  254. d: dict like object
  255. Returns
  256. -------
  257. dict
  258. """
  259. return {maybe_box_datetimelike(key): value for key, value in iteritems(d)}
  260. def standardize_mapping(into):
  261. """
  262. Helper function to standardize a supplied mapping.
  263. .. versionadded:: 0.21.0
  264. Parameters
  265. ----------
  266. into : instance or subclass of collections.Mapping
  267. Must be a class, an initialized collections.defaultdict,
  268. or an instance of a collections.Mapping subclass.
  269. Returns
  270. -------
  271. mapping : a collections.Mapping subclass or other constructor
  272. a callable object that can accept an iterator to create
  273. the desired Mapping.
  274. See Also
  275. --------
  276. DataFrame.to_dict
  277. Series.to_dict
  278. """
  279. if not inspect.isclass(into):
  280. if isinstance(into, collections.defaultdict):
  281. return partial(
  282. collections.defaultdict, into.default_factory)
  283. into = type(into)
  284. if not issubclass(into, compat.Mapping):
  285. raise TypeError('unsupported type: {into}'.format(into=into))
  286. elif into == collections.defaultdict:
  287. raise TypeError(
  288. 'to_dict() only accepts initialized defaultdicts')
  289. return into
  290. def sentinel_factory():
  291. class Sentinel(object):
  292. pass
  293. return Sentinel()
  294. def random_state(state=None):
  295. """
  296. Helper function for processing random_state arguments.
  297. Parameters
  298. ----------
  299. state : int, np.random.RandomState, None.
  300. If receives an int, passes to np.random.RandomState() as seed.
  301. If receives an np.random.RandomState object, just returns object.
  302. If receives `None`, returns np.random.
  303. If receives anything else, raises an informative ValueError.
  304. Default None.
  305. Returns
  306. -------
  307. np.random.RandomState
  308. """
  309. if is_integer(state):
  310. return np.random.RandomState(state)
  311. elif isinstance(state, np.random.RandomState):
  312. return state
  313. elif state is None:
  314. return np.random
  315. else:
  316. raise ValueError("random_state must be an integer, a numpy "
  317. "RandomState, or None")
  318. def _pipe(obj, func, *args, **kwargs):
  319. """
  320. Apply a function ``func`` to object ``obj`` either by passing obj as the
  321. first argument to the function or, in the case that the func is a tuple,
  322. interpret the first element of the tuple as a function and pass the obj to
  323. that function as a keyword argument whose key is the value of the second
  324. element of the tuple.
  325. Parameters
  326. ----------
  327. func : callable or tuple of (callable, string)
  328. Function to apply to this object or, alternatively, a
  329. ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
  330. string indicating the keyword of `callable`` that expects the
  331. object.
  332. args : iterable, optional
  333. positional arguments passed into ``func``.
  334. kwargs : dict, optional
  335. a dictionary of keyword arguments passed into ``func``.
  336. Returns
  337. -------
  338. object : the return type of ``func``.
  339. """
  340. if isinstance(func, tuple):
  341. func, target = func
  342. if target in kwargs:
  343. msg = '%s is both the pipe target and a keyword argument' % target
  344. raise ValueError(msg)
  345. kwargs[target] = obj
  346. return func(*args, **kwargs)
  347. else:
  348. return func(obj, *args, **kwargs)
  349. def _get_rename_function(mapper):
  350. """
  351. Returns a function that will map names/labels, dependent if mapper
  352. is a dict, Series or just a function.
  353. """
  354. if isinstance(mapper, (compat.Mapping, ABCSeries)):
  355. def f(x):
  356. if x in mapper:
  357. return mapper[x]
  358. else:
  359. return x
  360. else:
  361. f = mapper
  362. return f