api.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. import textwrap
  2. import warnings
  3. from pandas._libs import NaT, lib
  4. import pandas.core.common as com
  5. from pandas.core.indexes.base import (
  6. Index, _new_Index, ensure_index, ensure_index_from_sequences)
  7. from pandas.core.indexes.base import InvalidIndexError # noqa:F401
  8. from pandas.core.indexes.category import CategoricalIndex # noqa:F401
  9. from pandas.core.indexes.datetimes import DatetimeIndex
  10. from pandas.core.indexes.interval import IntervalIndex # noqa:F401
  11. from pandas.core.indexes.multi import MultiIndex # noqa:F401
  12. from pandas.core.indexes.numeric import ( # noqa:F401
  13. Float64Index, Int64Index, NumericIndex, UInt64Index)
  14. from pandas.core.indexes.period import PeriodIndex
  15. from pandas.core.indexes.range import RangeIndex # noqa:F401
  16. from pandas.core.indexes.timedeltas import TimedeltaIndex
  17. _sort_msg = textwrap.dedent("""\
  18. Sorting because non-concatenation axis is not aligned. A future version
  19. of pandas will change to not sort by default.
  20. To accept the future behavior, pass 'sort=False'.
  21. To retain the current behavior and silence the warning, pass 'sort=True'.
  22. """)
  23. # TODO: there are many places that rely on these private methods existing in
  24. # pandas.core.index
  25. __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index',
  26. 'CategoricalIndex', 'IntervalIndex', 'RangeIndex', 'UInt64Index',
  27. 'InvalidIndexError', 'TimedeltaIndex',
  28. 'PeriodIndex', 'DatetimeIndex',
  29. '_new_Index', 'NaT',
  30. 'ensure_index', 'ensure_index_from_sequences',
  31. '_get_combined_index',
  32. '_get_objs_combined_axis', '_union_indexes',
  33. '_get_consensus_names',
  34. '_all_indexes_same']
  35. def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
  36. """
  37. Extract combined index: return intersection or union (depending on the
  38. value of "intersect") of indexes on given axis, or None if all objects
  39. lack indexes (e.g. they are numpy arrays).
  40. Parameters
  41. ----------
  42. objs : list of objects
  43. Each object will only be considered if it has a _get_axis
  44. attribute.
  45. intersect : bool, default False
  46. If True, calculate the intersection between indexes. Otherwise,
  47. calculate the union.
  48. axis : {0 or 'index', 1 or 'outer'}, default 0
  49. The axis to extract indexes from.
  50. sort : bool, default True
  51. Whether the result index should come out sorted or not.
  52. Returns
  53. -------
  54. Index
  55. """
  56. obs_idxes = [obj._get_axis(axis) for obj in objs
  57. if hasattr(obj, '_get_axis')]
  58. if obs_idxes:
  59. return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
  60. def _get_distinct_objs(objs):
  61. """
  62. Return a list with distinct elements of "objs" (different ids).
  63. Preserves order.
  64. """
  65. ids = set()
  66. res = []
  67. for obj in objs:
  68. if not id(obj) in ids:
  69. ids.add(id(obj))
  70. res.append(obj)
  71. return res
  72. def _get_combined_index(indexes, intersect=False, sort=False):
  73. """
  74. Return the union or intersection of indexes.
  75. Parameters
  76. ----------
  77. indexes : list of Index or list objects
  78. When intersect=True, do not accept list of lists.
  79. intersect : bool, default False
  80. If True, calculate the intersection between indexes. Otherwise,
  81. calculate the union.
  82. sort : bool, default False
  83. Whether the result index should come out sorted or not.
  84. Returns
  85. -------
  86. Index
  87. """
  88. # TODO: handle index names!
  89. indexes = _get_distinct_objs(indexes)
  90. if len(indexes) == 0:
  91. index = Index([])
  92. elif len(indexes) == 1:
  93. index = indexes[0]
  94. elif intersect:
  95. index = indexes[0]
  96. for other in indexes[1:]:
  97. index = index.intersection(other)
  98. else:
  99. index = _union_indexes(indexes, sort=sort)
  100. index = ensure_index(index)
  101. if sort:
  102. try:
  103. index = index.sort_values()
  104. except TypeError:
  105. pass
  106. return index
  107. def _union_indexes(indexes, sort=True):
  108. """
  109. Return the union of indexes.
  110. The behavior of sort and names is not consistent.
  111. Parameters
  112. ----------
  113. indexes : list of Index or list objects
  114. sort : bool, default True
  115. Whether the result index should come out sorted or not.
  116. Returns
  117. -------
  118. Index
  119. """
  120. if len(indexes) == 0:
  121. raise AssertionError('Must have at least 1 Index to union')
  122. if len(indexes) == 1:
  123. result = indexes[0]
  124. if isinstance(result, list):
  125. result = Index(sorted(result))
  126. return result
  127. indexes, kind = _sanitize_and_check(indexes)
  128. def _unique_indices(inds):
  129. """
  130. Convert indexes to lists and concatenate them, removing duplicates.
  131. The final dtype is inferred.
  132. Parameters
  133. ----------
  134. inds : list of Index or list objects
  135. Returns
  136. -------
  137. Index
  138. """
  139. def conv(i):
  140. if isinstance(i, Index):
  141. i = i.tolist()
  142. return i
  143. return Index(
  144. lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
  145. if kind == 'special':
  146. result = indexes[0]
  147. if hasattr(result, 'union_many'):
  148. return result.union_many(indexes[1:])
  149. else:
  150. for other in indexes[1:]:
  151. result = result.union(other)
  152. return result
  153. elif kind == 'array':
  154. index = indexes[0]
  155. for other in indexes[1:]:
  156. if not index.equals(other):
  157. if sort is None:
  158. # TODO: remove once pd.concat sort default changes
  159. warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
  160. sort = True
  161. return _unique_indices(indexes)
  162. name = _get_consensus_names(indexes)[0]
  163. if name != index.name:
  164. index = index._shallow_copy(name=name)
  165. return index
  166. else: # kind='list'
  167. return _unique_indices(indexes)
  168. def _sanitize_and_check(indexes):
  169. """
  170. Verify the type of indexes and convert lists to Index.
  171. Cases:
  172. - [list, list, ...]: Return ([list, list, ...], 'list')
  173. - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])
  174. Lists are sorted and converted to Index.
  175. - [Index, Index, ...]: Return ([Index, Index, ...], TYPE)
  176. TYPE = 'special' if at least one special type, 'array' otherwise.
  177. Parameters
  178. ----------
  179. indexes : list of Index or list objects
  180. Returns
  181. -------
  182. sanitized_indexes : list of Index or list objects
  183. type : {'list', 'array', 'special'}
  184. """
  185. kinds = list({type(index) for index in indexes})
  186. if list in kinds:
  187. if len(kinds) > 1:
  188. indexes = [Index(com.try_sort(x))
  189. if not isinstance(x, Index) else
  190. x for x in indexes]
  191. kinds.remove(list)
  192. else:
  193. return indexes, 'list'
  194. if len(kinds) > 1 or Index not in kinds:
  195. return indexes, 'special'
  196. else:
  197. return indexes, 'array'
  198. def _get_consensus_names(indexes):
  199. """
  200. Give a consensus 'names' to indexes.
  201. If there's exactly one non-empty 'names', return this,
  202. otherwise, return empty.
  203. Parameters
  204. ----------
  205. indexes : list of Index objects
  206. Returns
  207. -------
  208. list
  209. A list representing the consensus 'names' found.
  210. """
  211. # find the non-none names, need to tupleify to make
  212. # the set hashable, then reverse on return
  213. consensus_names = {tuple(i.names) for i in indexes
  214. if com._any_not_none(*i.names)}
  215. if len(consensus_names) == 1:
  216. return list(list(consensus_names)[0])
  217. return [None] * indexes[0].nlevels
  218. def _all_indexes_same(indexes):
  219. """
  220. Determine if all indexes contain the same elements.
  221. Parameters
  222. ----------
  223. indexes : list of Index objects
  224. Returns
  225. -------
  226. bool
  227. True if all indexes contain the same elements, False otherwise.
  228. """
  229. first = indexes[0]
  230. for index in indexes[1:]:
  231. if not first.equals(index):
  232. return False
  233. return True