multi.py 111 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166
  1. # pylint: disable=E1101,E1103,W0232
  2. from collections import OrderedDict
  3. import datetime
  4. from sys import getsizeof
  5. import warnings
  6. import numpy as np
  7. from pandas._libs import (
  8. Timestamp, algos as libalgos, index as libindex, lib, tslibs)
  9. import pandas.compat as compat
  10. from pandas.compat import lrange, lzip, map, range, zip
  11. from pandas.compat.numpy import function as nv
  12. from pandas.errors import PerformanceWarning, UnsortedIndexError
  13. from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg
  14. from pandas.core.dtypes.common import (
  15. ensure_int64, ensure_platform_int, is_categorical_dtype, is_hashable,
  16. is_integer, is_iterator, is_list_like, is_object_dtype, is_scalar,
  17. pandas_dtype)
  18. from pandas.core.dtypes.dtypes import ExtensionDtype, PandasExtensionDtype
  19. from pandas.core.dtypes.generic import ABCDataFrame
  20. from pandas.core.dtypes.missing import array_equivalent, isna
  21. import pandas.core.algorithms as algos
  22. import pandas.core.common as com
  23. from pandas.core.config import get_option
  24. import pandas.core.indexes.base as ibase
  25. from pandas.core.indexes.base import (
  26. Index, InvalidIndexError, _index_shared_docs, ensure_index)
  27. from pandas.core.indexes.frozen import FrozenList, _ensure_frozen
  28. import pandas.core.missing as missing
  29. from pandas.io.formats.printing import pprint_thing
  30. _index_doc_kwargs = dict(ibase._index_doc_kwargs)
  31. _index_doc_kwargs.update(
  32. dict(klass='MultiIndex',
  33. target_klass='MultiIndex or list of tuples'))
  34. class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine,
  35. libindex.UInt64Engine):
  36. """
  37. This class manages a MultiIndex by mapping label combinations to positive
  38. integers.
  39. """
  40. _base = libindex.UInt64Engine
  41. def _codes_to_ints(self, codes):
  42. """
  43. Transform combination(s) of uint64 in one uint64 (each), in a strictly
  44. monotonic way (i.e. respecting the lexicographic order of integer
  45. combinations): see BaseMultiIndexCodesEngine documentation.
  46. Parameters
  47. ----------
  48. codes : 1- or 2-dimensional array of dtype uint64
  49. Combinations of integers (one per row)
  50. Returns
  51. ------
  52. int_keys : scalar or 1-dimensional array, of dtype uint64
  53. Integer(s) representing one combination (each)
  54. """
  55. # Shift the representation of each level by the pre-calculated number
  56. # of bits:
  57. codes <<= self.offsets
  58. # Now sum and OR are in fact interchangeable. This is a simple
  59. # composition of the (disjunct) significant bits of each level (i.e.
  60. # each column in "codes") in a single positive integer:
  61. if codes.ndim == 1:
  62. # Single key
  63. return np.bitwise_or.reduce(codes)
  64. # Multiple keys
  65. return np.bitwise_or.reduce(codes, axis=1)
  66. class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine,
  67. libindex.ObjectEngine):
  68. """
  69. This class manages those (extreme) cases in which the number of possible
  70. label combinations overflows the 64 bits integers, and uses an ObjectEngine
  71. containing Python integers.
  72. """
  73. _base = libindex.ObjectEngine
  74. def _codes_to_ints(self, codes):
  75. """
  76. Transform combination(s) of uint64 in one Python integer (each), in a
  77. strictly monotonic way (i.e. respecting the lexicographic order of
  78. integer combinations): see BaseMultiIndexCodesEngine documentation.
  79. Parameters
  80. ----------
  81. codes : 1- or 2-dimensional array of dtype uint64
  82. Combinations of integers (one per row)
  83. Returns
  84. ------
  85. int_keys : int, or 1-dimensional array of dtype object
  86. Integer(s) representing one combination (each)
  87. """
  88. # Shift the representation of each level by the pre-calculated number
  89. # of bits. Since this can overflow uint64, first make sure we are
  90. # working with Python integers:
  91. codes = codes.astype('object') << self.offsets
  92. # Now sum and OR are in fact interchangeable. This is a simple
  93. # composition of the (disjunct) significant bits of each level (i.e.
  94. # each column in "codes") in a single positive integer (per row):
  95. if codes.ndim == 1:
  96. # Single key
  97. return np.bitwise_or.reduce(codes)
  98. # Multiple keys
  99. return np.bitwise_or.reduce(codes, axis=1)
  100. class MultiIndex(Index):
  101. """
  102. A multi-level, or hierarchical, index object for pandas objects.
  103. Parameters
  104. ----------
  105. levels : sequence of arrays
  106. The unique labels for each level.
  107. codes : sequence of arrays
  108. Integers for each level designating which label at each location.
  109. .. versionadded:: 0.24.0
  110. labels : sequence of arrays
  111. Integers for each level designating which label at each location.
  112. .. deprecated:: 0.24.0
  113. Use ``codes`` instead
  114. sortorder : optional int
  115. Level of sortedness (must be lexicographically sorted by that
  116. level).
  117. names : optional sequence of objects
  118. Names for each of the index levels. (name is accepted for compat).
  119. copy : bool, default False
  120. Copy the meta-data.
  121. verify_integrity : bool, default True
  122. Check that the levels/codes are consistent and valid.
  123. Attributes
  124. ----------
  125. names
  126. levels
  127. codes
  128. nlevels
  129. levshape
  130. Methods
  131. -------
  132. from_arrays
  133. from_tuples
  134. from_product
  135. from_frame
  136. set_levels
  137. set_codes
  138. to_frame
  139. to_flat_index
  140. is_lexsorted
  141. sortlevel
  142. droplevel
  143. swaplevel
  144. reorder_levels
  145. remove_unused_levels
  146. See Also
  147. --------
  148. MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
  149. MultiIndex.from_product : Create a MultiIndex from the cartesian product
  150. of iterables.
  151. MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
  152. MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
  153. Index : The base pandas Index type.
  154. Examples
  155. ---------
  156. A new ``MultiIndex`` is typically constructed using one of the helper
  157. methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
  158. and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
  159. >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
  160. >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
  161. MultiIndex(levels=[[1, 2], ['blue', 'red']],
  162. codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
  163. names=['number', 'color'])
  164. See further examples for how to construct a MultiIndex in the doc strings
  165. of the mentioned helper methods.
  166. Notes
  167. -----
  168. See the `user guide
  169. <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
  170. """
  171. # initialize to zero-length tuples to make everything work
  172. _typ = 'multiindex'
  173. _names = FrozenList()
  174. _levels = FrozenList()
  175. _codes = FrozenList()
  176. _comparables = ['names']
  177. rename = Index.set_names
  178. # --------------------------------------------------------------------
  179. # Constructors
  180. @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
  181. def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
  182. dtype=None, copy=False, name=None,
  183. verify_integrity=True, _set_identity=True):
  184. # compat with Index
  185. if name is not None:
  186. names = name
  187. if levels is None or codes is None:
  188. raise TypeError("Must pass both levels and codes")
  189. if len(levels) != len(codes):
  190. raise ValueError('Length of levels and codes must be the same.')
  191. if len(levels) == 0:
  192. raise ValueError('Must pass non-zero number of levels/codes')
  193. result = object.__new__(MultiIndex)
  194. # we've already validated levels and codes, so shortcut here
  195. result._set_levels(levels, copy=copy, validate=False)
  196. result._set_codes(codes, copy=copy, validate=False)
  197. if names is not None:
  198. # handles name validation
  199. result._set_names(names)
  200. if sortorder is not None:
  201. result.sortorder = int(sortorder)
  202. else:
  203. result.sortorder = sortorder
  204. if verify_integrity:
  205. result._verify_integrity()
  206. if _set_identity:
  207. result._reset_identity()
  208. return result
  209. def _verify_integrity(self, codes=None, levels=None):
  210. """
  211. Parameters
  212. ----------
  213. codes : optional list
  214. Codes to check for validity. Defaults to current codes.
  215. levels : optional list
  216. Levels to check for validity. Defaults to current levels.
  217. Raises
  218. ------
  219. ValueError
  220. If length of levels and codes don't match, if the codes for any
  221. level would exceed level bounds, or there are any duplicate levels.
  222. """
  223. # NOTE: Currently does not check, among other things, that cached
  224. # nlevels matches nor that sortorder matches actually sortorder.
  225. codes = codes or self.codes
  226. levels = levels or self.levels
  227. if len(levels) != len(codes):
  228. raise ValueError("Length of levels and codes must match. NOTE:"
  229. " this index is in an inconsistent state.")
  230. codes_length = len(self.codes[0])
  231. for i, (level, level_codes) in enumerate(zip(levels, codes)):
  232. if len(level_codes) != codes_length:
  233. raise ValueError("Unequal code lengths: %s" %
  234. ([len(code_) for code_ in codes]))
  235. if len(level_codes) and level_codes.max() >= len(level):
  236. raise ValueError("On level %d, code max (%d) >= length of"
  237. " level (%d). NOTE: this index is in an"
  238. " inconsistent state" % (i, level_codes.max(),
  239. len(level)))
  240. if not level.is_unique:
  241. raise ValueError("Level values must be unique: {values} on "
  242. "level {level}".format(
  243. values=[value for value in level],
  244. level=i))
  245. @classmethod
  246. def from_arrays(cls, arrays, sortorder=None, names=None):
  247. """
  248. Convert arrays to MultiIndex.
  249. Parameters
  250. ----------
  251. arrays : list / sequence of array-likes
  252. Each array-like gives one level's value for each data point.
  253. len(arrays) is the number of levels.
  254. sortorder : int or None
  255. Level of sortedness (must be lexicographically sorted by that
  256. level).
  257. names : list / sequence of str, optional
  258. Names for the levels in the index.
  259. Returns
  260. -------
  261. index : MultiIndex
  262. See Also
  263. --------
  264. MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
  265. MultiIndex.from_product : Make a MultiIndex from cartesian product
  266. of iterables.
  267. MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
  268. Examples
  269. --------
  270. >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
  271. >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
  272. MultiIndex(levels=[[1, 2], ['blue', 'red']],
  273. codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
  274. names=['number', 'color'])
  275. """
  276. if not is_list_like(arrays):
  277. raise TypeError("Input must be a list / sequence of array-likes.")
  278. elif is_iterator(arrays):
  279. arrays = list(arrays)
  280. # Check if lengths of all arrays are equal or not,
  281. # raise ValueError, if not
  282. for i in range(1, len(arrays)):
  283. if len(arrays[i]) != len(arrays[i - 1]):
  284. raise ValueError('all arrays must be same length')
  285. from pandas.core.arrays.categorical import _factorize_from_iterables
  286. codes, levels = _factorize_from_iterables(arrays)
  287. if names is None:
  288. names = [getattr(arr, "name", None) for arr in arrays]
  289. return MultiIndex(levels=levels, codes=codes, sortorder=sortorder,
  290. names=names, verify_integrity=False)
  291. @classmethod
  292. def from_tuples(cls, tuples, sortorder=None, names=None):
  293. """
  294. Convert list of tuples to MultiIndex.
  295. Parameters
  296. ----------
  297. tuples : list / sequence of tuple-likes
  298. Each tuple is the index of one row/column.
  299. sortorder : int or None
  300. Level of sortedness (must be lexicographically sorted by that
  301. level).
  302. names : list / sequence of str, optional
  303. Names for the levels in the index.
  304. Returns
  305. -------
  306. index : MultiIndex
  307. See Also
  308. --------
  309. MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
  310. MultiIndex.from_product : Make a MultiIndex from cartesian product
  311. of iterables.
  312. MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
  313. Examples
  314. --------
  315. >>> tuples = [(1, u'red'), (1, u'blue'),
  316. ... (2, u'red'), (2, u'blue')]
  317. >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
  318. MultiIndex(levels=[[1, 2], ['blue', 'red']],
  319. codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
  320. names=['number', 'color'])
  321. """
  322. if not is_list_like(tuples):
  323. raise TypeError('Input must be a list / sequence of tuple-likes.')
  324. elif is_iterator(tuples):
  325. tuples = list(tuples)
  326. if len(tuples) == 0:
  327. if names is None:
  328. msg = 'Cannot infer number of levels from empty list'
  329. raise TypeError(msg)
  330. arrays = [[]] * len(names)
  331. elif isinstance(tuples, (np.ndarray, Index)):
  332. if isinstance(tuples, Index):
  333. tuples = tuples._values
  334. arrays = list(lib.tuples_to_object_array(tuples).T)
  335. elif isinstance(tuples, list):
  336. arrays = list(lib.to_object_array_tuples(tuples).T)
  337. else:
  338. arrays = lzip(*tuples)
  339. return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
  340. @classmethod
  341. def from_product(cls, iterables, sortorder=None, names=None):
  342. """
  343. Make a MultiIndex from the cartesian product of multiple iterables.
  344. Parameters
  345. ----------
  346. iterables : list / sequence of iterables
  347. Each iterable has unique labels for each level of the index.
  348. sortorder : int or None
  349. Level of sortedness (must be lexicographically sorted by that
  350. level).
  351. names : list / sequence of str, optional
  352. Names for the levels in the index.
  353. Returns
  354. -------
  355. index : MultiIndex
  356. See Also
  357. --------
  358. MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
  359. MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
  360. MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
  361. Examples
  362. --------
  363. >>> numbers = [0, 1, 2]
  364. >>> colors = ['green', 'purple']
  365. >>> pd.MultiIndex.from_product([numbers, colors],
  366. ... names=['number', 'color'])
  367. MultiIndex(levels=[[0, 1, 2], ['green', 'purple']],
  368. codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
  369. names=['number', 'color'])
  370. """
  371. from pandas.core.arrays.categorical import _factorize_from_iterables
  372. from pandas.core.reshape.util import cartesian_product
  373. if not is_list_like(iterables):
  374. raise TypeError("Input must be a list / sequence of iterables.")
  375. elif is_iterator(iterables):
  376. iterables = list(iterables)
  377. codes, levels = _factorize_from_iterables(iterables)
  378. codes = cartesian_product(codes)
  379. return MultiIndex(levels, codes, sortorder=sortorder, names=names)
  380. @classmethod
  381. def from_frame(cls, df, sortorder=None, names=None):
  382. """
  383. Make a MultiIndex from a DataFrame.
  384. .. versionadded:: 0.24.0
  385. Parameters
  386. ----------
  387. df : DataFrame
  388. DataFrame to be converted to MultiIndex.
  389. sortorder : int, optional
  390. Level of sortedness (must be lexicographically sorted by that
  391. level).
  392. names : list-like, optional
  393. If no names are provided, use the column names, or tuple of column
  394. names if the columns is a MultiIndex. If a sequence, overwrite
  395. names with the given sequence.
  396. Returns
  397. -------
  398. MultiIndex
  399. The MultiIndex representation of the given DataFrame.
  400. See Also
  401. --------
  402. MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
  403. MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
  404. MultiIndex.from_product : Make a MultiIndex from cartesian product
  405. of iterables.
  406. Examples
  407. --------
  408. >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
  409. ... ['NJ', 'Temp'], ['NJ', 'Precip']],
  410. ... columns=['a', 'b'])
  411. >>> df
  412. a b
  413. 0 HI Temp
  414. 1 HI Precip
  415. 2 NJ Temp
  416. 3 NJ Precip
  417. >>> pd.MultiIndex.from_frame(df)
  418. MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
  419. codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
  420. names=['a', 'b'])
  421. Using explicit names, instead of the column names
  422. >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
  423. MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
  424. codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
  425. names=['state', 'observation'])
  426. """
  427. if not isinstance(df, ABCDataFrame):
  428. raise TypeError("Input must be a DataFrame")
  429. column_names, columns = lzip(*df.iteritems())
  430. names = column_names if names is None else names
  431. return cls.from_arrays(columns, sortorder=sortorder, names=names)
  432. # --------------------------------------------------------------------
  433. @property
  434. def levels(self):
  435. return self._levels
  436. @property
  437. def _values(self):
  438. # We override here, since our parent uses _data, which we dont' use.
  439. return self.values
  440. @property
  441. def array(self):
  442. """
  443. Raises a ValueError for `MultiIndex` because there's no single
  444. array backing a MultiIndex.
  445. Raises
  446. ------
  447. ValueError
  448. """
  449. msg = ("MultiIndex has no single backing array. Use "
  450. "'MultiIndex.to_numpy()' to get a NumPy array of tuples.")
  451. raise ValueError(msg)
  452. @property
  453. def _is_homogeneous_type(self):
  454. """Whether the levels of a MultiIndex all have the same dtype.
  455. This looks at the dtypes of the levels.
  456. See Also
  457. --------
  458. Index._is_homogeneous_type
  459. DataFrame._is_homogeneous_type
  460. Examples
  461. --------
  462. >>> MultiIndex.from_tuples([
  463. ... ('a', 'b'), ('a', 'c')])._is_homogeneous_type
  464. True
  465. >>> MultiIndex.from_tuples([
  466. ... ('a', 1), ('a', 2)])._is_homogeneous_type
  467. False
  468. """
  469. return len({x.dtype for x in self.levels}) <= 1
  470. def _set_levels(self, levels, level=None, copy=False, validate=True,
  471. verify_integrity=False):
  472. # This is NOT part of the levels property because it should be
  473. # externally not allowed to set levels. User beware if you change
  474. # _levels directly
  475. if validate and len(levels) == 0:
  476. raise ValueError('Must set non-zero number of levels.')
  477. if validate and level is None and len(levels) != self.nlevels:
  478. raise ValueError('Length of levels must match number of levels.')
  479. if validate and level is not None and len(levels) != len(level):
  480. raise ValueError('Length of levels must match length of level.')
  481. if level is None:
  482. new_levels = FrozenList(
  483. ensure_index(lev, copy=copy)._shallow_copy()
  484. for lev in levels)
  485. else:
  486. level = [self._get_level_number(l) for l in level]
  487. new_levels = list(self._levels)
  488. for l, v in zip(level, levels):
  489. new_levels[l] = ensure_index(v, copy=copy)._shallow_copy()
  490. new_levels = FrozenList(new_levels)
  491. if verify_integrity:
  492. self._verify_integrity(levels=new_levels)
  493. names = self.names
  494. self._levels = new_levels
  495. if any(names):
  496. self._set_names(names)
  497. self._tuples = None
  498. self._reset_cache()
  499. def set_levels(self, levels, level=None, inplace=False,
  500. verify_integrity=True):
  501. """
  502. Set new levels on MultiIndex. Defaults to returning
  503. new index.
  504. Parameters
  505. ----------
  506. levels : sequence or list of sequence
  507. new level(s) to apply
  508. level : int, level name, or sequence of int/level names (default None)
  509. level(s) to set (None for all levels)
  510. inplace : bool
  511. if True, mutates in place
  512. verify_integrity : bool (default True)
  513. if True, checks that levels and codes are compatible
  514. Returns
  515. -------
  516. new index (of same type and class...etc)
  517. Examples
  518. --------
  519. >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
  520. (2, u'one'), (2, u'two')],
  521. names=['foo', 'bar'])
  522. >>> idx.set_levels([['a','b'], [1,2]])
  523. MultiIndex(levels=[[u'a', u'b'], [1, 2]],
  524. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  525. names=[u'foo', u'bar'])
  526. >>> idx.set_levels(['a','b'], level=0)
  527. MultiIndex(levels=[[u'a', u'b'], [u'one', u'two']],
  528. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  529. names=[u'foo', u'bar'])
  530. >>> idx.set_levels(['a','b'], level='bar')
  531. MultiIndex(levels=[[1, 2], [u'a', u'b']],
  532. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  533. names=[u'foo', u'bar'])
  534. >>> idx.set_levels([['a','b'], [1,2]], level=[0,1])
  535. MultiIndex(levels=[[u'a', u'b'], [1, 2]],
  536. codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
  537. names=[u'foo', u'bar'])
  538. """
  539. if is_list_like(levels) and not isinstance(levels, Index):
  540. levels = list(levels)
  541. if level is not None and not is_list_like(level):
  542. if not is_list_like(levels):
  543. raise TypeError("Levels must be list-like")
  544. if is_list_like(levels[0]):
  545. raise TypeError("Levels must be list-like")
  546. level = [level]
  547. levels = [levels]
  548. elif level is None or is_list_like(level):
  549. if not is_list_like(levels) or not is_list_like(levels[0]):
  550. raise TypeError("Levels must be list of lists-like")
  551. if inplace:
  552. idx = self
  553. else:
  554. idx = self._shallow_copy()
  555. idx._reset_identity()
  556. idx._set_levels(levels, level=level, validate=True,
  557. verify_integrity=verify_integrity)
  558. if not inplace:
  559. return idx
  560. @property
  561. def codes(self):
  562. return self._codes
  563. @property
  564. def labels(self):
  565. warnings.warn((".labels was deprecated in version 0.24.0. "
  566. "Use .codes instead."),
  567. FutureWarning, stacklevel=2)
  568. return self.codes
  569. def _set_codes(self, codes, level=None, copy=False, validate=True,
  570. verify_integrity=False):
  571. if validate and level is None and len(codes) != self.nlevels:
  572. raise ValueError("Length of codes must match number of levels")
  573. if validate and level is not None and len(codes) != len(level):
  574. raise ValueError('Length of codes must match length of levels.')
  575. if level is None:
  576. new_codes = FrozenList(
  577. _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy()
  578. for lev, level_codes in zip(self.levels, codes))
  579. else:
  580. level = [self._get_level_number(l) for l in level]
  581. new_codes = list(self._codes)
  582. for lev_idx, level_codes in zip(level, codes):
  583. lev = self.levels[lev_idx]
  584. new_codes[lev_idx] = _ensure_frozen(
  585. level_codes, lev, copy=copy)._shallow_copy()
  586. new_codes = FrozenList(new_codes)
  587. if verify_integrity:
  588. self._verify_integrity(codes=new_codes)
  589. self._codes = new_codes
  590. self._tuples = None
  591. self._reset_cache()
  592. def set_labels(self, labels, level=None, inplace=False,
  593. verify_integrity=True):
  594. warnings.warn((".set_labels was deprecated in version 0.24.0. "
  595. "Use .set_codes instead."),
  596. FutureWarning, stacklevel=2)
  597. return self.set_codes(codes=labels, level=level, inplace=inplace,
  598. verify_integrity=verify_integrity)
  599. @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
  600. def set_codes(self, codes, level=None, inplace=False,
  601. verify_integrity=True):
  602. """
  603. Set new codes on MultiIndex. Defaults to returning
  604. new index.
  605. .. versionadded:: 0.24.0
  606. New name for deprecated method `set_labels`.
  607. Parameters
  608. ----------
  609. codes : sequence or list of sequence
  610. new codes to apply
  611. level : int, level name, or sequence of int/level names (default None)
  612. level(s) to set (None for all levels)
  613. inplace : bool
  614. if True, mutates in place
  615. verify_integrity : bool (default True)
  616. if True, checks that levels and codes are compatible
  617. Returns
  618. -------
  619. new index (of same type and class...etc)
  620. Examples
  621. --------
  622. >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
  623. (2, u'one'), (2, u'two')],
  624. names=['foo', 'bar'])
  625. >>> idx.set_codes([[1,0,1,0], [0,0,1,1]])
  626. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  627. codes=[[1, 0, 1, 0], [0, 0, 1, 1]],
  628. names=[u'foo', u'bar'])
  629. >>> idx.set_codes([1,0,1,0], level=0)
  630. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  631. codes=[[1, 0, 1, 0], [0, 1, 0, 1]],
  632. names=[u'foo', u'bar'])
  633. >>> idx.set_codes([0,0,1,1], level='bar')
  634. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  635. codes=[[0, 0, 1, 1], [0, 0, 1, 1]],
  636. names=[u'foo', u'bar'])
  637. >>> idx.set_codes([[1,0,1,0], [0,0,1,1]], level=[0,1])
  638. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  639. codes=[[1, 0, 1, 0], [0, 0, 1, 1]],
  640. names=[u'foo', u'bar'])
  641. """
  642. if level is not None and not is_list_like(level):
  643. if not is_list_like(codes):
  644. raise TypeError("Codes must be list-like")
  645. if is_list_like(codes[0]):
  646. raise TypeError("Codes must be list-like")
  647. level = [level]
  648. codes = [codes]
  649. elif level is None or is_list_like(level):
  650. if not is_list_like(codes) or not is_list_like(codes[0]):
  651. raise TypeError("Codes must be list of lists-like")
  652. if inplace:
  653. idx = self
  654. else:
  655. idx = self._shallow_copy()
  656. idx._reset_identity()
  657. idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
  658. if not inplace:
  659. return idx
  660. @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
  661. def copy(self, names=None, dtype=None, levels=None, codes=None,
  662. deep=False, _set_identity=False, **kwargs):
  663. """
  664. Make a copy of this object. Names, dtype, levels and codes can be
  665. passed and will be set on new copy.
  666. Parameters
  667. ----------
  668. names : sequence, optional
  669. dtype : numpy dtype or pandas type, optional
  670. levels : sequence, optional
  671. codes : sequence, optional
  672. Returns
  673. -------
  674. copy : MultiIndex
  675. Notes
  676. -----
  677. In most cases, there should be no functional difference from using
  678. ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
  679. This could be potentially expensive on large MultiIndex objects.
  680. """
  681. name = kwargs.get('name')
  682. names = self._validate_names(name=name, names=names, deep=deep)
  683. if deep:
  684. from copy import deepcopy
  685. if levels is None:
  686. levels = deepcopy(self.levels)
  687. if codes is None:
  688. codes = deepcopy(self.codes)
  689. else:
  690. if levels is None:
  691. levels = self.levels
  692. if codes is None:
  693. codes = self.codes
  694. return MultiIndex(levels=levels, codes=codes, names=names,
  695. sortorder=self.sortorder, verify_integrity=False,
  696. _set_identity=_set_identity)
  697. def __array__(self, dtype=None):
  698. """ the array interface, return my values """
  699. return self.values
  700. def view(self, cls=None):
  701. """ this is defined as a copy with the same identity """
  702. result = self.copy()
  703. result._id = self._id
  704. return result
  705. def _shallow_copy_with_infer(self, values, **kwargs):
  706. # On equal MultiIndexes the difference is empty.
  707. # Therefore, an empty MultiIndex is returned GH13490
  708. if len(values) == 0:
  709. return MultiIndex(levels=[[] for _ in range(self.nlevels)],
  710. codes=[[] for _ in range(self.nlevels)],
  711. **kwargs)
  712. return self._shallow_copy(values, **kwargs)
  713. @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
  714. def __contains__(self, key):
  715. hash(key)
  716. try:
  717. self.get_loc(key)
  718. return True
  719. except (LookupError, TypeError):
  720. return False
  721. contains = __contains__
  722. @Appender(_index_shared_docs['_shallow_copy'])
  723. def _shallow_copy(self, values=None, **kwargs):
  724. if values is not None:
  725. names = kwargs.pop('names', kwargs.pop('name', self.names))
  726. # discards freq
  727. kwargs.pop('freq', None)
  728. return MultiIndex.from_tuples(values, names=names, **kwargs)
  729. return self.view()
  730. @cache_readonly
  731. def dtype(self):
  732. return np.dtype('O')
  733. def _is_memory_usage_qualified(self):
  734. """ return a boolean if we need a qualified .info display """
  735. def f(l):
  736. return 'mixed' in l or 'string' in l or 'unicode' in l
  737. return any(f(l) for l in self._inferred_type_levels)
  738. @Appender(Index.memory_usage.__doc__)
  739. def memory_usage(self, deep=False):
  740. # we are overwriting our base class to avoid
  741. # computing .values here which could materialize
  742. # a tuple representation uncessarily
  743. return self._nbytes(deep)
  744. @cache_readonly
  745. def nbytes(self):
  746. """ return the number of bytes in the underlying data """
  747. return self._nbytes(False)
  748. def _nbytes(self, deep=False):
  749. """
  750. return the number of bytes in the underlying data
  751. deeply introspect the level data if deep=True
  752. include the engine hashtable
  753. *this is in internal routine*
  754. """
  755. # for implementations with no useful getsizeof (PyPy)
  756. objsize = 24
  757. level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
  758. label_nbytes = sum(i.nbytes for i in self.codes)
  759. names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
  760. result = level_nbytes + label_nbytes + names_nbytes
  761. # include our engine hashtable
  762. result += self._engine.sizeof(deep=deep)
  763. return result
  764. # --------------------------------------------------------------------
  765. # Rendering Methods
  766. def _format_attrs(self):
  767. """
  768. Return a list of tuples of the (attr,formatted_value)
  769. """
  770. attrs = [
  771. ('levels', ibase.default_pprint(self._levels,
  772. max_seq_items=False)),
  773. ('codes', ibase.default_pprint(self._codes,
  774. max_seq_items=False))]
  775. if com._any_not_none(*self.names):
  776. attrs.append(('names', ibase.default_pprint(self.names)))
  777. if self.sortorder is not None:
  778. attrs.append(('sortorder', ibase.default_pprint(self.sortorder)))
  779. return attrs
  780. def _format_space(self):
  781. return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
  782. def _format_data(self, name=None):
  783. # we are formatting thru the attributes
  784. return None
  785. def _format_native_types(self, na_rep='nan', **kwargs):
  786. new_levels = []
  787. new_codes = []
  788. # go through the levels and format them
  789. for level, level_codes in zip(self.levels, self.codes):
  790. level = level._format_native_types(na_rep=na_rep, **kwargs)
  791. # add nan values, if there are any
  792. mask = (level_codes == -1)
  793. if mask.any():
  794. nan_index = len(level)
  795. level = np.append(level, na_rep)
  796. level_codes = level_codes.values()
  797. level_codes[mask] = nan_index
  798. new_levels.append(level)
  799. new_codes.append(level_codes)
  800. if len(new_levels) == 1:
  801. return Index(new_levels[0])._format_native_types()
  802. else:
  803. # reconstruct the multi-index
  804. mi = MultiIndex(levels=new_levels, codes=new_codes,
  805. names=self.names, sortorder=self.sortorder,
  806. verify_integrity=False)
  807. return mi.values
  808. def format(self, space=2, sparsify=None, adjoin=True, names=False,
  809. na_rep=None, formatter=None):
  810. if len(self) == 0:
  811. return []
  812. stringified_levels = []
  813. for lev, level_codes in zip(self.levels, self.codes):
  814. na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)
  815. if len(lev) > 0:
  816. formatted = lev.take(level_codes).format(formatter=formatter)
  817. # we have some NA
  818. mask = level_codes == -1
  819. if mask.any():
  820. formatted = np.array(formatted, dtype=object)
  821. formatted[mask] = na
  822. formatted = formatted.tolist()
  823. else:
  824. # weird all NA case
  825. formatted = [pprint_thing(na if isna(x) else x,
  826. escape_chars=('\t', '\r', '\n'))
  827. for x in algos.take_1d(lev._values, level_codes)]
  828. stringified_levels.append(formatted)
  829. result_levels = []
  830. for lev, name in zip(stringified_levels, self.names):
  831. level = []
  832. if names:
  833. level.append(pprint_thing(name,
  834. escape_chars=('\t', '\r', '\n'))
  835. if name is not None else '')
  836. level.extend(np.array(lev, dtype=object))
  837. result_levels.append(level)
  838. if sparsify is None:
  839. sparsify = get_option("display.multi_sparse")
  840. if sparsify:
  841. sentinel = ''
  842. # GH3547
  843. # use value of sparsify as sentinel, unless it's an obvious
  844. # "Truthey" value
  845. if sparsify not in [True, 1]:
  846. sentinel = sparsify
  847. # little bit of a kludge job for #1217
  848. result_levels = _sparsify(result_levels, start=int(names),
  849. sentinel=sentinel)
  850. if adjoin:
  851. from pandas.io.formats.format import _get_adjustment
  852. adj = _get_adjustment()
  853. return adj.adjoin(space, *result_levels).split('\n')
  854. else:
  855. return result_levels
  856. # --------------------------------------------------------------------
  857. def __len__(self):
  858. return len(self.codes[0])
  859. def _get_names(self):
  860. return FrozenList(level.name for level in self.levels)
  861. def _set_names(self, names, level=None, validate=True):
  862. """
  863. Set new names on index. Each name has to be a hashable type.
  864. Parameters
  865. ----------
  866. values : str or sequence
  867. name(s) to set
  868. level : int, level name, or sequence of int/level names (default None)
  869. If the index is a MultiIndex (hierarchical), level(s) to set (None
  870. for all levels). Otherwise level must be None
  871. validate : boolean, default True
  872. validate that the names match level lengths
  873. Raises
  874. ------
  875. TypeError if each name is not hashable.
  876. Notes
  877. -----
  878. sets names on levels. WARNING: mutates!
  879. Note that you generally want to set this *after* changing levels, so
  880. that it only acts on copies
  881. """
  882. # GH 15110
  883. # Don't allow a single string for names in a MultiIndex
  884. if names is not None and not is_list_like(names):
  885. raise ValueError('Names should be list-like for a MultiIndex')
  886. names = list(names)
  887. if validate and level is not None and len(names) != len(level):
  888. raise ValueError('Length of names must match length of level.')
  889. if validate and level is None and len(names) != self.nlevels:
  890. raise ValueError('Length of names must match number of levels in '
  891. 'MultiIndex.')
  892. if level is None:
  893. level = range(self.nlevels)
  894. else:
  895. level = [self._get_level_number(l) for l in level]
  896. # set the name
  897. for l, name in zip(level, names):
  898. if name is not None:
  899. # GH 20527
  900. # All items in 'names' need to be hashable:
  901. if not is_hashable(name):
  902. raise TypeError('{}.name must be a hashable type'
  903. .format(self.__class__.__name__))
  904. self.levels[l].rename(name, inplace=True)
  905. names = property(fset=_set_names, fget=_get_names,
  906. doc="Names of levels in MultiIndex")
  907. @Appender(_index_shared_docs['_get_grouper_for_level'])
  908. def _get_grouper_for_level(self, mapper, level):
  909. indexer = self.codes[level]
  910. level_index = self.levels[level]
  911. if mapper is not None:
  912. # Handle group mapping function and return
  913. level_values = self.levels[level].take(indexer)
  914. grouper = level_values.map(mapper)
  915. return grouper, None, None
  916. codes, uniques = algos.factorize(indexer, sort=True)
  917. if len(uniques) > 0 and uniques[0] == -1:
  918. # Handle NAs
  919. mask = indexer != -1
  920. ok_codes, uniques = algos.factorize(indexer[mask], sort=True)
  921. codes = np.empty(len(indexer), dtype=indexer.dtype)
  922. codes[mask] = ok_codes
  923. codes[~mask] = -1
  924. if len(uniques) < len(level_index):
  925. # Remove unobserved levels from level_index
  926. level_index = level_index.take(uniques)
  927. grouper = level_index.take(codes)
  928. return grouper, codes, level_index
  929. @property
  930. def _constructor(self):
  931. return MultiIndex.from_tuples
  932. @cache_readonly
  933. def inferred_type(self):
  934. return 'mixed'
  935. def _get_level_number(self, level):
  936. count = self.names.count(level)
  937. if (count > 1) and not is_integer(level):
  938. raise ValueError('The name %s occurs multiple times, use a '
  939. 'level number' % level)
  940. try:
  941. level = self.names.index(level)
  942. except ValueError:
  943. if not is_integer(level):
  944. raise KeyError('Level %s not found' % str(level))
  945. elif level < 0:
  946. level += self.nlevels
  947. if level < 0:
  948. orig_level = level - self.nlevels
  949. raise IndexError('Too many levels: Index has only %d '
  950. 'levels, %d is not a valid level number' %
  951. (self.nlevels, orig_level))
  952. # Note: levels are zero-based
  953. elif level >= self.nlevels:
  954. raise IndexError('Too many levels: Index has only %d levels, '
  955. 'not %d' % (self.nlevels, level + 1))
  956. return level
  957. _tuples = None
  958. @cache_readonly
  959. def _engine(self):
  960. # Calculate the number of bits needed to represent labels in each
  961. # level, as log2 of their sizes (including -1 for NaN):
  962. sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels]))
  963. # Sum bit counts, starting from the _right_....
  964. lev_bits = np.cumsum(sizes[::-1])[::-1]
  965. # ... in order to obtain offsets such that sorting the combination of
  966. # shifted codes (one for each level, resulting in a unique integer) is
  967. # equivalent to sorting lexicographically the codes themselves. Notice
  968. # that each level needs to be shifted by the number of bits needed to
  969. # represent the _previous_ ones:
  970. offsets = np.concatenate([lev_bits[1:], [0]]).astype('uint64')
  971. # Check the total number of bits needed for our representation:
  972. if lev_bits[0] > 64:
  973. # The levels would overflow a 64 bit uint - use Python integers:
  974. return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
  975. return MultiIndexUIntEngine(self.levels, self.codes, offsets)
  976. @property
  977. def values(self):
  978. if self._tuples is not None:
  979. return self._tuples
  980. values = []
  981. for i in range(self.nlevels):
  982. vals = self._get_level_values(i)
  983. if is_categorical_dtype(vals):
  984. vals = vals.get_values()
  985. if (isinstance(vals.dtype, (PandasExtensionDtype, ExtensionDtype))
  986. or hasattr(vals, '_box_values')):
  987. vals = vals.astype(object)
  988. vals = np.array(vals, copy=False)
  989. values.append(vals)
  990. self._tuples = lib.fast_zip(values)
  991. return self._tuples
  992. @property
  993. def _has_complex_internals(self):
  994. # to disable groupby tricks
  995. return True
  996. @cache_readonly
  997. def is_monotonic_increasing(self):
  998. """
  999. return if the index is monotonic increasing (only equal or
  1000. increasing) values.
  1001. """
  1002. # reversed() because lexsort() wants the most significant key last.
  1003. values = [self._get_level_values(i).values
  1004. for i in reversed(range(len(self.levels)))]
  1005. try:
  1006. sort_order = np.lexsort(values)
  1007. return Index(sort_order).is_monotonic
  1008. except TypeError:
  1009. # we have mixed types and np.lexsort is not happy
  1010. return Index(self.values).is_monotonic
  1011. @cache_readonly
  1012. def is_monotonic_decreasing(self):
  1013. """
  1014. return if the index is monotonic decreasing (only equal or
  1015. decreasing) values.
  1016. """
  1017. # monotonic decreasing if and only if reverse is monotonic increasing
  1018. return self[::-1].is_monotonic_increasing
  1019. @cache_readonly
  1020. def _have_mixed_levels(self):
  1021. """ return a boolean list indicated if we have mixed levels """
  1022. return ['mixed' in l for l in self._inferred_type_levels]
  1023. @cache_readonly
  1024. def _inferred_type_levels(self):
  1025. """ return a list of the inferred types, one for each level """
  1026. return [i.inferred_type for i in self.levels]
  1027. @cache_readonly
  1028. def _hashed_values(self):
  1029. """ return a uint64 ndarray of my hashed values """
  1030. from pandas.core.util.hashing import hash_tuples
  1031. return hash_tuples(self)
  1032. def _hashed_indexing_key(self, key):
  1033. """
  1034. validate and return the hash for the provided key
  1035. *this is internal for use for the cython routines*
  1036. Parameters
  1037. ----------
  1038. key : string or tuple
  1039. Returns
  1040. -------
  1041. np.uint64
  1042. Notes
  1043. -----
  1044. we need to stringify if we have mixed levels
  1045. """
  1046. from pandas.core.util.hashing import hash_tuples, hash_tuple
  1047. if not isinstance(key, tuple):
  1048. return hash_tuples(key)
  1049. if not len(key) == self.nlevels:
  1050. raise KeyError
  1051. def f(k, stringify):
  1052. if stringify and not isinstance(k, compat.string_types):
  1053. k = str(k)
  1054. return k
  1055. key = tuple(f(k, stringify)
  1056. for k, stringify in zip(key, self._have_mixed_levels))
  1057. return hash_tuple(key)
  1058. @Appender(Index.duplicated.__doc__)
  1059. def duplicated(self, keep='first'):
  1060. from pandas.core.sorting import get_group_index
  1061. from pandas._libs.hashtable import duplicated_int64
  1062. shape = map(len, self.levels)
  1063. ids = get_group_index(self.codes, shape, sort=False, xnull=False)
  1064. return duplicated_int64(ids, keep)
  1065. def fillna(self, value=None, downcast=None):
  1066. """
  1067. fillna is not implemented for MultiIndex
  1068. """
  1069. raise NotImplementedError('isna is not defined for MultiIndex')
  1070. @Appender(_index_shared_docs['dropna'])
  1071. def dropna(self, how='any'):
  1072. nans = [level_codes == -1 for level_codes in self.codes]
  1073. if how == 'any':
  1074. indexer = np.any(nans, axis=0)
  1075. elif how == 'all':
  1076. indexer = np.all(nans, axis=0)
  1077. else:
  1078. raise ValueError("invalid how option: {0}".format(how))
  1079. new_codes = [level_codes[~indexer] for level_codes in self.codes]
  1080. return self.copy(codes=new_codes, deep=True)
  1081. def get_value(self, series, key):
  1082. # somewhat broken encapsulation
  1083. from pandas.core.indexing import maybe_droplevels
  1084. # Label-based
  1085. s = com.values_from_object(series)
  1086. k = com.values_from_object(key)
  1087. def _try_mi(k):
  1088. # TODO: what if a level contains tuples??
  1089. loc = self.get_loc(k)
  1090. new_values = series._values[loc]
  1091. new_index = self[loc]
  1092. new_index = maybe_droplevels(new_index, k)
  1093. return series._constructor(new_values, index=new_index,
  1094. name=series.name).__finalize__(self)
  1095. try:
  1096. return self._engine.get_value(s, k)
  1097. except KeyError as e1:
  1098. try:
  1099. return _try_mi(key)
  1100. except KeyError:
  1101. pass
  1102. try:
  1103. return libindex.get_value_at(s, k)
  1104. except IndexError:
  1105. raise
  1106. except TypeError:
  1107. # generator/iterator-like
  1108. if is_iterator(key):
  1109. raise InvalidIndexError(key)
  1110. else:
  1111. raise e1
  1112. except Exception: # pragma: no cover
  1113. raise e1
  1114. except TypeError:
  1115. # a Timestamp will raise a TypeError in a multi-index
  1116. # rather than a KeyError, try it here
  1117. # note that a string that 'looks' like a Timestamp will raise
  1118. # a KeyError! (GH5725)
  1119. if (isinstance(key, (datetime.datetime, np.datetime64)) or
  1120. (compat.PY3 and isinstance(key, compat.string_types))):
  1121. try:
  1122. return _try_mi(key)
  1123. except KeyError:
  1124. raise
  1125. except (IndexError, ValueError, TypeError):
  1126. pass
  1127. try:
  1128. return _try_mi(Timestamp(key))
  1129. except (KeyError, TypeError,
  1130. IndexError, ValueError, tslibs.OutOfBoundsDatetime):
  1131. pass
  1132. raise InvalidIndexError(key)
  1133. def _get_level_values(self, level, unique=False):
  1134. """
  1135. Return vector of label values for requested level,
  1136. equal to the length of the index
  1137. **this is an internal method**
  1138. Parameters
  1139. ----------
  1140. level : int level
  1141. unique : bool, default False
  1142. if True, drop duplicated values
  1143. Returns
  1144. -------
  1145. values : ndarray
  1146. """
  1147. values = self.levels[level]
  1148. level_codes = self.codes[level]
  1149. if unique:
  1150. level_codes = algos.unique(level_codes)
  1151. filled = algos.take_1d(values._values, level_codes,
  1152. fill_value=values._na_value)
  1153. values = values._shallow_copy(filled)
  1154. return values
  1155. def get_level_values(self, level):
  1156. """
  1157. Return vector of label values for requested level,
  1158. equal to the length of the index.
  1159. Parameters
  1160. ----------
  1161. level : int or str
  1162. ``level`` is either the integer position of the level in the
  1163. MultiIndex, or the name of the level.
  1164. Returns
  1165. -------
  1166. values : Index
  1167. ``values`` is a level of this MultiIndex converted to
  1168. a single :class:`Index` (or subclass thereof).
  1169. Examples
  1170. ---------
  1171. Create a MultiIndex:
  1172. >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
  1173. >>> mi.names = ['level_1', 'level_2']
  1174. Get level values by supplying level as either integer or name:
  1175. >>> mi.get_level_values(0)
  1176. Index(['a', 'b', 'c'], dtype='object', name='level_1')
  1177. >>> mi.get_level_values('level_2')
  1178. Index(['d', 'e', 'f'], dtype='object', name='level_2')
  1179. """
  1180. level = self._get_level_number(level)
  1181. values = self._get_level_values(level)
  1182. return values
  1183. @Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
  1184. def unique(self, level=None):
  1185. if level is None:
  1186. return super(MultiIndex, self).unique()
  1187. else:
  1188. level = self._get_level_number(level)
  1189. return self._get_level_values(level=level, unique=True)
  1190. def _to_safe_for_reshape(self):
  1191. """ convert to object if we are a categorical """
  1192. return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
  1193. def to_frame(self, index=True, name=None):
  1194. """
  1195. Create a DataFrame with the levels of the MultiIndex as columns.
  1196. Column ordering is determined by the DataFrame constructor with data as
  1197. a dict.
  1198. .. versionadded:: 0.24.0
  1199. Parameters
  1200. ----------
  1201. index : boolean, default True
  1202. Set the index of the returned DataFrame as the original MultiIndex.
  1203. name : list / sequence of strings, optional
  1204. The passed names should substitute index level names.
  1205. Returns
  1206. -------
  1207. DataFrame : a DataFrame containing the original MultiIndex data.
  1208. See Also
  1209. --------
  1210. DataFrame
  1211. """
  1212. from pandas import DataFrame
  1213. if name is not None:
  1214. if not is_list_like(name):
  1215. raise TypeError("'name' must be a list / sequence "
  1216. "of column names.")
  1217. if len(name) != len(self.levels):
  1218. raise ValueError("'name' should have same length as "
  1219. "number of levels on index.")
  1220. idx_names = name
  1221. else:
  1222. idx_names = self.names
  1223. # Guarantee resulting column order
  1224. result = DataFrame(
  1225. OrderedDict([
  1226. ((level if lvlname is None else lvlname),
  1227. self._get_level_values(level))
  1228. for lvlname, level in zip(idx_names, range(len(self.levels)))
  1229. ]),
  1230. copy=False
  1231. )
  1232. if index:
  1233. result.index = self
  1234. return result
  1235. def to_hierarchical(self, n_repeat, n_shuffle=1):
  1236. """
  1237. Return a MultiIndex reshaped to conform to the
  1238. shapes given by n_repeat and n_shuffle.
  1239. .. deprecated:: 0.24.0
  1240. Useful to replicate and rearrange a MultiIndex for combination
  1241. with another Index with n_repeat items.
  1242. Parameters
  1243. ----------
  1244. n_repeat : int
  1245. Number of times to repeat the labels on self
  1246. n_shuffle : int
  1247. Controls the reordering of the labels. If the result is going
  1248. to be an inner level in a MultiIndex, n_shuffle will need to be
  1249. greater than one. The size of each label must divisible by
  1250. n_shuffle.
  1251. Returns
  1252. -------
  1253. MultiIndex
  1254. Examples
  1255. --------
  1256. >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
  1257. (2, u'one'), (2, u'two')])
  1258. >>> idx.to_hierarchical(3)
  1259. MultiIndex(levels=[[1, 2], [u'one', u'two']],
  1260. codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
  1261. [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
  1262. """
  1263. levels = self.levels
  1264. codes = [np.repeat(level_codes, n_repeat) for
  1265. level_codes in self.codes]
  1266. # Assumes that each level_codes is divisible by n_shuffle
  1267. codes = [x.reshape(n_shuffle, -1).ravel(order='F') for x in codes]
  1268. names = self.names
  1269. warnings.warn("Method .to_hierarchical is deprecated and will "
  1270. "be removed in a future version",
  1271. FutureWarning, stacklevel=2)
  1272. return MultiIndex(levels=levels, codes=codes, names=names)
  1273. def to_flat_index(self):
  1274. """
  1275. Convert a MultiIndex to an Index of Tuples containing the level values.
  1276. .. versionadded:: 0.24.0
  1277. Returns
  1278. -------
  1279. pd.Index
  1280. Index with the MultiIndex data represented in Tuples.
  1281. Notes
  1282. -----
  1283. This method will simply return the caller if called by anything other
  1284. than a MultiIndex.
  1285. Examples
  1286. --------
  1287. >>> index = pd.MultiIndex.from_product(
  1288. ... [['foo', 'bar'], ['baz', 'qux']],
  1289. ... names=['a', 'b'])
  1290. >>> index.to_flat_index()
  1291. Index([('foo', 'baz'), ('foo', 'qux'),
  1292. ('bar', 'baz'), ('bar', 'qux')],
  1293. dtype='object')
  1294. """
  1295. return Index(self.values, tupleize_cols=False)
  1296. @property
  1297. def is_all_dates(self):
  1298. return False
  1299. def is_lexsorted(self):
  1300. """
  1301. Return True if the codes are lexicographically sorted
  1302. """
  1303. return self.lexsort_depth == self.nlevels
  1304. @cache_readonly
  1305. def lexsort_depth(self):
  1306. if self.sortorder is not None:
  1307. if self.sortorder == 0:
  1308. return self.nlevels
  1309. else:
  1310. return 0
  1311. int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
  1312. for k in range(self.nlevels, 0, -1):
  1313. if libalgos.is_lexsorted(int64_codes[:k]):
  1314. return k
  1315. return 0
  1316. def _sort_levels_monotonic(self):
  1317. """
  1318. .. versionadded:: 0.20.0
  1319. This is an *internal* function.
  1320. Create a new MultiIndex from the current to monotonically sorted
  1321. items IN the levels. This does not actually make the entire MultiIndex
  1322. monotonic, JUST the levels.
  1323. The resulting MultiIndex will have the same outward
  1324. appearance, meaning the same .values and ordering. It will also
  1325. be .equals() to the original.
  1326. Returns
  1327. -------
  1328. MultiIndex
  1329. Examples
  1330. --------
  1331. >>> i = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
  1332. codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
  1333. >>> i
  1334. MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
  1335. codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
  1336. >>> i.sort_monotonic()
  1337. MultiIndex(levels=[['a', 'b'], ['aa', 'bb']],
  1338. codes=[[0, 0, 1, 1], [1, 0, 1, 0]])
  1339. """
  1340. if self.is_lexsorted() and self.is_monotonic:
  1341. return self
  1342. new_levels = []
  1343. new_codes = []
  1344. for lev, level_codes in zip(self.levels, self.codes):
  1345. if not lev.is_monotonic:
  1346. try:
  1347. # indexer to reorder the levels
  1348. indexer = lev.argsort()
  1349. except TypeError:
  1350. pass
  1351. else:
  1352. lev = lev.take(indexer)
  1353. # indexer to reorder the level codes
  1354. indexer = ensure_int64(indexer)
  1355. ri = lib.get_reverse_indexer(indexer, len(indexer))
  1356. level_codes = algos.take_1d(ri, level_codes)
  1357. new_levels.append(lev)
  1358. new_codes.append(level_codes)
  1359. return MultiIndex(new_levels, new_codes,
  1360. names=self.names, sortorder=self.sortorder,
  1361. verify_integrity=False)
  1362. def remove_unused_levels(self):
  1363. """
  1364. Create a new MultiIndex from the current that removes
  1365. unused levels, meaning that they are not expressed in the labels.
  1366. The resulting MultiIndex will have the same outward
  1367. appearance, meaning the same .values and ordering. It will also
  1368. be .equals() to the original.
  1369. .. versionadded:: 0.20.0
  1370. Returns
  1371. -------
  1372. MultiIndex
  1373. Examples
  1374. --------
  1375. >>> i = pd.MultiIndex.from_product([range(2), list('ab')])
  1376. MultiIndex(levels=[[0, 1], ['a', 'b']],
  1377. codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
  1378. >>> i[2:]
  1379. MultiIndex(levels=[[0, 1], ['a', 'b']],
  1380. codes=[[1, 1], [0, 1]])
  1381. The 0 from the first level is not represented
  1382. and can be removed
  1383. >>> i[2:].remove_unused_levels()
  1384. MultiIndex(levels=[[1], ['a', 'b']],
  1385. codes=[[0, 0], [0, 1]])
  1386. """
  1387. new_levels = []
  1388. new_codes = []
  1389. changed = False
  1390. for lev, level_codes in zip(self.levels, self.codes):
  1391. # Since few levels are typically unused, bincount() is more
  1392. # efficient than unique() - however it only accepts positive values
  1393. # (and drops order):
  1394. uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
  1395. has_na = int(len(uniques) and (uniques[0] == -1))
  1396. if len(uniques) != len(lev) + has_na:
  1397. # We have unused levels
  1398. changed = True
  1399. # Recalculate uniques, now preserving order.
  1400. # Can easily be cythonized by exploiting the already existing
  1401. # "uniques" and stop parsing "level_codes" when all items
  1402. # are found:
  1403. uniques = algos.unique(level_codes)
  1404. if has_na:
  1405. na_idx = np.where(uniques == -1)[0]
  1406. # Just ensure that -1 is in first position:
  1407. uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
  1408. # codes get mapped from uniques to 0:len(uniques)
  1409. # -1 (if present) is mapped to last position
  1410. code_mapping = np.zeros(len(lev) + has_na)
  1411. # ... and reassigned value -1:
  1412. code_mapping[uniques] = np.arange(len(uniques)) - has_na
  1413. level_codes = code_mapping[level_codes]
  1414. # new levels are simple
  1415. lev = lev.take(uniques[has_na:])
  1416. new_levels.append(lev)
  1417. new_codes.append(level_codes)
  1418. result = self._shallow_copy()
  1419. if changed:
  1420. result._reset_identity()
  1421. result._set_levels(new_levels, validate=False)
  1422. result._set_codes(new_codes, validate=False)
  1423. return result
  1424. @property
  1425. def nlevels(self):
  1426. """Integer number of levels in this MultiIndex."""
  1427. return len(self.levels)
  1428. @property
  1429. def levshape(self):
  1430. """A tuple with the length of each level."""
  1431. return tuple(len(x) for x in self.levels)
  1432. def __reduce__(self):
  1433. """Necessary for making this object picklable"""
  1434. d = dict(levels=[lev for lev in self.levels],
  1435. codes=[level_codes for level_codes in self.codes],
  1436. sortorder=self.sortorder, names=list(self.names))
  1437. return ibase._new_Index, (self.__class__, d), None
  1438. def __setstate__(self, state):
  1439. """Necessary for making this object picklable"""
  1440. if isinstance(state, dict):
  1441. levels = state.get('levels')
  1442. codes = state.get('codes')
  1443. sortorder = state.get('sortorder')
  1444. names = state.get('names')
  1445. elif isinstance(state, tuple):
  1446. nd_state, own_state = state
  1447. levels, codes, sortorder, names = own_state
  1448. self._set_levels([Index(x) for x in levels], validate=False)
  1449. self._set_codes(codes)
  1450. self._set_names(names)
  1451. self.sortorder = sortorder
  1452. self._verify_integrity()
  1453. self._reset_identity()
  1454. def __getitem__(self, key):
  1455. if is_scalar(key):
  1456. key = com.cast_scalar_indexer(key)
  1457. retval = []
  1458. for lev, level_codes in zip(self.levels, self.codes):
  1459. if level_codes[key] == -1:
  1460. retval.append(np.nan)
  1461. else:
  1462. retval.append(lev[level_codes[key]])
  1463. return tuple(retval)
  1464. else:
  1465. if com.is_bool_indexer(key):
  1466. key = np.asarray(key, dtype=bool)
  1467. sortorder = self.sortorder
  1468. else:
  1469. # cannot be sure whether the result will be sorted
  1470. sortorder = None
  1471. if isinstance(key, Index):
  1472. key = np.asarray(key)
  1473. new_codes = [level_codes[key] for level_codes in self.codes]
  1474. return MultiIndex(levels=self.levels, codes=new_codes,
  1475. names=self.names, sortorder=sortorder,
  1476. verify_integrity=False)
  1477. @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
  1478. def take(self, indices, axis=0, allow_fill=True,
  1479. fill_value=None, **kwargs):
  1480. nv.validate_take(tuple(), kwargs)
  1481. indices = ensure_platform_int(indices)
  1482. taken = self._assert_take_fillable(self.codes, indices,
  1483. allow_fill=allow_fill,
  1484. fill_value=fill_value,
  1485. na_value=-1)
  1486. return MultiIndex(levels=self.levels, codes=taken,
  1487. names=self.names, verify_integrity=False)
  1488. def _assert_take_fillable(self, values, indices, allow_fill=True,
  1489. fill_value=None, na_value=None):
  1490. """ Internal method to handle NA filling of take """
  1491. # only fill if we are passing a non-None fill_value
  1492. if allow_fill and fill_value is not None:
  1493. if (indices < -1).any():
  1494. msg = ('When allow_fill=True and fill_value is not None, '
  1495. 'all indices must be >= -1')
  1496. raise ValueError(msg)
  1497. taken = [lab.take(indices) for lab in self.codes]
  1498. mask = indices == -1
  1499. if mask.any():
  1500. masked = []
  1501. for new_label in taken:
  1502. label_values = new_label.values()
  1503. label_values[mask] = na_value
  1504. masked.append(np.asarray(label_values))
  1505. taken = masked
  1506. else:
  1507. taken = [lab.take(indices) for lab in self.codes]
  1508. return taken
  1509. def append(self, other):
  1510. """
  1511. Append a collection of Index options together
  1512. Parameters
  1513. ----------
  1514. other : Index or list/tuple of indices
  1515. Returns
  1516. -------
  1517. appended : Index
  1518. """
  1519. if not isinstance(other, (list, tuple)):
  1520. other = [other]
  1521. if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels)
  1522. for o in other):
  1523. arrays = []
  1524. for i in range(self.nlevels):
  1525. label = self._get_level_values(i)
  1526. appended = [o._get_level_values(i) for o in other]
  1527. arrays.append(label.append(appended))
  1528. return MultiIndex.from_arrays(arrays, names=self.names)
  1529. to_concat = (self.values, ) + tuple(k._values for k in other)
  1530. new_tuples = np.concatenate(to_concat)
  1531. # if all(isinstance(x, MultiIndex) for x in other):
  1532. try:
  1533. return MultiIndex.from_tuples(new_tuples, names=self.names)
  1534. except (TypeError, IndexError):
  1535. return Index(new_tuples)
  1536. def argsort(self, *args, **kwargs):
  1537. return self.values.argsort(*args, **kwargs)
  1538. @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
  1539. def repeat(self, repeats, axis=None):
  1540. nv.validate_repeat(tuple(), dict(axis=axis))
  1541. return MultiIndex(levels=self.levels,
  1542. codes=[level_codes.view(np.ndarray).repeat(repeats)
  1543. for level_codes in self.codes],
  1544. names=self.names, sortorder=self.sortorder,
  1545. verify_integrity=False)
  1546. def where(self, cond, other=None):
  1547. raise NotImplementedError(".where is not supported for "
  1548. "MultiIndex operations")
  1549. @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
  1550. def drop(self, codes, level=None, errors='raise'):
  1551. """
  1552. Make new MultiIndex with passed list of codes deleted
  1553. Parameters
  1554. ----------
  1555. codes : array-like
  1556. Must be a list of tuples
  1557. level : int or level name, default None
  1558. Returns
  1559. -------
  1560. dropped : MultiIndex
  1561. """
  1562. if level is not None:
  1563. return self._drop_from_level(codes, level)
  1564. try:
  1565. if not isinstance(codes, (np.ndarray, Index)):
  1566. codes = com.index_labels_to_array(codes)
  1567. indexer = self.get_indexer(codes)
  1568. mask = indexer == -1
  1569. if mask.any():
  1570. if errors != 'ignore':
  1571. raise ValueError('codes %s not contained in axis' %
  1572. codes[mask])
  1573. except Exception:
  1574. pass
  1575. inds = []
  1576. for level_codes in codes:
  1577. try:
  1578. loc = self.get_loc(level_codes)
  1579. # get_loc returns either an integer, a slice, or a boolean
  1580. # mask
  1581. if isinstance(loc, int):
  1582. inds.append(loc)
  1583. elif isinstance(loc, slice):
  1584. inds.extend(lrange(loc.start, loc.stop))
  1585. elif com.is_bool_indexer(loc):
  1586. if self.lexsort_depth == 0:
  1587. warnings.warn('dropping on a non-lexsorted multi-index'
  1588. ' without a level parameter may impact '
  1589. 'performance.',
  1590. PerformanceWarning,
  1591. stacklevel=3)
  1592. loc = loc.nonzero()[0]
  1593. inds.extend(loc)
  1594. else:
  1595. msg = 'unsupported indexer of type {}'.format(type(loc))
  1596. raise AssertionError(msg)
  1597. except KeyError:
  1598. if errors != 'ignore':
  1599. raise
  1600. return self.delete(inds)
  1601. def _drop_from_level(self, codes, level):
  1602. codes = com.index_labels_to_array(codes)
  1603. i = self._get_level_number(level)
  1604. index = self.levels[i]
  1605. values = index.get_indexer(codes)
  1606. mask = ~algos.isin(self.codes[i], values)
  1607. return self[mask]
  1608. def swaplevel(self, i=-2, j=-1):
  1609. """
  1610. Swap level i with level j.
  1611. Calling this method does not change the ordering of the values.
  1612. Parameters
  1613. ----------
  1614. i : int, str, default -2
  1615. First level of index to be swapped. Can pass level name as string.
  1616. Type of parameters can be mixed.
  1617. j : int, str, default -1
  1618. Second level of index to be swapped. Can pass level name as string.
  1619. Type of parameters can be mixed.
  1620. Returns
  1621. -------
  1622. MultiIndex
  1623. A new MultiIndex
  1624. .. versionchanged:: 0.18.1
  1625. The indexes ``i`` and ``j`` are now optional, and default to
  1626. the two innermost levels of the index.
  1627. See Also
  1628. --------
  1629. Series.swaplevel : Swap levels i and j in a MultiIndex.
  1630. Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a
  1631. particular axis.
  1632. Examples
  1633. --------
  1634. >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
  1635. ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
  1636. >>> mi
  1637. MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
  1638. codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
  1639. >>> mi.swaplevel(0, 1)
  1640. MultiIndex(levels=[['bb', 'aa'], ['a', 'b']],
  1641. codes=[[0, 1, 0, 1], [0, 0, 1, 1]])
  1642. """
  1643. new_levels = list(self.levels)
  1644. new_codes = list(self.codes)
  1645. new_names = list(self.names)
  1646. i = self._get_level_number(i)
  1647. j = self._get_level_number(j)
  1648. new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
  1649. new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
  1650. new_names[i], new_names[j] = new_names[j], new_names[i]
  1651. return MultiIndex(levels=new_levels, codes=new_codes,
  1652. names=new_names, verify_integrity=False)
  1653. def reorder_levels(self, order):
  1654. """
  1655. Rearrange levels using input order. May not drop or duplicate levels
  1656. Parameters
  1657. ----------
  1658. """
  1659. order = [self._get_level_number(i) for i in order]
  1660. if len(order) != self.nlevels:
  1661. raise AssertionError('Length of order must be same as '
  1662. 'number of levels (%d), got %d' %
  1663. (self.nlevels, len(order)))
  1664. new_levels = [self.levels[i] for i in order]
  1665. new_codes = [self.codes[i] for i in order]
  1666. new_names = [self.names[i] for i in order]
  1667. return MultiIndex(levels=new_levels, codes=new_codes,
  1668. names=new_names, verify_integrity=False)
  1669. def __getslice__(self, i, j):
  1670. return self.__getitem__(slice(i, j))
  1671. def _get_codes_for_sorting(self):
  1672. """
  1673. we categorizing our codes by using the
  1674. available categories (all, not just observed)
  1675. excluding any missing ones (-1); this is in preparation
  1676. for sorting, where we need to disambiguate that -1 is not
  1677. a valid valid
  1678. """
  1679. from pandas.core.arrays import Categorical
  1680. def cats(level_codes):
  1681. return np.arange(np.array(level_codes).max() + 1 if
  1682. len(level_codes) else 0,
  1683. dtype=level_codes.dtype)
  1684. return [Categorical.from_codes(level_codes, cats(level_codes),
  1685. ordered=True)
  1686. for level_codes in self.codes]
  1687. def sortlevel(self, level=0, ascending=True, sort_remaining=True):
  1688. """
  1689. Sort MultiIndex at the requested level. The result will respect the
  1690. original ordering of the associated factor at that level.
  1691. Parameters
  1692. ----------
  1693. level : list-like, int or str, default 0
  1694. If a string is given, must be a name of the level
  1695. If list-like must be names or ints of levels.
  1696. ascending : boolean, default True
  1697. False to sort in descending order
  1698. Can also be a list to specify a directed ordering
  1699. sort_remaining : sort by the remaining levels after level
  1700. Returns
  1701. -------
  1702. sorted_index : pd.MultiIndex
  1703. Resulting index
  1704. indexer : np.ndarray
  1705. Indices of output values in original index
  1706. """
  1707. from pandas.core.sorting import indexer_from_factorized
  1708. if isinstance(level, (compat.string_types, int)):
  1709. level = [level]
  1710. level = [self._get_level_number(lev) for lev in level]
  1711. sortorder = None
  1712. # we have a directed ordering via ascending
  1713. if isinstance(ascending, list):
  1714. if not len(level) == len(ascending):
  1715. raise ValueError("level must have same length as ascending")
  1716. from pandas.core.sorting import lexsort_indexer
  1717. indexer = lexsort_indexer([self.codes[lev] for lev in level],
  1718. orders=ascending)
  1719. # level ordering
  1720. else:
  1721. codes = list(self.codes)
  1722. shape = list(self.levshape)
  1723. # partition codes and shape
  1724. primary = tuple(codes.pop(lev - i) for i, lev in enumerate(level))
  1725. primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level))
  1726. if sort_remaining:
  1727. primary += primary + tuple(codes)
  1728. primshp += primshp + tuple(shape)
  1729. else:
  1730. sortorder = level[0]
  1731. indexer = indexer_from_factorized(primary, primshp,
  1732. compress=False)
  1733. if not ascending:
  1734. indexer = indexer[::-1]
  1735. indexer = ensure_platform_int(indexer)
  1736. new_codes = [level_codes.take(indexer) for level_codes in self.codes]
  1737. new_index = MultiIndex(codes=new_codes, levels=self.levels,
  1738. names=self.names, sortorder=sortorder,
  1739. verify_integrity=False)
  1740. return new_index, indexer
  1741. def _convert_listlike_indexer(self, keyarr, kind=None):
  1742. """
  1743. Parameters
  1744. ----------
  1745. keyarr : list-like
  1746. Indexer to convert.
  1747. Returns
  1748. -------
  1749. tuple (indexer, keyarr)
  1750. indexer is an ndarray or None if cannot convert
  1751. keyarr are tuple-safe keys
  1752. """
  1753. indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer(
  1754. keyarr, kind=kind)
  1755. # are we indexing a specific level
  1756. if indexer is None and len(keyarr) and not isinstance(keyarr[0],
  1757. tuple):
  1758. level = 0
  1759. _, indexer = self.reindex(keyarr, level=level)
  1760. # take all
  1761. if indexer is None:
  1762. indexer = np.arange(len(self))
  1763. check = self.levels[0].get_indexer(keyarr)
  1764. mask = check == -1
  1765. if mask.any():
  1766. raise KeyError('%s not in index' % keyarr[mask])
  1767. return indexer, keyarr
  1768. @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
  1769. def get_indexer(self, target, method=None, limit=None, tolerance=None):
  1770. method = missing.clean_reindex_fill_method(method)
  1771. target = ensure_index(target)
  1772. # empty indexer
  1773. if is_list_like(target) and not len(target):
  1774. return ensure_platform_int(np.array([]))
  1775. if not isinstance(target, MultiIndex):
  1776. try:
  1777. target = MultiIndex.from_tuples(target)
  1778. except (TypeError, ValueError):
  1779. # let's instead try with a straight Index
  1780. if method is None:
  1781. return Index(self.values).get_indexer(target,
  1782. method=method,
  1783. limit=limit,
  1784. tolerance=tolerance)
  1785. if not self.is_unique:
  1786. raise ValueError('Reindexing only valid with uniquely valued '
  1787. 'Index objects')
  1788. if method == 'pad' or method == 'backfill':
  1789. if tolerance is not None:
  1790. raise NotImplementedError("tolerance not implemented yet "
  1791. 'for MultiIndex')
  1792. indexer = self._engine.get_indexer(target, method, limit)
  1793. elif method == 'nearest':
  1794. raise NotImplementedError("method='nearest' not implemented yet "
  1795. 'for MultiIndex; see GitHub issue 9365')
  1796. else:
  1797. indexer = self._engine.get_indexer(target)
  1798. return ensure_platform_int(indexer)
  1799. @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
  1800. def get_indexer_non_unique(self, target):
  1801. return super(MultiIndex, self).get_indexer_non_unique(target)
  1802. def reindex(self, target, method=None, level=None, limit=None,
  1803. tolerance=None):
  1804. """
  1805. Create index with target's values (move/add/delete values as necessary)
  1806. Returns
  1807. -------
  1808. new_index : pd.MultiIndex
  1809. Resulting index
  1810. indexer : np.ndarray or None
  1811. Indices of output values in original index
  1812. """
  1813. # GH6552: preserve names when reindexing to non-named target
  1814. # (i.e. neither Index nor Series).
  1815. preserve_names = not hasattr(target, 'names')
  1816. if level is not None:
  1817. if method is not None:
  1818. raise TypeError('Fill method not supported if level passed')
  1819. # GH7774: preserve dtype/tz if target is empty and not an Index.
  1820. # target may be an iterator
  1821. target = ibase._ensure_has_len(target)
  1822. if len(target) == 0 and not isinstance(target, Index):
  1823. idx = self.levels[level]
  1824. attrs = idx._get_attributes_dict()
  1825. attrs.pop('freq', None) # don't preserve freq
  1826. target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype),
  1827. **attrs)
  1828. else:
  1829. target = ensure_index(target)
  1830. target, indexer, _ = self._join_level(target, level, how='right',
  1831. return_indexers=True,
  1832. keep_order=False)
  1833. else:
  1834. target = ensure_index(target)
  1835. if self.equals(target):
  1836. indexer = None
  1837. else:
  1838. if self.is_unique:
  1839. indexer = self.get_indexer(target, method=method,
  1840. limit=limit,
  1841. tolerance=tolerance)
  1842. else:
  1843. raise ValueError("cannot handle a non-unique multi-index!")
  1844. if not isinstance(target, MultiIndex):
  1845. if indexer is None:
  1846. target = self
  1847. elif (indexer >= 0).all():
  1848. target = self.take(indexer)
  1849. else:
  1850. # hopefully?
  1851. target = MultiIndex.from_tuples(target)
  1852. if (preserve_names and target.nlevels == self.nlevels and
  1853. target.names != self.names):
  1854. target = target.copy(deep=False)
  1855. target.names = self.names
  1856. return target, indexer
  1857. def get_slice_bound(self, label, side, kind):
  1858. if not isinstance(label, tuple):
  1859. label = label,
  1860. return self._partial_tup_index(label, side=side)
  1861. def slice_locs(self, start=None, end=None, step=None, kind=None):
  1862. """
  1863. For an ordered MultiIndex, compute the slice locations for input
  1864. labels.
  1865. The input labels can be tuples representing partial levels, e.g. for a
  1866. MultiIndex with 3 levels, you can pass a single value (corresponding to
  1867. the first level), or a 1-, 2-, or 3-tuple.
  1868. Parameters
  1869. ----------
  1870. start : label or tuple, default None
  1871. If None, defaults to the beginning
  1872. end : label or tuple
  1873. If None, defaults to the end
  1874. step : int or None
  1875. Slice step
  1876. kind : string, optional, defaults None
  1877. Returns
  1878. -------
  1879. (start, end) : (int, int)
  1880. Notes
  1881. -----
  1882. This method only works if the MultiIndex is properly lexsorted. So,
  1883. if only the first 2 levels of a 3-level MultiIndex are lexsorted,
  1884. you can only pass two levels to ``.slice_locs``.
  1885. Examples
  1886. --------
  1887. >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
  1888. ... names=['A', 'B'])
  1889. Get the slice locations from the beginning of 'b' in the first level
  1890. until the end of the multiindex:
  1891. >>> mi.slice_locs(start='b')
  1892. (1, 4)
  1893. Like above, but stop at the end of 'b' in the first level and 'f' in
  1894. the second level:
  1895. >>> mi.slice_locs(start='b', end=('b', 'f'))
  1896. (1, 3)
  1897. See Also
  1898. --------
  1899. MultiIndex.get_loc : Get location for a label or a tuple of labels.
  1900. MultiIndex.get_locs : Get location for a label/slice/list/mask or a
  1901. sequence of such.
  1902. """
  1903. # This function adds nothing to its parent implementation (the magic
  1904. # happens in get_slice_bound method), but it adds meaningful doc.
  1905. return super(MultiIndex, self).slice_locs(start, end, step, kind=kind)
  1906. def _partial_tup_index(self, tup, side='left'):
  1907. if len(tup) > self.lexsort_depth:
  1908. raise UnsortedIndexError(
  1909. 'Key length (%d) was greater than MultiIndex'
  1910. ' lexsort depth (%d)' %
  1911. (len(tup), self.lexsort_depth))
  1912. n = len(tup)
  1913. start, end = 0, len(self)
  1914. zipped = zip(tup, self.levels, self.codes)
  1915. for k, (lab, lev, labs) in enumerate(zipped):
  1916. section = labs[start:end]
  1917. if lab not in lev:
  1918. if not lev.is_type_compatible(lib.infer_dtype([lab],
  1919. skipna=False)):
  1920. raise TypeError('Level type mismatch: %s' % lab)
  1921. # short circuit
  1922. loc = lev.searchsorted(lab, side=side)
  1923. if side == 'right' and loc >= 0:
  1924. loc -= 1
  1925. return start + section.searchsorted(loc, side=side)
  1926. idx = lev.get_loc(lab)
  1927. if k < n - 1:
  1928. end = start + section.searchsorted(idx, side='right')
  1929. start = start + section.searchsorted(idx, side='left')
  1930. else:
  1931. return start + section.searchsorted(idx, side=side)
  1932. def get_loc(self, key, method=None):
  1933. """
  1934. Get location for a label or a tuple of labels as an integer, slice or
  1935. boolean mask.
  1936. Parameters
  1937. ----------
  1938. key : label or tuple of labels (one for each level)
  1939. method : None
  1940. Returns
  1941. -------
  1942. loc : int, slice object or boolean mask
  1943. If the key is past the lexsort depth, the return may be a
  1944. boolean mask array, otherwise it is always a slice or int.
  1945. Examples
  1946. ---------
  1947. >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
  1948. >>> mi.get_loc('b')
  1949. slice(1, 3, None)
  1950. >>> mi.get_loc(('b', 'e'))
  1951. 1
  1952. Notes
  1953. ------
  1954. The key cannot be a slice, list of same-level labels, a boolean mask,
  1955. or a sequence of such. If you want to use those, use
  1956. :meth:`MultiIndex.get_locs` instead.
  1957. See Also
  1958. --------
  1959. Index.get_loc : The get_loc method for (single-level) index.
  1960. MultiIndex.slice_locs : Get slice location given start label(s) and
  1961. end label(s).
  1962. MultiIndex.get_locs : Get location for a label/slice/list/mask or a
  1963. sequence of such.
  1964. """
  1965. if method is not None:
  1966. raise NotImplementedError('only the default get_loc method is '
  1967. 'currently supported for MultiIndex')
  1968. def _maybe_to_slice(loc):
  1969. """convert integer indexer to boolean mask or slice if possible"""
  1970. if not isinstance(loc, np.ndarray) or loc.dtype != 'int64':
  1971. return loc
  1972. loc = lib.maybe_indices_to_slice(loc, len(self))
  1973. if isinstance(loc, slice):
  1974. return loc
  1975. mask = np.empty(len(self), dtype='bool')
  1976. mask.fill(False)
  1977. mask[loc] = True
  1978. return mask
  1979. if not isinstance(key, tuple):
  1980. loc = self._get_level_indexer(key, level=0)
  1981. return _maybe_to_slice(loc)
  1982. keylen = len(key)
  1983. if self.nlevels < keylen:
  1984. raise KeyError('Key length ({0}) exceeds index depth ({1})'
  1985. ''.format(keylen, self.nlevels))
  1986. if keylen == self.nlevels and self.is_unique:
  1987. return self._engine.get_loc(key)
  1988. # -- partial selection or non-unique index
  1989. # break the key into 2 parts based on the lexsort_depth of the index;
  1990. # the first part returns a continuous slice of the index; the 2nd part
  1991. # needs linear search within the slice
  1992. i = self.lexsort_depth
  1993. lead_key, follow_key = key[:i], key[i:]
  1994. start, stop = (self.slice_locs(lead_key, lead_key)
  1995. if lead_key else (0, len(self)))
  1996. if start == stop:
  1997. raise KeyError(key)
  1998. if not follow_key:
  1999. return slice(start, stop)
  2000. warnings.warn('indexing past lexsort depth may impact performance.',
  2001. PerformanceWarning, stacklevel=10)
  2002. loc = np.arange(start, stop, dtype='int64')
  2003. for i, k in enumerate(follow_key, len(lead_key)):
  2004. mask = self.codes[i][loc] == self.levels[i].get_loc(k)
  2005. if not mask.all():
  2006. loc = loc[mask]
  2007. if not len(loc):
  2008. raise KeyError(key)
  2009. return (_maybe_to_slice(loc) if len(loc) != stop - start else
  2010. slice(start, stop))
  2011. def get_loc_level(self, key, level=0, drop_level=True):
  2012. """
  2013. Get both the location for the requested label(s) and the
  2014. resulting sliced index.
  2015. Parameters
  2016. ----------
  2017. key : label or sequence of labels
  2018. level : int/level name or list thereof, optional
  2019. drop_level : bool, default True
  2020. if ``False``, the resulting index will not drop any level.
  2021. Returns
  2022. -------
  2023. loc : A 2-tuple where the elements are:
  2024. Element 0: int, slice object or boolean array
  2025. Element 1: The resulting sliced multiindex/index. If the key
  2026. contains all levels, this will be ``None``.
  2027. Examples
  2028. --------
  2029. >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
  2030. ... names=['A', 'B'])
  2031. >>> mi.get_loc_level('b')
  2032. (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
  2033. >>> mi.get_loc_level('e', level='B')
  2034. (array([False, True, False], dtype=bool),
  2035. Index(['b'], dtype='object', name='A'))
  2036. >>> mi.get_loc_level(['b', 'e'])
  2037. (1, None)
  2038. See Also
  2039. ---------
  2040. MultiIndex.get_loc : Get location for a label or a tuple of labels.
  2041. MultiIndex.get_locs : Get location for a label/slice/list/mask or a
  2042. sequence of such.
  2043. """
  2044. def maybe_droplevels(indexer, levels, drop_level):
  2045. if not drop_level:
  2046. return self[indexer]
  2047. # kludgearound
  2048. orig_index = new_index = self[indexer]
  2049. levels = [self._get_level_number(i) for i in levels]
  2050. for i in sorted(levels, reverse=True):
  2051. try:
  2052. new_index = new_index.droplevel(i)
  2053. except ValueError:
  2054. # no dropping here
  2055. return orig_index
  2056. return new_index
  2057. if isinstance(level, (tuple, list)):
  2058. if len(key) != len(level):
  2059. raise AssertionError('Key for location must have same '
  2060. 'length as number of levels')
  2061. result = None
  2062. for lev, k in zip(level, key):
  2063. loc, new_index = self.get_loc_level(k, level=lev)
  2064. if isinstance(loc, slice):
  2065. mask = np.zeros(len(self), dtype=bool)
  2066. mask[loc] = True
  2067. loc = mask
  2068. result = loc if result is None else result & loc
  2069. return result, maybe_droplevels(result, level, drop_level)
  2070. level = self._get_level_number(level)
  2071. # kludge for #1796
  2072. if isinstance(key, list):
  2073. key = tuple(key)
  2074. if isinstance(key, tuple) and level == 0:
  2075. try:
  2076. if key in self.levels[0]:
  2077. indexer = self._get_level_indexer(key, level=level)
  2078. new_index = maybe_droplevels(indexer, [0], drop_level)
  2079. return indexer, new_index
  2080. except TypeError:
  2081. pass
  2082. if not any(isinstance(k, slice) for k in key):
  2083. # partial selection
  2084. # optionally get indexer to avoid re-calculation
  2085. def partial_selection(key, indexer=None):
  2086. if indexer is None:
  2087. indexer = self.get_loc(key)
  2088. ilevels = [i for i in range(len(key))
  2089. if key[i] != slice(None, None)]
  2090. return indexer, maybe_droplevels(indexer, ilevels,
  2091. drop_level)
  2092. if len(key) == self.nlevels and self.is_unique:
  2093. # Complete key in unique index -> standard get_loc
  2094. return (self._engine.get_loc(key), None)
  2095. else:
  2096. return partial_selection(key)
  2097. else:
  2098. indexer = None
  2099. for i, k in enumerate(key):
  2100. if not isinstance(k, slice):
  2101. k = self._get_level_indexer(k, level=i)
  2102. if isinstance(k, slice):
  2103. # everything
  2104. if k.start == 0 and k.stop == len(self):
  2105. k = slice(None, None)
  2106. else:
  2107. k_index = k
  2108. if isinstance(k, slice):
  2109. if k == slice(None, None):
  2110. continue
  2111. else:
  2112. raise TypeError(key)
  2113. if indexer is None:
  2114. indexer = k_index
  2115. else: # pragma: no cover
  2116. indexer &= k_index
  2117. if indexer is None:
  2118. indexer = slice(None, None)
  2119. ilevels = [i for i in range(len(key))
  2120. if key[i] != slice(None, None)]
  2121. return indexer, maybe_droplevels(indexer, ilevels, drop_level)
  2122. else:
  2123. indexer = self._get_level_indexer(key, level=level)
  2124. return indexer, maybe_droplevels(indexer, [level], drop_level)
  2125. def _get_level_indexer(self, key, level=0, indexer=None):
  2126. # return an indexer, boolean array or a slice showing where the key is
  2127. # in the totality of values
  2128. # if the indexer is provided, then use this
  2129. level_index = self.levels[level]
  2130. level_codes = self.codes[level]
  2131. def convert_indexer(start, stop, step, indexer=indexer,
  2132. codes=level_codes):
  2133. # given the inputs and the codes/indexer, compute an indexer set
  2134. # if we have a provided indexer, then this need not consider
  2135. # the entire labels set
  2136. r = np.arange(start, stop, step)
  2137. if indexer is not None and len(indexer) != len(codes):
  2138. # we have an indexer which maps the locations in the labels
  2139. # that we have already selected (and is not an indexer for the
  2140. # entire set) otherwise this is wasteful so we only need to
  2141. # examine locations that are in this set the only magic here is
  2142. # that the result are the mappings to the set that we have
  2143. # selected
  2144. from pandas import Series
  2145. mapper = Series(indexer)
  2146. indexer = codes.take(ensure_platform_int(indexer))
  2147. result = Series(Index(indexer).isin(r).nonzero()[0])
  2148. m = result.map(mapper)._ndarray_values
  2149. else:
  2150. m = np.zeros(len(codes), dtype=bool)
  2151. m[np.in1d(codes, r,
  2152. assume_unique=Index(codes).is_unique)] = True
  2153. return m
  2154. if isinstance(key, slice):
  2155. # handle a slice, returnig a slice if we can
  2156. # otherwise a boolean indexer
  2157. try:
  2158. if key.start is not None:
  2159. start = level_index.get_loc(key.start)
  2160. else:
  2161. start = 0
  2162. if key.stop is not None:
  2163. stop = level_index.get_loc(key.stop)
  2164. else:
  2165. stop = len(level_index) - 1
  2166. step = key.step
  2167. except KeyError:
  2168. # we have a partial slice (like looking up a partial date
  2169. # string)
  2170. start = stop = level_index.slice_indexer(key.start, key.stop,
  2171. key.step, kind='loc')
  2172. step = start.step
  2173. if isinstance(start, slice) or isinstance(stop, slice):
  2174. # we have a slice for start and/or stop
  2175. # a partial date slicer on a DatetimeIndex generates a slice
  2176. # note that the stop ALREADY includes the stopped point (if
  2177. # it was a string sliced)
  2178. return convert_indexer(start.start, stop.stop, step)
  2179. elif level > 0 or self.lexsort_depth == 0 or step is not None:
  2180. # need to have like semantics here to right
  2181. # searching as when we are using a slice
  2182. # so include the stop+1 (so we include stop)
  2183. return convert_indexer(start, stop + 1, step)
  2184. else:
  2185. # sorted, so can return slice object -> view
  2186. i = level_codes.searchsorted(start, side='left')
  2187. j = level_codes.searchsorted(stop, side='right')
  2188. return slice(i, j, step)
  2189. else:
  2190. code = level_index.get_loc(key)
  2191. if level > 0 or self.lexsort_depth == 0:
  2192. # Desired level is not sorted
  2193. locs = np.array(level_codes == code, dtype=bool, copy=False)
  2194. if not locs.any():
  2195. # The label is present in self.levels[level] but unused:
  2196. raise KeyError(key)
  2197. return locs
  2198. i = level_codes.searchsorted(code, side='left')
  2199. j = level_codes.searchsorted(code, side='right')
  2200. if i == j:
  2201. # The label is present in self.levels[level] but unused:
  2202. raise KeyError(key)
  2203. return slice(i, j)
  2204. def get_locs(self, seq):
  2205. """
  2206. Get location for a given label/slice/list/mask or a sequence of such as
  2207. an array of integers.
  2208. Parameters
  2209. ----------
  2210. seq : label/slice/list/mask or a sequence of such
  2211. You should use one of the above for each level.
  2212. If a level should not be used, set it to ``slice(None)``.
  2213. Returns
  2214. -------
  2215. locs : array of integers suitable for passing to iloc
  2216. Examples
  2217. ---------
  2218. >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
  2219. >>> mi.get_locs('b')
  2220. array([1, 2], dtype=int64)
  2221. >>> mi.get_locs([slice(None), ['e', 'f']])
  2222. array([1, 2], dtype=int64)
  2223. >>> mi.get_locs([[True, False, True], slice('e', 'f')])
  2224. array([2], dtype=int64)
  2225. See Also
  2226. --------
  2227. MultiIndex.get_loc : Get location for a label or a tuple of labels.
  2228. MultiIndex.slice_locs : Get slice location given start label(s) and
  2229. end label(s).
  2230. """
  2231. from .numeric import Int64Index
  2232. # must be lexsorted to at least as many levels
  2233. true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
  2234. if true_slices and true_slices[-1] >= self.lexsort_depth:
  2235. raise UnsortedIndexError('MultiIndex slicing requires the index '
  2236. 'to be lexsorted: slicing on levels {0}, '
  2237. 'lexsort depth {1}'
  2238. .format(true_slices, self.lexsort_depth))
  2239. # indexer
  2240. # this is the list of all values that we want to select
  2241. n = len(self)
  2242. indexer = None
  2243. def _convert_to_indexer(r):
  2244. # return an indexer
  2245. if isinstance(r, slice):
  2246. m = np.zeros(n, dtype=bool)
  2247. m[r] = True
  2248. r = m.nonzero()[0]
  2249. elif com.is_bool_indexer(r):
  2250. if len(r) != n:
  2251. raise ValueError("cannot index with a boolean indexer "
  2252. "that is not the same length as the "
  2253. "index")
  2254. r = r.nonzero()[0]
  2255. return Int64Index(r)
  2256. def _update_indexer(idxr, indexer=indexer):
  2257. if indexer is None:
  2258. indexer = Index(np.arange(n))
  2259. if idxr is None:
  2260. return indexer
  2261. return indexer & idxr
  2262. for i, k in enumerate(seq):
  2263. if com.is_bool_indexer(k):
  2264. # a boolean indexer, must be the same length!
  2265. k = np.asarray(k)
  2266. indexer = _update_indexer(_convert_to_indexer(k),
  2267. indexer=indexer)
  2268. elif is_list_like(k):
  2269. # a collection of labels to include from this level (these
  2270. # are or'd)
  2271. indexers = None
  2272. for x in k:
  2273. try:
  2274. idxrs = _convert_to_indexer(
  2275. self._get_level_indexer(x, level=i,
  2276. indexer=indexer))
  2277. indexers = (idxrs if indexers is None
  2278. else indexers | idxrs)
  2279. except KeyError:
  2280. # ignore not founds
  2281. continue
  2282. if indexers is not None:
  2283. indexer = _update_indexer(indexers, indexer=indexer)
  2284. else:
  2285. # no matches we are done
  2286. return Int64Index([])._ndarray_values
  2287. elif com.is_null_slice(k):
  2288. # empty slice
  2289. indexer = _update_indexer(None, indexer=indexer)
  2290. elif isinstance(k, slice):
  2291. # a slice, include BOTH of the labels
  2292. indexer = _update_indexer(_convert_to_indexer(
  2293. self._get_level_indexer(k, level=i, indexer=indexer)),
  2294. indexer=indexer)
  2295. else:
  2296. # a single label
  2297. indexer = _update_indexer(_convert_to_indexer(
  2298. self.get_loc_level(k, level=i, drop_level=False)[0]),
  2299. indexer=indexer)
  2300. # empty indexer
  2301. if indexer is None:
  2302. return Int64Index([])._ndarray_values
  2303. return indexer._ndarray_values
  2304. def truncate(self, before=None, after=None):
  2305. """
  2306. Slice index between two labels / tuples, return new MultiIndex
  2307. Parameters
  2308. ----------
  2309. before : label or tuple, can be partial. Default None
  2310. None defaults to start
  2311. after : label or tuple, can be partial. Default None
  2312. None defaults to end
  2313. Returns
  2314. -------
  2315. truncated : MultiIndex
  2316. """
  2317. if after and before and after < before:
  2318. raise ValueError('after < before')
  2319. i, j = self.levels[0].slice_locs(before, after)
  2320. left, right = self.slice_locs(before, after)
  2321. new_levels = list(self.levels)
  2322. new_levels[0] = new_levels[0][i:j]
  2323. new_codes = [level_codes[left:right] for level_codes in self.codes]
  2324. new_codes[0] = new_codes[0] - i
  2325. return MultiIndex(levels=new_levels, codes=new_codes,
  2326. verify_integrity=False)
  2327. def equals(self, other):
  2328. """
  2329. Determines if two MultiIndex objects have the same labeling information
  2330. (the levels themselves do not necessarily have to be the same)
  2331. See Also
  2332. --------
  2333. equal_levels
  2334. """
  2335. if self.is_(other):
  2336. return True
  2337. if not isinstance(other, Index):
  2338. return False
  2339. if not isinstance(other, MultiIndex):
  2340. other_vals = com.values_from_object(ensure_index(other))
  2341. return array_equivalent(self._ndarray_values, other_vals)
  2342. if self.nlevels != other.nlevels:
  2343. return False
  2344. if len(self) != len(other):
  2345. return False
  2346. for i in range(self.nlevels):
  2347. self_codes = self.codes[i]
  2348. self_codes = self_codes[self_codes != -1]
  2349. self_values = algos.take_nd(np.asarray(self.levels[i]._values),
  2350. self_codes, allow_fill=False)
  2351. other_codes = other.codes[i]
  2352. other_codes = other_codes[other_codes != -1]
  2353. other_values = algos.take_nd(
  2354. np.asarray(other.levels[i]._values),
  2355. other_codes, allow_fill=False)
  2356. # since we use NaT both datetime64 and timedelta64
  2357. # we can have a situation where a level is typed say
  2358. # timedelta64 in self (IOW it has other values than NaT)
  2359. # but types datetime64 in other (where its all NaT)
  2360. # but these are equivalent
  2361. if len(self_values) == 0 and len(other_values) == 0:
  2362. continue
  2363. if not array_equivalent(self_values, other_values):
  2364. return False
  2365. return True
  2366. def equal_levels(self, other):
  2367. """
  2368. Return True if the levels of both MultiIndex objects are the same
  2369. """
  2370. if self.nlevels != other.nlevels:
  2371. return False
  2372. for i in range(self.nlevels):
  2373. if not self.levels[i].equals(other.levels[i]):
  2374. return False
  2375. return True
  2376. def union(self, other, sort=None):
  2377. """
  2378. Form the union of two MultiIndex objects
  2379. Parameters
  2380. ----------
  2381. other : MultiIndex or array / Index of tuples
  2382. sort : False or None, default None
  2383. Whether to sort the resulting Index.
  2384. * None : Sort the result, except when
  2385. 1. `self` and `other` are equal.
  2386. 2. `self` has length 0.
  2387. 3. Some values in `self` or `other` cannot be compared.
  2388. A RuntimeWarning is issued in this case.
  2389. * False : do not sort the result.
  2390. .. versionadded:: 0.24.0
  2391. .. versionchanged:: 0.24.1
  2392. Changed the default value from ``True`` to ``None``
  2393. (without change in behaviour).
  2394. Returns
  2395. -------
  2396. Index
  2397. >>> index.union(index2)
  2398. """
  2399. self._validate_sort_keyword(sort)
  2400. self._assert_can_do_setop(other)
  2401. other, result_names = self._convert_can_do_setop(other)
  2402. if len(other) == 0 or self.equals(other):
  2403. return self
  2404. # TODO: Index.union returns other when `len(self)` is 0.
  2405. uniq_tuples = lib.fast_unique_multiple([self._ndarray_values,
  2406. other._ndarray_values],
  2407. sort=sort)
  2408. return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
  2409. names=result_names)
  2410. def intersection(self, other, sort=False):
  2411. """
  2412. Form the intersection of two MultiIndex objects.
  2413. Parameters
  2414. ----------
  2415. other : MultiIndex or array / Index of tuples
  2416. sort : False or None, default False
  2417. Sort the resulting MultiIndex if possible
  2418. .. versionadded:: 0.24.0
  2419. .. versionchanged:: 0.24.1
  2420. Changed the default from ``True`` to ``False``, to match
  2421. behaviour from before 0.24.0
  2422. Returns
  2423. -------
  2424. Index
  2425. """
  2426. self._validate_sort_keyword(sort)
  2427. self._assert_can_do_setop(other)
  2428. other, result_names = self._convert_can_do_setop(other)
  2429. if self.equals(other):
  2430. return self
  2431. self_tuples = self._ndarray_values
  2432. other_tuples = other._ndarray_values
  2433. uniq_tuples = set(self_tuples) & set(other_tuples)
  2434. if sort is None:
  2435. uniq_tuples = sorted(uniq_tuples)
  2436. if len(uniq_tuples) == 0:
  2437. return MultiIndex(levels=self.levels,
  2438. codes=[[]] * self.nlevels,
  2439. names=result_names, verify_integrity=False)
  2440. else:
  2441. return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
  2442. names=result_names)
  2443. def difference(self, other, sort=None):
  2444. """
  2445. Compute set difference of two MultiIndex objects
  2446. Parameters
  2447. ----------
  2448. other : MultiIndex
  2449. sort : False or None, default None
  2450. Sort the resulting MultiIndex if possible
  2451. .. versionadded:: 0.24.0
  2452. .. versionchanged:: 0.24.1
  2453. Changed the default value from ``True`` to ``None``
  2454. (without change in behaviour).
  2455. Returns
  2456. -------
  2457. diff : MultiIndex
  2458. """
  2459. self._validate_sort_keyword(sort)
  2460. self._assert_can_do_setop(other)
  2461. other, result_names = self._convert_can_do_setop(other)
  2462. if len(other) == 0:
  2463. return self
  2464. if self.equals(other):
  2465. return MultiIndex(levels=self.levels,
  2466. codes=[[]] * self.nlevels,
  2467. names=result_names, verify_integrity=False)
  2468. this = self._get_unique_index()
  2469. indexer = this.get_indexer(other)
  2470. indexer = indexer.take((indexer != -1).nonzero()[0])
  2471. label_diff = np.setdiff1d(np.arange(this.size), indexer,
  2472. assume_unique=True)
  2473. difference = this.values.take(label_diff)
  2474. if sort is None:
  2475. difference = sorted(difference)
  2476. if len(difference) == 0:
  2477. return MultiIndex(levels=[[]] * self.nlevels,
  2478. codes=[[]] * self.nlevels,
  2479. names=result_names, verify_integrity=False)
  2480. else:
  2481. return MultiIndex.from_tuples(difference, sortorder=0,
  2482. names=result_names)
  2483. @Appender(_index_shared_docs['astype'])
  2484. def astype(self, dtype, copy=True):
  2485. dtype = pandas_dtype(dtype)
  2486. if is_categorical_dtype(dtype):
  2487. msg = '> 1 ndim Categorical are not supported at this time'
  2488. raise NotImplementedError(msg)
  2489. elif not is_object_dtype(dtype):
  2490. msg = ('Setting {cls} dtype to anything other than object '
  2491. 'is not supported').format(cls=self.__class__)
  2492. raise TypeError(msg)
  2493. elif copy is True:
  2494. return self._shallow_copy()
  2495. return self
  2496. def _convert_can_do_setop(self, other):
  2497. result_names = self.names
  2498. if not hasattr(other, 'names'):
  2499. if len(other) == 0:
  2500. other = MultiIndex(levels=[[]] * self.nlevels,
  2501. codes=[[]] * self.nlevels,
  2502. verify_integrity=False)
  2503. else:
  2504. msg = 'other must be a MultiIndex or a list of tuples'
  2505. try:
  2506. other = MultiIndex.from_tuples(other)
  2507. except TypeError:
  2508. raise TypeError(msg)
  2509. else:
  2510. result_names = self.names if self.names == other.names else None
  2511. return other, result_names
  2512. def insert(self, loc, item):
  2513. """
  2514. Make new MultiIndex inserting new item at location
  2515. Parameters
  2516. ----------
  2517. loc : int
  2518. item : tuple
  2519. Must be same length as number of levels in the MultiIndex
  2520. Returns
  2521. -------
  2522. new_index : Index
  2523. """
  2524. # Pad the key with empty strings if lower levels of the key
  2525. # aren't specified:
  2526. if not isinstance(item, tuple):
  2527. item = (item, ) + ('', ) * (self.nlevels - 1)
  2528. elif len(item) != self.nlevels:
  2529. raise ValueError('Item must have length equal to number of '
  2530. 'levels.')
  2531. new_levels = []
  2532. new_codes = []
  2533. for k, level, level_codes in zip(item, self.levels, self.codes):
  2534. if k not in level:
  2535. # have to insert into level
  2536. # must insert at end otherwise you have to recompute all the
  2537. # other codes
  2538. lev_loc = len(level)
  2539. level = level.insert(lev_loc, k)
  2540. else:
  2541. lev_loc = level.get_loc(k)
  2542. new_levels.append(level)
  2543. new_codes.append(np.insert(
  2544. ensure_int64(level_codes), loc, lev_loc))
  2545. return MultiIndex(levels=new_levels, codes=new_codes,
  2546. names=self.names, verify_integrity=False)
  2547. def delete(self, loc):
  2548. """
  2549. Make new index with passed location deleted
  2550. Returns
  2551. -------
  2552. new_index : MultiIndex
  2553. """
  2554. new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
  2555. return MultiIndex(levels=self.levels, codes=new_codes,
  2556. names=self.names, verify_integrity=False)
  2557. def _wrap_joined_index(self, joined, other):
  2558. names = self.names if self.names == other.names else None
  2559. return MultiIndex.from_tuples(joined, names=names)
  2560. @Appender(Index.isin.__doc__)
  2561. def isin(self, values, level=None):
  2562. if level is None:
  2563. values = MultiIndex.from_tuples(values,
  2564. names=self.names).values
  2565. return algos.isin(self.values, values)
  2566. else:
  2567. num = self._get_level_number(level)
  2568. levs = self.levels[num]
  2569. level_codes = self.codes[num]
  2570. sought_labels = levs.isin(values).nonzero()[0]
  2571. if levs.size == 0:
  2572. return np.zeros(len(level_codes), dtype=np.bool_)
  2573. else:
  2574. return np.lib.arraysetops.in1d(level_codes, sought_labels)
  2575. MultiIndex._add_numeric_methods_disabled()
  2576. MultiIndex._add_numeric_methods_add_sub_disabled()
  2577. MultiIndex._add_logical_methods_disabled()
  2578. def _sparsify(label_list, start=0, sentinel=''):
  2579. pivoted = lzip(*label_list)
  2580. k = len(label_list)
  2581. result = pivoted[:start + 1]
  2582. prev = pivoted[start]
  2583. for cur in pivoted[start + 1:]:
  2584. sparse_cur = []
  2585. for i, (p, t) in enumerate(zip(prev, cur)):
  2586. if i == k - 1:
  2587. sparse_cur.append(t)
  2588. result.append(sparse_cur)
  2589. break
  2590. if p == t:
  2591. sparse_cur.append(sentinel)
  2592. else:
  2593. sparse_cur.extend(cur[i:])
  2594. result.append(sparse_cur)
  2595. break
  2596. prev = cur
  2597. return lzip(*result)
  2598. def _get_na_rep(dtype):
  2599. return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN')