1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166 |
- # pylint: disable=E1101,E1103,W0232
- from collections import OrderedDict
- import datetime
- from sys import getsizeof
- import warnings
- import numpy as np
- from pandas._libs import (
- Timestamp, algos as libalgos, index as libindex, lib, tslibs)
- import pandas.compat as compat
- from pandas.compat import lrange, lzip, map, range, zip
- from pandas.compat.numpy import function as nv
- from pandas.errors import PerformanceWarning, UnsortedIndexError
- from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg
- from pandas.core.dtypes.common import (
- ensure_int64, ensure_platform_int, is_categorical_dtype, is_hashable,
- is_integer, is_iterator, is_list_like, is_object_dtype, is_scalar,
- pandas_dtype)
- from pandas.core.dtypes.dtypes import ExtensionDtype, PandasExtensionDtype
- from pandas.core.dtypes.generic import ABCDataFrame
- from pandas.core.dtypes.missing import array_equivalent, isna
- import pandas.core.algorithms as algos
- import pandas.core.common as com
- from pandas.core.config import get_option
- import pandas.core.indexes.base as ibase
- from pandas.core.indexes.base import (
- Index, InvalidIndexError, _index_shared_docs, ensure_index)
- from pandas.core.indexes.frozen import FrozenList, _ensure_frozen
- import pandas.core.missing as missing
- from pandas.io.formats.printing import pprint_thing
- _index_doc_kwargs = dict(ibase._index_doc_kwargs)
- _index_doc_kwargs.update(
- dict(klass='MultiIndex',
- target_klass='MultiIndex or list of tuples'))
- class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine,
- libindex.UInt64Engine):
- """
- This class manages a MultiIndex by mapping label combinations to positive
- integers.
- """
- _base = libindex.UInt64Engine
- def _codes_to_ints(self, codes):
- """
- Transform combination(s) of uint64 in one uint64 (each), in a strictly
- monotonic way (i.e. respecting the lexicographic order of integer
- combinations): see BaseMultiIndexCodesEngine documentation.
- Parameters
- ----------
- codes : 1- or 2-dimensional array of dtype uint64
- Combinations of integers (one per row)
- Returns
- ------
- int_keys : scalar or 1-dimensional array, of dtype uint64
- Integer(s) representing one combination (each)
- """
- # Shift the representation of each level by the pre-calculated number
- # of bits:
- codes <<= self.offsets
- # Now sum and OR are in fact interchangeable. This is a simple
- # composition of the (disjunct) significant bits of each level (i.e.
- # each column in "codes") in a single positive integer:
- if codes.ndim == 1:
- # Single key
- return np.bitwise_or.reduce(codes)
- # Multiple keys
- return np.bitwise_or.reduce(codes, axis=1)
- class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine,
- libindex.ObjectEngine):
- """
- This class manages those (extreme) cases in which the number of possible
- label combinations overflows the 64 bits integers, and uses an ObjectEngine
- containing Python integers.
- """
- _base = libindex.ObjectEngine
- def _codes_to_ints(self, codes):
- """
- Transform combination(s) of uint64 in one Python integer (each), in a
- strictly monotonic way (i.e. respecting the lexicographic order of
- integer combinations): see BaseMultiIndexCodesEngine documentation.
- Parameters
- ----------
- codes : 1- or 2-dimensional array of dtype uint64
- Combinations of integers (one per row)
- Returns
- ------
- int_keys : int, or 1-dimensional array of dtype object
- Integer(s) representing one combination (each)
- """
- # Shift the representation of each level by the pre-calculated number
- # of bits. Since this can overflow uint64, first make sure we are
- # working with Python integers:
- codes = codes.astype('object') << self.offsets
- # Now sum and OR are in fact interchangeable. This is a simple
- # composition of the (disjunct) significant bits of each level (i.e.
- # each column in "codes") in a single positive integer (per row):
- if codes.ndim == 1:
- # Single key
- return np.bitwise_or.reduce(codes)
- # Multiple keys
- return np.bitwise_or.reduce(codes, axis=1)
- class MultiIndex(Index):
- """
- A multi-level, or hierarchical, index object for pandas objects.
- Parameters
- ----------
- levels : sequence of arrays
- The unique labels for each level.
- codes : sequence of arrays
- Integers for each level designating which label at each location.
- .. versionadded:: 0.24.0
- labels : sequence of arrays
- Integers for each level designating which label at each location.
- .. deprecated:: 0.24.0
- Use ``codes`` instead
- sortorder : optional int
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : optional sequence of objects
- Names for each of the index levels. (name is accepted for compat).
- copy : bool, default False
- Copy the meta-data.
- verify_integrity : bool, default True
- Check that the levels/codes are consistent and valid.
- Attributes
- ----------
- names
- levels
- codes
- nlevels
- levshape
- Methods
- -------
- from_arrays
- from_tuples
- from_product
- from_frame
- set_levels
- set_codes
- to_frame
- to_flat_index
- is_lexsorted
- sortlevel
- droplevel
- swaplevel
- reorder_levels
- remove_unused_levels
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_product : Create a MultiIndex from the cartesian product
- of iterables.
- MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Index : The base pandas Index type.
- Examples
- ---------
- A new ``MultiIndex`` is typically constructed using one of the helper
- methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
- and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
- >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
- >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
- MultiIndex(levels=[[1, 2], ['blue', 'red']],
- codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
- names=['number', 'color'])
- See further examples for how to construct a MultiIndex in the doc strings
- of the mentioned helper methods.
- Notes
- -----
- See the `user guide
- <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
- """
- # initialize to zero-length tuples to make everything work
- _typ = 'multiindex'
- _names = FrozenList()
- _levels = FrozenList()
- _codes = FrozenList()
- _comparables = ['names']
- rename = Index.set_names
- # --------------------------------------------------------------------
- # Constructors
- @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
- def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
- dtype=None, copy=False, name=None,
- verify_integrity=True, _set_identity=True):
- # compat with Index
- if name is not None:
- names = name
- if levels is None or codes is None:
- raise TypeError("Must pass both levels and codes")
- if len(levels) != len(codes):
- raise ValueError('Length of levels and codes must be the same.')
- if len(levels) == 0:
- raise ValueError('Must pass non-zero number of levels/codes')
- result = object.__new__(MultiIndex)
- # we've already validated levels and codes, so shortcut here
- result._set_levels(levels, copy=copy, validate=False)
- result._set_codes(codes, copy=copy, validate=False)
- if names is not None:
- # handles name validation
- result._set_names(names)
- if sortorder is not None:
- result.sortorder = int(sortorder)
- else:
- result.sortorder = sortorder
- if verify_integrity:
- result._verify_integrity()
- if _set_identity:
- result._reset_identity()
- return result
- def _verify_integrity(self, codes=None, levels=None):
- """
- Parameters
- ----------
- codes : optional list
- Codes to check for validity. Defaults to current codes.
- levels : optional list
- Levels to check for validity. Defaults to current levels.
- Raises
- ------
- ValueError
- If length of levels and codes don't match, if the codes for any
- level would exceed level bounds, or there are any duplicate levels.
- """
- # NOTE: Currently does not check, among other things, that cached
- # nlevels matches nor that sortorder matches actually sortorder.
- codes = codes or self.codes
- levels = levels or self.levels
- if len(levels) != len(codes):
- raise ValueError("Length of levels and codes must match. NOTE:"
- " this index is in an inconsistent state.")
- codes_length = len(self.codes[0])
- for i, (level, level_codes) in enumerate(zip(levels, codes)):
- if len(level_codes) != codes_length:
- raise ValueError("Unequal code lengths: %s" %
- ([len(code_) for code_ in codes]))
- if len(level_codes) and level_codes.max() >= len(level):
- raise ValueError("On level %d, code max (%d) >= length of"
- " level (%d). NOTE: this index is in an"
- " inconsistent state" % (i, level_codes.max(),
- len(level)))
- if not level.is_unique:
- raise ValueError("Level values must be unique: {values} on "
- "level {level}".format(
- values=[value for value in level],
- level=i))
- @classmethod
- def from_arrays(cls, arrays, sortorder=None, names=None):
- """
- Convert arrays to MultiIndex.
- Parameters
- ----------
- arrays : list / sequence of array-likes
- Each array-like gives one level's value for each data point.
- len(arrays) is the number of levels.
- sortorder : int or None
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list / sequence of str, optional
- Names for the levels in the index.
- Returns
- -------
- index : MultiIndex
- See Also
- --------
- MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
- MultiIndex.from_product : Make a MultiIndex from cartesian product
- of iterables.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Examples
- --------
- >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
- >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
- MultiIndex(levels=[[1, 2], ['blue', 'red']],
- codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
- names=['number', 'color'])
- """
- if not is_list_like(arrays):
- raise TypeError("Input must be a list / sequence of array-likes.")
- elif is_iterator(arrays):
- arrays = list(arrays)
- # Check if lengths of all arrays are equal or not,
- # raise ValueError, if not
- for i in range(1, len(arrays)):
- if len(arrays[i]) != len(arrays[i - 1]):
- raise ValueError('all arrays must be same length')
- from pandas.core.arrays.categorical import _factorize_from_iterables
- codes, levels = _factorize_from_iterables(arrays)
- if names is None:
- names = [getattr(arr, "name", None) for arr in arrays]
- return MultiIndex(levels=levels, codes=codes, sortorder=sortorder,
- names=names, verify_integrity=False)
- @classmethod
- def from_tuples(cls, tuples, sortorder=None, names=None):
- """
- Convert list of tuples to MultiIndex.
- Parameters
- ----------
- tuples : list / sequence of tuple-likes
- Each tuple is the index of one row/column.
- sortorder : int or None
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list / sequence of str, optional
- Names for the levels in the index.
- Returns
- -------
- index : MultiIndex
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_product : Make a MultiIndex from cartesian product
- of iterables.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Examples
- --------
- >>> tuples = [(1, u'red'), (1, u'blue'),
- ... (2, u'red'), (2, u'blue')]
- >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
- MultiIndex(levels=[[1, 2], ['blue', 'red']],
- codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
- names=['number', 'color'])
- """
- if not is_list_like(tuples):
- raise TypeError('Input must be a list / sequence of tuple-likes.')
- elif is_iterator(tuples):
- tuples = list(tuples)
- if len(tuples) == 0:
- if names is None:
- msg = 'Cannot infer number of levels from empty list'
- raise TypeError(msg)
- arrays = [[]] * len(names)
- elif isinstance(tuples, (np.ndarray, Index)):
- if isinstance(tuples, Index):
- tuples = tuples._values
- arrays = list(lib.tuples_to_object_array(tuples).T)
- elif isinstance(tuples, list):
- arrays = list(lib.to_object_array_tuples(tuples).T)
- else:
- arrays = lzip(*tuples)
- return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
- @classmethod
- def from_product(cls, iterables, sortorder=None, names=None):
- """
- Make a MultiIndex from the cartesian product of multiple iterables.
- Parameters
- ----------
- iterables : list / sequence of iterables
- Each iterable has unique labels for each level of the index.
- sortorder : int or None
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list / sequence of str, optional
- Names for the levels in the index.
- Returns
- -------
- index : MultiIndex
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Examples
- --------
- >>> numbers = [0, 1, 2]
- >>> colors = ['green', 'purple']
- >>> pd.MultiIndex.from_product([numbers, colors],
- ... names=['number', 'color'])
- MultiIndex(levels=[[0, 1, 2], ['green', 'purple']],
- codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
- names=['number', 'color'])
- """
- from pandas.core.arrays.categorical import _factorize_from_iterables
- from pandas.core.reshape.util import cartesian_product
- if not is_list_like(iterables):
- raise TypeError("Input must be a list / sequence of iterables.")
- elif is_iterator(iterables):
- iterables = list(iterables)
- codes, levels = _factorize_from_iterables(iterables)
- codes = cartesian_product(codes)
- return MultiIndex(levels, codes, sortorder=sortorder, names=names)
- @classmethod
- def from_frame(cls, df, sortorder=None, names=None):
- """
- Make a MultiIndex from a DataFrame.
- .. versionadded:: 0.24.0
- Parameters
- ----------
- df : DataFrame
- DataFrame to be converted to MultiIndex.
- sortorder : int, optional
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list-like, optional
- If no names are provided, use the column names, or tuple of column
- names if the columns is a MultiIndex. If a sequence, overwrite
- names with the given sequence.
- Returns
- -------
- MultiIndex
- The MultiIndex representation of the given DataFrame.
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
- MultiIndex.from_product : Make a MultiIndex from cartesian product
- of iterables.
- Examples
- --------
- >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
- ... ['NJ', 'Temp'], ['NJ', 'Precip']],
- ... columns=['a', 'b'])
- >>> df
- a b
- 0 HI Temp
- 1 HI Precip
- 2 NJ Temp
- 3 NJ Precip
- >>> pd.MultiIndex.from_frame(df)
- MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
- codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
- names=['a', 'b'])
- Using explicit names, instead of the column names
- >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
- MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
- codes=[[0, 0, 1, 1], [1, 0, 1, 0]],
- names=['state', 'observation'])
- """
- if not isinstance(df, ABCDataFrame):
- raise TypeError("Input must be a DataFrame")
- column_names, columns = lzip(*df.iteritems())
- names = column_names if names is None else names
- return cls.from_arrays(columns, sortorder=sortorder, names=names)
- # --------------------------------------------------------------------
- @property
- def levels(self):
- return self._levels
- @property
- def _values(self):
- # We override here, since our parent uses _data, which we dont' use.
- return self.values
- @property
- def array(self):
- """
- Raises a ValueError for `MultiIndex` because there's no single
- array backing a MultiIndex.
- Raises
- ------
- ValueError
- """
- msg = ("MultiIndex has no single backing array. Use "
- "'MultiIndex.to_numpy()' to get a NumPy array of tuples.")
- raise ValueError(msg)
- @property
- def _is_homogeneous_type(self):
- """Whether the levels of a MultiIndex all have the same dtype.
- This looks at the dtypes of the levels.
- See Also
- --------
- Index._is_homogeneous_type
- DataFrame._is_homogeneous_type
- Examples
- --------
- >>> MultiIndex.from_tuples([
- ... ('a', 'b'), ('a', 'c')])._is_homogeneous_type
- True
- >>> MultiIndex.from_tuples([
- ... ('a', 1), ('a', 2)])._is_homogeneous_type
- False
- """
- return len({x.dtype for x in self.levels}) <= 1
- def _set_levels(self, levels, level=None, copy=False, validate=True,
- verify_integrity=False):
- # This is NOT part of the levels property because it should be
- # externally not allowed to set levels. User beware if you change
- # _levels directly
- if validate and len(levels) == 0:
- raise ValueError('Must set non-zero number of levels.')
- if validate and level is None and len(levels) != self.nlevels:
- raise ValueError('Length of levels must match number of levels.')
- if validate and level is not None and len(levels) != len(level):
- raise ValueError('Length of levels must match length of level.')
- if level is None:
- new_levels = FrozenList(
- ensure_index(lev, copy=copy)._shallow_copy()
- for lev in levels)
- else:
- level = [self._get_level_number(l) for l in level]
- new_levels = list(self._levels)
- for l, v in zip(level, levels):
- new_levels[l] = ensure_index(v, copy=copy)._shallow_copy()
- new_levels = FrozenList(new_levels)
- if verify_integrity:
- self._verify_integrity(levels=new_levels)
- names = self.names
- self._levels = new_levels
- if any(names):
- self._set_names(names)
- self._tuples = None
- self._reset_cache()
- def set_levels(self, levels, level=None, inplace=False,
- verify_integrity=True):
- """
- Set new levels on MultiIndex. Defaults to returning
- new index.
- Parameters
- ----------
- levels : sequence or list of sequence
- new level(s) to apply
- level : int, level name, or sequence of int/level names (default None)
- level(s) to set (None for all levels)
- inplace : bool
- if True, mutates in place
- verify_integrity : bool (default True)
- if True, checks that levels and codes are compatible
- Returns
- -------
- new index (of same type and class...etc)
- Examples
- --------
- >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
- (2, u'one'), (2, u'two')],
- names=['foo', 'bar'])
- >>> idx.set_levels([['a','b'], [1,2]])
- MultiIndex(levels=[[u'a', u'b'], [1, 2]],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=[u'foo', u'bar'])
- >>> idx.set_levels(['a','b'], level=0)
- MultiIndex(levels=[[u'a', u'b'], [u'one', u'two']],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=[u'foo', u'bar'])
- >>> idx.set_levels(['a','b'], level='bar')
- MultiIndex(levels=[[1, 2], [u'a', u'b']],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=[u'foo', u'bar'])
- >>> idx.set_levels([['a','b'], [1,2]], level=[0,1])
- MultiIndex(levels=[[u'a', u'b'], [1, 2]],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=[u'foo', u'bar'])
- """
- if is_list_like(levels) and not isinstance(levels, Index):
- levels = list(levels)
- if level is not None and not is_list_like(level):
- if not is_list_like(levels):
- raise TypeError("Levels must be list-like")
- if is_list_like(levels[0]):
- raise TypeError("Levels must be list-like")
- level = [level]
- levels = [levels]
- elif level is None or is_list_like(level):
- if not is_list_like(levels) or not is_list_like(levels[0]):
- raise TypeError("Levels must be list of lists-like")
- if inplace:
- idx = self
- else:
- idx = self._shallow_copy()
- idx._reset_identity()
- idx._set_levels(levels, level=level, validate=True,
- verify_integrity=verify_integrity)
- if not inplace:
- return idx
- @property
- def codes(self):
- return self._codes
- @property
- def labels(self):
- warnings.warn((".labels was deprecated in version 0.24.0. "
- "Use .codes instead."),
- FutureWarning, stacklevel=2)
- return self.codes
- def _set_codes(self, codes, level=None, copy=False, validate=True,
- verify_integrity=False):
- if validate and level is None and len(codes) != self.nlevels:
- raise ValueError("Length of codes must match number of levels")
- if validate and level is not None and len(codes) != len(level):
- raise ValueError('Length of codes must match length of levels.')
- if level is None:
- new_codes = FrozenList(
- _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy()
- for lev, level_codes in zip(self.levels, codes))
- else:
- level = [self._get_level_number(l) for l in level]
- new_codes = list(self._codes)
- for lev_idx, level_codes in zip(level, codes):
- lev = self.levels[lev_idx]
- new_codes[lev_idx] = _ensure_frozen(
- level_codes, lev, copy=copy)._shallow_copy()
- new_codes = FrozenList(new_codes)
- if verify_integrity:
- self._verify_integrity(codes=new_codes)
- self._codes = new_codes
- self._tuples = None
- self._reset_cache()
- def set_labels(self, labels, level=None, inplace=False,
- verify_integrity=True):
- warnings.warn((".set_labels was deprecated in version 0.24.0. "
- "Use .set_codes instead."),
- FutureWarning, stacklevel=2)
- return self.set_codes(codes=labels, level=level, inplace=inplace,
- verify_integrity=verify_integrity)
- @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
- def set_codes(self, codes, level=None, inplace=False,
- verify_integrity=True):
- """
- Set new codes on MultiIndex. Defaults to returning
- new index.
- .. versionadded:: 0.24.0
- New name for deprecated method `set_labels`.
- Parameters
- ----------
- codes : sequence or list of sequence
- new codes to apply
- level : int, level name, or sequence of int/level names (default None)
- level(s) to set (None for all levels)
- inplace : bool
- if True, mutates in place
- verify_integrity : bool (default True)
- if True, checks that levels and codes are compatible
- Returns
- -------
- new index (of same type and class...etc)
- Examples
- --------
- >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
- (2, u'one'), (2, u'two')],
- names=['foo', 'bar'])
- >>> idx.set_codes([[1,0,1,0], [0,0,1,1]])
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- codes=[[1, 0, 1, 0], [0, 0, 1, 1]],
- names=[u'foo', u'bar'])
- >>> idx.set_codes([1,0,1,0], level=0)
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- codes=[[1, 0, 1, 0], [0, 1, 0, 1]],
- names=[u'foo', u'bar'])
- >>> idx.set_codes([0,0,1,1], level='bar')
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- codes=[[0, 0, 1, 1], [0, 0, 1, 1]],
- names=[u'foo', u'bar'])
- >>> idx.set_codes([[1,0,1,0], [0,0,1,1]], level=[0,1])
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- codes=[[1, 0, 1, 0], [0, 0, 1, 1]],
- names=[u'foo', u'bar'])
- """
- if level is not None and not is_list_like(level):
- if not is_list_like(codes):
- raise TypeError("Codes must be list-like")
- if is_list_like(codes[0]):
- raise TypeError("Codes must be list-like")
- level = [level]
- codes = [codes]
- elif level is None or is_list_like(level):
- if not is_list_like(codes) or not is_list_like(codes[0]):
- raise TypeError("Codes must be list of lists-like")
- if inplace:
- idx = self
- else:
- idx = self._shallow_copy()
- idx._reset_identity()
- idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
- if not inplace:
- return idx
- @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
- def copy(self, names=None, dtype=None, levels=None, codes=None,
- deep=False, _set_identity=False, **kwargs):
- """
- Make a copy of this object. Names, dtype, levels and codes can be
- passed and will be set on new copy.
- Parameters
- ----------
- names : sequence, optional
- dtype : numpy dtype or pandas type, optional
- levels : sequence, optional
- codes : sequence, optional
- Returns
- -------
- copy : MultiIndex
- Notes
- -----
- In most cases, there should be no functional difference from using
- ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
- This could be potentially expensive on large MultiIndex objects.
- """
- name = kwargs.get('name')
- names = self._validate_names(name=name, names=names, deep=deep)
- if deep:
- from copy import deepcopy
- if levels is None:
- levels = deepcopy(self.levels)
- if codes is None:
- codes = deepcopy(self.codes)
- else:
- if levels is None:
- levels = self.levels
- if codes is None:
- codes = self.codes
- return MultiIndex(levels=levels, codes=codes, names=names,
- sortorder=self.sortorder, verify_integrity=False,
- _set_identity=_set_identity)
- def __array__(self, dtype=None):
- """ the array interface, return my values """
- return self.values
- def view(self, cls=None):
- """ this is defined as a copy with the same identity """
- result = self.copy()
- result._id = self._id
- return result
- def _shallow_copy_with_infer(self, values, **kwargs):
- # On equal MultiIndexes the difference is empty.
- # Therefore, an empty MultiIndex is returned GH13490
- if len(values) == 0:
- return MultiIndex(levels=[[] for _ in range(self.nlevels)],
- codes=[[] for _ in range(self.nlevels)],
- **kwargs)
- return self._shallow_copy(values, **kwargs)
- @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
- def __contains__(self, key):
- hash(key)
- try:
- self.get_loc(key)
- return True
- except (LookupError, TypeError):
- return False
- contains = __contains__
- @Appender(_index_shared_docs['_shallow_copy'])
- def _shallow_copy(self, values=None, **kwargs):
- if values is not None:
- names = kwargs.pop('names', kwargs.pop('name', self.names))
- # discards freq
- kwargs.pop('freq', None)
- return MultiIndex.from_tuples(values, names=names, **kwargs)
- return self.view()
- @cache_readonly
- def dtype(self):
- return np.dtype('O')
- def _is_memory_usage_qualified(self):
- """ return a boolean if we need a qualified .info display """
- def f(l):
- return 'mixed' in l or 'string' in l or 'unicode' in l
- return any(f(l) for l in self._inferred_type_levels)
- @Appender(Index.memory_usage.__doc__)
- def memory_usage(self, deep=False):
- # we are overwriting our base class to avoid
- # computing .values here which could materialize
- # a tuple representation uncessarily
- return self._nbytes(deep)
- @cache_readonly
- def nbytes(self):
- """ return the number of bytes in the underlying data """
- return self._nbytes(False)
- def _nbytes(self, deep=False):
- """
- return the number of bytes in the underlying data
- deeply introspect the level data if deep=True
- include the engine hashtable
- *this is in internal routine*
- """
- # for implementations with no useful getsizeof (PyPy)
- objsize = 24
- level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
- label_nbytes = sum(i.nbytes for i in self.codes)
- names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
- result = level_nbytes + label_nbytes + names_nbytes
- # include our engine hashtable
- result += self._engine.sizeof(deep=deep)
- return result
- # --------------------------------------------------------------------
- # Rendering Methods
- def _format_attrs(self):
- """
- Return a list of tuples of the (attr,formatted_value)
- """
- attrs = [
- ('levels', ibase.default_pprint(self._levels,
- max_seq_items=False)),
- ('codes', ibase.default_pprint(self._codes,
- max_seq_items=False))]
- if com._any_not_none(*self.names):
- attrs.append(('names', ibase.default_pprint(self.names)))
- if self.sortorder is not None:
- attrs.append(('sortorder', ibase.default_pprint(self.sortorder)))
- return attrs
- def _format_space(self):
- return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
- def _format_data(self, name=None):
- # we are formatting thru the attributes
- return None
- def _format_native_types(self, na_rep='nan', **kwargs):
- new_levels = []
- new_codes = []
- # go through the levels and format them
- for level, level_codes in zip(self.levels, self.codes):
- level = level._format_native_types(na_rep=na_rep, **kwargs)
- # add nan values, if there are any
- mask = (level_codes == -1)
- if mask.any():
- nan_index = len(level)
- level = np.append(level, na_rep)
- level_codes = level_codes.values()
- level_codes[mask] = nan_index
- new_levels.append(level)
- new_codes.append(level_codes)
- if len(new_levels) == 1:
- return Index(new_levels[0])._format_native_types()
- else:
- # reconstruct the multi-index
- mi = MultiIndex(levels=new_levels, codes=new_codes,
- names=self.names, sortorder=self.sortorder,
- verify_integrity=False)
- return mi.values
- def format(self, space=2, sparsify=None, adjoin=True, names=False,
- na_rep=None, formatter=None):
- if len(self) == 0:
- return []
- stringified_levels = []
- for lev, level_codes in zip(self.levels, self.codes):
- na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)
- if len(lev) > 0:
- formatted = lev.take(level_codes).format(formatter=formatter)
- # we have some NA
- mask = level_codes == -1
- if mask.any():
- formatted = np.array(formatted, dtype=object)
- formatted[mask] = na
- formatted = formatted.tolist()
- else:
- # weird all NA case
- formatted = [pprint_thing(na if isna(x) else x,
- escape_chars=('\t', '\r', '\n'))
- for x in algos.take_1d(lev._values, level_codes)]
- stringified_levels.append(formatted)
- result_levels = []
- for lev, name in zip(stringified_levels, self.names):
- level = []
- if names:
- level.append(pprint_thing(name,
- escape_chars=('\t', '\r', '\n'))
- if name is not None else '')
- level.extend(np.array(lev, dtype=object))
- result_levels.append(level)
- if sparsify is None:
- sparsify = get_option("display.multi_sparse")
- if sparsify:
- sentinel = ''
- # GH3547
- # use value of sparsify as sentinel, unless it's an obvious
- # "Truthey" value
- if sparsify not in [True, 1]:
- sentinel = sparsify
- # little bit of a kludge job for #1217
- result_levels = _sparsify(result_levels, start=int(names),
- sentinel=sentinel)
- if adjoin:
- from pandas.io.formats.format import _get_adjustment
- adj = _get_adjustment()
- return adj.adjoin(space, *result_levels).split('\n')
- else:
- return result_levels
- # --------------------------------------------------------------------
- def __len__(self):
- return len(self.codes[0])
- def _get_names(self):
- return FrozenList(level.name for level in self.levels)
- def _set_names(self, names, level=None, validate=True):
- """
- Set new names on index. Each name has to be a hashable type.
- Parameters
- ----------
- values : str or sequence
- name(s) to set
- level : int, level name, or sequence of int/level names (default None)
- If the index is a MultiIndex (hierarchical), level(s) to set (None
- for all levels). Otherwise level must be None
- validate : boolean, default True
- validate that the names match level lengths
- Raises
- ------
- TypeError if each name is not hashable.
- Notes
- -----
- sets names on levels. WARNING: mutates!
- Note that you generally want to set this *after* changing levels, so
- that it only acts on copies
- """
- # GH 15110
- # Don't allow a single string for names in a MultiIndex
- if names is not None and not is_list_like(names):
- raise ValueError('Names should be list-like for a MultiIndex')
- names = list(names)
- if validate and level is not None and len(names) != len(level):
- raise ValueError('Length of names must match length of level.')
- if validate and level is None and len(names) != self.nlevels:
- raise ValueError('Length of names must match number of levels in '
- 'MultiIndex.')
- if level is None:
- level = range(self.nlevels)
- else:
- level = [self._get_level_number(l) for l in level]
- # set the name
- for l, name in zip(level, names):
- if name is not None:
- # GH 20527
- # All items in 'names' need to be hashable:
- if not is_hashable(name):
- raise TypeError('{}.name must be a hashable type'
- .format(self.__class__.__name__))
- self.levels[l].rename(name, inplace=True)
- names = property(fset=_set_names, fget=_get_names,
- doc="Names of levels in MultiIndex")
- @Appender(_index_shared_docs['_get_grouper_for_level'])
- def _get_grouper_for_level(self, mapper, level):
- indexer = self.codes[level]
- level_index = self.levels[level]
- if mapper is not None:
- # Handle group mapping function and return
- level_values = self.levels[level].take(indexer)
- grouper = level_values.map(mapper)
- return grouper, None, None
- codes, uniques = algos.factorize(indexer, sort=True)
- if len(uniques) > 0 and uniques[0] == -1:
- # Handle NAs
- mask = indexer != -1
- ok_codes, uniques = algos.factorize(indexer[mask], sort=True)
- codes = np.empty(len(indexer), dtype=indexer.dtype)
- codes[mask] = ok_codes
- codes[~mask] = -1
- if len(uniques) < len(level_index):
- # Remove unobserved levels from level_index
- level_index = level_index.take(uniques)
- grouper = level_index.take(codes)
- return grouper, codes, level_index
- @property
- def _constructor(self):
- return MultiIndex.from_tuples
- @cache_readonly
- def inferred_type(self):
- return 'mixed'
- def _get_level_number(self, level):
- count = self.names.count(level)
- if (count > 1) and not is_integer(level):
- raise ValueError('The name %s occurs multiple times, use a '
- 'level number' % level)
- try:
- level = self.names.index(level)
- except ValueError:
- if not is_integer(level):
- raise KeyError('Level %s not found' % str(level))
- elif level < 0:
- level += self.nlevels
- if level < 0:
- orig_level = level - self.nlevels
- raise IndexError('Too many levels: Index has only %d '
- 'levels, %d is not a valid level number' %
- (self.nlevels, orig_level))
- # Note: levels are zero-based
- elif level >= self.nlevels:
- raise IndexError('Too many levels: Index has only %d levels, '
- 'not %d' % (self.nlevels, level + 1))
- return level
- _tuples = None
- @cache_readonly
- def _engine(self):
- # Calculate the number of bits needed to represent labels in each
- # level, as log2 of their sizes (including -1 for NaN):
- sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels]))
- # Sum bit counts, starting from the _right_....
- lev_bits = np.cumsum(sizes[::-1])[::-1]
- # ... in order to obtain offsets such that sorting the combination of
- # shifted codes (one for each level, resulting in a unique integer) is
- # equivalent to sorting lexicographically the codes themselves. Notice
- # that each level needs to be shifted by the number of bits needed to
- # represent the _previous_ ones:
- offsets = np.concatenate([lev_bits[1:], [0]]).astype('uint64')
- # Check the total number of bits needed for our representation:
- if lev_bits[0] > 64:
- # The levels would overflow a 64 bit uint - use Python integers:
- return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
- return MultiIndexUIntEngine(self.levels, self.codes, offsets)
- @property
- def values(self):
- if self._tuples is not None:
- return self._tuples
- values = []
- for i in range(self.nlevels):
- vals = self._get_level_values(i)
- if is_categorical_dtype(vals):
- vals = vals.get_values()
- if (isinstance(vals.dtype, (PandasExtensionDtype, ExtensionDtype))
- or hasattr(vals, '_box_values')):
- vals = vals.astype(object)
- vals = np.array(vals, copy=False)
- values.append(vals)
- self._tuples = lib.fast_zip(values)
- return self._tuples
- @property
- def _has_complex_internals(self):
- # to disable groupby tricks
- return True
- @cache_readonly
- def is_monotonic_increasing(self):
- """
- return if the index is monotonic increasing (only equal or
- increasing) values.
- """
- # reversed() because lexsort() wants the most significant key last.
- values = [self._get_level_values(i).values
- for i in reversed(range(len(self.levels)))]
- try:
- sort_order = np.lexsort(values)
- return Index(sort_order).is_monotonic
- except TypeError:
- # we have mixed types and np.lexsort is not happy
- return Index(self.values).is_monotonic
- @cache_readonly
- def is_monotonic_decreasing(self):
- """
- return if the index is monotonic decreasing (only equal or
- decreasing) values.
- """
- # monotonic decreasing if and only if reverse is monotonic increasing
- return self[::-1].is_monotonic_increasing
- @cache_readonly
- def _have_mixed_levels(self):
- """ return a boolean list indicated if we have mixed levels """
- return ['mixed' in l for l in self._inferred_type_levels]
- @cache_readonly
- def _inferred_type_levels(self):
- """ return a list of the inferred types, one for each level """
- return [i.inferred_type for i in self.levels]
- @cache_readonly
- def _hashed_values(self):
- """ return a uint64 ndarray of my hashed values """
- from pandas.core.util.hashing import hash_tuples
- return hash_tuples(self)
- def _hashed_indexing_key(self, key):
- """
- validate and return the hash for the provided key
- *this is internal for use for the cython routines*
- Parameters
- ----------
- key : string or tuple
- Returns
- -------
- np.uint64
- Notes
- -----
- we need to stringify if we have mixed levels
- """
- from pandas.core.util.hashing import hash_tuples, hash_tuple
- if not isinstance(key, tuple):
- return hash_tuples(key)
- if not len(key) == self.nlevels:
- raise KeyError
- def f(k, stringify):
- if stringify and not isinstance(k, compat.string_types):
- k = str(k)
- return k
- key = tuple(f(k, stringify)
- for k, stringify in zip(key, self._have_mixed_levels))
- return hash_tuple(key)
- @Appender(Index.duplicated.__doc__)
- def duplicated(self, keep='first'):
- from pandas.core.sorting import get_group_index
- from pandas._libs.hashtable import duplicated_int64
- shape = map(len, self.levels)
- ids = get_group_index(self.codes, shape, sort=False, xnull=False)
- return duplicated_int64(ids, keep)
- def fillna(self, value=None, downcast=None):
- """
- fillna is not implemented for MultiIndex
- """
- raise NotImplementedError('isna is not defined for MultiIndex')
- @Appender(_index_shared_docs['dropna'])
- def dropna(self, how='any'):
- nans = [level_codes == -1 for level_codes in self.codes]
- if how == 'any':
- indexer = np.any(nans, axis=0)
- elif how == 'all':
- indexer = np.all(nans, axis=0)
- else:
- raise ValueError("invalid how option: {0}".format(how))
- new_codes = [level_codes[~indexer] for level_codes in self.codes]
- return self.copy(codes=new_codes, deep=True)
- def get_value(self, series, key):
- # somewhat broken encapsulation
- from pandas.core.indexing import maybe_droplevels
- # Label-based
- s = com.values_from_object(series)
- k = com.values_from_object(key)
- def _try_mi(k):
- # TODO: what if a level contains tuples??
- loc = self.get_loc(k)
- new_values = series._values[loc]
- new_index = self[loc]
- new_index = maybe_droplevels(new_index, k)
- return series._constructor(new_values, index=new_index,
- name=series.name).__finalize__(self)
- try:
- return self._engine.get_value(s, k)
- except KeyError as e1:
- try:
- return _try_mi(key)
- except KeyError:
- pass
- try:
- return libindex.get_value_at(s, k)
- except IndexError:
- raise
- except TypeError:
- # generator/iterator-like
- if is_iterator(key):
- raise InvalidIndexError(key)
- else:
- raise e1
- except Exception: # pragma: no cover
- raise e1
- except TypeError:
- # a Timestamp will raise a TypeError in a multi-index
- # rather than a KeyError, try it here
- # note that a string that 'looks' like a Timestamp will raise
- # a KeyError! (GH5725)
- if (isinstance(key, (datetime.datetime, np.datetime64)) or
- (compat.PY3 and isinstance(key, compat.string_types))):
- try:
- return _try_mi(key)
- except KeyError:
- raise
- except (IndexError, ValueError, TypeError):
- pass
- try:
- return _try_mi(Timestamp(key))
- except (KeyError, TypeError,
- IndexError, ValueError, tslibs.OutOfBoundsDatetime):
- pass
- raise InvalidIndexError(key)
- def _get_level_values(self, level, unique=False):
- """
- Return vector of label values for requested level,
- equal to the length of the index
- **this is an internal method**
- Parameters
- ----------
- level : int level
- unique : bool, default False
- if True, drop duplicated values
- Returns
- -------
- values : ndarray
- """
- values = self.levels[level]
- level_codes = self.codes[level]
- if unique:
- level_codes = algos.unique(level_codes)
- filled = algos.take_1d(values._values, level_codes,
- fill_value=values._na_value)
- values = values._shallow_copy(filled)
- return values
- def get_level_values(self, level):
- """
- Return vector of label values for requested level,
- equal to the length of the index.
- Parameters
- ----------
- level : int or str
- ``level`` is either the integer position of the level in the
- MultiIndex, or the name of the level.
- Returns
- -------
- values : Index
- ``values`` is a level of this MultiIndex converted to
- a single :class:`Index` (or subclass thereof).
- Examples
- ---------
- Create a MultiIndex:
- >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
- >>> mi.names = ['level_1', 'level_2']
- Get level values by supplying level as either integer or name:
- >>> mi.get_level_values(0)
- Index(['a', 'b', 'c'], dtype='object', name='level_1')
- >>> mi.get_level_values('level_2')
- Index(['d', 'e', 'f'], dtype='object', name='level_2')
- """
- level = self._get_level_number(level)
- values = self._get_level_values(level)
- return values
- @Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
- def unique(self, level=None):
- if level is None:
- return super(MultiIndex, self).unique()
- else:
- level = self._get_level_number(level)
- return self._get_level_values(level=level, unique=True)
- def _to_safe_for_reshape(self):
- """ convert to object if we are a categorical """
- return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
- def to_frame(self, index=True, name=None):
- """
- Create a DataFrame with the levels of the MultiIndex as columns.
- Column ordering is determined by the DataFrame constructor with data as
- a dict.
- .. versionadded:: 0.24.0
- Parameters
- ----------
- index : boolean, default True
- Set the index of the returned DataFrame as the original MultiIndex.
- name : list / sequence of strings, optional
- The passed names should substitute index level names.
- Returns
- -------
- DataFrame : a DataFrame containing the original MultiIndex data.
- See Also
- --------
- DataFrame
- """
- from pandas import DataFrame
- if name is not None:
- if not is_list_like(name):
- raise TypeError("'name' must be a list / sequence "
- "of column names.")
- if len(name) != len(self.levels):
- raise ValueError("'name' should have same length as "
- "number of levels on index.")
- idx_names = name
- else:
- idx_names = self.names
- # Guarantee resulting column order
- result = DataFrame(
- OrderedDict([
- ((level if lvlname is None else lvlname),
- self._get_level_values(level))
- for lvlname, level in zip(idx_names, range(len(self.levels)))
- ]),
- copy=False
- )
- if index:
- result.index = self
- return result
- def to_hierarchical(self, n_repeat, n_shuffle=1):
- """
- Return a MultiIndex reshaped to conform to the
- shapes given by n_repeat and n_shuffle.
- .. deprecated:: 0.24.0
- Useful to replicate and rearrange a MultiIndex for combination
- with another Index with n_repeat items.
- Parameters
- ----------
- n_repeat : int
- Number of times to repeat the labels on self
- n_shuffle : int
- Controls the reordering of the labels. If the result is going
- to be an inner level in a MultiIndex, n_shuffle will need to be
- greater than one. The size of each label must divisible by
- n_shuffle.
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
- (2, u'one'), (2, u'two')])
- >>> idx.to_hierarchical(3)
- MultiIndex(levels=[[1, 2], [u'one', u'two']],
- codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
- [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
- """
- levels = self.levels
- codes = [np.repeat(level_codes, n_repeat) for
- level_codes in self.codes]
- # Assumes that each level_codes is divisible by n_shuffle
- codes = [x.reshape(n_shuffle, -1).ravel(order='F') for x in codes]
- names = self.names
- warnings.warn("Method .to_hierarchical is deprecated and will "
- "be removed in a future version",
- FutureWarning, stacklevel=2)
- return MultiIndex(levels=levels, codes=codes, names=names)
- def to_flat_index(self):
- """
- Convert a MultiIndex to an Index of Tuples containing the level values.
- .. versionadded:: 0.24.0
- Returns
- -------
- pd.Index
- Index with the MultiIndex data represented in Tuples.
- Notes
- -----
- This method will simply return the caller if called by anything other
- than a MultiIndex.
- Examples
- --------
- >>> index = pd.MultiIndex.from_product(
- ... [['foo', 'bar'], ['baz', 'qux']],
- ... names=['a', 'b'])
- >>> index.to_flat_index()
- Index([('foo', 'baz'), ('foo', 'qux'),
- ('bar', 'baz'), ('bar', 'qux')],
- dtype='object')
- """
- return Index(self.values, tupleize_cols=False)
- @property
- def is_all_dates(self):
- return False
- def is_lexsorted(self):
- """
- Return True if the codes are lexicographically sorted
- """
- return self.lexsort_depth == self.nlevels
- @cache_readonly
- def lexsort_depth(self):
- if self.sortorder is not None:
- if self.sortorder == 0:
- return self.nlevels
- else:
- return 0
- int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
- for k in range(self.nlevels, 0, -1):
- if libalgos.is_lexsorted(int64_codes[:k]):
- return k
- return 0
- def _sort_levels_monotonic(self):
- """
- .. versionadded:: 0.20.0
- This is an *internal* function.
- Create a new MultiIndex from the current to monotonically sorted
- items IN the levels. This does not actually make the entire MultiIndex
- monotonic, JUST the levels.
- The resulting MultiIndex will have the same outward
- appearance, meaning the same .values and ordering. It will also
- be .equals() to the original.
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> i = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> i
- MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> i.sort_monotonic()
- MultiIndex(levels=[['a', 'b'], ['aa', 'bb']],
- codes=[[0, 0, 1, 1], [1, 0, 1, 0]])
- """
- if self.is_lexsorted() and self.is_monotonic:
- return self
- new_levels = []
- new_codes = []
- for lev, level_codes in zip(self.levels, self.codes):
- if not lev.is_monotonic:
- try:
- # indexer to reorder the levels
- indexer = lev.argsort()
- except TypeError:
- pass
- else:
- lev = lev.take(indexer)
- # indexer to reorder the level codes
- indexer = ensure_int64(indexer)
- ri = lib.get_reverse_indexer(indexer, len(indexer))
- level_codes = algos.take_1d(ri, level_codes)
- new_levels.append(lev)
- new_codes.append(level_codes)
- return MultiIndex(new_levels, new_codes,
- names=self.names, sortorder=self.sortorder,
- verify_integrity=False)
- def remove_unused_levels(self):
- """
- Create a new MultiIndex from the current that removes
- unused levels, meaning that they are not expressed in the labels.
- The resulting MultiIndex will have the same outward
- appearance, meaning the same .values and ordering. It will also
- be .equals() to the original.
- .. versionadded:: 0.20.0
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> i = pd.MultiIndex.from_product([range(2), list('ab')])
- MultiIndex(levels=[[0, 1], ['a', 'b']],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> i[2:]
- MultiIndex(levels=[[0, 1], ['a', 'b']],
- codes=[[1, 1], [0, 1]])
- The 0 from the first level is not represented
- and can be removed
- >>> i[2:].remove_unused_levels()
- MultiIndex(levels=[[1], ['a', 'b']],
- codes=[[0, 0], [0, 1]])
- """
- new_levels = []
- new_codes = []
- changed = False
- for lev, level_codes in zip(self.levels, self.codes):
- # Since few levels are typically unused, bincount() is more
- # efficient than unique() - however it only accepts positive values
- # (and drops order):
- uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
- has_na = int(len(uniques) and (uniques[0] == -1))
- if len(uniques) != len(lev) + has_na:
- # We have unused levels
- changed = True
- # Recalculate uniques, now preserving order.
- # Can easily be cythonized by exploiting the already existing
- # "uniques" and stop parsing "level_codes" when all items
- # are found:
- uniques = algos.unique(level_codes)
- if has_na:
- na_idx = np.where(uniques == -1)[0]
- # Just ensure that -1 is in first position:
- uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
- # codes get mapped from uniques to 0:len(uniques)
- # -1 (if present) is mapped to last position
- code_mapping = np.zeros(len(lev) + has_na)
- # ... and reassigned value -1:
- code_mapping[uniques] = np.arange(len(uniques)) - has_na
- level_codes = code_mapping[level_codes]
- # new levels are simple
- lev = lev.take(uniques[has_na:])
- new_levels.append(lev)
- new_codes.append(level_codes)
- result = self._shallow_copy()
- if changed:
- result._reset_identity()
- result._set_levels(new_levels, validate=False)
- result._set_codes(new_codes, validate=False)
- return result
- @property
- def nlevels(self):
- """Integer number of levels in this MultiIndex."""
- return len(self.levels)
- @property
- def levshape(self):
- """A tuple with the length of each level."""
- return tuple(len(x) for x in self.levels)
- def __reduce__(self):
- """Necessary for making this object picklable"""
- d = dict(levels=[lev for lev in self.levels],
- codes=[level_codes for level_codes in self.codes],
- sortorder=self.sortorder, names=list(self.names))
- return ibase._new_Index, (self.__class__, d), None
- def __setstate__(self, state):
- """Necessary for making this object picklable"""
- if isinstance(state, dict):
- levels = state.get('levels')
- codes = state.get('codes')
- sortorder = state.get('sortorder')
- names = state.get('names')
- elif isinstance(state, tuple):
- nd_state, own_state = state
- levels, codes, sortorder, names = own_state
- self._set_levels([Index(x) for x in levels], validate=False)
- self._set_codes(codes)
- self._set_names(names)
- self.sortorder = sortorder
- self._verify_integrity()
- self._reset_identity()
- def __getitem__(self, key):
- if is_scalar(key):
- key = com.cast_scalar_indexer(key)
- retval = []
- for lev, level_codes in zip(self.levels, self.codes):
- if level_codes[key] == -1:
- retval.append(np.nan)
- else:
- retval.append(lev[level_codes[key]])
- return tuple(retval)
- else:
- if com.is_bool_indexer(key):
- key = np.asarray(key, dtype=bool)
- sortorder = self.sortorder
- else:
- # cannot be sure whether the result will be sorted
- sortorder = None
- if isinstance(key, Index):
- key = np.asarray(key)
- new_codes = [level_codes[key] for level_codes in self.codes]
- return MultiIndex(levels=self.levels, codes=new_codes,
- names=self.names, sortorder=sortorder,
- verify_integrity=False)
- @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
- def take(self, indices, axis=0, allow_fill=True,
- fill_value=None, **kwargs):
- nv.validate_take(tuple(), kwargs)
- indices = ensure_platform_int(indices)
- taken = self._assert_take_fillable(self.codes, indices,
- allow_fill=allow_fill,
- fill_value=fill_value,
- na_value=-1)
- return MultiIndex(levels=self.levels, codes=taken,
- names=self.names, verify_integrity=False)
- def _assert_take_fillable(self, values, indices, allow_fill=True,
- fill_value=None, na_value=None):
- """ Internal method to handle NA filling of take """
- # only fill if we are passing a non-None fill_value
- if allow_fill and fill_value is not None:
- if (indices < -1).any():
- msg = ('When allow_fill=True and fill_value is not None, '
- 'all indices must be >= -1')
- raise ValueError(msg)
- taken = [lab.take(indices) for lab in self.codes]
- mask = indices == -1
- if mask.any():
- masked = []
- for new_label in taken:
- label_values = new_label.values()
- label_values[mask] = na_value
- masked.append(np.asarray(label_values))
- taken = masked
- else:
- taken = [lab.take(indices) for lab in self.codes]
- return taken
- def append(self, other):
- """
- Append a collection of Index options together
- Parameters
- ----------
- other : Index or list/tuple of indices
- Returns
- -------
- appended : Index
- """
- if not isinstance(other, (list, tuple)):
- other = [other]
- if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels)
- for o in other):
- arrays = []
- for i in range(self.nlevels):
- label = self._get_level_values(i)
- appended = [o._get_level_values(i) for o in other]
- arrays.append(label.append(appended))
- return MultiIndex.from_arrays(arrays, names=self.names)
- to_concat = (self.values, ) + tuple(k._values for k in other)
- new_tuples = np.concatenate(to_concat)
- # if all(isinstance(x, MultiIndex) for x in other):
- try:
- return MultiIndex.from_tuples(new_tuples, names=self.names)
- except (TypeError, IndexError):
- return Index(new_tuples)
- def argsort(self, *args, **kwargs):
- return self.values.argsort(*args, **kwargs)
- @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
- def repeat(self, repeats, axis=None):
- nv.validate_repeat(tuple(), dict(axis=axis))
- return MultiIndex(levels=self.levels,
- codes=[level_codes.view(np.ndarray).repeat(repeats)
- for level_codes in self.codes],
- names=self.names, sortorder=self.sortorder,
- verify_integrity=False)
- def where(self, cond, other=None):
- raise NotImplementedError(".where is not supported for "
- "MultiIndex operations")
- @deprecate_kwarg(old_arg_name='labels', new_arg_name='codes')
- def drop(self, codes, level=None, errors='raise'):
- """
- Make new MultiIndex with passed list of codes deleted
- Parameters
- ----------
- codes : array-like
- Must be a list of tuples
- level : int or level name, default None
- Returns
- -------
- dropped : MultiIndex
- """
- if level is not None:
- return self._drop_from_level(codes, level)
- try:
- if not isinstance(codes, (np.ndarray, Index)):
- codes = com.index_labels_to_array(codes)
- indexer = self.get_indexer(codes)
- mask = indexer == -1
- if mask.any():
- if errors != 'ignore':
- raise ValueError('codes %s not contained in axis' %
- codes[mask])
- except Exception:
- pass
- inds = []
- for level_codes in codes:
- try:
- loc = self.get_loc(level_codes)
- # get_loc returns either an integer, a slice, or a boolean
- # mask
- if isinstance(loc, int):
- inds.append(loc)
- elif isinstance(loc, slice):
- inds.extend(lrange(loc.start, loc.stop))
- elif com.is_bool_indexer(loc):
- if self.lexsort_depth == 0:
- warnings.warn('dropping on a non-lexsorted multi-index'
- ' without a level parameter may impact '
- 'performance.',
- PerformanceWarning,
- stacklevel=3)
- loc = loc.nonzero()[0]
- inds.extend(loc)
- else:
- msg = 'unsupported indexer of type {}'.format(type(loc))
- raise AssertionError(msg)
- except KeyError:
- if errors != 'ignore':
- raise
- return self.delete(inds)
- def _drop_from_level(self, codes, level):
- codes = com.index_labels_to_array(codes)
- i = self._get_level_number(level)
- index = self.levels[i]
- values = index.get_indexer(codes)
- mask = ~algos.isin(self.codes[i], values)
- return self[mask]
- def swaplevel(self, i=-2, j=-1):
- """
- Swap level i with level j.
- Calling this method does not change the ordering of the values.
- Parameters
- ----------
- i : int, str, default -2
- First level of index to be swapped. Can pass level name as string.
- Type of parameters can be mixed.
- j : int, str, default -1
- Second level of index to be swapped. Can pass level name as string.
- Type of parameters can be mixed.
- Returns
- -------
- MultiIndex
- A new MultiIndex
- .. versionchanged:: 0.18.1
- The indexes ``i`` and ``j`` are now optional, and default to
- the two innermost levels of the index.
- See Also
- --------
- Series.swaplevel : Swap levels i and j in a MultiIndex.
- Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a
- particular axis.
- Examples
- --------
- >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
- ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> mi
- MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> mi.swaplevel(0, 1)
- MultiIndex(levels=[['bb', 'aa'], ['a', 'b']],
- codes=[[0, 1, 0, 1], [0, 0, 1, 1]])
- """
- new_levels = list(self.levels)
- new_codes = list(self.codes)
- new_names = list(self.names)
- i = self._get_level_number(i)
- j = self._get_level_number(j)
- new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
- new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
- new_names[i], new_names[j] = new_names[j], new_names[i]
- return MultiIndex(levels=new_levels, codes=new_codes,
- names=new_names, verify_integrity=False)
- def reorder_levels(self, order):
- """
- Rearrange levels using input order. May not drop or duplicate levels
- Parameters
- ----------
- """
- order = [self._get_level_number(i) for i in order]
- if len(order) != self.nlevels:
- raise AssertionError('Length of order must be same as '
- 'number of levels (%d), got %d' %
- (self.nlevels, len(order)))
- new_levels = [self.levels[i] for i in order]
- new_codes = [self.codes[i] for i in order]
- new_names = [self.names[i] for i in order]
- return MultiIndex(levels=new_levels, codes=new_codes,
- names=new_names, verify_integrity=False)
- def __getslice__(self, i, j):
- return self.__getitem__(slice(i, j))
- def _get_codes_for_sorting(self):
- """
- we categorizing our codes by using the
- available categories (all, not just observed)
- excluding any missing ones (-1); this is in preparation
- for sorting, where we need to disambiguate that -1 is not
- a valid valid
- """
- from pandas.core.arrays import Categorical
- def cats(level_codes):
- return np.arange(np.array(level_codes).max() + 1 if
- len(level_codes) else 0,
- dtype=level_codes.dtype)
- return [Categorical.from_codes(level_codes, cats(level_codes),
- ordered=True)
- for level_codes in self.codes]
- def sortlevel(self, level=0, ascending=True, sort_remaining=True):
- """
- Sort MultiIndex at the requested level. The result will respect the
- original ordering of the associated factor at that level.
- Parameters
- ----------
- level : list-like, int or str, default 0
- If a string is given, must be a name of the level
- If list-like must be names or ints of levels.
- ascending : boolean, default True
- False to sort in descending order
- Can also be a list to specify a directed ordering
- sort_remaining : sort by the remaining levels after level
- Returns
- -------
- sorted_index : pd.MultiIndex
- Resulting index
- indexer : np.ndarray
- Indices of output values in original index
- """
- from pandas.core.sorting import indexer_from_factorized
- if isinstance(level, (compat.string_types, int)):
- level = [level]
- level = [self._get_level_number(lev) for lev in level]
- sortorder = None
- # we have a directed ordering via ascending
- if isinstance(ascending, list):
- if not len(level) == len(ascending):
- raise ValueError("level must have same length as ascending")
- from pandas.core.sorting import lexsort_indexer
- indexer = lexsort_indexer([self.codes[lev] for lev in level],
- orders=ascending)
- # level ordering
- else:
- codes = list(self.codes)
- shape = list(self.levshape)
- # partition codes and shape
- primary = tuple(codes.pop(lev - i) for i, lev in enumerate(level))
- primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level))
- if sort_remaining:
- primary += primary + tuple(codes)
- primshp += primshp + tuple(shape)
- else:
- sortorder = level[0]
- indexer = indexer_from_factorized(primary, primshp,
- compress=False)
- if not ascending:
- indexer = indexer[::-1]
- indexer = ensure_platform_int(indexer)
- new_codes = [level_codes.take(indexer) for level_codes in self.codes]
- new_index = MultiIndex(codes=new_codes, levels=self.levels,
- names=self.names, sortorder=sortorder,
- verify_integrity=False)
- return new_index, indexer
- def _convert_listlike_indexer(self, keyarr, kind=None):
- """
- Parameters
- ----------
- keyarr : list-like
- Indexer to convert.
- Returns
- -------
- tuple (indexer, keyarr)
- indexer is an ndarray or None if cannot convert
- keyarr are tuple-safe keys
- """
- indexer, keyarr = super(MultiIndex, self)._convert_listlike_indexer(
- keyarr, kind=kind)
- # are we indexing a specific level
- if indexer is None and len(keyarr) and not isinstance(keyarr[0],
- tuple):
- level = 0
- _, indexer = self.reindex(keyarr, level=level)
- # take all
- if indexer is None:
- indexer = np.arange(len(self))
- check = self.levels[0].get_indexer(keyarr)
- mask = check == -1
- if mask.any():
- raise KeyError('%s not in index' % keyarr[mask])
- return indexer, keyarr
- @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
- def get_indexer(self, target, method=None, limit=None, tolerance=None):
- method = missing.clean_reindex_fill_method(method)
- target = ensure_index(target)
- # empty indexer
- if is_list_like(target) and not len(target):
- return ensure_platform_int(np.array([]))
- if not isinstance(target, MultiIndex):
- try:
- target = MultiIndex.from_tuples(target)
- except (TypeError, ValueError):
- # let's instead try with a straight Index
- if method is None:
- return Index(self.values).get_indexer(target,
- method=method,
- limit=limit,
- tolerance=tolerance)
- if not self.is_unique:
- raise ValueError('Reindexing only valid with uniquely valued '
- 'Index objects')
- if method == 'pad' or method == 'backfill':
- if tolerance is not None:
- raise NotImplementedError("tolerance not implemented yet "
- 'for MultiIndex')
- indexer = self._engine.get_indexer(target, method, limit)
- elif method == 'nearest':
- raise NotImplementedError("method='nearest' not implemented yet "
- 'for MultiIndex; see GitHub issue 9365')
- else:
- indexer = self._engine.get_indexer(target)
- return ensure_platform_int(indexer)
- @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
- def get_indexer_non_unique(self, target):
- return super(MultiIndex, self).get_indexer_non_unique(target)
- def reindex(self, target, method=None, level=None, limit=None,
- tolerance=None):
- """
- Create index with target's values (move/add/delete values as necessary)
- Returns
- -------
- new_index : pd.MultiIndex
- Resulting index
- indexer : np.ndarray or None
- Indices of output values in original index
- """
- # GH6552: preserve names when reindexing to non-named target
- # (i.e. neither Index nor Series).
- preserve_names = not hasattr(target, 'names')
- if level is not None:
- if method is not None:
- raise TypeError('Fill method not supported if level passed')
- # GH7774: preserve dtype/tz if target is empty and not an Index.
- # target may be an iterator
- target = ibase._ensure_has_len(target)
- if len(target) == 0 and not isinstance(target, Index):
- idx = self.levels[level]
- attrs = idx._get_attributes_dict()
- attrs.pop('freq', None) # don't preserve freq
- target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype),
- **attrs)
- else:
- target = ensure_index(target)
- target, indexer, _ = self._join_level(target, level, how='right',
- return_indexers=True,
- keep_order=False)
- else:
- target = ensure_index(target)
- if self.equals(target):
- indexer = None
- else:
- if self.is_unique:
- indexer = self.get_indexer(target, method=method,
- limit=limit,
- tolerance=tolerance)
- else:
- raise ValueError("cannot handle a non-unique multi-index!")
- if not isinstance(target, MultiIndex):
- if indexer is None:
- target = self
- elif (indexer >= 0).all():
- target = self.take(indexer)
- else:
- # hopefully?
- target = MultiIndex.from_tuples(target)
- if (preserve_names and target.nlevels == self.nlevels and
- target.names != self.names):
- target = target.copy(deep=False)
- target.names = self.names
- return target, indexer
- def get_slice_bound(self, label, side, kind):
- if not isinstance(label, tuple):
- label = label,
- return self._partial_tup_index(label, side=side)
- def slice_locs(self, start=None, end=None, step=None, kind=None):
- """
- For an ordered MultiIndex, compute the slice locations for input
- labels.
- The input labels can be tuples representing partial levels, e.g. for a
- MultiIndex with 3 levels, you can pass a single value (corresponding to
- the first level), or a 1-, 2-, or 3-tuple.
- Parameters
- ----------
- start : label or tuple, default None
- If None, defaults to the beginning
- end : label or tuple
- If None, defaults to the end
- step : int or None
- Slice step
- kind : string, optional, defaults None
- Returns
- -------
- (start, end) : (int, int)
- Notes
- -----
- This method only works if the MultiIndex is properly lexsorted. So,
- if only the first 2 levels of a 3-level MultiIndex are lexsorted,
- you can only pass two levels to ``.slice_locs``.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
- ... names=['A', 'B'])
- Get the slice locations from the beginning of 'b' in the first level
- until the end of the multiindex:
- >>> mi.slice_locs(start='b')
- (1, 4)
- Like above, but stop at the end of 'b' in the first level and 'f' in
- the second level:
- >>> mi.slice_locs(start='b', end=('b', 'f'))
- (1, 3)
- See Also
- --------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- """
- # This function adds nothing to its parent implementation (the magic
- # happens in get_slice_bound method), but it adds meaningful doc.
- return super(MultiIndex, self).slice_locs(start, end, step, kind=kind)
- def _partial_tup_index(self, tup, side='left'):
- if len(tup) > self.lexsort_depth:
- raise UnsortedIndexError(
- 'Key length (%d) was greater than MultiIndex'
- ' lexsort depth (%d)' %
- (len(tup), self.lexsort_depth))
- n = len(tup)
- start, end = 0, len(self)
- zipped = zip(tup, self.levels, self.codes)
- for k, (lab, lev, labs) in enumerate(zipped):
- section = labs[start:end]
- if lab not in lev:
- if not lev.is_type_compatible(lib.infer_dtype([lab],
- skipna=False)):
- raise TypeError('Level type mismatch: %s' % lab)
- # short circuit
- loc = lev.searchsorted(lab, side=side)
- if side == 'right' and loc >= 0:
- loc -= 1
- return start + section.searchsorted(loc, side=side)
- idx = lev.get_loc(lab)
- if k < n - 1:
- end = start + section.searchsorted(idx, side='right')
- start = start + section.searchsorted(idx, side='left')
- else:
- return start + section.searchsorted(idx, side=side)
- def get_loc(self, key, method=None):
- """
- Get location for a label or a tuple of labels as an integer, slice or
- boolean mask.
- Parameters
- ----------
- key : label or tuple of labels (one for each level)
- method : None
- Returns
- -------
- loc : int, slice object or boolean mask
- If the key is past the lexsort depth, the return may be a
- boolean mask array, otherwise it is always a slice or int.
- Examples
- ---------
- >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
- >>> mi.get_loc('b')
- slice(1, 3, None)
- >>> mi.get_loc(('b', 'e'))
- 1
- Notes
- ------
- The key cannot be a slice, list of same-level labels, a boolean mask,
- or a sequence of such. If you want to use those, use
- :meth:`MultiIndex.get_locs` instead.
- See Also
- --------
- Index.get_loc : The get_loc method for (single-level) index.
- MultiIndex.slice_locs : Get slice location given start label(s) and
- end label(s).
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- """
- if method is not None:
- raise NotImplementedError('only the default get_loc method is '
- 'currently supported for MultiIndex')
- def _maybe_to_slice(loc):
- """convert integer indexer to boolean mask or slice if possible"""
- if not isinstance(loc, np.ndarray) or loc.dtype != 'int64':
- return loc
- loc = lib.maybe_indices_to_slice(loc, len(self))
- if isinstance(loc, slice):
- return loc
- mask = np.empty(len(self), dtype='bool')
- mask.fill(False)
- mask[loc] = True
- return mask
- if not isinstance(key, tuple):
- loc = self._get_level_indexer(key, level=0)
- return _maybe_to_slice(loc)
- keylen = len(key)
- if self.nlevels < keylen:
- raise KeyError('Key length ({0}) exceeds index depth ({1})'
- ''.format(keylen, self.nlevels))
- if keylen == self.nlevels and self.is_unique:
- return self._engine.get_loc(key)
- # -- partial selection or non-unique index
- # break the key into 2 parts based on the lexsort_depth of the index;
- # the first part returns a continuous slice of the index; the 2nd part
- # needs linear search within the slice
- i = self.lexsort_depth
- lead_key, follow_key = key[:i], key[i:]
- start, stop = (self.slice_locs(lead_key, lead_key)
- if lead_key else (0, len(self)))
- if start == stop:
- raise KeyError(key)
- if not follow_key:
- return slice(start, stop)
- warnings.warn('indexing past lexsort depth may impact performance.',
- PerformanceWarning, stacklevel=10)
- loc = np.arange(start, stop, dtype='int64')
- for i, k in enumerate(follow_key, len(lead_key)):
- mask = self.codes[i][loc] == self.levels[i].get_loc(k)
- if not mask.all():
- loc = loc[mask]
- if not len(loc):
- raise KeyError(key)
- return (_maybe_to_slice(loc) if len(loc) != stop - start else
- slice(start, stop))
- def get_loc_level(self, key, level=0, drop_level=True):
- """
- Get both the location for the requested label(s) and the
- resulting sliced index.
- Parameters
- ----------
- key : label or sequence of labels
- level : int/level name or list thereof, optional
- drop_level : bool, default True
- if ``False``, the resulting index will not drop any level.
- Returns
- -------
- loc : A 2-tuple where the elements are:
- Element 0: int, slice object or boolean array
- Element 1: The resulting sliced multiindex/index. If the key
- contains all levels, this will be ``None``.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
- ... names=['A', 'B'])
- >>> mi.get_loc_level('b')
- (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
- >>> mi.get_loc_level('e', level='B')
- (array([False, True, False], dtype=bool),
- Index(['b'], dtype='object', name='A'))
- >>> mi.get_loc_level(['b', 'e'])
- (1, None)
- See Also
- ---------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- """
- def maybe_droplevels(indexer, levels, drop_level):
- if not drop_level:
- return self[indexer]
- # kludgearound
- orig_index = new_index = self[indexer]
- levels = [self._get_level_number(i) for i in levels]
- for i in sorted(levels, reverse=True):
- try:
- new_index = new_index.droplevel(i)
- except ValueError:
- # no dropping here
- return orig_index
- return new_index
- if isinstance(level, (tuple, list)):
- if len(key) != len(level):
- raise AssertionError('Key for location must have same '
- 'length as number of levels')
- result = None
- for lev, k in zip(level, key):
- loc, new_index = self.get_loc_level(k, level=lev)
- if isinstance(loc, slice):
- mask = np.zeros(len(self), dtype=bool)
- mask[loc] = True
- loc = mask
- result = loc if result is None else result & loc
- return result, maybe_droplevels(result, level, drop_level)
- level = self._get_level_number(level)
- # kludge for #1796
- if isinstance(key, list):
- key = tuple(key)
- if isinstance(key, tuple) and level == 0:
- try:
- if key in self.levels[0]:
- indexer = self._get_level_indexer(key, level=level)
- new_index = maybe_droplevels(indexer, [0], drop_level)
- return indexer, new_index
- except TypeError:
- pass
- if not any(isinstance(k, slice) for k in key):
- # partial selection
- # optionally get indexer to avoid re-calculation
- def partial_selection(key, indexer=None):
- if indexer is None:
- indexer = self.get_loc(key)
- ilevels = [i for i in range(len(key))
- if key[i] != slice(None, None)]
- return indexer, maybe_droplevels(indexer, ilevels,
- drop_level)
- if len(key) == self.nlevels and self.is_unique:
- # Complete key in unique index -> standard get_loc
- return (self._engine.get_loc(key), None)
- else:
- return partial_selection(key)
- else:
- indexer = None
- for i, k in enumerate(key):
- if not isinstance(k, slice):
- k = self._get_level_indexer(k, level=i)
- if isinstance(k, slice):
- # everything
- if k.start == 0 and k.stop == len(self):
- k = slice(None, None)
- else:
- k_index = k
- if isinstance(k, slice):
- if k == slice(None, None):
- continue
- else:
- raise TypeError(key)
- if indexer is None:
- indexer = k_index
- else: # pragma: no cover
- indexer &= k_index
- if indexer is None:
- indexer = slice(None, None)
- ilevels = [i for i in range(len(key))
- if key[i] != slice(None, None)]
- return indexer, maybe_droplevels(indexer, ilevels, drop_level)
- else:
- indexer = self._get_level_indexer(key, level=level)
- return indexer, maybe_droplevels(indexer, [level], drop_level)
- def _get_level_indexer(self, key, level=0, indexer=None):
- # return an indexer, boolean array or a slice showing where the key is
- # in the totality of values
- # if the indexer is provided, then use this
- level_index = self.levels[level]
- level_codes = self.codes[level]
- def convert_indexer(start, stop, step, indexer=indexer,
- codes=level_codes):
- # given the inputs and the codes/indexer, compute an indexer set
- # if we have a provided indexer, then this need not consider
- # the entire labels set
- r = np.arange(start, stop, step)
- if indexer is not None and len(indexer) != len(codes):
- # we have an indexer which maps the locations in the labels
- # that we have already selected (and is not an indexer for the
- # entire set) otherwise this is wasteful so we only need to
- # examine locations that are in this set the only magic here is
- # that the result are the mappings to the set that we have
- # selected
- from pandas import Series
- mapper = Series(indexer)
- indexer = codes.take(ensure_platform_int(indexer))
- result = Series(Index(indexer).isin(r).nonzero()[0])
- m = result.map(mapper)._ndarray_values
- else:
- m = np.zeros(len(codes), dtype=bool)
- m[np.in1d(codes, r,
- assume_unique=Index(codes).is_unique)] = True
- return m
- if isinstance(key, slice):
- # handle a slice, returnig a slice if we can
- # otherwise a boolean indexer
- try:
- if key.start is not None:
- start = level_index.get_loc(key.start)
- else:
- start = 0
- if key.stop is not None:
- stop = level_index.get_loc(key.stop)
- else:
- stop = len(level_index) - 1
- step = key.step
- except KeyError:
- # we have a partial slice (like looking up a partial date
- # string)
- start = stop = level_index.slice_indexer(key.start, key.stop,
- key.step, kind='loc')
- step = start.step
- if isinstance(start, slice) or isinstance(stop, slice):
- # we have a slice for start and/or stop
- # a partial date slicer on a DatetimeIndex generates a slice
- # note that the stop ALREADY includes the stopped point (if
- # it was a string sliced)
- return convert_indexer(start.start, stop.stop, step)
- elif level > 0 or self.lexsort_depth == 0 or step is not None:
- # need to have like semantics here to right
- # searching as when we are using a slice
- # so include the stop+1 (so we include stop)
- return convert_indexer(start, stop + 1, step)
- else:
- # sorted, so can return slice object -> view
- i = level_codes.searchsorted(start, side='left')
- j = level_codes.searchsorted(stop, side='right')
- return slice(i, j, step)
- else:
- code = level_index.get_loc(key)
- if level > 0 or self.lexsort_depth == 0:
- # Desired level is not sorted
- locs = np.array(level_codes == code, dtype=bool, copy=False)
- if not locs.any():
- # The label is present in self.levels[level] but unused:
- raise KeyError(key)
- return locs
- i = level_codes.searchsorted(code, side='left')
- j = level_codes.searchsorted(code, side='right')
- if i == j:
- # The label is present in self.levels[level] but unused:
- raise KeyError(key)
- return slice(i, j)
- def get_locs(self, seq):
- """
- Get location for a given label/slice/list/mask or a sequence of such as
- an array of integers.
- Parameters
- ----------
- seq : label/slice/list/mask or a sequence of such
- You should use one of the above for each level.
- If a level should not be used, set it to ``slice(None)``.
- Returns
- -------
- locs : array of integers suitable for passing to iloc
- Examples
- ---------
- >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
- >>> mi.get_locs('b')
- array([1, 2], dtype=int64)
- >>> mi.get_locs([slice(None), ['e', 'f']])
- array([1, 2], dtype=int64)
- >>> mi.get_locs([[True, False, True], slice('e', 'f')])
- array([2], dtype=int64)
- See Also
- --------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.slice_locs : Get slice location given start label(s) and
- end label(s).
- """
- from .numeric import Int64Index
- # must be lexsorted to at least as many levels
- true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
- if true_slices and true_slices[-1] >= self.lexsort_depth:
- raise UnsortedIndexError('MultiIndex slicing requires the index '
- 'to be lexsorted: slicing on levels {0}, '
- 'lexsort depth {1}'
- .format(true_slices, self.lexsort_depth))
- # indexer
- # this is the list of all values that we want to select
- n = len(self)
- indexer = None
- def _convert_to_indexer(r):
- # return an indexer
- if isinstance(r, slice):
- m = np.zeros(n, dtype=bool)
- m[r] = True
- r = m.nonzero()[0]
- elif com.is_bool_indexer(r):
- if len(r) != n:
- raise ValueError("cannot index with a boolean indexer "
- "that is not the same length as the "
- "index")
- r = r.nonzero()[0]
- return Int64Index(r)
- def _update_indexer(idxr, indexer=indexer):
- if indexer is None:
- indexer = Index(np.arange(n))
- if idxr is None:
- return indexer
- return indexer & idxr
- for i, k in enumerate(seq):
- if com.is_bool_indexer(k):
- # a boolean indexer, must be the same length!
- k = np.asarray(k)
- indexer = _update_indexer(_convert_to_indexer(k),
- indexer=indexer)
- elif is_list_like(k):
- # a collection of labels to include from this level (these
- # are or'd)
- indexers = None
- for x in k:
- try:
- idxrs = _convert_to_indexer(
- self._get_level_indexer(x, level=i,
- indexer=indexer))
- indexers = (idxrs if indexers is None
- else indexers | idxrs)
- except KeyError:
- # ignore not founds
- continue
- if indexers is not None:
- indexer = _update_indexer(indexers, indexer=indexer)
- else:
- # no matches we are done
- return Int64Index([])._ndarray_values
- elif com.is_null_slice(k):
- # empty slice
- indexer = _update_indexer(None, indexer=indexer)
- elif isinstance(k, slice):
- # a slice, include BOTH of the labels
- indexer = _update_indexer(_convert_to_indexer(
- self._get_level_indexer(k, level=i, indexer=indexer)),
- indexer=indexer)
- else:
- # a single label
- indexer = _update_indexer(_convert_to_indexer(
- self.get_loc_level(k, level=i, drop_level=False)[0]),
- indexer=indexer)
- # empty indexer
- if indexer is None:
- return Int64Index([])._ndarray_values
- return indexer._ndarray_values
- def truncate(self, before=None, after=None):
- """
- Slice index between two labels / tuples, return new MultiIndex
- Parameters
- ----------
- before : label or tuple, can be partial. Default None
- None defaults to start
- after : label or tuple, can be partial. Default None
- None defaults to end
- Returns
- -------
- truncated : MultiIndex
- """
- if after and before and after < before:
- raise ValueError('after < before')
- i, j = self.levels[0].slice_locs(before, after)
- left, right = self.slice_locs(before, after)
- new_levels = list(self.levels)
- new_levels[0] = new_levels[0][i:j]
- new_codes = [level_codes[left:right] for level_codes in self.codes]
- new_codes[0] = new_codes[0] - i
- return MultiIndex(levels=new_levels, codes=new_codes,
- verify_integrity=False)
- def equals(self, other):
- """
- Determines if two MultiIndex objects have the same labeling information
- (the levels themselves do not necessarily have to be the same)
- See Also
- --------
- equal_levels
- """
- if self.is_(other):
- return True
- if not isinstance(other, Index):
- return False
- if not isinstance(other, MultiIndex):
- other_vals = com.values_from_object(ensure_index(other))
- return array_equivalent(self._ndarray_values, other_vals)
- if self.nlevels != other.nlevels:
- return False
- if len(self) != len(other):
- return False
- for i in range(self.nlevels):
- self_codes = self.codes[i]
- self_codes = self_codes[self_codes != -1]
- self_values = algos.take_nd(np.asarray(self.levels[i]._values),
- self_codes, allow_fill=False)
- other_codes = other.codes[i]
- other_codes = other_codes[other_codes != -1]
- other_values = algos.take_nd(
- np.asarray(other.levels[i]._values),
- other_codes, allow_fill=False)
- # since we use NaT both datetime64 and timedelta64
- # we can have a situation where a level is typed say
- # timedelta64 in self (IOW it has other values than NaT)
- # but types datetime64 in other (where its all NaT)
- # but these are equivalent
- if len(self_values) == 0 and len(other_values) == 0:
- continue
- if not array_equivalent(self_values, other_values):
- return False
- return True
- def equal_levels(self, other):
- """
- Return True if the levels of both MultiIndex objects are the same
- """
- if self.nlevels != other.nlevels:
- return False
- for i in range(self.nlevels):
- if not self.levels[i].equals(other.levels[i]):
- return False
- return True
- def union(self, other, sort=None):
- """
- Form the union of two MultiIndex objects
- Parameters
- ----------
- other : MultiIndex or array / Index of tuples
- sort : False or None, default None
- Whether to sort the resulting Index.
- * None : Sort the result, except when
- 1. `self` and `other` are equal.
- 2. `self` has length 0.
- 3. Some values in `self` or `other` cannot be compared.
- A RuntimeWarning is issued in this case.
- * False : do not sort the result.
- .. versionadded:: 0.24.0
- .. versionchanged:: 0.24.1
- Changed the default value from ``True`` to ``None``
- (without change in behaviour).
- Returns
- -------
- Index
- >>> index.union(index2)
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_names = self._convert_can_do_setop(other)
- if len(other) == 0 or self.equals(other):
- return self
- # TODO: Index.union returns other when `len(self)` is 0.
- uniq_tuples = lib.fast_unique_multiple([self._ndarray_values,
- other._ndarray_values],
- sort=sort)
- return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
- names=result_names)
- def intersection(self, other, sort=False):
- """
- Form the intersection of two MultiIndex objects.
- Parameters
- ----------
- other : MultiIndex or array / Index of tuples
- sort : False or None, default False
- Sort the resulting MultiIndex if possible
- .. versionadded:: 0.24.0
- .. versionchanged:: 0.24.1
- Changed the default from ``True`` to ``False``, to match
- behaviour from before 0.24.0
- Returns
- -------
- Index
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_names = self._convert_can_do_setop(other)
- if self.equals(other):
- return self
- self_tuples = self._ndarray_values
- other_tuples = other._ndarray_values
- uniq_tuples = set(self_tuples) & set(other_tuples)
- if sort is None:
- uniq_tuples = sorted(uniq_tuples)
- if len(uniq_tuples) == 0:
- return MultiIndex(levels=self.levels,
- codes=[[]] * self.nlevels,
- names=result_names, verify_integrity=False)
- else:
- return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
- names=result_names)
- def difference(self, other, sort=None):
- """
- Compute set difference of two MultiIndex objects
- Parameters
- ----------
- other : MultiIndex
- sort : False or None, default None
- Sort the resulting MultiIndex if possible
- .. versionadded:: 0.24.0
- .. versionchanged:: 0.24.1
- Changed the default value from ``True`` to ``None``
- (without change in behaviour).
- Returns
- -------
- diff : MultiIndex
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_names = self._convert_can_do_setop(other)
- if len(other) == 0:
- return self
- if self.equals(other):
- return MultiIndex(levels=self.levels,
- codes=[[]] * self.nlevels,
- names=result_names, verify_integrity=False)
- this = self._get_unique_index()
- indexer = this.get_indexer(other)
- indexer = indexer.take((indexer != -1).nonzero()[0])
- label_diff = np.setdiff1d(np.arange(this.size), indexer,
- assume_unique=True)
- difference = this.values.take(label_diff)
- if sort is None:
- difference = sorted(difference)
- if len(difference) == 0:
- return MultiIndex(levels=[[]] * self.nlevels,
- codes=[[]] * self.nlevels,
- names=result_names, verify_integrity=False)
- else:
- return MultiIndex.from_tuples(difference, sortorder=0,
- names=result_names)
- @Appender(_index_shared_docs['astype'])
- def astype(self, dtype, copy=True):
- dtype = pandas_dtype(dtype)
- if is_categorical_dtype(dtype):
- msg = '> 1 ndim Categorical are not supported at this time'
- raise NotImplementedError(msg)
- elif not is_object_dtype(dtype):
- msg = ('Setting {cls} dtype to anything other than object '
- 'is not supported').format(cls=self.__class__)
- raise TypeError(msg)
- elif copy is True:
- return self._shallow_copy()
- return self
- def _convert_can_do_setop(self, other):
- result_names = self.names
- if not hasattr(other, 'names'):
- if len(other) == 0:
- other = MultiIndex(levels=[[]] * self.nlevels,
- codes=[[]] * self.nlevels,
- verify_integrity=False)
- else:
- msg = 'other must be a MultiIndex or a list of tuples'
- try:
- other = MultiIndex.from_tuples(other)
- except TypeError:
- raise TypeError(msg)
- else:
- result_names = self.names if self.names == other.names else None
- return other, result_names
- def insert(self, loc, item):
- """
- Make new MultiIndex inserting new item at location
- Parameters
- ----------
- loc : int
- item : tuple
- Must be same length as number of levels in the MultiIndex
- Returns
- -------
- new_index : Index
- """
- # Pad the key with empty strings if lower levels of the key
- # aren't specified:
- if not isinstance(item, tuple):
- item = (item, ) + ('', ) * (self.nlevels - 1)
- elif len(item) != self.nlevels:
- raise ValueError('Item must have length equal to number of '
- 'levels.')
- new_levels = []
- new_codes = []
- for k, level, level_codes in zip(item, self.levels, self.codes):
- if k not in level:
- # have to insert into level
- # must insert at end otherwise you have to recompute all the
- # other codes
- lev_loc = len(level)
- level = level.insert(lev_loc, k)
- else:
- lev_loc = level.get_loc(k)
- new_levels.append(level)
- new_codes.append(np.insert(
- ensure_int64(level_codes), loc, lev_loc))
- return MultiIndex(levels=new_levels, codes=new_codes,
- names=self.names, verify_integrity=False)
- def delete(self, loc):
- """
- Make new index with passed location deleted
- Returns
- -------
- new_index : MultiIndex
- """
- new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
- return MultiIndex(levels=self.levels, codes=new_codes,
- names=self.names, verify_integrity=False)
- def _wrap_joined_index(self, joined, other):
- names = self.names if self.names == other.names else None
- return MultiIndex.from_tuples(joined, names=names)
- @Appender(Index.isin.__doc__)
- def isin(self, values, level=None):
- if level is None:
- values = MultiIndex.from_tuples(values,
- names=self.names).values
- return algos.isin(self.values, values)
- else:
- num = self._get_level_number(level)
- levs = self.levels[num]
- level_codes = self.codes[num]
- sought_labels = levs.isin(values).nonzero()[0]
- if levs.size == 0:
- return np.zeros(len(level_codes), dtype=np.bool_)
- else:
- return np.lib.arraysetops.in1d(level_codes, sought_labels)
- MultiIndex._add_numeric_methods_disabled()
- MultiIndex._add_numeric_methods_add_sub_disabled()
- MultiIndex._add_logical_methods_disabled()
- def _sparsify(label_list, start=0, sentinel=''):
- pivoted = lzip(*label_list)
- k = len(label_list)
- result = pivoted[:start + 1]
- prev = pivoted[start]
- for cur in pivoted[start + 1:]:
- sparse_cur = []
- for i, (p, t) in enumerate(zip(prev, cur)):
- if i == k - 1:
- sparse_cur.append(t)
- result.append(sparse_cur)
- break
- if p == t:
- sparse_cur.append(sentinel)
- else:
- sparse_cur.extend(cur[i:])
- result.append(sparse_cur)
- break
- prev = cur
- return lzip(*result)
- def _get_na_rep(dtype):
- return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN')
|