base.py 180 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410
  1. from datetime import datetime, timedelta
  2. import operator
  3. from textwrap import dedent
  4. import warnings
  5. import numpy as np
  6. from pandas._libs import (
  7. Timedelta, algos as libalgos, index as libindex, join as libjoin, lib,
  8. tslibs)
  9. from pandas._libs.lib import is_datetime_array
  10. import pandas.compat as compat
  11. from pandas.compat import range, set_function_name, u
  12. from pandas.compat.numpy import function as nv
  13. from pandas.util._decorators import Appender, Substitution, cache_readonly
  14. from pandas.core.dtypes.cast import maybe_cast_to_integer_array
  15. from pandas.core.dtypes.common import (
  16. ensure_categorical, ensure_int64, ensure_object, ensure_platform_int,
  17. is_bool, is_bool_dtype, is_categorical, is_categorical_dtype,
  18. is_datetime64_any_dtype, is_datetime64tz_dtype, is_dtype_equal,
  19. is_dtype_union_equal, is_extension_array_dtype, is_float, is_float_dtype,
  20. is_hashable, is_integer, is_integer_dtype, is_interval_dtype, is_iterator,
  21. is_list_like, is_object_dtype, is_period_dtype, is_scalar,
  22. is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
  23. pandas_dtype)
  24. import pandas.core.dtypes.concat as _concat
  25. from pandas.core.dtypes.generic import (
  26. ABCDataFrame, ABCDateOffset, ABCDatetimeArray, ABCIndexClass,
  27. ABCMultiIndex, ABCPandasArray, ABCPeriodIndex, ABCSeries,
  28. ABCTimedeltaArray, ABCTimedeltaIndex)
  29. from pandas.core.dtypes.missing import array_equivalent, isna
  30. from pandas.core import ops
  31. from pandas.core.accessor import CachedAccessor, DirNamesMixin
  32. import pandas.core.algorithms as algos
  33. from pandas.core.arrays import ExtensionArray
  34. from pandas.core.base import IndexOpsMixin, PandasObject
  35. import pandas.core.common as com
  36. from pandas.core.indexes.frozen import FrozenList
  37. import pandas.core.missing as missing
  38. from pandas.core.ops import get_op_result_name, make_invalid_op
  39. import pandas.core.sorting as sorting
  40. from pandas.core.strings import StringMethods
  41. from pandas.io.formats.printing import (
  42. default_pprint, format_object_attrs, format_object_summary, pprint_thing)
  43. __all__ = ['Index']
  44. _unsortable_types = frozenset(('mixed', 'mixed-integer'))
  45. _index_doc_kwargs = dict(klass='Index', inplace='',
  46. target_klass='Index',
  47. unique='Index', duplicated='np.ndarray')
  48. _index_shared_docs = dict()
  49. def _try_get_item(x):
  50. try:
  51. return x.item()
  52. except AttributeError:
  53. return x
  54. def _make_comparison_op(op, cls):
  55. def cmp_method(self, other):
  56. if isinstance(other, (np.ndarray, Index, ABCSeries)):
  57. if other.ndim > 0 and len(self) != len(other):
  58. raise ValueError('Lengths must match to compare')
  59. if is_object_dtype(self) and not isinstance(self, ABCMultiIndex):
  60. # don't pass MultiIndex
  61. with np.errstate(all='ignore'):
  62. result = ops._comp_method_OBJECT_ARRAY(op, self.values, other)
  63. else:
  64. # numpy will show a DeprecationWarning on invalid elementwise
  65. # comparisons, this will raise in the future
  66. with warnings.catch_warnings(record=True):
  67. warnings.filterwarnings("ignore", "elementwise", FutureWarning)
  68. with np.errstate(all='ignore'):
  69. result = op(self.values, np.asarray(other))
  70. # technically we could support bool dtyped Index
  71. # for now just return the indexing array directly
  72. if is_bool_dtype(result):
  73. return result
  74. try:
  75. return Index(result)
  76. except TypeError:
  77. return result
  78. name = '__{name}__'.format(name=op.__name__)
  79. # TODO: docstring?
  80. return set_function_name(cmp_method, name, cls)
  81. def _make_arithmetic_op(op, cls):
  82. def index_arithmetic_method(self, other):
  83. if isinstance(other, (ABCSeries, ABCDataFrame)):
  84. return NotImplemented
  85. elif isinstance(other, ABCTimedeltaIndex):
  86. # Defer to subclass implementation
  87. return NotImplemented
  88. elif (isinstance(other, (np.ndarray, ABCTimedeltaArray)) and
  89. is_timedelta64_dtype(other)):
  90. # GH#22390; wrap in Series for op, this will in turn wrap in
  91. # TimedeltaIndex, but will correctly raise TypeError instead of
  92. # NullFrequencyError for add/sub ops
  93. from pandas import Series
  94. other = Series(other)
  95. out = op(self, other)
  96. return Index(out, name=self.name)
  97. other = self._validate_for_numeric_binop(other, op)
  98. # handle time-based others
  99. if isinstance(other, (ABCDateOffset, np.timedelta64, timedelta)):
  100. return self._evaluate_with_timedelta_like(other, op)
  101. elif isinstance(other, (datetime, np.datetime64)):
  102. return self._evaluate_with_datetime_like(other, op)
  103. values = self.values
  104. with np.errstate(all='ignore'):
  105. result = op(values, other)
  106. result = missing.dispatch_missing(op, values, other, result)
  107. attrs = self._get_attributes_dict()
  108. attrs = self._maybe_update_attributes(attrs)
  109. if op is divmod:
  110. result = (Index(result[0], **attrs), Index(result[1], **attrs))
  111. else:
  112. result = Index(result, **attrs)
  113. return result
  114. name = '__{name}__'.format(name=op.__name__)
  115. # TODO: docstring?
  116. return set_function_name(index_arithmetic_method, name, cls)
  117. class InvalidIndexError(Exception):
  118. pass
  119. _o_dtype = np.dtype(object)
  120. _Identity = object
  121. def _new_Index(cls, d):
  122. """
  123. This is called upon unpickling, rather than the default which doesn't
  124. have arguments and breaks __new__.
  125. """
  126. # required for backward compat, because PI can't be instantiated with
  127. # ordinals through __new__ GH #13277
  128. if issubclass(cls, ABCPeriodIndex):
  129. from pandas.core.indexes.period import _new_PeriodIndex
  130. return _new_PeriodIndex(cls, **d)
  131. return cls.__new__(cls, **d)
  132. class Index(IndexOpsMixin, PandasObject):
  133. """
  134. Immutable ndarray implementing an ordered, sliceable set. The basic object
  135. storing axis labels for all pandas objects.
  136. Parameters
  137. ----------
  138. data : array-like (1-dimensional)
  139. dtype : NumPy dtype (default: object)
  140. If dtype is None, we find the dtype that best fits the data.
  141. If an actual dtype is provided, we coerce to that dtype if it's safe.
  142. Otherwise, an error will be raised.
  143. copy : bool
  144. Make a copy of input ndarray
  145. name : object
  146. Name to be stored in the index
  147. tupleize_cols : bool (default: True)
  148. When True, attempt to create a MultiIndex if possible
  149. See Also
  150. ---------
  151. RangeIndex : Index implementing a monotonic integer range.
  152. CategoricalIndex : Index of :class:`Categorical` s.
  153. MultiIndex : A multi-level, or hierarchical, Index.
  154. IntervalIndex : An Index of :class:`Interval` s.
  155. DatetimeIndex, TimedeltaIndex, PeriodIndex
  156. Int64Index, UInt64Index, Float64Index
  157. Notes
  158. -----
  159. An Index instance can **only** contain hashable objects
  160. Examples
  161. --------
  162. >>> pd.Index([1, 2, 3])
  163. Int64Index([1, 2, 3], dtype='int64')
  164. >>> pd.Index(list('abc'))
  165. Index(['a', 'b', 'c'], dtype='object')
  166. """
  167. # tolist is not actually deprecated, just suppressed in the __dir__
  168. _deprecations = DirNamesMixin._deprecations | frozenset(['tolist'])
  169. # To hand over control to subclasses
  170. _join_precedence = 1
  171. # Cython methods; see github.com/cython/cython/issues/2647
  172. # for why we need to wrap these instead of making them class attributes
  173. # Moreover, cython will choose the appropriate-dtyped sub-function
  174. # given the dtypes of the passed arguments
  175. def _left_indexer_unique(self, left, right):
  176. return libjoin.left_join_indexer_unique(left, right)
  177. def _left_indexer(self, left, right):
  178. return libjoin.left_join_indexer(left, right)
  179. def _inner_indexer(self, left, right):
  180. return libjoin.inner_join_indexer(left, right)
  181. def _outer_indexer(self, left, right):
  182. return libjoin.outer_join_indexer(left, right)
  183. _typ = 'index'
  184. _data = None
  185. _id = None
  186. name = None
  187. asi8 = None
  188. _comparables = ['name']
  189. _attributes = ['name']
  190. _is_numeric_dtype = False
  191. _can_hold_na = True
  192. # would we like our indexing holder to defer to us
  193. _defer_to_indexing = False
  194. # prioritize current class for _shallow_copy_with_infer,
  195. # used to infer integers as datetime-likes
  196. _infer_as_myclass = False
  197. _engine_type = libindex.ObjectEngine
  198. _accessors = {'str'}
  199. str = CachedAccessor("str", StringMethods)
  200. # --------------------------------------------------------------------
  201. # Constructors
  202. def __new__(cls, data=None, dtype=None, copy=False, name=None,
  203. fastpath=None, tupleize_cols=True, **kwargs):
  204. if name is None and hasattr(data, 'name'):
  205. name = data.name
  206. if fastpath is not None:
  207. warnings.warn("The 'fastpath' keyword is deprecated, and will be "
  208. "removed in a future version.",
  209. FutureWarning, stacklevel=2)
  210. if fastpath:
  211. return cls._simple_new(data, name)
  212. from .range import RangeIndex
  213. if isinstance(data, ABCPandasArray):
  214. # ensure users don't accidentally put a PandasArray in an index.
  215. data = data.to_numpy()
  216. # range
  217. if isinstance(data, RangeIndex):
  218. return RangeIndex(start=data, copy=copy, dtype=dtype, name=name)
  219. elif isinstance(data, range):
  220. return RangeIndex.from_range(data, copy=copy, dtype=dtype,
  221. name=name)
  222. # categorical
  223. elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
  224. from .category import CategoricalIndex
  225. return CategoricalIndex(data, dtype=dtype, copy=copy, name=name,
  226. **kwargs)
  227. # interval
  228. elif ((is_interval_dtype(data) or is_interval_dtype(dtype)) and
  229. not is_object_dtype(dtype)):
  230. from .interval import IntervalIndex
  231. closed = kwargs.get('closed', None)
  232. return IntervalIndex(data, dtype=dtype, name=name, copy=copy,
  233. closed=closed)
  234. elif (is_datetime64_any_dtype(data) or
  235. (dtype is not None and is_datetime64_any_dtype(dtype)) or
  236. 'tz' in kwargs):
  237. from pandas import DatetimeIndex
  238. if dtype is not None and is_dtype_equal(_o_dtype, dtype):
  239. # GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
  240. # will raise in the where `data` is already tz-aware. So
  241. # we leave it out of this step and cast to object-dtype after
  242. # the DatetimeIndex construction.
  243. # Note we can pass copy=False because the .astype below
  244. # will always make a copy
  245. result = DatetimeIndex(data, copy=False, name=name, **kwargs)
  246. return result.astype(object)
  247. else:
  248. result = DatetimeIndex(data, copy=copy, name=name,
  249. dtype=dtype, **kwargs)
  250. return result
  251. elif (is_timedelta64_dtype(data) or
  252. (dtype is not None and is_timedelta64_dtype(dtype))):
  253. from pandas import TimedeltaIndex
  254. if dtype is not None and is_dtype_equal(_o_dtype, dtype):
  255. # Note we can pass copy=False because the .astype below
  256. # will always make a copy
  257. result = TimedeltaIndex(data, copy=False, name=name, **kwargs)
  258. return result.astype(object)
  259. else:
  260. result = TimedeltaIndex(data, copy=copy, name=name,
  261. dtype=dtype, **kwargs)
  262. return result
  263. elif is_period_dtype(data) and not is_object_dtype(dtype):
  264. from pandas import PeriodIndex
  265. result = PeriodIndex(data, copy=copy, name=name, **kwargs)
  266. return result
  267. # extension dtype
  268. elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
  269. data = np.asarray(data)
  270. if not (dtype is None or is_object_dtype(dtype)):
  271. # coerce to the provided dtype
  272. data = dtype.construct_array_type()._from_sequence(
  273. data, dtype=dtype, copy=False)
  274. # coerce to the object dtype
  275. data = data.astype(object)
  276. return Index(data, dtype=object, copy=copy, name=name,
  277. **kwargs)
  278. # index-like
  279. elif isinstance(data, (np.ndarray, Index, ABCSeries)):
  280. if dtype is not None:
  281. try:
  282. # we need to avoid having numpy coerce
  283. # things that look like ints/floats to ints unless
  284. # they are actually ints, e.g. '0' and 0.0
  285. # should not be coerced
  286. # GH 11836
  287. if is_integer_dtype(dtype):
  288. inferred = lib.infer_dtype(data, skipna=False)
  289. if inferred == 'integer':
  290. data = maybe_cast_to_integer_array(data, dtype,
  291. copy=copy)
  292. elif inferred in ['floating', 'mixed-integer-float']:
  293. if isna(data).any():
  294. raise ValueError('cannot convert float '
  295. 'NaN to integer')
  296. if inferred == "mixed-integer-float":
  297. data = maybe_cast_to_integer_array(data, dtype)
  298. # If we are actually all equal to integers,
  299. # then coerce to integer.
  300. try:
  301. return cls._try_convert_to_int_index(
  302. data, copy, name, dtype)
  303. except ValueError:
  304. pass
  305. # Return an actual float index.
  306. from .numeric import Float64Index
  307. return Float64Index(data, copy=copy, dtype=dtype,
  308. name=name)
  309. elif inferred == 'string':
  310. pass
  311. else:
  312. data = data.astype(dtype)
  313. elif is_float_dtype(dtype):
  314. inferred = lib.infer_dtype(data, skipna=False)
  315. if inferred == 'string':
  316. pass
  317. else:
  318. data = data.astype(dtype)
  319. else:
  320. data = np.array(data, dtype=dtype, copy=copy)
  321. except (TypeError, ValueError) as e:
  322. msg = str(e)
  323. if ("cannot convert float" in msg or
  324. "Trying to coerce float values to integer" in msg):
  325. raise
  326. # maybe coerce to a sub-class
  327. from pandas.core.indexes.period import (
  328. PeriodIndex, IncompatibleFrequency)
  329. if is_signed_integer_dtype(data.dtype):
  330. from .numeric import Int64Index
  331. return Int64Index(data, copy=copy, dtype=dtype, name=name)
  332. elif is_unsigned_integer_dtype(data.dtype):
  333. from .numeric import UInt64Index
  334. return UInt64Index(data, copy=copy, dtype=dtype, name=name)
  335. elif is_float_dtype(data.dtype):
  336. from .numeric import Float64Index
  337. return Float64Index(data, copy=copy, dtype=dtype, name=name)
  338. elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
  339. subarr = data.astype('object')
  340. else:
  341. subarr = com.asarray_tuplesafe(data, dtype=object)
  342. # asarray_tuplesafe does not always copy underlying data,
  343. # so need to make sure that this happens
  344. if copy:
  345. subarr = subarr.copy()
  346. if dtype is None:
  347. inferred = lib.infer_dtype(subarr, skipna=False)
  348. if inferred == 'integer':
  349. try:
  350. return cls._try_convert_to_int_index(
  351. subarr, copy, name, dtype)
  352. except ValueError:
  353. pass
  354. return Index(subarr, copy=copy,
  355. dtype=object, name=name)
  356. elif inferred in ['floating', 'mixed-integer-float']:
  357. from .numeric import Float64Index
  358. return Float64Index(subarr, copy=copy, name=name)
  359. elif inferred == 'interval':
  360. from .interval import IntervalIndex
  361. return IntervalIndex(subarr, name=name, copy=copy)
  362. elif inferred == 'boolean':
  363. # don't support boolean explicitly ATM
  364. pass
  365. elif inferred != 'string':
  366. if inferred.startswith('datetime'):
  367. if (lib.is_datetime_with_singletz_array(subarr) or
  368. 'tz' in kwargs):
  369. # only when subarr has the same tz
  370. from pandas import DatetimeIndex
  371. try:
  372. return DatetimeIndex(subarr, copy=copy,
  373. name=name, **kwargs)
  374. except tslibs.OutOfBoundsDatetime:
  375. pass
  376. elif inferred.startswith('timedelta'):
  377. from pandas import TimedeltaIndex
  378. return TimedeltaIndex(subarr, copy=copy, name=name,
  379. **kwargs)
  380. elif inferred == 'period':
  381. try:
  382. return PeriodIndex(subarr, name=name, **kwargs)
  383. except IncompatibleFrequency:
  384. pass
  385. return cls._simple_new(subarr, name)
  386. elif hasattr(data, '__array__'):
  387. return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
  388. **kwargs)
  389. elif data is None or is_scalar(data):
  390. cls._scalar_data_error(data)
  391. else:
  392. if tupleize_cols and is_list_like(data):
  393. # GH21470: convert iterable to list before determining if empty
  394. if is_iterator(data):
  395. data = list(data)
  396. if data and all(isinstance(e, tuple) for e in data):
  397. # we must be all tuples, otherwise don't construct
  398. # 10697
  399. from .multi import MultiIndex
  400. return MultiIndex.from_tuples(
  401. data, names=name or kwargs.get('names'))
  402. # other iterable of some kind
  403. subarr = com.asarray_tuplesafe(data, dtype=object)
  404. return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
  405. """
  406. NOTE for new Index creation:
  407. - _simple_new: It returns new Index with the same type as the caller.
  408. All metadata (such as name) must be provided by caller's responsibility.
  409. Using _shallow_copy is recommended because it fills these metadata
  410. otherwise specified.
  411. - _shallow_copy: It returns new Index with the same type (using
  412. _simple_new), but fills caller's metadata otherwise specified. Passed
  413. kwargs will overwrite corresponding metadata.
  414. - _shallow_copy_with_infer: It returns new Index inferring its type
  415. from passed values. It fills caller's metadata otherwise specified as the
  416. same as _shallow_copy.
  417. See each method's docstring.
  418. """
  419. @classmethod
  420. def _simple_new(cls, values, name=None, dtype=None, **kwargs):
  421. """
  422. We require that we have a dtype compat for the values. If we are passed
  423. a non-dtype compat, then coerce using the constructor.
  424. Must be careful not to recurse.
  425. """
  426. if not hasattr(values, 'dtype'):
  427. if (values is None or not len(values)) and dtype is not None:
  428. values = np.empty(0, dtype=dtype)
  429. else:
  430. values = np.array(values, copy=False)
  431. if is_object_dtype(values):
  432. values = cls(values, name=name, dtype=dtype,
  433. **kwargs)._ndarray_values
  434. if isinstance(values, (ABCSeries, ABCIndexClass)):
  435. # Index._data must always be an ndarray.
  436. # This is no-copy for when _values is an ndarray,
  437. # which should be always at this point.
  438. values = np.asarray(values._values)
  439. result = object.__new__(cls)
  440. result._data = values
  441. # _index_data is a (temporary?) fix to ensure that the direct data
  442. # manipulation we do in `_libs/reduction.pyx` continues to work.
  443. # We need access to the actual ndarray, since we're messing with
  444. # data buffers and strides. We don't re-use `_ndarray_values`, since
  445. # we actually set this value too.
  446. result._index_data = values
  447. result.name = name
  448. for k, v in compat.iteritems(kwargs):
  449. setattr(result, k, v)
  450. return result._reset_identity()
  451. @cache_readonly
  452. def _constructor(self):
  453. return type(self)
  454. # --------------------------------------------------------------------
  455. # Index Internals Methods
  456. def _get_attributes_dict(self):
  457. """
  458. Return an attributes dict for my class.
  459. """
  460. return {k: getattr(self, k, None) for k in self._attributes}
  461. _index_shared_docs['_shallow_copy'] = """
  462. Create a new Index with the same class as the caller, don't copy the
  463. data, use the same object attributes with passed in attributes taking
  464. precedence.
  465. *this is an internal non-public method*
  466. Parameters
  467. ----------
  468. values : the values to create the new Index, optional
  469. kwargs : updates the default attributes for this Index
  470. """
  471. @Appender(_index_shared_docs['_shallow_copy'])
  472. def _shallow_copy(self, values=None, **kwargs):
  473. if values is None:
  474. values = self.values
  475. attributes = self._get_attributes_dict()
  476. attributes.update(kwargs)
  477. if not len(values) and 'dtype' not in kwargs:
  478. attributes['dtype'] = self.dtype
  479. # _simple_new expects an the type of self._data
  480. values = getattr(values, '_values', values)
  481. if isinstance(values, ABCDatetimeArray):
  482. # `self.values` returns `self` for tz-aware, so we need to unwrap
  483. # more specifically
  484. values = values.asi8
  485. return self._simple_new(values, **attributes)
  486. def _shallow_copy_with_infer(self, values, **kwargs):
  487. """
  488. Create a new Index inferring the class with passed value, don't copy
  489. the data, use the same object attributes with passed in attributes
  490. taking precedence.
  491. *this is an internal non-public method*
  492. Parameters
  493. ----------
  494. values : the values to create the new Index, optional
  495. kwargs : updates the default attributes for this Index
  496. """
  497. attributes = self._get_attributes_dict()
  498. attributes.update(kwargs)
  499. attributes['copy'] = False
  500. if not len(values) and 'dtype' not in kwargs:
  501. attributes['dtype'] = self.dtype
  502. if self._infer_as_myclass:
  503. try:
  504. return self._constructor(values, **attributes)
  505. except (TypeError, ValueError):
  506. pass
  507. return Index(values, **attributes)
  508. def _update_inplace(self, result, **kwargs):
  509. # guard when called from IndexOpsMixin
  510. raise TypeError("Index can't be updated inplace")
  511. def is_(self, other):
  512. """
  513. More flexible, faster check like ``is`` but that works through views.
  514. Note: this is *not* the same as ``Index.identical()``, which checks
  515. that metadata is also the same.
  516. Parameters
  517. ----------
  518. other : object
  519. other object to compare against.
  520. Returns
  521. -------
  522. True if both have same underlying data, False otherwise : bool
  523. """
  524. # use something other than None to be clearer
  525. return self._id is getattr(
  526. other, '_id', Ellipsis) and self._id is not None
  527. def _reset_identity(self):
  528. """
  529. Initializes or resets ``_id`` attribute with new object.
  530. """
  531. self._id = _Identity()
  532. return self
  533. def _cleanup(self):
  534. self._engine.clear_mapping()
  535. @cache_readonly
  536. def _engine(self):
  537. # property, for now, slow to look up
  538. return self._engine_type(lambda: self._ndarray_values, len(self))
  539. # --------------------------------------------------------------------
  540. # Array-Like Methods
  541. # ndarray compat
  542. def __len__(self):
  543. """
  544. Return the length of the Index.
  545. """
  546. return len(self._data)
  547. def __array__(self, dtype=None):
  548. """
  549. The array interface, return my values.
  550. """
  551. return np.asarray(self._data, dtype=dtype)
  552. def __array_wrap__(self, result, context=None):
  553. """
  554. Gets called after a ufunc.
  555. """
  556. result = lib.item_from_zerodim(result)
  557. if is_bool_dtype(result) or lib.is_scalar(result):
  558. return result
  559. attrs = self._get_attributes_dict()
  560. attrs = self._maybe_update_attributes(attrs)
  561. return Index(result, **attrs)
  562. @cache_readonly
  563. def dtype(self):
  564. """
  565. Return the dtype object of the underlying data.
  566. """
  567. return self._data.dtype
  568. @cache_readonly
  569. def dtype_str(self):
  570. """
  571. Return the dtype str of the underlying data.
  572. """
  573. return str(self.dtype)
  574. def ravel(self, order='C'):
  575. """
  576. Return an ndarray of the flattened values of the underlying data.
  577. See Also
  578. --------
  579. numpy.ndarray.ravel
  580. """
  581. return self._ndarray_values.ravel(order=order)
  582. def view(self, cls=None):
  583. # we need to see if we are subclassing an
  584. # index type here
  585. if cls is not None and not hasattr(cls, '_typ'):
  586. result = self._data.view(cls)
  587. else:
  588. result = self._shallow_copy()
  589. if isinstance(result, Index):
  590. result._id = self._id
  591. return result
  592. _index_shared_docs['astype'] = """
  593. Create an Index with values cast to dtypes. The class of a new Index
  594. is determined by dtype. When conversion is impossible, a ValueError
  595. exception is raised.
  596. Parameters
  597. ----------
  598. dtype : numpy dtype or pandas type
  599. Note that any signed integer `dtype` is treated as ``'int64'``,
  600. and any unsigned integer `dtype` is treated as ``'uint64'``,
  601. regardless of the size.
  602. copy : bool, default True
  603. By default, astype always returns a newly allocated object.
  604. If copy is set to False and internal requirements on dtype are
  605. satisfied, the original data is used to create a new Index
  606. or the original Index is returned.
  607. .. versionadded:: 0.19.0
  608. """
  609. @Appender(_index_shared_docs['astype'])
  610. def astype(self, dtype, copy=True):
  611. if is_dtype_equal(self.dtype, dtype):
  612. return self.copy() if copy else self
  613. elif is_categorical_dtype(dtype):
  614. from .category import CategoricalIndex
  615. return CategoricalIndex(self.values, name=self.name, dtype=dtype,
  616. copy=copy)
  617. elif is_datetime64tz_dtype(dtype):
  618. # TODO(GH-24559): Remove this block, use the following elif.
  619. # avoid FutureWarning from DatetimeIndex constructor.
  620. from pandas import DatetimeIndex
  621. tz = pandas_dtype(dtype).tz
  622. return (DatetimeIndex(np.asarray(self))
  623. .tz_localize("UTC").tz_convert(tz))
  624. elif is_extension_array_dtype(dtype):
  625. return Index(np.asarray(self), dtype=dtype, copy=copy)
  626. try:
  627. if is_datetime64tz_dtype(dtype):
  628. from pandas import DatetimeIndex
  629. return DatetimeIndex(self.values, name=self.name, dtype=dtype,
  630. copy=copy)
  631. return Index(self.values.astype(dtype, copy=copy), name=self.name,
  632. dtype=dtype)
  633. except (TypeError, ValueError):
  634. msg = 'Cannot cast {name} to dtype {dtype}'
  635. raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
  636. _index_shared_docs['take'] = """
  637. Return a new %(klass)s of the values selected by the indices.
  638. For internal compatibility with numpy arrays.
  639. Parameters
  640. ----------
  641. indices : list
  642. Indices to be taken
  643. axis : int, optional
  644. The axis over which to select values, always 0.
  645. allow_fill : bool, default True
  646. fill_value : bool, default None
  647. If allow_fill=True and fill_value is not None, indices specified by
  648. -1 is regarded as NA. If Index doesn't hold NA, raise ValueError
  649. See Also
  650. --------
  651. numpy.ndarray.take
  652. """
  653. @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
  654. def take(self, indices, axis=0, allow_fill=True,
  655. fill_value=None, **kwargs):
  656. if kwargs:
  657. nv.validate_take(tuple(), kwargs)
  658. indices = ensure_platform_int(indices)
  659. if self._can_hold_na:
  660. taken = self._assert_take_fillable(self.values, indices,
  661. allow_fill=allow_fill,
  662. fill_value=fill_value,
  663. na_value=self._na_value)
  664. else:
  665. if allow_fill and fill_value is not None:
  666. msg = 'Unable to fill values because {0} cannot contain NA'
  667. raise ValueError(msg.format(self.__class__.__name__))
  668. taken = self.values.take(indices)
  669. return self._shallow_copy(taken)
  670. def _assert_take_fillable(self, values, indices, allow_fill=True,
  671. fill_value=None, na_value=np.nan):
  672. """
  673. Internal method to handle NA filling of take.
  674. """
  675. indices = ensure_platform_int(indices)
  676. # only fill if we are passing a non-None fill_value
  677. if allow_fill and fill_value is not None:
  678. if (indices < -1).any():
  679. msg = ('When allow_fill=True and fill_value is not None, '
  680. 'all indices must be >= -1')
  681. raise ValueError(msg)
  682. taken = algos.take(values,
  683. indices,
  684. allow_fill=allow_fill,
  685. fill_value=na_value)
  686. else:
  687. taken = values.take(indices)
  688. return taken
  689. _index_shared_docs['repeat'] = """
  690. Repeat elements of a %(klass)s.
  691. Returns a new %(klass)s where each element of the current %(klass)s
  692. is repeated consecutively a given number of times.
  693. Parameters
  694. ----------
  695. repeats : int or array of ints
  696. The number of repetitions for each element. This should be a
  697. non-negative integer. Repeating 0 times will return an empty
  698. %(klass)s.
  699. axis : None
  700. Must be ``None``. Has no effect but is accepted for compatibility
  701. with numpy.
  702. Returns
  703. -------
  704. repeated_index : %(klass)s
  705. Newly created %(klass)s with repeated elements.
  706. See Also
  707. --------
  708. Series.repeat : Equivalent function for Series.
  709. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  710. Examples
  711. --------
  712. >>> idx = pd.Index(['a', 'b', 'c'])
  713. >>> idx
  714. Index(['a', 'b', 'c'], dtype='object')
  715. >>> idx.repeat(2)
  716. Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
  717. >>> idx.repeat([1, 2, 3])
  718. Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
  719. """
  720. @Appender(_index_shared_docs['repeat'] % _index_doc_kwargs)
  721. def repeat(self, repeats, axis=None):
  722. nv.validate_repeat(tuple(), dict(axis=axis))
  723. return self._shallow_copy(self._values.repeat(repeats))
  724. # --------------------------------------------------------------------
  725. # Copying Methods
  726. _index_shared_docs['copy'] = """
  727. Make a copy of this object. Name and dtype sets those attributes on
  728. the new object.
  729. Parameters
  730. ----------
  731. name : string, optional
  732. deep : boolean, default False
  733. dtype : numpy dtype or pandas type
  734. Returns
  735. -------
  736. copy : Index
  737. Notes
  738. -----
  739. In most cases, there should be no functional difference from using
  740. ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
  741. """
  742. @Appender(_index_shared_docs['copy'])
  743. def copy(self, name=None, deep=False, dtype=None, **kwargs):
  744. if deep:
  745. new_index = self._shallow_copy(self._data.copy())
  746. else:
  747. new_index = self._shallow_copy()
  748. names = kwargs.get('names')
  749. names = self._validate_names(name=name, names=names, deep=deep)
  750. new_index = new_index.set_names(names)
  751. if dtype:
  752. new_index = new_index.astype(dtype)
  753. return new_index
  754. def __copy__(self, **kwargs):
  755. return self.copy(**kwargs)
  756. def __deepcopy__(self, memo=None):
  757. """
  758. Parameters
  759. ----------
  760. memo, default None
  761. Standard signature. Unused
  762. """
  763. if memo is None:
  764. memo = {}
  765. return self.copy(deep=True)
  766. # --------------------------------------------------------------------
  767. # Rendering Methods
  768. def __unicode__(self):
  769. """
  770. Return a string representation for this object.
  771. Invoked by unicode(df) in py2 only. Yields a Unicode String in both
  772. py2/py3.
  773. """
  774. klass = self.__class__.__name__
  775. data = self._format_data()
  776. attrs = self._format_attrs()
  777. space = self._format_space()
  778. prepr = (u(",%s") %
  779. space).join(u("%s=%s") % (k, v) for k, v in attrs)
  780. # no data provided, just attributes
  781. if data is None:
  782. data = ''
  783. res = u("%s(%s%s)") % (klass, data, prepr)
  784. return res
  785. def _format_space(self):
  786. # using space here controls if the attributes
  787. # are line separated or not (the default)
  788. # max_seq_items = get_option('display.max_seq_items')
  789. # if len(self) > max_seq_items:
  790. # space = "\n%s" % (' ' * (len(klass) + 1))
  791. return " "
  792. @property
  793. def _formatter_func(self):
  794. """
  795. Return the formatter function.
  796. """
  797. return default_pprint
  798. def _format_data(self, name=None):
  799. """
  800. Return the formatted data as a unicode string.
  801. """
  802. # do we want to justify (only do so for non-objects)
  803. is_justify = not (self.inferred_type in ('string', 'unicode') or
  804. (self.inferred_type == 'categorical' and
  805. is_object_dtype(self.categories)))
  806. return format_object_summary(self, self._formatter_func,
  807. is_justify=is_justify, name=name)
  808. def _format_attrs(self):
  809. """
  810. Return a list of tuples of the (attr,formatted_value).
  811. """
  812. return format_object_attrs(self)
  813. def _mpl_repr(self):
  814. # how to represent ourselves to matplotlib
  815. return self.values
  816. def format(self, name=False, formatter=None, **kwargs):
  817. """
  818. Render a string representation of the Index.
  819. """
  820. header = []
  821. if name:
  822. header.append(pprint_thing(self.name,
  823. escape_chars=('\t', '\r', '\n')) if
  824. self.name is not None else '')
  825. if formatter is not None:
  826. return header + list(self.map(formatter))
  827. return self._format_with_header(header, **kwargs)
  828. def _format_with_header(self, header, na_rep='NaN', **kwargs):
  829. values = self.values
  830. from pandas.io.formats.format import format_array
  831. if is_categorical_dtype(values.dtype):
  832. values = np.array(values)
  833. elif is_object_dtype(values.dtype):
  834. values = lib.maybe_convert_objects(values, safe=1)
  835. if is_object_dtype(values.dtype):
  836. result = [pprint_thing(x, escape_chars=('\t', '\r', '\n'))
  837. for x in values]
  838. # could have nans
  839. mask = isna(values)
  840. if mask.any():
  841. result = np.array(result)
  842. result[mask] = na_rep
  843. result = result.tolist()
  844. else:
  845. result = _trim_front(format_array(values, None, justify='left'))
  846. return header + result
  847. def to_native_types(self, slicer=None, **kwargs):
  848. """
  849. Format specified values of `self` and return them.
  850. Parameters
  851. ----------
  852. slicer : int, array-like
  853. An indexer into `self` that specifies which values
  854. are used in the formatting process.
  855. kwargs : dict
  856. Options for specifying how the values should be formatted.
  857. These options include the following:
  858. 1) na_rep : str
  859. The value that serves as a placeholder for NULL values
  860. 2) quoting : bool or None
  861. Whether or not there are quoted values in `self`
  862. 3) date_format : str
  863. The format used to represent date-like values
  864. """
  865. values = self
  866. if slicer is not None:
  867. values = values[slicer]
  868. return values._format_native_types(**kwargs)
  869. def _format_native_types(self, na_rep='', quoting=None, **kwargs):
  870. """
  871. Actually format specific types of the index.
  872. """
  873. mask = isna(self)
  874. if not self.is_object() and not quoting:
  875. values = np.asarray(self).astype(str)
  876. else:
  877. values = np.array(self, dtype=object, copy=True)
  878. values[mask] = na_rep
  879. return values
  880. def _summary(self, name=None):
  881. """
  882. Return a summarized representation.
  883. Parameters
  884. ----------
  885. name : str
  886. name to use in the summary representation
  887. Returns
  888. -------
  889. String with a summarized representation of the index
  890. """
  891. if len(self) > 0:
  892. head = self[0]
  893. if (hasattr(head, 'format') and
  894. not isinstance(head, compat.string_types)):
  895. head = head.format()
  896. tail = self[-1]
  897. if (hasattr(tail, 'format') and
  898. not isinstance(tail, compat.string_types)):
  899. tail = tail.format()
  900. index_summary = ', %s to %s' % (pprint_thing(head),
  901. pprint_thing(tail))
  902. else:
  903. index_summary = ''
  904. if name is None:
  905. name = type(self).__name__
  906. return '%s: %s entries%s' % (name, len(self), index_summary)
  907. def summary(self, name=None):
  908. """
  909. Return a summarized representation.
  910. .. deprecated:: 0.23.0
  911. """
  912. warnings.warn("'summary' is deprecated and will be removed in a "
  913. "future version.", FutureWarning, stacklevel=2)
  914. return self._summary(name)
  915. # --------------------------------------------------------------------
  916. # Conversion Methods
  917. def to_flat_index(self):
  918. """
  919. Identity method.
  920. .. versionadded:: 0.24.0
  921. This is implemented for compatability with subclass implementations
  922. when chaining.
  923. Returns
  924. -------
  925. pd.Index
  926. Caller.
  927. See Also
  928. --------
  929. MultiIndex.to_flat_index : Subclass implementation.
  930. """
  931. return self
  932. def to_series(self, index=None, name=None):
  933. """
  934. Create a Series with both index and values equal to the index keys
  935. useful with map for returning an indexer based on an index.
  936. Parameters
  937. ----------
  938. index : Index, optional
  939. index of resulting Series. If None, defaults to original index
  940. name : string, optional
  941. name of resulting Series. If None, defaults to name of original
  942. index
  943. Returns
  944. -------
  945. Series : dtype will be based on the type of the Index values.
  946. """
  947. from pandas import Series
  948. if index is None:
  949. index = self._shallow_copy()
  950. if name is None:
  951. name = self.name
  952. return Series(self.values.copy(), index=index, name=name)
  953. def to_frame(self, index=True, name=None):
  954. """
  955. Create a DataFrame with a column containing the Index.
  956. .. versionadded:: 0.24.0
  957. Parameters
  958. ----------
  959. index : boolean, default True
  960. Set the index of the returned DataFrame as the original Index.
  961. name : object, default None
  962. The passed name should substitute for the index name (if it has
  963. one).
  964. Returns
  965. -------
  966. DataFrame
  967. DataFrame containing the original Index data.
  968. See Also
  969. --------
  970. Index.to_series : Convert an Index to a Series.
  971. Series.to_frame : Convert Series to DataFrame.
  972. Examples
  973. --------
  974. >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
  975. >>> idx.to_frame()
  976. animal
  977. animal
  978. Ant Ant
  979. Bear Bear
  980. Cow Cow
  981. By default, the original Index is reused. To enforce a new Index:
  982. >>> idx.to_frame(index=False)
  983. animal
  984. 0 Ant
  985. 1 Bear
  986. 2 Cow
  987. To override the name of the resulting column, specify `name`:
  988. >>> idx.to_frame(index=False, name='zoo')
  989. zoo
  990. 0 Ant
  991. 1 Bear
  992. 2 Cow
  993. """
  994. from pandas import DataFrame
  995. if name is None:
  996. name = self.name or 0
  997. result = DataFrame({name: self.values.copy()})
  998. if index:
  999. result.index = self
  1000. return result
  1001. # --------------------------------------------------------------------
  1002. # Name-Centric Methods
  1003. def _validate_names(self, name=None, names=None, deep=False):
  1004. """
  1005. Handles the quirks of having a singular 'name' parameter for general
  1006. Index and plural 'names' parameter for MultiIndex.
  1007. """
  1008. from copy import deepcopy
  1009. if names is not None and name is not None:
  1010. raise TypeError("Can only provide one of `names` and `name`")
  1011. elif names is None and name is None:
  1012. return deepcopy(self.names) if deep else self.names
  1013. elif names is not None:
  1014. if not is_list_like(names):
  1015. raise TypeError("Must pass list-like as `names`.")
  1016. return names
  1017. else:
  1018. if not is_list_like(name):
  1019. return [name]
  1020. return name
  1021. def _get_names(self):
  1022. return FrozenList((self.name, ))
  1023. def _set_names(self, values, level=None):
  1024. """
  1025. Set new names on index. Each name has to be a hashable type.
  1026. Parameters
  1027. ----------
  1028. values : str or sequence
  1029. name(s) to set
  1030. level : int, level name, or sequence of int/level names (default None)
  1031. If the index is a MultiIndex (hierarchical), level(s) to set (None
  1032. for all levels). Otherwise level must be None
  1033. Raises
  1034. ------
  1035. TypeError if each name is not hashable.
  1036. """
  1037. if not is_list_like(values):
  1038. raise ValueError('Names must be a list-like')
  1039. if len(values) != 1:
  1040. raise ValueError('Length of new names must be 1, got %d' %
  1041. len(values))
  1042. # GH 20527
  1043. # All items in 'name' need to be hashable:
  1044. for name in values:
  1045. if not is_hashable(name):
  1046. raise TypeError('{}.name must be a hashable type'
  1047. .format(self.__class__.__name__))
  1048. self.name = values[0]
  1049. names = property(fset=_set_names, fget=_get_names)
  1050. def set_names(self, names, level=None, inplace=False):
  1051. """
  1052. Set Index or MultiIndex name.
  1053. Able to set new names partially and by level.
  1054. Parameters
  1055. ----------
  1056. names : label or list of label
  1057. Name(s) to set.
  1058. level : int, label or list of int or label, optional
  1059. If the index is a MultiIndex, level(s) to set (None for all
  1060. levels). Otherwise level must be None.
  1061. inplace : bool, default False
  1062. Modifies the object directly, instead of creating a new Index or
  1063. MultiIndex.
  1064. Returns
  1065. -------
  1066. Index
  1067. The same type as the caller or None if inplace is True.
  1068. See Also
  1069. --------
  1070. Index.rename : Able to set new names without level.
  1071. Examples
  1072. --------
  1073. >>> idx = pd.Index([1, 2, 3, 4])
  1074. >>> idx
  1075. Int64Index([1, 2, 3, 4], dtype='int64')
  1076. >>> idx.set_names('quarter')
  1077. Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
  1078. >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
  1079. ... [2018, 2019]])
  1080. >>> idx
  1081. MultiIndex(levels=[['cobra', 'python'], [2018, 2019]],
  1082. codes=[[1, 1, 0, 0], [0, 1, 0, 1]])
  1083. >>> idx.set_names(['kind', 'year'], inplace=True)
  1084. >>> idx
  1085. MultiIndex(levels=[['cobra', 'python'], [2018, 2019]],
  1086. codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
  1087. names=['kind', 'year'])
  1088. >>> idx.set_names('species', level=0)
  1089. MultiIndex(levels=[['cobra', 'python'], [2018, 2019]],
  1090. codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
  1091. names=['species', 'year'])
  1092. """
  1093. if level is not None and not isinstance(self, ABCMultiIndex):
  1094. raise ValueError('Level must be None for non-MultiIndex')
  1095. if level is not None and not is_list_like(level) and is_list_like(
  1096. names):
  1097. msg = "Names must be a string when a single level is provided."
  1098. raise TypeError(msg)
  1099. if not is_list_like(names) and level is None and self.nlevels > 1:
  1100. raise TypeError("Must pass list-like as `names`.")
  1101. if not is_list_like(names):
  1102. names = [names]
  1103. if level is not None and not is_list_like(level):
  1104. level = [level]
  1105. if inplace:
  1106. idx = self
  1107. else:
  1108. idx = self._shallow_copy()
  1109. idx._set_names(names, level=level)
  1110. if not inplace:
  1111. return idx
  1112. def rename(self, name, inplace=False):
  1113. """
  1114. Alter Index or MultiIndex name.
  1115. Able to set new names without level. Defaults to returning new index.
  1116. Length of names must match number of levels in MultiIndex.
  1117. Parameters
  1118. ----------
  1119. name : label or list of labels
  1120. Name(s) to set.
  1121. inplace : boolean, default False
  1122. Modifies the object directly, instead of creating a new Index or
  1123. MultiIndex.
  1124. Returns
  1125. -------
  1126. Index
  1127. The same type as the caller or None if inplace is True.
  1128. See Also
  1129. --------
  1130. Index.set_names : Able to set new names partially and by level.
  1131. Examples
  1132. --------
  1133. >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
  1134. >>> idx.rename('grade')
  1135. Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
  1136. >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
  1137. ... [2018, 2019]],
  1138. ... names=['kind', 'year'])
  1139. >>> idx
  1140. MultiIndex(levels=[['cobra', 'python'], [2018, 2019]],
  1141. codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
  1142. names=['kind', 'year'])
  1143. >>> idx.rename(['species', 'year'])
  1144. MultiIndex(levels=[['cobra', 'python'], [2018, 2019]],
  1145. codes=[[1, 1, 0, 0], [0, 1, 0, 1]],
  1146. names=['species', 'year'])
  1147. >>> idx.rename('species')
  1148. Traceback (most recent call last):
  1149. TypeError: Must pass list-like as `names`.
  1150. """
  1151. return self.set_names([name], inplace=inplace)
  1152. # --------------------------------------------------------------------
  1153. # Level-Centric Methods
  1154. @property
  1155. def nlevels(self):
  1156. return 1
  1157. def _sort_levels_monotonic(self):
  1158. """
  1159. Compat with MultiIndex.
  1160. """
  1161. return self
  1162. def _validate_index_level(self, level):
  1163. """
  1164. Validate index level.
  1165. For single-level Index getting level number is a no-op, but some
  1166. verification must be done like in MultiIndex.
  1167. """
  1168. if isinstance(level, int):
  1169. if level < 0 and level != -1:
  1170. raise IndexError("Too many levels: Index has only 1 level,"
  1171. " %d is not a valid level number" % (level, ))
  1172. elif level > 0:
  1173. raise IndexError("Too many levels:"
  1174. " Index has only 1 level, not %d" %
  1175. (level + 1))
  1176. elif level != self.name:
  1177. raise KeyError('Level %s must be same as name (%s)' %
  1178. (level, self.name))
  1179. def _get_level_number(self, level):
  1180. self._validate_index_level(level)
  1181. return 0
  1182. def sortlevel(self, level=None, ascending=True, sort_remaining=None):
  1183. """
  1184. For internal compatibility with with the Index API.
  1185. Sort the Index. This is for compat with MultiIndex
  1186. Parameters
  1187. ----------
  1188. ascending : boolean, default True
  1189. False to sort in descending order
  1190. level, sort_remaining are compat parameters
  1191. Returns
  1192. -------
  1193. sorted_index : Index
  1194. """
  1195. return self.sort_values(return_indexer=True, ascending=ascending)
  1196. def _get_level_values(self, level):
  1197. """
  1198. Return an Index of values for requested level.
  1199. This is primarily useful to get an individual level of values from a
  1200. MultiIndex, but is provided on Index as well for compatability.
  1201. Parameters
  1202. ----------
  1203. level : int or str
  1204. It is either the integer position or the name of the level.
  1205. Returns
  1206. -------
  1207. values : Index
  1208. Calling object, as there is only one level in the Index.
  1209. See Also
  1210. --------
  1211. MultiIndex.get_level_values : Get values for a level of a MultiIndex.
  1212. Notes
  1213. -----
  1214. For Index, level should be 0, since there are no multiple levels.
  1215. Examples
  1216. --------
  1217. >>> idx = pd.Index(list('abc'))
  1218. >>> idx
  1219. Index(['a', 'b', 'c'], dtype='object')
  1220. Get level values by supplying `level` as integer:
  1221. >>> idx.get_level_values(0)
  1222. Index(['a', 'b', 'c'], dtype='object')
  1223. """
  1224. self._validate_index_level(level)
  1225. return self
  1226. get_level_values = _get_level_values
  1227. def droplevel(self, level=0):
  1228. """
  1229. Return index with requested level(s) removed.
  1230. If resulting index has only 1 level left, the result will be
  1231. of Index type, not MultiIndex.
  1232. .. versionadded:: 0.23.1 (support for non-MultiIndex)
  1233. Parameters
  1234. ----------
  1235. level : int, str, or list-like, default 0
  1236. If a string is given, must be the name of a level
  1237. If list-like, elements must be names or indexes of levels.
  1238. Returns
  1239. -------
  1240. index : Index or MultiIndex
  1241. """
  1242. if not isinstance(level, (tuple, list)):
  1243. level = [level]
  1244. levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
  1245. if len(level) == 0:
  1246. return self
  1247. if len(level) >= self.nlevels:
  1248. raise ValueError("Cannot remove {} levels from an index with {} "
  1249. "levels: at least one level must be "
  1250. "left.".format(len(level), self.nlevels))
  1251. # The two checks above guarantee that here self is a MultiIndex
  1252. new_levels = list(self.levels)
  1253. new_codes = list(self.codes)
  1254. new_names = list(self.names)
  1255. for i in levnums:
  1256. new_levels.pop(i)
  1257. new_codes.pop(i)
  1258. new_names.pop(i)
  1259. if len(new_levels) == 1:
  1260. # set nan if needed
  1261. mask = new_codes[0] == -1
  1262. result = new_levels[0].take(new_codes[0])
  1263. if mask.any():
  1264. result = result.putmask(mask, np.nan)
  1265. result.name = new_names[0]
  1266. return result
  1267. else:
  1268. from .multi import MultiIndex
  1269. return MultiIndex(levels=new_levels, codes=new_codes,
  1270. names=new_names, verify_integrity=False)
  1271. _index_shared_docs['_get_grouper_for_level'] = """
  1272. Get index grouper corresponding to an index level
  1273. Parameters
  1274. ----------
  1275. mapper: Group mapping function or None
  1276. Function mapping index values to groups
  1277. level : int or None
  1278. Index level
  1279. Returns
  1280. -------
  1281. grouper : Index
  1282. Index of values to group on
  1283. labels : ndarray of int or None
  1284. Array of locations in level_index
  1285. uniques : Index or None
  1286. Index of unique values for level
  1287. """
  1288. @Appender(_index_shared_docs['_get_grouper_for_level'])
  1289. def _get_grouper_for_level(self, mapper, level=None):
  1290. assert level is None or level == 0
  1291. if mapper is None:
  1292. grouper = self
  1293. else:
  1294. grouper = self.map(mapper)
  1295. return grouper, None, None
  1296. # --------------------------------------------------------------------
  1297. # Introspection Methods
  1298. @property
  1299. def is_monotonic(self):
  1300. """
  1301. Alias for is_monotonic_increasing.
  1302. """
  1303. return self.is_monotonic_increasing
  1304. @property
  1305. def is_monotonic_increasing(self):
  1306. """
  1307. Return if the index is monotonic increasing (only equal or
  1308. increasing) values.
  1309. Examples
  1310. --------
  1311. >>> Index([1, 2, 3]).is_monotonic_increasing
  1312. True
  1313. >>> Index([1, 2, 2]).is_monotonic_increasing
  1314. True
  1315. >>> Index([1, 3, 2]).is_monotonic_increasing
  1316. False
  1317. """
  1318. return self._engine.is_monotonic_increasing
  1319. @property
  1320. def is_monotonic_decreasing(self):
  1321. """
  1322. Return if the index is monotonic decreasing (only equal or
  1323. decreasing) values.
  1324. Examples
  1325. --------
  1326. >>> Index([3, 2, 1]).is_monotonic_decreasing
  1327. True
  1328. >>> Index([3, 2, 2]).is_monotonic_decreasing
  1329. True
  1330. >>> Index([3, 1, 2]).is_monotonic_decreasing
  1331. False
  1332. """
  1333. return self._engine.is_monotonic_decreasing
  1334. @property
  1335. def _is_strictly_monotonic_increasing(self):
  1336. """
  1337. Return if the index is strictly monotonic increasing
  1338. (only increasing) values.
  1339. Examples
  1340. --------
  1341. >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
  1342. True
  1343. >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
  1344. False
  1345. >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
  1346. False
  1347. """
  1348. return self.is_unique and self.is_monotonic_increasing
  1349. @property
  1350. def _is_strictly_monotonic_decreasing(self):
  1351. """
  1352. Return if the index is strictly monotonic decreasing
  1353. (only decreasing) values.
  1354. Examples
  1355. --------
  1356. >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
  1357. True
  1358. >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
  1359. False
  1360. >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
  1361. False
  1362. """
  1363. return self.is_unique and self.is_monotonic_decreasing
  1364. def is_lexsorted_for_tuple(self, tup):
  1365. return True
  1366. @cache_readonly
  1367. def is_unique(self):
  1368. """
  1369. Return if the index has unique values.
  1370. """
  1371. return self._engine.is_unique
  1372. @property
  1373. def has_duplicates(self):
  1374. return not self.is_unique
  1375. def is_boolean(self):
  1376. return self.inferred_type in ['boolean']
  1377. def is_integer(self):
  1378. return self.inferred_type in ['integer']
  1379. def is_floating(self):
  1380. return self.inferred_type in ['floating', 'mixed-integer-float']
  1381. def is_numeric(self):
  1382. return self.inferred_type in ['integer', 'floating']
  1383. def is_object(self):
  1384. return is_object_dtype(self.dtype)
  1385. def is_categorical(self):
  1386. """
  1387. Check if the Index holds categorical data.
  1388. Returns
  1389. -------
  1390. boolean
  1391. True if the Index is categorical.
  1392. See Also
  1393. --------
  1394. CategoricalIndex : Index for categorical data.
  1395. Examples
  1396. --------
  1397. >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
  1398. ... "Watermelon"]).astype("category")
  1399. >>> idx.is_categorical()
  1400. True
  1401. >>> idx = pd.Index([1, 3, 5, 7])
  1402. >>> idx.is_categorical()
  1403. False
  1404. >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
  1405. >>> s
  1406. 0 Peter
  1407. 1 Victor
  1408. 2 Elisabeth
  1409. 3 Mar
  1410. dtype: object
  1411. >>> s.index.is_categorical()
  1412. False
  1413. """
  1414. return self.inferred_type in ['categorical']
  1415. def is_interval(self):
  1416. return self.inferred_type in ['interval']
  1417. def is_mixed(self):
  1418. return self.inferred_type in ['mixed']
  1419. def holds_integer(self):
  1420. return self.inferred_type in ['integer', 'mixed-integer']
  1421. @cache_readonly
  1422. def inferred_type(self):
  1423. """
  1424. Return a string of the type inferred from the values.
  1425. """
  1426. return lib.infer_dtype(self, skipna=False)
  1427. @cache_readonly
  1428. def is_all_dates(self):
  1429. if self._data is None:
  1430. return False
  1431. return is_datetime_array(ensure_object(self.values))
  1432. # --------------------------------------------------------------------
  1433. # Pickle Methods
  1434. def __reduce__(self):
  1435. d = dict(data=self._data)
  1436. d.update(self._get_attributes_dict())
  1437. return _new_Index, (self.__class__, d), None
  1438. def __setstate__(self, state):
  1439. """
  1440. Necessary for making this object picklable.
  1441. """
  1442. if isinstance(state, dict):
  1443. self._data = state.pop('data')
  1444. for k, v in compat.iteritems(state):
  1445. setattr(self, k, v)
  1446. elif isinstance(state, tuple):
  1447. if len(state) == 2:
  1448. nd_state, own_state = state
  1449. data = np.empty(nd_state[1], dtype=nd_state[2])
  1450. np.ndarray.__setstate__(data, nd_state)
  1451. self.name = own_state[0]
  1452. else: # pragma: no cover
  1453. data = np.empty(state)
  1454. np.ndarray.__setstate__(data, state)
  1455. self._data = data
  1456. self._reset_identity()
  1457. else:
  1458. raise Exception("invalid pickle state")
  1459. _unpickle_compat = __setstate__
  1460. # --------------------------------------------------------------------
  1461. # Null Handling Methods
  1462. _na_value = np.nan
  1463. """The expected NA value to use with this index."""
  1464. @cache_readonly
  1465. def _isnan(self):
  1466. """
  1467. Return if each value is NaN.
  1468. """
  1469. if self._can_hold_na:
  1470. return isna(self)
  1471. else:
  1472. # shouldn't reach to this condition by checking hasnans beforehand
  1473. values = np.empty(len(self), dtype=np.bool_)
  1474. values.fill(False)
  1475. return values
  1476. @cache_readonly
  1477. def _nan_idxs(self):
  1478. if self._can_hold_na:
  1479. w, = self._isnan.nonzero()
  1480. return w
  1481. else:
  1482. return np.array([], dtype=np.int64)
  1483. @cache_readonly
  1484. def hasnans(self):
  1485. """
  1486. Return if I have any nans; enables various perf speedups.
  1487. """
  1488. if self._can_hold_na:
  1489. return bool(self._isnan.any())
  1490. else:
  1491. return False
  1492. def isna(self):
  1493. """
  1494. Detect missing values.
  1495. Return a boolean same-sized object indicating if the values are NA.
  1496. NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
  1497. mapped to ``True`` values.
  1498. Everything else get mapped to ``False`` values. Characters such as
  1499. empty strings `''` or :attr:`numpy.inf` are not considered NA values
  1500. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  1501. .. versionadded:: 0.20.0
  1502. Returns
  1503. -------
  1504. numpy.ndarray
  1505. A boolean array of whether my values are NA
  1506. See Also
  1507. --------
  1508. pandas.Index.notna : Boolean inverse of isna.
  1509. pandas.Index.dropna : Omit entries with missing values.
  1510. pandas.isna : Top-level isna.
  1511. Series.isna : Detect missing values in Series object.
  1512. Examples
  1513. --------
  1514. Show which entries in a pandas.Index are NA. The result is an
  1515. array.
  1516. >>> idx = pd.Index([5.2, 6.0, np.NaN])
  1517. >>> idx
  1518. Float64Index([5.2, 6.0, nan], dtype='float64')
  1519. >>> idx.isna()
  1520. array([False, False, True], dtype=bool)
  1521. Empty strings are not considered NA values. None is considered an NA
  1522. value.
  1523. >>> idx = pd.Index(['black', '', 'red', None])
  1524. >>> idx
  1525. Index(['black', '', 'red', None], dtype='object')
  1526. >>> idx.isna()
  1527. array([False, False, False, True], dtype=bool)
  1528. For datetimes, `NaT` (Not a Time) is considered as an NA value.
  1529. >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
  1530. ... pd.Timestamp(''), None, pd.NaT])
  1531. >>> idx
  1532. DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
  1533. dtype='datetime64[ns]', freq=None)
  1534. >>> idx.isna()
  1535. array([False, True, True, True], dtype=bool)
  1536. """
  1537. return self._isnan
  1538. isnull = isna
  1539. def notna(self):
  1540. """
  1541. Detect existing (non-missing) values.
  1542. Return a boolean same-sized object indicating if the values are not NA.
  1543. Non-missing values get mapped to ``True``. Characters such as empty
  1544. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  1545. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  1546. NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
  1547. values.
  1548. .. versionadded:: 0.20.0
  1549. Returns
  1550. -------
  1551. numpy.ndarray
  1552. Boolean array to indicate which entries are not NA.
  1553. See Also
  1554. --------
  1555. Index.notnull : Alias of notna.
  1556. Index.isna: Inverse of notna.
  1557. pandas.notna : Top-level notna.
  1558. Examples
  1559. --------
  1560. Show which entries in an Index are not NA. The result is an
  1561. array.
  1562. >>> idx = pd.Index([5.2, 6.0, np.NaN])
  1563. >>> idx
  1564. Float64Index([5.2, 6.0, nan], dtype='float64')
  1565. >>> idx.notna()
  1566. array([ True, True, False])
  1567. Empty strings are not considered NA values. None is considered a NA
  1568. value.
  1569. >>> idx = pd.Index(['black', '', 'red', None])
  1570. >>> idx
  1571. Index(['black', '', 'red', None], dtype='object')
  1572. >>> idx.notna()
  1573. array([ True, True, True, False])
  1574. """
  1575. return ~self.isna()
  1576. notnull = notna
  1577. _index_shared_docs['fillna'] = """
  1578. Fill NA/NaN values with the specified value
  1579. Parameters
  1580. ----------
  1581. value : scalar
  1582. Scalar value to use to fill holes (e.g. 0).
  1583. This value cannot be a list-likes.
  1584. downcast : dict, default is None
  1585. a dict of item->dtype of what to downcast if possible,
  1586. or the string 'infer' which will try to downcast to an appropriate
  1587. equal type (e.g. float64 to int64 if possible)
  1588. Returns
  1589. -------
  1590. filled : Index
  1591. """
  1592. @Appender(_index_shared_docs['fillna'])
  1593. def fillna(self, value=None, downcast=None):
  1594. self._assert_can_do_op(value)
  1595. if self.hasnans:
  1596. result = self.putmask(self._isnan, value)
  1597. if downcast is None:
  1598. # no need to care metadata other than name
  1599. # because it can't have freq if
  1600. return Index(result, name=self.name)
  1601. return self._shallow_copy()
  1602. _index_shared_docs['dropna'] = """
  1603. Return Index without NA/NaN values
  1604. Parameters
  1605. ----------
  1606. how : {'any', 'all'}, default 'any'
  1607. If the Index is a MultiIndex, drop the value when any or all levels
  1608. are NaN.
  1609. Returns
  1610. -------
  1611. valid : Index
  1612. """
  1613. @Appender(_index_shared_docs['dropna'])
  1614. def dropna(self, how='any'):
  1615. if how not in ('any', 'all'):
  1616. raise ValueError("invalid how option: {0}".format(how))
  1617. if self.hasnans:
  1618. return self._shallow_copy(self.values[~self._isnan])
  1619. return self._shallow_copy()
  1620. # --------------------------------------------------------------------
  1621. # Uniqueness Methods
  1622. _index_shared_docs['index_unique'] = (
  1623. """
  1624. Return unique values in the index. Uniques are returned in order
  1625. of appearance, this does NOT sort.
  1626. Parameters
  1627. ----------
  1628. level : int or str, optional, default None
  1629. Only return values from specified level (for MultiIndex)
  1630. .. versionadded:: 0.23.0
  1631. Returns
  1632. -------
  1633. Index without duplicates
  1634. See Also
  1635. --------
  1636. unique
  1637. Series.unique
  1638. """)
  1639. @Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
  1640. def unique(self, level=None):
  1641. if level is not None:
  1642. self._validate_index_level(level)
  1643. result = super(Index, self).unique()
  1644. return self._shallow_copy(result)
  1645. def drop_duplicates(self, keep='first'):
  1646. """
  1647. Return Index with duplicate values removed.
  1648. Parameters
  1649. ----------
  1650. keep : {'first', 'last', ``False``}, default 'first'
  1651. - 'first' : Drop duplicates except for the first occurrence.
  1652. - 'last' : Drop duplicates except for the last occurrence.
  1653. - ``False`` : Drop all duplicates.
  1654. Returns
  1655. -------
  1656. deduplicated : Index
  1657. See Also
  1658. --------
  1659. Series.drop_duplicates : Equivalent method on Series.
  1660. DataFrame.drop_duplicates : Equivalent method on DataFrame.
  1661. Index.duplicated : Related method on Index, indicating duplicate
  1662. Index values.
  1663. Examples
  1664. --------
  1665. Generate an pandas.Index with duplicate values.
  1666. >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
  1667. The `keep` parameter controls which duplicate values are removed.
  1668. The value 'first' keeps the first occurrence for each
  1669. set of duplicated entries. The default value of keep is 'first'.
  1670. >>> idx.drop_duplicates(keep='first')
  1671. Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
  1672. The value 'last' keeps the last occurrence for each set of duplicated
  1673. entries.
  1674. >>> idx.drop_duplicates(keep='last')
  1675. Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
  1676. The value ``False`` discards all sets of duplicated entries.
  1677. >>> idx.drop_duplicates(keep=False)
  1678. Index(['cow', 'beetle', 'hippo'], dtype='object')
  1679. """
  1680. return super(Index, self).drop_duplicates(keep=keep)
  1681. def duplicated(self, keep='first'):
  1682. """
  1683. Indicate duplicate index values.
  1684. Duplicated values are indicated as ``True`` values in the resulting
  1685. array. Either all duplicates, all except the first, or all except the
  1686. last occurrence of duplicates can be indicated.
  1687. Parameters
  1688. ----------
  1689. keep : {'first', 'last', False}, default 'first'
  1690. The value or values in a set of duplicates to mark as missing.
  1691. - 'first' : Mark duplicates as ``True`` except for the first
  1692. occurrence.
  1693. - 'last' : Mark duplicates as ``True`` except for the last
  1694. occurrence.
  1695. - ``False`` : Mark all duplicates as ``True``.
  1696. Returns
  1697. -------
  1698. numpy.ndarray
  1699. See Also
  1700. --------
  1701. pandas.Series.duplicated : Equivalent method on pandas.Series.
  1702. pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame.
  1703. pandas.Index.drop_duplicates : Remove duplicate values from Index.
  1704. Examples
  1705. --------
  1706. By default, for each set of duplicated values, the first occurrence is
  1707. set to False and all others to True:
  1708. >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
  1709. >>> idx.duplicated()
  1710. array([False, False, True, False, True])
  1711. which is equivalent to
  1712. >>> idx.duplicated(keep='first')
  1713. array([False, False, True, False, True])
  1714. By using 'last', the last occurrence of each set of duplicated values
  1715. is set on False and all others on True:
  1716. >>> idx.duplicated(keep='last')
  1717. array([ True, False, True, False, False])
  1718. By setting keep on ``False``, all duplicates are True:
  1719. >>> idx.duplicated(keep=False)
  1720. array([ True, False, True, False, True])
  1721. """
  1722. return super(Index, self).duplicated(keep=keep)
  1723. def get_duplicates(self):
  1724. """
  1725. Extract duplicated index elements.
  1726. .. deprecated:: 0.23.0
  1727. Use idx[idx.duplicated()].unique() instead
  1728. Returns a sorted list of index elements which appear more than once in
  1729. the index.
  1730. Returns
  1731. -------
  1732. array-like
  1733. List of duplicated indexes.
  1734. See Also
  1735. --------
  1736. Index.duplicated : Return boolean array denoting duplicates.
  1737. Index.drop_duplicates : Return Index with duplicates removed.
  1738. Examples
  1739. --------
  1740. Works on different Index of types.
  1741. >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP
  1742. [2, 3]
  1743. Note that for a DatetimeIndex, it does not return a list but a new
  1744. DatetimeIndex:
  1745. >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
  1746. ... '2018-01-03', '2018-01-04', '2018-01-04'],
  1747. ... format='%Y-%m-%d')
  1748. >>> pd.Index(dates).get_duplicates() # doctest: +SKIP
  1749. DatetimeIndex(['2018-01-03', '2018-01-04'],
  1750. dtype='datetime64[ns]', freq=None)
  1751. Sorts duplicated elements even when indexes are unordered.
  1752. >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP
  1753. [2, 3]
  1754. Return empty array-like structure when all elements are unique.
  1755. >>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP
  1756. []
  1757. >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
  1758. ... format='%Y-%m-%d')
  1759. >>> pd.Index(dates).get_duplicates() # doctest: +SKIP
  1760. DatetimeIndex([], dtype='datetime64[ns]', freq=None)
  1761. """
  1762. warnings.warn("'get_duplicates' is deprecated and will be removed in "
  1763. "a future release. You can use "
  1764. "idx[idx.duplicated()].unique() instead",
  1765. FutureWarning, stacklevel=2)
  1766. return self[self.duplicated()].unique()
  1767. def _get_unique_index(self, dropna=False):
  1768. """
  1769. Returns an index containing unique values.
  1770. Parameters
  1771. ----------
  1772. dropna : bool
  1773. If True, NaN values are dropped.
  1774. Returns
  1775. -------
  1776. uniques : index
  1777. """
  1778. if self.is_unique and not dropna:
  1779. return self
  1780. values = self.values
  1781. if not self.is_unique:
  1782. values = self.unique()
  1783. if dropna:
  1784. try:
  1785. if self.hasnans:
  1786. values = values[~isna(values)]
  1787. except NotImplementedError:
  1788. pass
  1789. return self._shallow_copy(values)
  1790. # --------------------------------------------------------------------
  1791. # Arithmetic & Logical Methods
  1792. def __add__(self, other):
  1793. if isinstance(other, (ABCSeries, ABCDataFrame)):
  1794. return NotImplemented
  1795. return Index(np.array(self) + other)
  1796. def __radd__(self, other):
  1797. return Index(other + np.array(self))
  1798. def __iadd__(self, other):
  1799. # alias for __add__
  1800. return self + other
  1801. def __sub__(self, other):
  1802. return Index(np.array(self) - other)
  1803. def __rsub__(self, other):
  1804. return Index(other - np.array(self))
  1805. def __and__(self, other):
  1806. return self.intersection(other)
  1807. def __or__(self, other):
  1808. return self.union(other)
  1809. def __xor__(self, other):
  1810. return self.symmetric_difference(other)
  1811. def __nonzero__(self):
  1812. raise ValueError("The truth value of a {0} is ambiguous. "
  1813. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  1814. .format(self.__class__.__name__))
  1815. __bool__ = __nonzero__
  1816. # --------------------------------------------------------------------
  1817. # Set Operation Methods
  1818. def _get_reconciled_name_object(self, other):
  1819. """
  1820. If the result of a set operation will be self,
  1821. return self, unless the name changes, in which
  1822. case make a shallow copy of self.
  1823. """
  1824. name = get_op_result_name(self, other)
  1825. if self.name != name:
  1826. return self._shallow_copy(name=name)
  1827. return self
  1828. def _validate_sort_keyword(self, sort):
  1829. if sort not in [None, False]:
  1830. raise ValueError("The 'sort' keyword only takes the values of "
  1831. "None or False; {0} was passed.".format(sort))
  1832. def union(self, other, sort=None):
  1833. """
  1834. Form the union of two Index objects.
  1835. Parameters
  1836. ----------
  1837. other : Index or array-like
  1838. sort : bool or None, default None
  1839. Whether to sort the resulting Index.
  1840. * None : Sort the result, except when
  1841. 1. `self` and `other` are equal.
  1842. 2. `self` or `other` has length 0.
  1843. 3. Some values in `self` or `other` cannot be compared.
  1844. A RuntimeWarning is issued in this case.
  1845. * False : do not sort the result.
  1846. .. versionadded:: 0.24.0
  1847. .. versionchanged:: 0.24.1
  1848. Changed the default value from ``True`` to ``None``
  1849. (without change in behaviour).
  1850. Returns
  1851. -------
  1852. union : Index
  1853. Examples
  1854. --------
  1855. >>> idx1 = pd.Index([1, 2, 3, 4])
  1856. >>> idx2 = pd.Index([3, 4, 5, 6])
  1857. >>> idx1.union(idx2)
  1858. Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
  1859. """
  1860. self._validate_sort_keyword(sort)
  1861. self._assert_can_do_setop(other)
  1862. other = ensure_index(other)
  1863. if len(other) == 0 or self.equals(other):
  1864. return self._get_reconciled_name_object(other)
  1865. if len(self) == 0:
  1866. return other._get_reconciled_name_object(self)
  1867. # TODO: is_dtype_union_equal is a hack around
  1868. # 1. buggy set ops with duplicates (GH #13432)
  1869. # 2. CategoricalIndex lacking setops (GH #10186)
  1870. # Once those are fixed, this workaround can be removed
  1871. if not is_dtype_union_equal(self.dtype, other.dtype):
  1872. this = self.astype('O')
  1873. other = other.astype('O')
  1874. return this.union(other, sort=sort)
  1875. # TODO(EA): setops-refactor, clean all this up
  1876. if is_period_dtype(self) or is_datetime64tz_dtype(self):
  1877. lvals = self._ndarray_values
  1878. else:
  1879. lvals = self._values
  1880. if is_period_dtype(other) or is_datetime64tz_dtype(other):
  1881. rvals = other._ndarray_values
  1882. else:
  1883. rvals = other._values
  1884. if self.is_monotonic and other.is_monotonic:
  1885. try:
  1886. result = self._outer_indexer(lvals, rvals)[0]
  1887. except TypeError:
  1888. # incomparable objects
  1889. result = list(lvals)
  1890. # worth making this faster? a very unusual case
  1891. value_set = set(lvals)
  1892. result.extend([x for x in rvals if x not in value_set])
  1893. else:
  1894. indexer = self.get_indexer(other)
  1895. indexer, = (indexer == -1).nonzero()
  1896. if len(indexer) > 0:
  1897. other_diff = algos.take_nd(rvals, indexer,
  1898. allow_fill=False)
  1899. result = _concat._concat_compat((lvals, other_diff))
  1900. else:
  1901. result = lvals
  1902. if sort is None:
  1903. try:
  1904. result = sorting.safe_sort(result)
  1905. except TypeError as e:
  1906. warnings.warn("{}, sort order is undefined for "
  1907. "incomparable objects".format(e),
  1908. RuntimeWarning, stacklevel=3)
  1909. # for subclasses
  1910. return self._wrap_setop_result(other, result)
  1911. def _wrap_setop_result(self, other, result):
  1912. return self._constructor(result, name=get_op_result_name(self, other))
  1913. def intersection(self, other, sort=False):
  1914. """
  1915. Form the intersection of two Index objects.
  1916. This returns a new Index with elements common to the index and `other`.
  1917. Parameters
  1918. ----------
  1919. other : Index or array-like
  1920. sort : False or None, default False
  1921. Whether to sort the resulting index.
  1922. * False : do not sort the result.
  1923. * None : sort the result, except when `self` and `other` are equal
  1924. or when the values cannot be compared.
  1925. .. versionadded:: 0.24.0
  1926. .. versionchanged:: 0.24.1
  1927. Changed the default from ``True`` to ``False``, to match
  1928. the behaviour of 0.23.4 and earlier.
  1929. Returns
  1930. -------
  1931. intersection : Index
  1932. Examples
  1933. --------
  1934. >>> idx1 = pd.Index([1, 2, 3, 4])
  1935. >>> idx2 = pd.Index([3, 4, 5, 6])
  1936. >>> idx1.intersection(idx2)
  1937. Int64Index([3, 4], dtype='int64')
  1938. """
  1939. self._validate_sort_keyword(sort)
  1940. self._assert_can_do_setop(other)
  1941. other = ensure_index(other)
  1942. if self.equals(other):
  1943. return self._get_reconciled_name_object(other)
  1944. if not is_dtype_equal(self.dtype, other.dtype):
  1945. this = self.astype('O')
  1946. other = other.astype('O')
  1947. return this.intersection(other, sort=sort)
  1948. # TODO(EA): setops-refactor, clean all this up
  1949. if is_period_dtype(self):
  1950. lvals = self._ndarray_values
  1951. else:
  1952. lvals = self._values
  1953. if is_period_dtype(other):
  1954. rvals = other._ndarray_values
  1955. else:
  1956. rvals = other._values
  1957. if self.is_monotonic and other.is_monotonic:
  1958. try:
  1959. result = self._inner_indexer(lvals, rvals)[0]
  1960. return self._wrap_setop_result(other, result)
  1961. except TypeError:
  1962. pass
  1963. try:
  1964. indexer = Index(rvals).get_indexer(lvals)
  1965. indexer = indexer.take((indexer != -1).nonzero()[0])
  1966. except Exception:
  1967. # duplicates
  1968. indexer = algos.unique1d(
  1969. Index(rvals).get_indexer_non_unique(lvals)[0])
  1970. indexer = indexer[indexer != -1]
  1971. taken = other.take(indexer)
  1972. if sort is None:
  1973. taken = sorting.safe_sort(taken.values)
  1974. if self.name != other.name:
  1975. name = None
  1976. else:
  1977. name = self.name
  1978. return self._shallow_copy(taken, name=name)
  1979. if self.name != other.name:
  1980. taken.name = None
  1981. return taken
  1982. def difference(self, other, sort=None):
  1983. """
  1984. Return a new Index with elements from the index that are not in
  1985. `other`.
  1986. This is the set difference of two Index objects.
  1987. Parameters
  1988. ----------
  1989. other : Index or array-like
  1990. sort : False or None, default None
  1991. Whether to sort the resulting index. By default, the
  1992. values are attempted to be sorted, but any TypeError from
  1993. incomparable elements is caught by pandas.
  1994. * None : Attempt to sort the result, but catch any TypeErrors
  1995. from comparing incomparable elements.
  1996. * False : Do not sort the result.
  1997. .. versionadded:: 0.24.0
  1998. .. versionchanged:: 0.24.1
  1999. Changed the default value from ``True`` to ``None``
  2000. (without change in behaviour).
  2001. Returns
  2002. -------
  2003. difference : Index
  2004. Examples
  2005. --------
  2006. >>> idx1 = pd.Index([2, 1, 3, 4])
  2007. >>> idx2 = pd.Index([3, 4, 5, 6])
  2008. >>> idx1.difference(idx2)
  2009. Int64Index([1, 2], dtype='int64')
  2010. >>> idx1.difference(idx2, sort=False)
  2011. Int64Index([2, 1], dtype='int64')
  2012. """
  2013. self._validate_sort_keyword(sort)
  2014. self._assert_can_do_setop(other)
  2015. if self.equals(other):
  2016. # pass an empty np.ndarray with the appropriate dtype
  2017. return self._shallow_copy(self._data[:0])
  2018. other, result_name = self._convert_can_do_setop(other)
  2019. this = self._get_unique_index()
  2020. indexer = this.get_indexer(other)
  2021. indexer = indexer.take((indexer != -1).nonzero()[0])
  2022. label_diff = np.setdiff1d(np.arange(this.size), indexer,
  2023. assume_unique=True)
  2024. the_diff = this.values.take(label_diff)
  2025. if sort is None:
  2026. try:
  2027. the_diff = sorting.safe_sort(the_diff)
  2028. except TypeError:
  2029. pass
  2030. return this._shallow_copy(the_diff, name=result_name, freq=None)
  2031. def symmetric_difference(self, other, result_name=None, sort=None):
  2032. """
  2033. Compute the symmetric difference of two Index objects.
  2034. Parameters
  2035. ----------
  2036. other : Index or array-like
  2037. result_name : str
  2038. sort : False or None, default None
  2039. Whether to sort the resulting index. By default, the
  2040. values are attempted to be sorted, but any TypeError from
  2041. incomparable elements is caught by pandas.
  2042. * None : Attempt to sort the result, but catch any TypeErrors
  2043. from comparing incomparable elements.
  2044. * False : Do not sort the result.
  2045. .. versionadded:: 0.24.0
  2046. .. versionchanged:: 0.24.1
  2047. Changed the default value from ``True`` to ``None``
  2048. (without change in behaviour).
  2049. Returns
  2050. -------
  2051. symmetric_difference : Index
  2052. Notes
  2053. -----
  2054. ``symmetric_difference`` contains elements that appear in either
  2055. ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
  2056. ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
  2057. dropped.
  2058. Examples
  2059. --------
  2060. >>> idx1 = pd.Index([1, 2, 3, 4])
  2061. >>> idx2 = pd.Index([2, 3, 4, 5])
  2062. >>> idx1.symmetric_difference(idx2)
  2063. Int64Index([1, 5], dtype='int64')
  2064. You can also use the ``^`` operator:
  2065. >>> idx1 ^ idx2
  2066. Int64Index([1, 5], dtype='int64')
  2067. """
  2068. self._validate_sort_keyword(sort)
  2069. self._assert_can_do_setop(other)
  2070. other, result_name_update = self._convert_can_do_setop(other)
  2071. if result_name is None:
  2072. result_name = result_name_update
  2073. this = self._get_unique_index()
  2074. other = other._get_unique_index()
  2075. indexer = this.get_indexer(other)
  2076. # {this} minus {other}
  2077. common_indexer = indexer.take((indexer != -1).nonzero()[0])
  2078. left_indexer = np.setdiff1d(np.arange(this.size), common_indexer,
  2079. assume_unique=True)
  2080. left_diff = this.values.take(left_indexer)
  2081. # {other} minus {this}
  2082. right_indexer = (indexer == -1).nonzero()[0]
  2083. right_diff = other.values.take(right_indexer)
  2084. the_diff = _concat._concat_compat([left_diff, right_diff])
  2085. if sort is None:
  2086. try:
  2087. the_diff = sorting.safe_sort(the_diff)
  2088. except TypeError:
  2089. pass
  2090. attribs = self._get_attributes_dict()
  2091. attribs['name'] = result_name
  2092. if 'freq' in attribs:
  2093. attribs['freq'] = None
  2094. return self._shallow_copy_with_infer(the_diff, **attribs)
  2095. def _assert_can_do_setop(self, other):
  2096. if not is_list_like(other):
  2097. raise TypeError('Input must be Index or array-like')
  2098. return True
  2099. def _convert_can_do_setop(self, other):
  2100. if not isinstance(other, Index):
  2101. other = Index(other, name=self.name)
  2102. result_name = self.name
  2103. else:
  2104. result_name = get_op_result_name(self, other)
  2105. return other, result_name
  2106. # --------------------------------------------------------------------
  2107. # Indexing Methods
  2108. _index_shared_docs['get_loc'] = """
  2109. Get integer location, slice or boolean mask for requested label.
  2110. Parameters
  2111. ----------
  2112. key : label
  2113. method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
  2114. * default: exact matches only.
  2115. * pad / ffill: find the PREVIOUS index value if no exact match.
  2116. * backfill / bfill: use NEXT index value if no exact match
  2117. * nearest: use the NEAREST index value if no exact match. Tied
  2118. distances are broken by preferring the larger index value.
  2119. tolerance : optional
  2120. Maximum distance from index value for inexact matches. The value of
  2121. the index at the matching location most satisfy the equation
  2122. ``abs(index[loc] - key) <= tolerance``.
  2123. Tolerance may be a scalar
  2124. value, which applies the same tolerance to all values, or
  2125. list-like, which applies variable tolerance per element. List-like
  2126. includes list, tuple, array, Series, and must be the same size as
  2127. the index and its dtype must exactly match the index's type.
  2128. .. versionadded:: 0.21.0 (list-like tolerance)
  2129. Returns
  2130. -------
  2131. loc : int if unique index, slice if monotonic index, else mask
  2132. Examples
  2133. ---------
  2134. >>> unique_index = pd.Index(list('abc'))
  2135. >>> unique_index.get_loc('b')
  2136. 1
  2137. >>> monotonic_index = pd.Index(list('abbc'))
  2138. >>> monotonic_index.get_loc('b')
  2139. slice(1, 3, None)
  2140. >>> non_monotonic_index = pd.Index(list('abcb'))
  2141. >>> non_monotonic_index.get_loc('b')
  2142. array([False, True, False, True], dtype=bool)
  2143. """
  2144. @Appender(_index_shared_docs['get_loc'])
  2145. def get_loc(self, key, method=None, tolerance=None):
  2146. if method is None:
  2147. if tolerance is not None:
  2148. raise ValueError('tolerance argument only valid if using pad, '
  2149. 'backfill or nearest lookups')
  2150. try:
  2151. return self._engine.get_loc(key)
  2152. except KeyError:
  2153. return self._engine.get_loc(self._maybe_cast_indexer(key))
  2154. indexer = self.get_indexer([key], method=method, tolerance=tolerance)
  2155. if indexer.ndim > 1 or indexer.size > 1:
  2156. raise TypeError('get_loc requires scalar valued input')
  2157. loc = indexer.item()
  2158. if loc == -1:
  2159. raise KeyError(key)
  2160. return loc
  2161. _index_shared_docs['get_indexer'] = """
  2162. Compute indexer and mask for new index given the current index. The
  2163. indexer should be then used as an input to ndarray.take to align the
  2164. current data to the new index.
  2165. Parameters
  2166. ----------
  2167. target : %(target_klass)s
  2168. method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
  2169. * default: exact matches only.
  2170. * pad / ffill: find the PREVIOUS index value if no exact match.
  2171. * backfill / bfill: use NEXT index value if no exact match
  2172. * nearest: use the NEAREST index value if no exact match. Tied
  2173. distances are broken by preferring the larger index value.
  2174. limit : int, optional
  2175. Maximum number of consecutive labels in ``target`` to match for
  2176. inexact matches.
  2177. tolerance : optional
  2178. Maximum distance between original and new labels for inexact
  2179. matches. The values of the index at the matching locations most
  2180. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  2181. Tolerance may be a scalar value, which applies the same tolerance
  2182. to all values, or list-like, which applies variable tolerance per
  2183. element. List-like includes list, tuple, array, Series, and must be
  2184. the same size as the index and its dtype must exactly match the
  2185. index's type.
  2186. .. versionadded:: 0.21.0 (list-like tolerance)
  2187. Returns
  2188. -------
  2189. indexer : ndarray of int
  2190. Integers from 0 to n - 1 indicating that the index at these
  2191. positions matches the corresponding target values. Missing values
  2192. in the target are marked by -1.
  2193. Examples
  2194. --------
  2195. >>> index = pd.Index(['c', 'a', 'b'])
  2196. >>> index.get_indexer(['a', 'b', 'x'])
  2197. array([ 1, 2, -1])
  2198. Notice that the return value is an array of locations in ``index``
  2199. and ``x`` is marked by -1, as it is not in ``index``.
  2200. """
  2201. @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
  2202. def get_indexer(self, target, method=None, limit=None, tolerance=None):
  2203. method = missing.clean_reindex_fill_method(method)
  2204. target = ensure_index(target)
  2205. if tolerance is not None:
  2206. tolerance = self._convert_tolerance(tolerance, target)
  2207. # Treat boolean labels passed to a numeric index as not found. Without
  2208. # this fix False and True would be treated as 0 and 1 respectively.
  2209. # (GH #16877)
  2210. if target.is_boolean() and self.is_numeric():
  2211. return ensure_platform_int(np.repeat(-1, target.size))
  2212. pself, ptarget = self._maybe_promote(target)
  2213. if pself is not self or ptarget is not target:
  2214. return pself.get_indexer(ptarget, method=method, limit=limit,
  2215. tolerance=tolerance)
  2216. if not is_dtype_equal(self.dtype, target.dtype):
  2217. this = self.astype(object)
  2218. target = target.astype(object)
  2219. return this.get_indexer(target, method=method, limit=limit,
  2220. tolerance=tolerance)
  2221. if not self.is_unique:
  2222. raise InvalidIndexError('Reindexing only valid with uniquely'
  2223. ' valued Index objects')
  2224. if method == 'pad' or method == 'backfill':
  2225. indexer = self._get_fill_indexer(target, method, limit, tolerance)
  2226. elif method == 'nearest':
  2227. indexer = self._get_nearest_indexer(target, limit, tolerance)
  2228. else:
  2229. if tolerance is not None:
  2230. raise ValueError('tolerance argument only valid if doing pad, '
  2231. 'backfill or nearest reindexing')
  2232. if limit is not None:
  2233. raise ValueError('limit argument only valid if doing pad, '
  2234. 'backfill or nearest reindexing')
  2235. indexer = self._engine.get_indexer(target._ndarray_values)
  2236. return ensure_platform_int(indexer)
  2237. def _convert_tolerance(self, tolerance, target):
  2238. # override this method on subclasses
  2239. tolerance = np.asarray(tolerance)
  2240. if target.size != tolerance.size and tolerance.size > 1:
  2241. raise ValueError('list-like tolerance size must match '
  2242. 'target index size')
  2243. return tolerance
  2244. def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
  2245. if self.is_monotonic_increasing and target.is_monotonic_increasing:
  2246. method = (self._engine.get_pad_indexer if method == 'pad' else
  2247. self._engine.get_backfill_indexer)
  2248. indexer = method(target._ndarray_values, limit)
  2249. else:
  2250. indexer = self._get_fill_indexer_searchsorted(target, method,
  2251. limit)
  2252. if tolerance is not None:
  2253. indexer = self._filter_indexer_tolerance(target._ndarray_values,
  2254. indexer,
  2255. tolerance)
  2256. return indexer
  2257. def _get_fill_indexer_searchsorted(self, target, method, limit=None):
  2258. """
  2259. Fallback pad/backfill get_indexer that works for monotonic decreasing
  2260. indexes and non-monotonic targets.
  2261. """
  2262. if limit is not None:
  2263. raise ValueError('limit argument for %r method only well-defined '
  2264. 'if index and target are monotonic' % method)
  2265. side = 'left' if method == 'pad' else 'right'
  2266. # find exact matches first (this simplifies the algorithm)
  2267. indexer = self.get_indexer(target)
  2268. nonexact = (indexer == -1)
  2269. indexer[nonexact] = self._searchsorted_monotonic(target[nonexact],
  2270. side)
  2271. if side == 'left':
  2272. # searchsorted returns "indices into a sorted array such that,
  2273. # if the corresponding elements in v were inserted before the
  2274. # indices, the order of a would be preserved".
  2275. # Thus, we need to subtract 1 to find values to the left.
  2276. indexer[nonexact] -= 1
  2277. # This also mapped not found values (values of 0 from
  2278. # np.searchsorted) to -1, which conveniently is also our
  2279. # sentinel for missing values
  2280. else:
  2281. # Mark indices to the right of the largest value as not found
  2282. indexer[indexer == len(self)] = -1
  2283. return indexer
  2284. def _get_nearest_indexer(self, target, limit, tolerance):
  2285. """
  2286. Get the indexer for the nearest index labels; requires an index with
  2287. values that can be subtracted from each other (e.g., not strings or
  2288. tuples).
  2289. """
  2290. left_indexer = self.get_indexer(target, 'pad', limit=limit)
  2291. right_indexer = self.get_indexer(target, 'backfill', limit=limit)
  2292. target = np.asarray(target)
  2293. left_distances = abs(self.values[left_indexer] - target)
  2294. right_distances = abs(self.values[right_indexer] - target)
  2295. op = operator.lt if self.is_monotonic_increasing else operator.le
  2296. indexer = np.where(op(left_distances, right_distances) |
  2297. (right_indexer == -1), left_indexer, right_indexer)
  2298. if tolerance is not None:
  2299. indexer = self._filter_indexer_tolerance(target, indexer,
  2300. tolerance)
  2301. return indexer
  2302. def _filter_indexer_tolerance(self, target, indexer, tolerance):
  2303. distance = abs(self.values[indexer] - target)
  2304. indexer = np.where(distance <= tolerance, indexer, -1)
  2305. return indexer
  2306. # --------------------------------------------------------------------
  2307. # Indexer Conversion Methods
  2308. _index_shared_docs['_convert_scalar_indexer'] = """
  2309. Convert a scalar indexer.
  2310. Parameters
  2311. ----------
  2312. key : label of the slice bound
  2313. kind : {'ix', 'loc', 'getitem', 'iloc'} or None
  2314. """
  2315. @Appender(_index_shared_docs['_convert_scalar_indexer'])
  2316. def _convert_scalar_indexer(self, key, kind=None):
  2317. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  2318. if kind == 'iloc':
  2319. return self._validate_indexer('positional', key, kind)
  2320. if len(self) and not isinstance(self, ABCMultiIndex,):
  2321. # we can raise here if we are definitive that this
  2322. # is positional indexing (eg. .ix on with a float)
  2323. # or label indexing if we are using a type able
  2324. # to be represented in the index
  2325. if kind in ['getitem', 'ix'] and is_float(key):
  2326. if not self.is_floating():
  2327. return self._invalid_indexer('label', key)
  2328. elif kind in ['loc'] and is_float(key):
  2329. # we want to raise KeyError on string/mixed here
  2330. # technically we *could* raise a TypeError
  2331. # on anything but mixed though
  2332. if self.inferred_type not in ['floating',
  2333. 'mixed-integer-float',
  2334. 'string',
  2335. 'unicode',
  2336. 'mixed']:
  2337. return self._invalid_indexer('label', key)
  2338. elif kind in ['loc'] and is_integer(key):
  2339. if not self.holds_integer():
  2340. return self._invalid_indexer('label', key)
  2341. return key
  2342. _index_shared_docs['_convert_slice_indexer'] = """
  2343. Convert a slice indexer.
  2344. By definition, these are labels unless 'iloc' is passed in.
  2345. Floats are not allowed as the start, step, or stop of the slice.
  2346. Parameters
  2347. ----------
  2348. key : label of the slice bound
  2349. kind : {'ix', 'loc', 'getitem', 'iloc'} or None
  2350. """
  2351. @Appender(_index_shared_docs['_convert_slice_indexer'])
  2352. def _convert_slice_indexer(self, key, kind=None):
  2353. assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
  2354. # if we are not a slice, then we are done
  2355. if not isinstance(key, slice):
  2356. return key
  2357. # validate iloc
  2358. if kind == 'iloc':
  2359. return slice(self._validate_indexer('slice', key.start, kind),
  2360. self._validate_indexer('slice', key.stop, kind),
  2361. self._validate_indexer('slice', key.step, kind))
  2362. # potentially cast the bounds to integers
  2363. start, stop, step = key.start, key.stop, key.step
  2364. # figure out if this is a positional indexer
  2365. def is_int(v):
  2366. return v is None or is_integer(v)
  2367. is_null_slicer = start is None and stop is None
  2368. is_index_slice = is_int(start) and is_int(stop)
  2369. is_positional = is_index_slice and not self.is_integer()
  2370. if kind == 'getitem':
  2371. """
  2372. called from the getitem slicers, validate that we are in fact
  2373. integers
  2374. """
  2375. if self.is_integer() or is_index_slice:
  2376. return slice(self._validate_indexer('slice', key.start, kind),
  2377. self._validate_indexer('slice', key.stop, kind),
  2378. self._validate_indexer('slice', key.step, kind))
  2379. # convert the slice to an indexer here
  2380. # if we are mixed and have integers
  2381. try:
  2382. if is_positional and self.is_mixed():
  2383. # Validate start & stop
  2384. if start is not None:
  2385. self.get_loc(start)
  2386. if stop is not None:
  2387. self.get_loc(stop)
  2388. is_positional = False
  2389. except KeyError:
  2390. if self.inferred_type == 'mixed-integer-float':
  2391. raise
  2392. if is_null_slicer:
  2393. indexer = key
  2394. elif is_positional:
  2395. indexer = key
  2396. else:
  2397. try:
  2398. indexer = self.slice_indexer(start, stop, step, kind=kind)
  2399. except Exception:
  2400. if is_index_slice:
  2401. if self.is_integer():
  2402. raise
  2403. else:
  2404. indexer = key
  2405. else:
  2406. raise
  2407. return indexer
  2408. def _convert_listlike_indexer(self, keyarr, kind=None):
  2409. """
  2410. Parameters
  2411. ----------
  2412. keyarr : list-like
  2413. Indexer to convert.
  2414. Returns
  2415. -------
  2416. tuple (indexer, keyarr)
  2417. indexer is an ndarray or None if cannot convert
  2418. keyarr are tuple-safe keys
  2419. """
  2420. if isinstance(keyarr, Index):
  2421. keyarr = self._convert_index_indexer(keyarr)
  2422. else:
  2423. keyarr = self._convert_arr_indexer(keyarr)
  2424. indexer = self._convert_list_indexer(keyarr, kind=kind)
  2425. return indexer, keyarr
  2426. _index_shared_docs['_convert_arr_indexer'] = """
  2427. Convert an array-like indexer to the appropriate dtype.
  2428. Parameters
  2429. ----------
  2430. keyarr : array-like
  2431. Indexer to convert.
  2432. Returns
  2433. -------
  2434. converted_keyarr : array-like
  2435. """
  2436. @Appender(_index_shared_docs['_convert_arr_indexer'])
  2437. def _convert_arr_indexer(self, keyarr):
  2438. keyarr = com.asarray_tuplesafe(keyarr)
  2439. return keyarr
  2440. _index_shared_docs['_convert_index_indexer'] = """
  2441. Convert an Index indexer to the appropriate dtype.
  2442. Parameters
  2443. ----------
  2444. keyarr : Index (or sub-class)
  2445. Indexer to convert.
  2446. Returns
  2447. -------
  2448. converted_keyarr : Index (or sub-class)
  2449. """
  2450. @Appender(_index_shared_docs['_convert_index_indexer'])
  2451. def _convert_index_indexer(self, keyarr):
  2452. return keyarr
  2453. _index_shared_docs['_convert_list_indexer'] = """
  2454. Convert a list-like indexer to the appropriate dtype.
  2455. Parameters
  2456. ----------
  2457. keyarr : Index (or sub-class)
  2458. Indexer to convert.
  2459. kind : iloc, ix, loc, optional
  2460. Returns
  2461. -------
  2462. positional indexer or None
  2463. """
  2464. @Appender(_index_shared_docs['_convert_list_indexer'])
  2465. def _convert_list_indexer(self, keyarr, kind=None):
  2466. if (kind in [None, 'iloc', 'ix'] and
  2467. is_integer_dtype(keyarr) and not self.is_floating() and
  2468. not isinstance(keyarr, ABCPeriodIndex)):
  2469. if self.inferred_type == 'mixed-integer':
  2470. indexer = self.get_indexer(keyarr)
  2471. if (indexer >= 0).all():
  2472. return indexer
  2473. # missing values are flagged as -1 by get_indexer and negative
  2474. # indices are already converted to positive indices in the
  2475. # above if-statement, so the negative flags are changed to
  2476. # values outside the range of indices so as to trigger an
  2477. # IndexError in maybe_convert_indices
  2478. indexer[indexer < 0] = len(self)
  2479. from pandas.core.indexing import maybe_convert_indices
  2480. return maybe_convert_indices(indexer, len(self))
  2481. elif not self.inferred_type == 'integer':
  2482. keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
  2483. return keyarr
  2484. return None
  2485. def _invalid_indexer(self, form, key):
  2486. """
  2487. Consistent invalid indexer message.
  2488. """
  2489. raise TypeError("cannot do {form} indexing on {klass} with these "
  2490. "indexers [{key}] of {kind}".format(
  2491. form=form, klass=type(self), key=key,
  2492. kind=type(key)))
  2493. # --------------------------------------------------------------------
  2494. # Reindex Methods
  2495. def _can_reindex(self, indexer):
  2496. """
  2497. Check if we are allowing reindexing with this particular indexer.
  2498. Parameters
  2499. ----------
  2500. indexer : an integer indexer
  2501. Raises
  2502. ------
  2503. ValueError if its a duplicate axis
  2504. """
  2505. # trying to reindex on an axis with duplicates
  2506. if not self.is_unique and len(indexer):
  2507. raise ValueError("cannot reindex from a duplicate axis")
  2508. def reindex(self, target, method=None, level=None, limit=None,
  2509. tolerance=None):
  2510. """
  2511. Create index with target's values (move/add/delete values
  2512. as necessary).
  2513. Parameters
  2514. ----------
  2515. target : an iterable
  2516. Returns
  2517. -------
  2518. new_index : pd.Index
  2519. Resulting index
  2520. indexer : np.ndarray or None
  2521. Indices of output values in original index
  2522. """
  2523. # GH6552: preserve names when reindexing to non-named target
  2524. # (i.e. neither Index nor Series).
  2525. preserve_names = not hasattr(target, 'name')
  2526. # GH7774: preserve dtype/tz if target is empty and not an Index.
  2527. target = _ensure_has_len(target) # target may be an iterator
  2528. if not isinstance(target, Index) and len(target) == 0:
  2529. attrs = self._get_attributes_dict()
  2530. attrs.pop('freq', None) # don't preserve freq
  2531. values = self._data[:0] # appropriately-dtyped empty array
  2532. target = self._simple_new(values, dtype=self.dtype, **attrs)
  2533. else:
  2534. target = ensure_index(target)
  2535. if level is not None:
  2536. if method is not None:
  2537. raise TypeError('Fill method not supported if level passed')
  2538. _, indexer, _ = self._join_level(target, level, how='right',
  2539. return_indexers=True)
  2540. else:
  2541. if self.equals(target):
  2542. indexer = None
  2543. else:
  2544. if self.is_unique:
  2545. indexer = self.get_indexer(target, method=method,
  2546. limit=limit,
  2547. tolerance=tolerance)
  2548. else:
  2549. if method is not None or limit is not None:
  2550. raise ValueError("cannot reindex a non-unique index "
  2551. "with a method or limit")
  2552. indexer, missing = self.get_indexer_non_unique(target)
  2553. if preserve_names and target.nlevels == 1 and target.name != self.name:
  2554. target = target.copy()
  2555. target.name = self.name
  2556. return target, indexer
  2557. def _reindex_non_unique(self, target):
  2558. """
  2559. Create a new index with target's values (move/add/delete values as
  2560. necessary) use with non-unique Index and a possibly non-unique target.
  2561. Parameters
  2562. ----------
  2563. target : an iterable
  2564. Returns
  2565. -------
  2566. new_index : pd.Index
  2567. Resulting index
  2568. indexer : np.ndarray or None
  2569. Indices of output values in original index
  2570. """
  2571. target = ensure_index(target)
  2572. indexer, missing = self.get_indexer_non_unique(target)
  2573. check = indexer != -1
  2574. new_labels = self.take(indexer[check])
  2575. new_indexer = None
  2576. if len(missing):
  2577. length = np.arange(len(indexer))
  2578. missing = ensure_platform_int(missing)
  2579. missing_labels = target.take(missing)
  2580. missing_indexer = ensure_int64(length[~check])
  2581. cur_labels = self.take(indexer[check]).values
  2582. cur_indexer = ensure_int64(length[check])
  2583. new_labels = np.empty(tuple([len(indexer)]), dtype=object)
  2584. new_labels[cur_indexer] = cur_labels
  2585. new_labels[missing_indexer] = missing_labels
  2586. # a unique indexer
  2587. if target.is_unique:
  2588. # see GH5553, make sure we use the right indexer
  2589. new_indexer = np.arange(len(indexer))
  2590. new_indexer[cur_indexer] = np.arange(len(cur_labels))
  2591. new_indexer[missing_indexer] = -1
  2592. # we have a non_unique selector, need to use the original
  2593. # indexer here
  2594. else:
  2595. # need to retake to have the same size as the indexer
  2596. indexer[~check] = -1
  2597. # reset the new indexer to account for the new size
  2598. new_indexer = np.arange(len(self.take(indexer)))
  2599. new_indexer[~check] = -1
  2600. new_index = self._shallow_copy_with_infer(new_labels, freq=None)
  2601. return new_index, indexer, new_indexer
  2602. # --------------------------------------------------------------------
  2603. # Join Methods
  2604. _index_shared_docs['join'] = """
  2605. Compute join_index and indexers to conform data
  2606. structures to the new index.
  2607. Parameters
  2608. ----------
  2609. other : Index
  2610. how : {'left', 'right', 'inner', 'outer'}
  2611. level : int or level name, default None
  2612. return_indexers : boolean, default False
  2613. sort : boolean, default False
  2614. Sort the join keys lexicographically in the result Index. If False,
  2615. the order of the join keys depends on the join type (how keyword)
  2616. .. versionadded:: 0.20.0
  2617. Returns
  2618. -------
  2619. join_index, (left_indexer, right_indexer)
  2620. """
  2621. @Appender(_index_shared_docs['join'])
  2622. def join(self, other, how='left', level=None, return_indexers=False,
  2623. sort=False):
  2624. self_is_mi = isinstance(self, ABCMultiIndex)
  2625. other_is_mi = isinstance(other, ABCMultiIndex)
  2626. # try to figure out the join level
  2627. # GH3662
  2628. if level is None and (self_is_mi or other_is_mi):
  2629. # have the same levels/names so a simple join
  2630. if self.names == other.names:
  2631. pass
  2632. else:
  2633. return self._join_multi(other, how=how,
  2634. return_indexers=return_indexers)
  2635. # join on the level
  2636. if level is not None and (self_is_mi or other_is_mi):
  2637. return self._join_level(other, level, how=how,
  2638. return_indexers=return_indexers)
  2639. other = ensure_index(other)
  2640. if len(other) == 0 and how in ('left', 'outer'):
  2641. join_index = self._shallow_copy()
  2642. if return_indexers:
  2643. rindexer = np.repeat(-1, len(join_index))
  2644. return join_index, None, rindexer
  2645. else:
  2646. return join_index
  2647. if len(self) == 0 and how in ('right', 'outer'):
  2648. join_index = other._shallow_copy()
  2649. if return_indexers:
  2650. lindexer = np.repeat(-1, len(join_index))
  2651. return join_index, lindexer, None
  2652. else:
  2653. return join_index
  2654. if self._join_precedence < other._join_precedence:
  2655. how = {'right': 'left', 'left': 'right'}.get(how, how)
  2656. result = other.join(self, how=how, level=level,
  2657. return_indexers=return_indexers)
  2658. if return_indexers:
  2659. x, y, z = result
  2660. result = x, z, y
  2661. return result
  2662. if not is_dtype_equal(self.dtype, other.dtype):
  2663. this = self.astype('O')
  2664. other = other.astype('O')
  2665. return this.join(other, how=how, return_indexers=return_indexers)
  2666. _validate_join_method(how)
  2667. if not self.is_unique and not other.is_unique:
  2668. return self._join_non_unique(other, how=how,
  2669. return_indexers=return_indexers)
  2670. elif not self.is_unique or not other.is_unique:
  2671. if self.is_monotonic and other.is_monotonic:
  2672. return self._join_monotonic(other, how=how,
  2673. return_indexers=return_indexers)
  2674. else:
  2675. return self._join_non_unique(other, how=how,
  2676. return_indexers=return_indexers)
  2677. elif self.is_monotonic and other.is_monotonic:
  2678. try:
  2679. return self._join_monotonic(other, how=how,
  2680. return_indexers=return_indexers)
  2681. except TypeError:
  2682. pass
  2683. if how == 'left':
  2684. join_index = self
  2685. elif how == 'right':
  2686. join_index = other
  2687. elif how == 'inner':
  2688. # TODO: sort=False here for backwards compat. It may
  2689. # be better to use the sort parameter passed into join
  2690. join_index = self.intersection(other, sort=False)
  2691. elif how == 'outer':
  2692. # TODO: sort=True here for backwards compat. It may
  2693. # be better to use the sort parameter passed into join
  2694. join_index = self.union(other)
  2695. if sort:
  2696. join_index = join_index.sort_values()
  2697. if return_indexers:
  2698. if join_index is self:
  2699. lindexer = None
  2700. else:
  2701. lindexer = self.get_indexer(join_index)
  2702. if join_index is other:
  2703. rindexer = None
  2704. else:
  2705. rindexer = other.get_indexer(join_index)
  2706. return join_index, lindexer, rindexer
  2707. else:
  2708. return join_index
  2709. def _join_multi(self, other, how, return_indexers=True):
  2710. from .multi import MultiIndex
  2711. from pandas.core.reshape.merge import _restore_dropped_levels_multijoin
  2712. # figure out join names
  2713. self_names = set(com._not_none(*self.names))
  2714. other_names = set(com._not_none(*other.names))
  2715. overlap = self_names & other_names
  2716. # need at least 1 in common
  2717. if not overlap:
  2718. raise ValueError("cannot join with no overlapping index names")
  2719. self_is_mi = isinstance(self, MultiIndex)
  2720. other_is_mi = isinstance(other, MultiIndex)
  2721. if self_is_mi and other_is_mi:
  2722. # Drop the non-matching levels from left and right respectively
  2723. ldrop_names = list(self_names - overlap)
  2724. rdrop_names = list(other_names - overlap)
  2725. self_jnlevels = self.droplevel(ldrop_names)
  2726. other_jnlevels = other.droplevel(rdrop_names)
  2727. # Join left and right
  2728. # Join on same leveled multi-index frames is supported
  2729. join_idx, lidx, ridx = self_jnlevels.join(other_jnlevels, how,
  2730. return_indexers=True)
  2731. # Restore the dropped levels
  2732. # Returned index level order is
  2733. # common levels, ldrop_names, rdrop_names
  2734. dropped_names = ldrop_names + rdrop_names
  2735. levels, codes, names = (
  2736. _restore_dropped_levels_multijoin(self, other,
  2737. dropped_names,
  2738. join_idx,
  2739. lidx, ridx))
  2740. # Re-create the multi-index
  2741. multi_join_idx = MultiIndex(levels=levels, codes=codes,
  2742. names=names, verify_integrity=False)
  2743. multi_join_idx = multi_join_idx.remove_unused_levels()
  2744. return multi_join_idx, lidx, ridx
  2745. jl = list(overlap)[0]
  2746. # Case where only one index is multi
  2747. # make the indices into mi's that match
  2748. flip_order = False
  2749. if self_is_mi:
  2750. self, other = other, self
  2751. flip_order = True
  2752. # flip if join method is right or left
  2753. how = {'right': 'left', 'left': 'right'}.get(how, how)
  2754. level = other.names.index(jl)
  2755. result = self._join_level(other, level, how=how,
  2756. return_indexers=return_indexers)
  2757. if flip_order:
  2758. if isinstance(result, tuple):
  2759. return result[0], result[2], result[1]
  2760. return result
  2761. def _join_non_unique(self, other, how='left', return_indexers=False):
  2762. from pandas.core.reshape.merge import _get_join_indexers
  2763. left_idx, right_idx = _get_join_indexers([self._ndarray_values],
  2764. [other._ndarray_values],
  2765. how=how,
  2766. sort=True)
  2767. left_idx = ensure_platform_int(left_idx)
  2768. right_idx = ensure_platform_int(right_idx)
  2769. join_index = np.asarray(self._ndarray_values.take(left_idx))
  2770. mask = left_idx == -1
  2771. np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
  2772. join_index = self._wrap_joined_index(join_index, other)
  2773. if return_indexers:
  2774. return join_index, left_idx, right_idx
  2775. else:
  2776. return join_index
  2777. def _join_level(self, other, level, how='left', return_indexers=False,
  2778. keep_order=True):
  2779. """
  2780. The join method *only* affects the level of the resulting
  2781. MultiIndex. Otherwise it just exactly aligns the Index data to the
  2782. labels of the level in the MultiIndex.
  2783. If ```keep_order == True```, the order of the data indexed by the
  2784. MultiIndex will not be changed; otherwise, it will tie out
  2785. with `other`.
  2786. """
  2787. from .multi import MultiIndex
  2788. def _get_leaf_sorter(labels):
  2789. """
  2790. Returns sorter for the inner most level while preserving the
  2791. order of higher levels.
  2792. """
  2793. if labels[0].size == 0:
  2794. return np.empty(0, dtype='int64')
  2795. if len(labels) == 1:
  2796. lab = ensure_int64(labels[0])
  2797. sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
  2798. return sorter
  2799. # find indexers of beginning of each set of
  2800. # same-key labels w.r.t all but last level
  2801. tic = labels[0][:-1] != labels[0][1:]
  2802. for lab in labels[1:-1]:
  2803. tic |= lab[:-1] != lab[1:]
  2804. starts = np.hstack(([True], tic, [True])).nonzero()[0]
  2805. lab = ensure_int64(labels[-1])
  2806. return lib.get_level_sorter(lab, ensure_int64(starts))
  2807. if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
  2808. raise TypeError('Join on level between two MultiIndex objects '
  2809. 'is ambiguous')
  2810. left, right = self, other
  2811. flip_order = not isinstance(self, MultiIndex)
  2812. if flip_order:
  2813. left, right = right, left
  2814. how = {'right': 'left', 'left': 'right'}.get(how, how)
  2815. level = left._get_level_number(level)
  2816. old_level = left.levels[level]
  2817. if not right.is_unique:
  2818. raise NotImplementedError('Index._join_level on non-unique index '
  2819. 'is not implemented')
  2820. new_level, left_lev_indexer, right_lev_indexer = \
  2821. old_level.join(right, how=how, return_indexers=True)
  2822. if left_lev_indexer is None:
  2823. if keep_order or len(left) == 0:
  2824. left_indexer = None
  2825. join_index = left
  2826. else: # sort the leaves
  2827. left_indexer = _get_leaf_sorter(left.codes[:level + 1])
  2828. join_index = left[left_indexer]
  2829. else:
  2830. left_lev_indexer = ensure_int64(left_lev_indexer)
  2831. rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
  2832. len(old_level))
  2833. new_lev_codes = algos.take_nd(rev_indexer, left.codes[level],
  2834. allow_fill=False)
  2835. new_codes = list(left.codes)
  2836. new_codes[level] = new_lev_codes
  2837. new_levels = list(left.levels)
  2838. new_levels[level] = new_level
  2839. if keep_order: # just drop missing values. o.w. keep order
  2840. left_indexer = np.arange(len(left), dtype=np.intp)
  2841. mask = new_lev_codes != -1
  2842. if not mask.all():
  2843. new_codes = [lab[mask] for lab in new_codes]
  2844. left_indexer = left_indexer[mask]
  2845. else: # tie out the order with other
  2846. if level == 0: # outer most level, take the fast route
  2847. ngroups = 1 + new_lev_codes.max()
  2848. left_indexer, counts = libalgos.groupsort_indexer(
  2849. new_lev_codes, ngroups)
  2850. # missing values are placed first; drop them!
  2851. left_indexer = left_indexer[counts[0]:]
  2852. new_codes = [lab[left_indexer] for lab in new_codes]
  2853. else: # sort the leaves
  2854. mask = new_lev_codes != -1
  2855. mask_all = mask.all()
  2856. if not mask_all:
  2857. new_codes = [lab[mask] for lab in new_codes]
  2858. left_indexer = _get_leaf_sorter(new_codes[:level + 1])
  2859. new_codes = [lab[left_indexer] for lab in new_codes]
  2860. # left_indexers are w.r.t masked frame.
  2861. # reverse to original frame!
  2862. if not mask_all:
  2863. left_indexer = mask.nonzero()[0][left_indexer]
  2864. join_index = MultiIndex(levels=new_levels, codes=new_codes,
  2865. names=left.names, verify_integrity=False)
  2866. if right_lev_indexer is not None:
  2867. right_indexer = algos.take_nd(right_lev_indexer,
  2868. join_index.codes[level],
  2869. allow_fill=False)
  2870. else:
  2871. right_indexer = join_index.codes[level]
  2872. if flip_order:
  2873. left_indexer, right_indexer = right_indexer, left_indexer
  2874. if return_indexers:
  2875. left_indexer = (None if left_indexer is None
  2876. else ensure_platform_int(left_indexer))
  2877. right_indexer = (None if right_indexer is None
  2878. else ensure_platform_int(right_indexer))
  2879. return join_index, left_indexer, right_indexer
  2880. else:
  2881. return join_index
  2882. def _join_monotonic(self, other, how='left', return_indexers=False):
  2883. if self.equals(other):
  2884. ret_index = other if how == 'right' else self
  2885. if return_indexers:
  2886. return ret_index, None, None
  2887. else:
  2888. return ret_index
  2889. sv = self._ndarray_values
  2890. ov = other._ndarray_values
  2891. if self.is_unique and other.is_unique:
  2892. # We can perform much better than the general case
  2893. if how == 'left':
  2894. join_index = self
  2895. lidx = None
  2896. ridx = self._left_indexer_unique(sv, ov)
  2897. elif how == 'right':
  2898. join_index = other
  2899. lidx = self._left_indexer_unique(ov, sv)
  2900. ridx = None
  2901. elif how == 'inner':
  2902. join_index, lidx, ridx = self._inner_indexer(sv, ov)
  2903. join_index = self._wrap_joined_index(join_index, other)
  2904. elif how == 'outer':
  2905. join_index, lidx, ridx = self._outer_indexer(sv, ov)
  2906. join_index = self._wrap_joined_index(join_index, other)
  2907. else:
  2908. if how == 'left':
  2909. join_index, lidx, ridx = self._left_indexer(sv, ov)
  2910. elif how == 'right':
  2911. join_index, ridx, lidx = self._left_indexer(ov, sv)
  2912. elif how == 'inner':
  2913. join_index, lidx, ridx = self._inner_indexer(sv, ov)
  2914. elif how == 'outer':
  2915. join_index, lidx, ridx = self._outer_indexer(sv, ov)
  2916. join_index = self._wrap_joined_index(join_index, other)
  2917. if return_indexers:
  2918. lidx = None if lidx is None else ensure_platform_int(lidx)
  2919. ridx = None if ridx is None else ensure_platform_int(ridx)
  2920. return join_index, lidx, ridx
  2921. else:
  2922. return join_index
  2923. def _wrap_joined_index(self, joined, other):
  2924. name = get_op_result_name(self, other)
  2925. return Index(joined, name=name)
  2926. # --------------------------------------------------------------------
  2927. # Uncategorized Methods
  2928. @property
  2929. def values(self):
  2930. """
  2931. Return an array representing the data in the Index.
  2932. .. warning::
  2933. We recommend using :attr:`Index.array` or
  2934. :meth:`Index.to_numpy`, depending on whether you need
  2935. a reference to the underlying data or a NumPy array.
  2936. Returns
  2937. -------
  2938. array: numpy.ndarray or ExtensionArray
  2939. See Also
  2940. --------
  2941. Index.array : Reference to the underlying data.
  2942. Index.to_numpy : A NumPy array representing the underlying data.
  2943. Return the underlying data as an ndarray.
  2944. """
  2945. return self._data.view(np.ndarray)
  2946. @property
  2947. def _values(self):
  2948. # type: () -> Union[ExtensionArray, Index, np.ndarray]
  2949. # TODO(EA): remove index types as they become extension arrays
  2950. """
  2951. The best array representation.
  2952. This is an ndarray, ExtensionArray, or Index subclass. This differs
  2953. from ``_ndarray_values``, which always returns an ndarray.
  2954. Both ``_values`` and ``_ndarray_values`` are consistent between
  2955. ``Series`` and ``Index``.
  2956. It may differ from the public '.values' method.
  2957. index | values | _values | _ndarray_values |
  2958. ----------------- | --------------- | ------------- | --------------- |
  2959. Index | ndarray | ndarray | ndarray |
  2960. CategoricalIndex | Categorical | Categorical | ndarray[int] |
  2961. DatetimeIndex | ndarray[M8ns] | ndarray[M8ns] | ndarray[M8ns] |
  2962. DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] |
  2963. PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] |
  2964. IntervalIndex | IntervalArray | IntervalArray | ndarray[object] |
  2965. See Also
  2966. --------
  2967. values
  2968. _ndarray_values
  2969. """
  2970. return self._data
  2971. def get_values(self):
  2972. """
  2973. Return `Index` data as an `numpy.ndarray`.
  2974. Returns
  2975. -------
  2976. numpy.ndarray
  2977. A one-dimensional numpy array of the `Index` values.
  2978. See Also
  2979. --------
  2980. Index.values : The attribute that get_values wraps.
  2981. Examples
  2982. --------
  2983. Getting the `Index` values of a `DataFrame`:
  2984. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  2985. ... index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
  2986. >>> df
  2987. A B C
  2988. a 1 2 3
  2989. b 4 5 6
  2990. c 7 8 9
  2991. >>> df.index.get_values()
  2992. array(['a', 'b', 'c'], dtype=object)
  2993. Standalone `Index` values:
  2994. >>> idx = pd.Index(['1', '2', '3'])
  2995. >>> idx.get_values()
  2996. array(['1', '2', '3'], dtype=object)
  2997. `MultiIndex` arrays also have only one dimension:
  2998. >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']],
  2999. ... names=('number', 'letter'))
  3000. >>> midx.get_values()
  3001. array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object)
  3002. >>> midx.get_values().ndim
  3003. 1
  3004. """
  3005. return self.values
  3006. @Appender(IndexOpsMixin.memory_usage.__doc__)
  3007. def memory_usage(self, deep=False):
  3008. result = super(Index, self).memory_usage(deep=deep)
  3009. # include our engine hashtable
  3010. result += self._engine.sizeof(deep=deep)
  3011. return result
  3012. _index_shared_docs['where'] = """
  3013. Return an Index of same shape as self and whose corresponding
  3014. entries are from self where cond is True and otherwise are from
  3015. other.
  3016. .. versionadded:: 0.19.0
  3017. Parameters
  3018. ----------
  3019. cond : boolean array-like with the same length as self
  3020. other : scalar, or array-like
  3021. """
  3022. @Appender(_index_shared_docs['where'])
  3023. def where(self, cond, other=None):
  3024. if other is None:
  3025. other = self._na_value
  3026. dtype = self.dtype
  3027. values = self.values
  3028. if is_bool(other) or is_bool_dtype(other):
  3029. # bools force casting
  3030. values = values.astype(object)
  3031. dtype = None
  3032. values = np.where(cond, values, other)
  3033. if self._is_numeric_dtype and np.any(isna(values)):
  3034. # We can't coerce to the numeric dtype of "self" (unless
  3035. # it's float) if there are NaN values in our output.
  3036. dtype = None
  3037. return self._shallow_copy_with_infer(values, dtype=dtype)
  3038. # construction helpers
  3039. @classmethod
  3040. def _try_convert_to_int_index(cls, data, copy, name, dtype):
  3041. """
  3042. Attempt to convert an array of data into an integer index.
  3043. Parameters
  3044. ----------
  3045. data : The data to convert.
  3046. copy : Whether to copy the data or not.
  3047. name : The name of the index returned.
  3048. Returns
  3049. -------
  3050. int_index : data converted to either an Int64Index or a
  3051. UInt64Index
  3052. Raises
  3053. ------
  3054. ValueError if the conversion was not successful.
  3055. """
  3056. from .numeric import Int64Index, UInt64Index
  3057. if not is_unsigned_integer_dtype(dtype):
  3058. # skip int64 conversion attempt if uint-like dtype is passed, as
  3059. # this could return Int64Index when UInt64Index is what's desrired
  3060. try:
  3061. res = data.astype('i8', copy=False)
  3062. if (res == data).all():
  3063. return Int64Index(res, copy=copy, name=name)
  3064. except (OverflowError, TypeError, ValueError):
  3065. pass
  3066. # Conversion to int64 failed (possibly due to overflow) or was skipped,
  3067. # so let's try now with uint64.
  3068. try:
  3069. res = data.astype('u8', copy=False)
  3070. if (res == data).all():
  3071. return UInt64Index(res, copy=copy, name=name)
  3072. except (OverflowError, TypeError, ValueError):
  3073. pass
  3074. raise ValueError
  3075. @classmethod
  3076. def _scalar_data_error(cls, data):
  3077. raise TypeError('{0}(...) must be called with a collection of some '
  3078. 'kind, {1} was passed'.format(cls.__name__,
  3079. repr(data)))
  3080. @classmethod
  3081. def _string_data_error(cls, data):
  3082. raise TypeError('String dtype not supported, you may need '
  3083. 'to explicitly cast to a numeric type')
  3084. @classmethod
  3085. def _coerce_to_ndarray(cls, data):
  3086. """
  3087. Coerces data to ndarray.
  3088. Converts other iterables to list first and then to array.
  3089. Does not touch ndarrays.
  3090. Raises
  3091. ------
  3092. TypeError
  3093. When the data passed in is a scalar.
  3094. """
  3095. if not isinstance(data, (np.ndarray, Index)):
  3096. if data is None or is_scalar(data):
  3097. cls._scalar_data_error(data)
  3098. # other iterable of some kind
  3099. if not isinstance(data, (ABCSeries, list, tuple)):
  3100. data = list(data)
  3101. data = np.asarray(data)
  3102. return data
  3103. def _coerce_scalar_to_index(self, item):
  3104. """
  3105. We need to coerce a scalar to a compat for our index type.
  3106. Parameters
  3107. ----------
  3108. item : scalar item to coerce
  3109. """
  3110. dtype = self.dtype
  3111. if self._is_numeric_dtype and isna(item):
  3112. # We can't coerce to the numeric dtype of "self" (unless
  3113. # it's float) if there are NaN values in our output.
  3114. dtype = None
  3115. return Index([item], dtype=dtype, **self._get_attributes_dict())
  3116. def _to_safe_for_reshape(self):
  3117. """
  3118. Convert to object if we are a categorical.
  3119. """
  3120. return self
  3121. def _convert_for_op(self, value):
  3122. """
  3123. Convert value to be insertable to ndarray.
  3124. """
  3125. return value
  3126. def _assert_can_do_op(self, value):
  3127. """
  3128. Check value is valid for scalar op.
  3129. """
  3130. if not is_scalar(value):
  3131. msg = "'value' must be a scalar, passed: {0}"
  3132. raise TypeError(msg.format(type(value).__name__))
  3133. @property
  3134. def _has_complex_internals(self):
  3135. # to disable groupby tricks in MultiIndex
  3136. return False
  3137. def _is_memory_usage_qualified(self):
  3138. """
  3139. Return a boolean if we need a qualified .info display.
  3140. """
  3141. return self.is_object()
  3142. def is_type_compatible(self, kind):
  3143. return kind == self.inferred_type
  3144. _index_shared_docs['contains'] = """
  3145. Return a boolean indicating whether the provided key is in the index.
  3146. Parameters
  3147. ----------
  3148. key : label
  3149. The key to check if it is present in the index.
  3150. Returns
  3151. -------
  3152. bool
  3153. Whether the key search is in the index.
  3154. See Also
  3155. --------
  3156. Index.isin : Returns an ndarray of boolean dtype indicating whether the
  3157. list-like key is in the index.
  3158. Examples
  3159. --------
  3160. >>> idx = pd.Index([1, 2, 3, 4])
  3161. >>> idx
  3162. Int64Index([1, 2, 3, 4], dtype='int64')
  3163. >>> idx.contains(2)
  3164. True
  3165. >>> idx.contains(6)
  3166. False
  3167. This is equivalent to:
  3168. >>> 2 in idx
  3169. True
  3170. >>> 6 in idx
  3171. False
  3172. """
  3173. @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
  3174. def __contains__(self, key):
  3175. hash(key)
  3176. try:
  3177. return key in self._engine
  3178. except (OverflowError, TypeError, ValueError):
  3179. return False
  3180. @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
  3181. def contains(self, key):
  3182. hash(key)
  3183. try:
  3184. return key in self._engine
  3185. except (TypeError, ValueError):
  3186. return False
  3187. def __hash__(self):
  3188. raise TypeError("unhashable type: %r" % type(self).__name__)
  3189. def __setitem__(self, key, value):
  3190. raise TypeError("Index does not support mutable operations")
  3191. def __getitem__(self, key):
  3192. """
  3193. Override numpy.ndarray's __getitem__ method to work as desired.
  3194. This function adds lists and Series as valid boolean indexers
  3195. (ndarrays only supports ndarray with dtype=bool).
  3196. If resulting ndim != 1, plain ndarray is returned instead of
  3197. corresponding `Index` subclass.
  3198. """
  3199. # There's no custom logic to be implemented in __getslice__, so it's
  3200. # not overloaded intentionally.
  3201. getitem = self._data.__getitem__
  3202. promote = self._shallow_copy
  3203. if is_scalar(key):
  3204. key = com.cast_scalar_indexer(key)
  3205. return getitem(key)
  3206. if isinstance(key, slice):
  3207. # This case is separated from the conditional above to avoid
  3208. # pessimization of basic indexing.
  3209. return promote(getitem(key))
  3210. if com.is_bool_indexer(key):
  3211. key = np.asarray(key, dtype=bool)
  3212. key = com.values_from_object(key)
  3213. result = getitem(key)
  3214. if not is_scalar(result):
  3215. return promote(result)
  3216. else:
  3217. return result
  3218. def _can_hold_identifiers_and_holds_name(self, name):
  3219. """
  3220. Faster check for ``name in self`` when we know `name` is a Python
  3221. identifier (e.g. in NDFrame.__getattr__, which hits this to support
  3222. . key lookup). For indexes that can't hold identifiers (everything
  3223. but object & categorical) we just return False.
  3224. https://github.com/pandas-dev/pandas/issues/19764
  3225. """
  3226. if self.is_object() or self.is_categorical():
  3227. return name in self
  3228. return False
  3229. def append(self, other):
  3230. """
  3231. Append a collection of Index options together.
  3232. Parameters
  3233. ----------
  3234. other : Index or list/tuple of indices
  3235. Returns
  3236. -------
  3237. appended : Index
  3238. """
  3239. to_concat = [self]
  3240. if isinstance(other, (list, tuple)):
  3241. to_concat = to_concat + list(other)
  3242. else:
  3243. to_concat.append(other)
  3244. for obj in to_concat:
  3245. if not isinstance(obj, Index):
  3246. raise TypeError('all inputs must be Index')
  3247. names = {obj.name for obj in to_concat}
  3248. name = None if len(names) > 1 else self.name
  3249. return self._concat(to_concat, name)
  3250. def _concat(self, to_concat, name):
  3251. typs = _concat.get_dtype_kinds(to_concat)
  3252. if len(typs) == 1:
  3253. return self._concat_same_dtype(to_concat, name=name)
  3254. return _concat._concat_index_asobject(to_concat, name=name)
  3255. def _concat_same_dtype(self, to_concat, name):
  3256. """
  3257. Concatenate to_concat which has the same class.
  3258. """
  3259. # must be overridden in specific classes
  3260. return _concat._concat_index_asobject(to_concat, name)
  3261. def putmask(self, mask, value):
  3262. """
  3263. Return a new Index of the values set with the mask.
  3264. See Also
  3265. --------
  3266. numpy.ndarray.putmask
  3267. """
  3268. values = self.values.copy()
  3269. try:
  3270. np.putmask(values, mask, self._convert_for_op(value))
  3271. return self._shallow_copy(values)
  3272. except (ValueError, TypeError) as err:
  3273. if is_object_dtype(self):
  3274. raise err
  3275. # coerces to object
  3276. return self.astype(object).putmask(mask, value)
  3277. def equals(self, other):
  3278. """
  3279. Determines if two Index objects contain the same elements.
  3280. """
  3281. if self.is_(other):
  3282. return True
  3283. if not isinstance(other, Index):
  3284. return False
  3285. if is_object_dtype(self) and not is_object_dtype(other):
  3286. # if other is not object, use other's logic for coercion
  3287. return other.equals(self)
  3288. try:
  3289. return array_equivalent(com.values_from_object(self),
  3290. com.values_from_object(other))
  3291. except Exception:
  3292. return False
  3293. def identical(self, other):
  3294. """
  3295. Similar to equals, but check that other comparable attributes are
  3296. also equal.
  3297. """
  3298. return (self.equals(other) and
  3299. all((getattr(self, c, None) == getattr(other, c, None)
  3300. for c in self._comparables)) and
  3301. type(self) == type(other))
  3302. def asof(self, label):
  3303. """
  3304. Return the label from the index, or, if not present, the previous one.
  3305. Assuming that the index is sorted, return the passed index label if it
  3306. is in the index, or return the previous index label if the passed one
  3307. is not in the index.
  3308. Parameters
  3309. ----------
  3310. label : object
  3311. The label up to which the method returns the latest index label.
  3312. Returns
  3313. -------
  3314. object
  3315. The passed label if it is in the index. The previous label if the
  3316. passed label is not in the sorted index or `NaN` if there is no
  3317. such label.
  3318. See Also
  3319. --------
  3320. Series.asof : Return the latest value in a Series up to the
  3321. passed index.
  3322. merge_asof : Perform an asof merge (similar to left join but it
  3323. matches on nearest key rather than equal key).
  3324. Index.get_loc : An `asof` is a thin wrapper around `get_loc`
  3325. with method='pad'.
  3326. Examples
  3327. --------
  3328. `Index.asof` returns the latest index label up to the passed label.
  3329. >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
  3330. >>> idx.asof('2014-01-01')
  3331. '2013-12-31'
  3332. If the label is in the index, the method returns the passed label.
  3333. >>> idx.asof('2014-01-02')
  3334. '2014-01-02'
  3335. If all of the labels in the index are later than the passed label,
  3336. NaN is returned.
  3337. >>> idx.asof('1999-01-02')
  3338. nan
  3339. If the index is not sorted, an error is raised.
  3340. >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
  3341. ... '2014-01-03'])
  3342. >>> idx_not_sorted.asof('2013-12-31')
  3343. Traceback (most recent call last):
  3344. ValueError: index must be monotonic increasing or decreasing
  3345. """
  3346. try:
  3347. loc = self.get_loc(label, method='pad')
  3348. except KeyError:
  3349. return self._na_value
  3350. else:
  3351. if isinstance(loc, slice):
  3352. loc = loc.indices(len(self))[-1]
  3353. return self[loc]
  3354. def asof_locs(self, where, mask):
  3355. """
  3356. Finds the locations (indices) of the labels from the index for
  3357. every entry in the `where` argument.
  3358. As in the `asof` function, if the label (a particular entry in
  3359. `where`) is not in the index, the latest index label upto the
  3360. passed label is chosen and its index returned.
  3361. If all of the labels in the index are later than a label in `where`,
  3362. -1 is returned.
  3363. `mask` is used to ignore NA values in the index during calculation.
  3364. Parameters
  3365. ----------
  3366. where : Index
  3367. An Index consisting of an array of timestamps.
  3368. mask : array-like
  3369. Array of booleans denoting where values in the original
  3370. data are not NA.
  3371. Returns
  3372. -------
  3373. numpy.ndarray
  3374. An array of locations (indices) of the labels from the Index
  3375. which correspond to the return values of the `asof` function
  3376. for every element in `where`.
  3377. """
  3378. locs = self.values[mask].searchsorted(where.values, side='right')
  3379. locs = np.where(locs > 0, locs - 1, 0)
  3380. result = np.arange(len(self))[mask].take(locs)
  3381. first = mask.argmax()
  3382. result[(locs == 0) & (where.values < self.values[first])] = -1
  3383. return result
  3384. def sort_values(self, return_indexer=False, ascending=True):
  3385. """
  3386. Return a sorted copy of the index.
  3387. Return a sorted copy of the index, and optionally return the indices
  3388. that sorted the index itself.
  3389. Parameters
  3390. ----------
  3391. return_indexer : bool, default False
  3392. Should the indices that would sort the index be returned.
  3393. ascending : bool, default True
  3394. Should the index values be sorted in an ascending order.
  3395. Returns
  3396. -------
  3397. sorted_index : pandas.Index
  3398. Sorted copy of the index.
  3399. indexer : numpy.ndarray, optional
  3400. The indices that the index itself was sorted by.
  3401. See Also
  3402. --------
  3403. pandas.Series.sort_values : Sort values of a Series.
  3404. pandas.DataFrame.sort_values : Sort values in a DataFrame.
  3405. Examples
  3406. --------
  3407. >>> idx = pd.Index([10, 100, 1, 1000])
  3408. >>> idx
  3409. Int64Index([10, 100, 1, 1000], dtype='int64')
  3410. Sort values in ascending order (default behavior).
  3411. >>> idx.sort_values()
  3412. Int64Index([1, 10, 100, 1000], dtype='int64')
  3413. Sort values in descending order, and also get the indices `idx` was
  3414. sorted by.
  3415. >>> idx.sort_values(ascending=False, return_indexer=True)
  3416. (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
  3417. """
  3418. _as = self.argsort()
  3419. if not ascending:
  3420. _as = _as[::-1]
  3421. sorted_index = self.take(_as)
  3422. if return_indexer:
  3423. return sorted_index, _as
  3424. else:
  3425. return sorted_index
  3426. def sort(self, *args, **kwargs):
  3427. raise TypeError("cannot sort an Index object in-place, use "
  3428. "sort_values instead")
  3429. def shift(self, periods=1, freq=None):
  3430. """
  3431. Shift index by desired number of time frequency increments.
  3432. This method is for shifting the values of datetime-like indexes
  3433. by a specified time increment a given number of times.
  3434. Parameters
  3435. ----------
  3436. periods : int, default 1
  3437. Number of periods (or increments) to shift by,
  3438. can be positive or negative.
  3439. freq : pandas.DateOffset, pandas.Timedelta or string, optional
  3440. Frequency increment to shift by.
  3441. If None, the index is shifted by its own `freq` attribute.
  3442. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
  3443. Returns
  3444. -------
  3445. pandas.Index
  3446. shifted index
  3447. See Also
  3448. --------
  3449. Series.shift : Shift values of Series.
  3450. Notes
  3451. -----
  3452. This method is only implemented for datetime-like index classes,
  3453. i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
  3454. Examples
  3455. --------
  3456. Put the first 5 month starts of 2011 into an index.
  3457. >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
  3458. >>> month_starts
  3459. DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
  3460. '2011-05-01'],
  3461. dtype='datetime64[ns]', freq='MS')
  3462. Shift the index by 10 days.
  3463. >>> month_starts.shift(10, freq='D')
  3464. DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
  3465. '2011-05-11'],
  3466. dtype='datetime64[ns]', freq=None)
  3467. The default value of `freq` is the `freq` attribute of the index,
  3468. which is 'MS' (month start) in this example.
  3469. >>> month_starts.shift(10)
  3470. DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
  3471. '2012-03-01'],
  3472. dtype='datetime64[ns]', freq='MS')
  3473. """
  3474. raise NotImplementedError("Not supported for type %s" %
  3475. type(self).__name__)
  3476. def argsort(self, *args, **kwargs):
  3477. """
  3478. Return the integer indices that would sort the index.
  3479. Parameters
  3480. ----------
  3481. *args
  3482. Passed to `numpy.ndarray.argsort`.
  3483. **kwargs
  3484. Passed to `numpy.ndarray.argsort`.
  3485. Returns
  3486. -------
  3487. numpy.ndarray
  3488. Integer indices that would sort the index if used as
  3489. an indexer.
  3490. See Also
  3491. --------
  3492. numpy.argsort : Similar method for NumPy arrays.
  3493. Index.sort_values : Return sorted copy of Index.
  3494. Examples
  3495. --------
  3496. >>> idx = pd.Index(['b', 'a', 'd', 'c'])
  3497. >>> idx
  3498. Index(['b', 'a', 'd', 'c'], dtype='object')
  3499. >>> order = idx.argsort()
  3500. >>> order
  3501. array([1, 0, 3, 2])
  3502. >>> idx[order]
  3503. Index(['a', 'b', 'c', 'd'], dtype='object')
  3504. """
  3505. result = self.asi8
  3506. if result is None:
  3507. result = np.array(self)
  3508. return result.argsort(*args, **kwargs)
  3509. def get_value(self, series, key):
  3510. """
  3511. Fast lookup of value from 1-dimensional ndarray. Only use this if you
  3512. know what you're doing.
  3513. """
  3514. # if we have something that is Index-like, then
  3515. # use this, e.g. DatetimeIndex
  3516. # Things like `Series._get_value` (via .at) pass the EA directly here.
  3517. s = getattr(series, '_values', series)
  3518. if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
  3519. # GH 20882, 21257
  3520. # Unify Index and ExtensionArray treatment
  3521. # First try to convert the key to a location
  3522. # If that fails, raise a KeyError if an integer
  3523. # index, otherwise, see if key is an integer, and
  3524. # try that
  3525. try:
  3526. iloc = self.get_loc(key)
  3527. return s[iloc]
  3528. except KeyError:
  3529. if (len(self) > 0 and
  3530. (self.holds_integer() or self.is_boolean())):
  3531. raise
  3532. elif is_integer(key):
  3533. return s[key]
  3534. s = com.values_from_object(series)
  3535. k = com.values_from_object(key)
  3536. k = self._convert_scalar_indexer(k, kind='getitem')
  3537. try:
  3538. return self._engine.get_value(s, k,
  3539. tz=getattr(series.dtype, 'tz', None))
  3540. except KeyError as e1:
  3541. if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
  3542. raise
  3543. try:
  3544. return libindex.get_value_box(s, key)
  3545. except IndexError:
  3546. raise
  3547. except TypeError:
  3548. # generator/iterator-like
  3549. if is_iterator(key):
  3550. raise InvalidIndexError(key)
  3551. else:
  3552. raise e1
  3553. except Exception: # pragma: no cover
  3554. raise e1
  3555. except TypeError:
  3556. # python 3
  3557. if is_scalar(key): # pragma: no cover
  3558. raise IndexError(key)
  3559. raise InvalidIndexError(key)
  3560. def set_value(self, arr, key, value):
  3561. """
  3562. Fast lookup of value from 1-dimensional ndarray.
  3563. Notes
  3564. -----
  3565. Only use this if you know what you're doing.
  3566. """
  3567. self._engine.set_value(com.values_from_object(arr),
  3568. com.values_from_object(key), value)
  3569. _index_shared_docs['get_indexer_non_unique'] = """
  3570. Compute indexer and mask for new index given the current index. The
  3571. indexer should be then used as an input to ndarray.take to align the
  3572. current data to the new index.
  3573. Parameters
  3574. ----------
  3575. target : %(target_klass)s
  3576. Returns
  3577. -------
  3578. indexer : ndarray of int
  3579. Integers from 0 to n - 1 indicating that the index at these
  3580. positions matches the corresponding target values. Missing values
  3581. in the target are marked by -1.
  3582. missing : ndarray of int
  3583. An indexer into the target of the values not found.
  3584. These correspond to the -1 in the indexer array
  3585. """
  3586. @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
  3587. def get_indexer_non_unique(self, target):
  3588. target = ensure_index(target)
  3589. if is_categorical(target):
  3590. target = target.astype(target.dtype.categories.dtype)
  3591. pself, ptarget = self._maybe_promote(target)
  3592. if pself is not self or ptarget is not target:
  3593. return pself.get_indexer_non_unique(ptarget)
  3594. if self.is_all_dates:
  3595. self = Index(self.asi8)
  3596. tgt_values = target.asi8
  3597. else:
  3598. tgt_values = target._ndarray_values
  3599. indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
  3600. return ensure_platform_int(indexer), missing
  3601. def get_indexer_for(self, target, **kwargs):
  3602. """
  3603. Guaranteed return of an indexer even when non-unique.
  3604. This dispatches to get_indexer or get_indexer_nonunique
  3605. as appropriate.
  3606. """
  3607. if self.is_unique:
  3608. return self.get_indexer(target, **kwargs)
  3609. indexer, _ = self.get_indexer_non_unique(target, **kwargs)
  3610. return indexer
  3611. def _maybe_promote(self, other):
  3612. # A hack, but it works
  3613. from pandas import DatetimeIndex
  3614. if self.inferred_type == 'date' and isinstance(other, DatetimeIndex):
  3615. return DatetimeIndex(self), other
  3616. elif self.inferred_type == 'boolean':
  3617. if not is_object_dtype(self.dtype):
  3618. return self.astype('object'), other.astype('object')
  3619. return self, other
  3620. def groupby(self, values):
  3621. """
  3622. Group the index labels by a given array of values.
  3623. Parameters
  3624. ----------
  3625. values : array
  3626. Values used to determine the groups.
  3627. Returns
  3628. -------
  3629. groups : dict
  3630. {group name -> group labels}
  3631. """
  3632. # TODO: if we are a MultiIndex, we can do better
  3633. # that converting to tuples
  3634. if isinstance(values, ABCMultiIndex):
  3635. values = values.values
  3636. values = ensure_categorical(values)
  3637. result = values._reverse_indexer()
  3638. # map to the label
  3639. result = {k: self.take(v) for k, v in compat.iteritems(result)}
  3640. return result
  3641. def map(self, mapper, na_action=None):
  3642. """
  3643. Map values using input correspondence (a dict, Series, or function).
  3644. Parameters
  3645. ----------
  3646. mapper : function, dict, or Series
  3647. Mapping correspondence.
  3648. na_action : {None, 'ignore'}
  3649. If 'ignore', propagate NA values, without passing them to the
  3650. mapping correspondence.
  3651. Returns
  3652. -------
  3653. applied : Union[Index, MultiIndex], inferred
  3654. The output of the mapping function applied to the index.
  3655. If the function returns a tuple with more than one element
  3656. a MultiIndex will be returned.
  3657. """
  3658. from .multi import MultiIndex
  3659. new_values = super(Index, self)._map_values(
  3660. mapper, na_action=na_action)
  3661. attributes = self._get_attributes_dict()
  3662. # we can return a MultiIndex
  3663. if new_values.size and isinstance(new_values[0], tuple):
  3664. if isinstance(self, MultiIndex):
  3665. names = self.names
  3666. elif attributes.get('name'):
  3667. names = [attributes.get('name')] * len(new_values[0])
  3668. else:
  3669. names = None
  3670. return MultiIndex.from_tuples(new_values,
  3671. names=names)
  3672. attributes['copy'] = False
  3673. if not new_values.size:
  3674. # empty
  3675. attributes['dtype'] = self.dtype
  3676. return Index(new_values, **attributes)
  3677. def isin(self, values, level=None):
  3678. """
  3679. Return a boolean array where the index values are in `values`.
  3680. Compute boolean array of whether each index value is found in the
  3681. passed set of values. The length of the returned boolean array matches
  3682. the length of the index.
  3683. Parameters
  3684. ----------
  3685. values : set or list-like
  3686. Sought values.
  3687. .. versionadded:: 0.18.1
  3688. Support for values as a set.
  3689. level : str or int, optional
  3690. Name or position of the index level to use (if the index is a
  3691. `MultiIndex`).
  3692. Returns
  3693. -------
  3694. is_contained : ndarray
  3695. NumPy array of boolean values.
  3696. See Also
  3697. --------
  3698. Series.isin : Same for Series.
  3699. DataFrame.isin : Same method for DataFrames.
  3700. Notes
  3701. -----
  3702. In the case of `MultiIndex` you must either specify `values` as a
  3703. list-like object containing tuples that are the same length as the
  3704. number of levels, or specify `level`. Otherwise it will raise a
  3705. ``ValueError``.
  3706. If `level` is specified:
  3707. - if it is the name of one *and only one* index level, use that level;
  3708. - otherwise it should be a number indicating level position.
  3709. Examples
  3710. --------
  3711. >>> idx = pd.Index([1,2,3])
  3712. >>> idx
  3713. Int64Index([1, 2, 3], dtype='int64')
  3714. Check whether each index value in a list of values.
  3715. >>> idx.isin([1, 4])
  3716. array([ True, False, False])
  3717. >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
  3718. ... ['red', 'blue', 'green']],
  3719. ... names=('number', 'color'))
  3720. >>> midx
  3721. MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']],
  3722. codes=[[0, 1, 2], [2, 0, 1]],
  3723. names=['number', 'color'])
  3724. Check whether the strings in the 'color' level of the MultiIndex
  3725. are in a list of colors.
  3726. >>> midx.isin(['red', 'orange', 'yellow'], level='color')
  3727. array([ True, False, False])
  3728. To check across the levels of a MultiIndex, pass a list of tuples:
  3729. >>> midx.isin([(1, 'red'), (3, 'red')])
  3730. array([ True, False, False])
  3731. For a DatetimeIndex, string values in `values` are converted to
  3732. Timestamps.
  3733. >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
  3734. >>> dti = pd.to_datetime(dates)
  3735. >>> dti
  3736. DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
  3737. dtype='datetime64[ns]', freq=None)
  3738. >>> dti.isin(['2000-03-11'])
  3739. array([ True, False, False])
  3740. """
  3741. if level is not None:
  3742. self._validate_index_level(level)
  3743. return algos.isin(self, values)
  3744. def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
  3745. # this is for partial string indexing,
  3746. # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
  3747. raise NotImplementedError
  3748. def slice_indexer(self, start=None, end=None, step=None, kind=None):
  3749. """
  3750. For an ordered or unique index, compute the slice indexer for input
  3751. labels and step.
  3752. Parameters
  3753. ----------
  3754. start : label, default None
  3755. If None, defaults to the beginning
  3756. end : label, default None
  3757. If None, defaults to the end
  3758. step : int, default None
  3759. kind : string, default None
  3760. Returns
  3761. -------
  3762. indexer : slice
  3763. Raises
  3764. ------
  3765. KeyError : If key does not exist, or key is not unique and index is
  3766. not ordered.
  3767. Notes
  3768. -----
  3769. This function assumes that the data is sorted, so use at your own peril
  3770. Examples
  3771. ---------
  3772. This is a method on all index types. For example you can do:
  3773. >>> idx = pd.Index(list('abcd'))
  3774. >>> idx.slice_indexer(start='b', end='c')
  3775. slice(1, 3)
  3776. >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
  3777. >>> idx.slice_indexer(start='b', end=('c', 'g'))
  3778. slice(1, 3)
  3779. """
  3780. start_slice, end_slice = self.slice_locs(start, end, step=step,
  3781. kind=kind)
  3782. # return a slice
  3783. if not is_scalar(start_slice):
  3784. raise AssertionError("Start slice bound is non-scalar")
  3785. if not is_scalar(end_slice):
  3786. raise AssertionError("End slice bound is non-scalar")
  3787. return slice(start_slice, end_slice, step)
  3788. def _maybe_cast_indexer(self, key):
  3789. """
  3790. If we have a float key and are not a floating index, then try to cast
  3791. to an int if equivalent.
  3792. """
  3793. if is_float(key) and not self.is_floating():
  3794. try:
  3795. ckey = int(key)
  3796. if ckey == key:
  3797. key = ckey
  3798. except (OverflowError, ValueError, TypeError):
  3799. pass
  3800. return key
  3801. def _validate_indexer(self, form, key, kind):
  3802. """
  3803. If we are positional indexer, validate that we have appropriate
  3804. typed bounds must be an integer.
  3805. """
  3806. assert kind in ['ix', 'loc', 'getitem', 'iloc']
  3807. if key is None:
  3808. pass
  3809. elif is_integer(key):
  3810. pass
  3811. elif kind in ['iloc', 'getitem']:
  3812. self._invalid_indexer(form, key)
  3813. return key
  3814. _index_shared_docs['_maybe_cast_slice_bound'] = """
  3815. This function should be overloaded in subclasses that allow non-trivial
  3816. casting on label-slice bounds, e.g. datetime-like indices allowing
  3817. strings containing formatted datetimes.
  3818. Parameters
  3819. ----------
  3820. label : object
  3821. side : {'left', 'right'}
  3822. kind : {'ix', 'loc', 'getitem'}
  3823. Returns
  3824. -------
  3825. label : object
  3826. Notes
  3827. -----
  3828. Value of `side` parameter should be validated in caller.
  3829. """
  3830. @Appender(_index_shared_docs['_maybe_cast_slice_bound'])
  3831. def _maybe_cast_slice_bound(self, label, side, kind):
  3832. assert kind in ['ix', 'loc', 'getitem', None]
  3833. # We are a plain index here (sub-class override this method if they
  3834. # wish to have special treatment for floats/ints, e.g. Float64Index and
  3835. # datetimelike Indexes
  3836. # reject them
  3837. if is_float(label):
  3838. if not (kind in ['ix'] and (self.holds_integer() or
  3839. self.is_floating())):
  3840. self._invalid_indexer('slice', label)
  3841. # we are trying to find integer bounds on a non-integer based index
  3842. # this is rejected (generally .loc gets you here)
  3843. elif is_integer(label):
  3844. self._invalid_indexer('slice', label)
  3845. return label
  3846. def _searchsorted_monotonic(self, label, side='left'):
  3847. if self.is_monotonic_increasing:
  3848. return self.searchsorted(label, side=side)
  3849. elif self.is_monotonic_decreasing:
  3850. # np.searchsorted expects ascending sort order, have to reverse
  3851. # everything for it to work (element ordering, search side and
  3852. # resulting value).
  3853. pos = self[::-1].searchsorted(label, side='right' if side == 'left'
  3854. else 'left')
  3855. return len(self) - pos
  3856. raise ValueError('index must be monotonic increasing or decreasing')
  3857. def _get_loc_only_exact_matches(self, key):
  3858. """
  3859. This is overridden on subclasses (namely, IntervalIndex) to control
  3860. get_slice_bound.
  3861. """
  3862. return self.get_loc(key)
  3863. def get_slice_bound(self, label, side, kind):
  3864. """
  3865. Calculate slice bound that corresponds to given label.
  3866. Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
  3867. of given label.
  3868. Parameters
  3869. ----------
  3870. label : object
  3871. side : {'left', 'right'}
  3872. kind : {'ix', 'loc', 'getitem'}
  3873. """
  3874. assert kind in ['ix', 'loc', 'getitem', None]
  3875. if side not in ('left', 'right'):
  3876. raise ValueError("Invalid value for side kwarg,"
  3877. " must be either 'left' or 'right': %s" %
  3878. (side, ))
  3879. original_label = label
  3880. # For datetime indices label may be a string that has to be converted
  3881. # to datetime boundary according to its resolution.
  3882. label = self._maybe_cast_slice_bound(label, side, kind)
  3883. # we need to look up the label
  3884. try:
  3885. slc = self._get_loc_only_exact_matches(label)
  3886. except KeyError as err:
  3887. try:
  3888. return self._searchsorted_monotonic(label, side)
  3889. except ValueError:
  3890. # raise the original KeyError
  3891. raise err
  3892. if isinstance(slc, np.ndarray):
  3893. # get_loc may return a boolean array or an array of indices, which
  3894. # is OK as long as they are representable by a slice.
  3895. if is_bool_dtype(slc):
  3896. slc = lib.maybe_booleans_to_slice(slc.view('u1'))
  3897. else:
  3898. slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self))
  3899. if isinstance(slc, np.ndarray):
  3900. raise KeyError("Cannot get %s slice bound for non-unique "
  3901. "label: %r" % (side, original_label))
  3902. if isinstance(slc, slice):
  3903. if side == 'left':
  3904. return slc.start
  3905. else:
  3906. return slc.stop
  3907. else:
  3908. if side == 'right':
  3909. return slc + 1
  3910. else:
  3911. return slc
  3912. def slice_locs(self, start=None, end=None, step=None, kind=None):
  3913. """
  3914. Compute slice locations for input labels.
  3915. Parameters
  3916. ----------
  3917. start : label, default None
  3918. If None, defaults to the beginning
  3919. end : label, default None
  3920. If None, defaults to the end
  3921. step : int, defaults None
  3922. If None, defaults to 1
  3923. kind : {'ix', 'loc', 'getitem'} or None
  3924. Returns
  3925. -------
  3926. start, end : int
  3927. See Also
  3928. --------
  3929. Index.get_loc : Get location for a single label.
  3930. Notes
  3931. -----
  3932. This method only works if the index is monotonic or unique.
  3933. Examples
  3934. ---------
  3935. >>> idx = pd.Index(list('abcd'))
  3936. >>> idx.slice_locs(start='b', end='c')
  3937. (1, 3)
  3938. """
  3939. inc = (step is None or step >= 0)
  3940. if not inc:
  3941. # If it's a reverse slice, temporarily swap bounds.
  3942. start, end = end, start
  3943. start_slice = None
  3944. if start is not None:
  3945. start_slice = self.get_slice_bound(start, 'left', kind)
  3946. if start_slice is None:
  3947. start_slice = 0
  3948. end_slice = None
  3949. if end is not None:
  3950. end_slice = self.get_slice_bound(end, 'right', kind)
  3951. if end_slice is None:
  3952. end_slice = len(self)
  3953. if not inc:
  3954. # Bounds at this moment are swapped, swap them back and shift by 1.
  3955. #
  3956. # slice_locs('B', 'A', step=-1): s='B', e='A'
  3957. #
  3958. # s='A' e='B'
  3959. # AFTER SWAP: | |
  3960. # v ------------------> V
  3961. # -----------------------------------
  3962. # | | |A|A|A|A| | | | | |B|B| | | | |
  3963. # -----------------------------------
  3964. # ^ <------------------ ^
  3965. # SHOULD BE: | |
  3966. # end=s-1 start=e-1
  3967. #
  3968. end_slice, start_slice = start_slice - 1, end_slice - 1
  3969. # i == -1 triggers ``len(self) + i`` selection that points to the
  3970. # last element, not before-the-first one, subtracting len(self)
  3971. # compensates that.
  3972. if end_slice == -1:
  3973. end_slice -= len(self)
  3974. if start_slice == -1:
  3975. start_slice -= len(self)
  3976. return start_slice, end_slice
  3977. def delete(self, loc):
  3978. """
  3979. Make new Index with passed location(-s) deleted.
  3980. Returns
  3981. -------
  3982. new_index : Index
  3983. """
  3984. return self._shallow_copy(np.delete(self._data, loc))
  3985. def insert(self, loc, item):
  3986. """
  3987. Make new Index inserting new item at location.
  3988. Follows Python list.append semantics for negative values.
  3989. Parameters
  3990. ----------
  3991. loc : int
  3992. item : object
  3993. Returns
  3994. -------
  3995. new_index : Index
  3996. """
  3997. _self = np.asarray(self)
  3998. item = self._coerce_scalar_to_index(item)._ndarray_values
  3999. idx = np.concatenate((_self[:loc], item, _self[loc:]))
  4000. return self._shallow_copy_with_infer(idx)
  4001. def drop(self, labels, errors='raise'):
  4002. """
  4003. Make new Index with passed list of labels deleted.
  4004. Parameters
  4005. ----------
  4006. labels : array-like
  4007. errors : {'ignore', 'raise'}, default 'raise'
  4008. If 'ignore', suppress error and existing labels are dropped.
  4009. Returns
  4010. -------
  4011. dropped : Index
  4012. Raises
  4013. ------
  4014. KeyError
  4015. If not all of the labels are found in the selected axis
  4016. """
  4017. arr_dtype = 'object' if self.dtype == 'object' else None
  4018. labels = com.index_labels_to_array(labels, dtype=arr_dtype)
  4019. indexer = self.get_indexer(labels)
  4020. mask = indexer == -1
  4021. if mask.any():
  4022. if errors != 'ignore':
  4023. raise KeyError(
  4024. '{} not found in axis'.format(labels[mask]))
  4025. indexer = indexer[~mask]
  4026. return self.delete(indexer)
  4027. # --------------------------------------------------------------------
  4028. # Generated Arithmetic, Comparison, and Unary Methods
  4029. def _evaluate_with_timedelta_like(self, other, op):
  4030. # Timedelta knows how to operate with np.array, so dispatch to that
  4031. # operation and then wrap the results
  4032. if self._is_numeric_dtype and op.__name__ in ['add', 'sub',
  4033. 'radd', 'rsub']:
  4034. raise TypeError("Operation {opname} between {cls} and {other} "
  4035. "is invalid".format(opname=op.__name__,
  4036. cls=self.dtype,
  4037. other=type(other).__name__))
  4038. other = Timedelta(other)
  4039. values = self.values
  4040. with np.errstate(all='ignore'):
  4041. result = op(values, other)
  4042. attrs = self._get_attributes_dict()
  4043. attrs = self._maybe_update_attributes(attrs)
  4044. if op == divmod:
  4045. return Index(result[0], **attrs), Index(result[1], **attrs)
  4046. return Index(result, **attrs)
  4047. def _evaluate_with_datetime_like(self, other, op):
  4048. raise TypeError("can only perform ops with datetime like values")
  4049. @classmethod
  4050. def _add_comparison_methods(cls):
  4051. """
  4052. Add in comparison methods.
  4053. """
  4054. cls.__eq__ = _make_comparison_op(operator.eq, cls)
  4055. cls.__ne__ = _make_comparison_op(operator.ne, cls)
  4056. cls.__lt__ = _make_comparison_op(operator.lt, cls)
  4057. cls.__gt__ = _make_comparison_op(operator.gt, cls)
  4058. cls.__le__ = _make_comparison_op(operator.le, cls)
  4059. cls.__ge__ = _make_comparison_op(operator.ge, cls)
  4060. @classmethod
  4061. def _add_numeric_methods_add_sub_disabled(cls):
  4062. """
  4063. Add in the numeric add/sub methods to disable.
  4064. """
  4065. cls.__add__ = make_invalid_op('__add__')
  4066. cls.__radd__ = make_invalid_op('__radd__')
  4067. cls.__iadd__ = make_invalid_op('__iadd__')
  4068. cls.__sub__ = make_invalid_op('__sub__')
  4069. cls.__rsub__ = make_invalid_op('__rsub__')
  4070. cls.__isub__ = make_invalid_op('__isub__')
  4071. @classmethod
  4072. def _add_numeric_methods_disabled(cls):
  4073. """
  4074. Add in numeric methods to disable other than add/sub.
  4075. """
  4076. cls.__pow__ = make_invalid_op('__pow__')
  4077. cls.__rpow__ = make_invalid_op('__rpow__')
  4078. cls.__mul__ = make_invalid_op('__mul__')
  4079. cls.__rmul__ = make_invalid_op('__rmul__')
  4080. cls.__floordiv__ = make_invalid_op('__floordiv__')
  4081. cls.__rfloordiv__ = make_invalid_op('__rfloordiv__')
  4082. cls.__truediv__ = make_invalid_op('__truediv__')
  4083. cls.__rtruediv__ = make_invalid_op('__rtruediv__')
  4084. if not compat.PY3:
  4085. cls.__div__ = make_invalid_op('__div__')
  4086. cls.__rdiv__ = make_invalid_op('__rdiv__')
  4087. cls.__mod__ = make_invalid_op('__mod__')
  4088. cls.__divmod__ = make_invalid_op('__divmod__')
  4089. cls.__neg__ = make_invalid_op('__neg__')
  4090. cls.__pos__ = make_invalid_op('__pos__')
  4091. cls.__abs__ = make_invalid_op('__abs__')
  4092. cls.__inv__ = make_invalid_op('__inv__')
  4093. def _maybe_update_attributes(self, attrs):
  4094. """
  4095. Update Index attributes (e.g. freq) depending on op.
  4096. """
  4097. return attrs
  4098. def _validate_for_numeric_unaryop(self, op, opstr):
  4099. """
  4100. Validate if we can perform a numeric unary operation.
  4101. """
  4102. if not self._is_numeric_dtype:
  4103. raise TypeError("cannot evaluate a numeric op "
  4104. "{opstr} for type: {typ}"
  4105. .format(opstr=opstr, typ=type(self).__name__))
  4106. def _validate_for_numeric_binop(self, other, op):
  4107. """
  4108. Return valid other; evaluate or raise TypeError if we are not of
  4109. the appropriate type.
  4110. Notes
  4111. -----
  4112. This is an internal method called by ops.
  4113. """
  4114. opstr = '__{opname}__'.format(opname=op.__name__)
  4115. # if we are an inheritor of numeric,
  4116. # but not actually numeric (e.g. DatetimeIndex/PeriodIndex)
  4117. if not self._is_numeric_dtype:
  4118. raise TypeError("cannot evaluate a numeric op {opstr} "
  4119. "for type: {typ}"
  4120. .format(opstr=opstr, typ=type(self).__name__))
  4121. if isinstance(other, Index):
  4122. if not other._is_numeric_dtype:
  4123. raise TypeError("cannot evaluate a numeric op "
  4124. "{opstr} with type: {typ}"
  4125. .format(opstr=opstr, typ=type(other)))
  4126. elif isinstance(other, np.ndarray) and not other.ndim:
  4127. other = other.item()
  4128. if isinstance(other, (Index, ABCSeries, np.ndarray)):
  4129. if len(self) != len(other):
  4130. raise ValueError("cannot evaluate a numeric op with "
  4131. "unequal lengths")
  4132. other = com.values_from_object(other)
  4133. if other.dtype.kind not in ['f', 'i', 'u']:
  4134. raise TypeError("cannot evaluate a numeric op "
  4135. "with a non-numeric dtype")
  4136. elif isinstance(other, (ABCDateOffset, np.timedelta64, timedelta)):
  4137. # higher up to handle
  4138. pass
  4139. elif isinstance(other, (datetime, np.datetime64)):
  4140. # higher up to handle
  4141. pass
  4142. else:
  4143. if not (is_float(other) or is_integer(other)):
  4144. raise TypeError("can only perform ops with scalar values")
  4145. return other
  4146. @classmethod
  4147. def _add_numeric_methods_binary(cls):
  4148. """
  4149. Add in numeric methods.
  4150. """
  4151. cls.__add__ = _make_arithmetic_op(operator.add, cls)
  4152. cls.__radd__ = _make_arithmetic_op(ops.radd, cls)
  4153. cls.__sub__ = _make_arithmetic_op(operator.sub, cls)
  4154. cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls)
  4155. cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls)
  4156. cls.__pow__ = _make_arithmetic_op(operator.pow, cls)
  4157. cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
  4158. cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
  4159. if not compat.PY3:
  4160. cls.__div__ = _make_arithmetic_op(operator.div, cls)
  4161. cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)
  4162. # TODO: rmod? rdivmod?
  4163. cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
  4164. cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
  4165. cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
  4166. cls.__divmod__ = _make_arithmetic_op(divmod, cls)
  4167. cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
  4168. cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)
  4169. @classmethod
  4170. def _add_numeric_methods_unary(cls):
  4171. """
  4172. Add in numeric unary methods.
  4173. """
  4174. def _make_evaluate_unary(op, opstr):
  4175. def _evaluate_numeric_unary(self):
  4176. self._validate_for_numeric_unaryop(op, opstr)
  4177. attrs = self._get_attributes_dict()
  4178. attrs = self._maybe_update_attributes(attrs)
  4179. return Index(op(self.values), **attrs)
  4180. _evaluate_numeric_unary.__name__ = opstr
  4181. return _evaluate_numeric_unary
  4182. cls.__neg__ = _make_evaluate_unary(operator.neg, '__neg__')
  4183. cls.__pos__ = _make_evaluate_unary(operator.pos, '__pos__')
  4184. cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__')
  4185. cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__')
  4186. @classmethod
  4187. def _add_numeric_methods(cls):
  4188. cls._add_numeric_methods_unary()
  4189. cls._add_numeric_methods_binary()
  4190. @classmethod
  4191. def _add_logical_methods(cls):
  4192. """
  4193. Add in logical methods.
  4194. """
  4195. _doc = """
  4196. %(desc)s
  4197. Parameters
  4198. ----------
  4199. *args
  4200. These parameters will be passed to numpy.%(outname)s.
  4201. **kwargs
  4202. These parameters will be passed to numpy.%(outname)s.
  4203. Returns
  4204. -------
  4205. %(outname)s : bool or array_like (if axis is specified)
  4206. A single element array_like may be converted to bool."""
  4207. _index_shared_docs['index_all'] = dedent("""
  4208. See Also
  4209. --------
  4210. pandas.Index.any : Return whether any element in an Index is True.
  4211. pandas.Series.any : Return whether any element in a Series is True.
  4212. pandas.Series.all : Return whether all elements in a Series are True.
  4213. Notes
  4214. -----
  4215. Not a Number (NaN), positive infinity and negative infinity
  4216. evaluate to True because these are not equal to zero.
  4217. Examples
  4218. --------
  4219. **all**
  4220. True, because nonzero integers are considered True.
  4221. >>> pd.Index([1, 2, 3]).all()
  4222. True
  4223. False, because ``0`` is considered False.
  4224. >>> pd.Index([0, 1, 2]).all()
  4225. False
  4226. **any**
  4227. True, because ``1`` is considered True.
  4228. >>> pd.Index([0, 0, 1]).any()
  4229. True
  4230. False, because ``0`` is considered False.
  4231. >>> pd.Index([0, 0, 0]).any()
  4232. False
  4233. """)
  4234. _index_shared_docs['index_any'] = dedent("""
  4235. See Also
  4236. --------
  4237. pandas.Index.all : Return whether all elements are True.
  4238. pandas.Series.all : Return whether all elements are True.
  4239. Notes
  4240. -----
  4241. Not a Number (NaN), positive infinity and negative infinity
  4242. evaluate to True because these are not equal to zero.
  4243. Examples
  4244. --------
  4245. >>> index = pd.Index([0, 1, 2])
  4246. >>> index.any()
  4247. True
  4248. >>> index = pd.Index([0, 0, 0])
  4249. >>> index.any()
  4250. False
  4251. """)
  4252. def _make_logical_function(name, desc, f):
  4253. @Substitution(outname=name, desc=desc)
  4254. @Appender(_index_shared_docs['index_' + name])
  4255. @Appender(_doc)
  4256. def logical_func(self, *args, **kwargs):
  4257. result = f(self.values)
  4258. if (isinstance(result, (np.ndarray, ABCSeries, Index)) and
  4259. result.ndim == 0):
  4260. # return NumPy type
  4261. return result.dtype.type(result.item())
  4262. else: # pragma: no cover
  4263. return result
  4264. logical_func.__name__ = name
  4265. return logical_func
  4266. cls.all = _make_logical_function('all', 'Return whether all elements '
  4267. 'are True.',
  4268. np.all)
  4269. cls.any = _make_logical_function('any',
  4270. 'Return whether any element is True.',
  4271. np.any)
  4272. @classmethod
  4273. def _add_logical_methods_disabled(cls):
  4274. """
  4275. Add in logical methods to disable.
  4276. """
  4277. cls.all = make_invalid_op('all')
  4278. cls.any = make_invalid_op('any')
  4279. Index._add_numeric_methods_disabled()
  4280. Index._add_logical_methods()
  4281. Index._add_comparison_methods()
  4282. def ensure_index_from_sequences(sequences, names=None):
  4283. """
  4284. Construct an index from sequences of data.
  4285. A single sequence returns an Index. Many sequences returns a
  4286. MultiIndex.
  4287. Parameters
  4288. ----------
  4289. sequences : sequence of sequences
  4290. names : sequence of str
  4291. Returns
  4292. -------
  4293. index : Index or MultiIndex
  4294. Examples
  4295. --------
  4296. >>> ensure_index_from_sequences([[1, 2, 3]], names=['name'])
  4297. Int64Index([1, 2, 3], dtype='int64', name='name')
  4298. >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']],
  4299. names=['L1', 'L2'])
  4300. MultiIndex(levels=[['a'], ['a', 'b']],
  4301. codes=[[0, 0], [0, 1]],
  4302. names=['L1', 'L2'])
  4303. See Also
  4304. --------
  4305. ensure_index
  4306. """
  4307. from .multi import MultiIndex
  4308. if len(sequences) == 1:
  4309. if names is not None:
  4310. names = names[0]
  4311. return Index(sequences[0], name=names)
  4312. else:
  4313. return MultiIndex.from_arrays(sequences, names=names)
  4314. def ensure_index(index_like, copy=False):
  4315. """
  4316. Ensure that we have an index from some index-like object.
  4317. Parameters
  4318. ----------
  4319. index : sequence
  4320. An Index or other sequence
  4321. copy : bool
  4322. Returns
  4323. -------
  4324. index : Index or MultiIndex
  4325. Examples
  4326. --------
  4327. >>> ensure_index(['a', 'b'])
  4328. Index(['a', 'b'], dtype='object')
  4329. >>> ensure_index([('a', 'a'), ('b', 'c')])
  4330. Index([('a', 'a'), ('b', 'c')], dtype='object')
  4331. >>> ensure_index([['a', 'a'], ['b', 'c']])
  4332. MultiIndex(levels=[['a'], ['b', 'c']],
  4333. codes=[[0, 0], [0, 1]])
  4334. See Also
  4335. --------
  4336. ensure_index_from_sequences
  4337. """
  4338. if isinstance(index_like, Index):
  4339. if copy:
  4340. index_like = index_like.copy()
  4341. return index_like
  4342. if hasattr(index_like, 'name'):
  4343. return Index(index_like, name=index_like.name, copy=copy)
  4344. if is_iterator(index_like):
  4345. index_like = list(index_like)
  4346. # must check for exactly list here because of strict type
  4347. # check in clean_index_list
  4348. if isinstance(index_like, list):
  4349. if type(index_like) != list:
  4350. index_like = list(index_like)
  4351. converted, all_arrays = lib.clean_index_list(index_like)
  4352. if len(converted) > 0 and all_arrays:
  4353. from .multi import MultiIndex
  4354. return MultiIndex.from_arrays(converted)
  4355. else:
  4356. index_like = converted
  4357. else:
  4358. # clean_index_list does the equivalent of copying
  4359. # so only need to do this if not list instance
  4360. if copy:
  4361. from copy import copy
  4362. index_like = copy(index_like)
  4363. return Index(index_like)
  4364. def _ensure_has_len(seq):
  4365. """
  4366. If seq is an iterator, put its values into a list.
  4367. """
  4368. try:
  4369. len(seq)
  4370. except TypeError:
  4371. return list(seq)
  4372. else:
  4373. return seq
  4374. def _trim_front(strings):
  4375. """
  4376. Trims zeros and decimal points.
  4377. """
  4378. trimmed = strings
  4379. while len(strings) > 0 and all(x[0] == ' ' for x in trimmed):
  4380. trimmed = [x[1:] for x in trimmed]
  4381. return trimmed
  4382. def _validate_join_method(method):
  4383. if method not in ['left', 'right', 'inner', 'outer']:
  4384. raise ValueError('do not recognize join method %s' % method)
  4385. def default_index(n):
  4386. from pandas.core.index import RangeIndex
  4387. return RangeIndex(0, n, name=None)