generic.py 378 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039
  1. # pylint: disable=W0231,E1101
  2. import collections
  3. from datetime import timedelta
  4. import functools
  5. import gc
  6. import json
  7. import operator
  8. from textwrap import dedent
  9. import warnings
  10. import weakref
  11. import numpy as np
  12. from pandas._libs import Timestamp, iNaT, properties
  13. import pandas.compat as compat
  14. from pandas.compat import (
  15. cPickle as pkl, isidentifier, lrange, lzip, map, set_function_name,
  16. string_types, to_str, zip)
  17. from pandas.compat.numpy import function as nv
  18. from pandas.errors import AbstractMethodError
  19. from pandas.util._decorators import (
  20. Appender, Substitution, rewrite_axis_style_signature)
  21. from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
  22. from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
  23. from pandas.core.dtypes.common import (
  24. ensure_int64, ensure_object, is_bool, is_bool_dtype,
  25. is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like,
  26. is_extension_array_dtype, is_integer, is_list_like, is_number,
  27. is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable,
  28. is_scalar, is_timedelta64_dtype, pandas_dtype)
  29. from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
  30. from pandas.core.dtypes.inference import is_hashable
  31. from pandas.core.dtypes.missing import isna, notna
  32. import pandas as pd
  33. from pandas.core import config, missing, nanops
  34. import pandas.core.algorithms as algos
  35. from pandas.core.base import PandasObject, SelectionMixin
  36. import pandas.core.common as com
  37. from pandas.core.index import (
  38. Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index)
  39. from pandas.core.indexes.datetimes import DatetimeIndex
  40. from pandas.core.indexes.period import Period, PeriodIndex
  41. import pandas.core.indexing as indexing
  42. from pandas.core.internals import BlockManager
  43. from pandas.core.ops import _align_method_FRAME
  44. from pandas.io.formats.format import DataFrameFormatter, format_percentiles
  45. from pandas.io.formats.printing import pprint_thing
  46. from pandas.tseries.frequencies import to_offset
  47. # goal is to be able to define the docs close to function, while still being
  48. # able to share
  49. _shared_docs = dict()
  50. _shared_doc_kwargs = dict(
  51. axes='keywords for axes', klass='NDFrame',
  52. axes_single_arg='int or labels for object',
  53. args_transpose='axes to permute (int or label for object)',
  54. optional_by="""
  55. by : str or list of str
  56. Name or list of names to sort by""")
  57. # sentinel value to use as kwarg in place of None when None has special meaning
  58. # and needs to be distinguished from a user explicitly passing None.
  59. sentinel = object()
  60. def _single_replace(self, to_replace, method, inplace, limit):
  61. """
  62. Replaces values in a Series using the fill method specified when no
  63. replacement value is given in the replace method
  64. """
  65. if self.ndim != 1:
  66. raise TypeError('cannot replace {0} with method {1} on a {2}'
  67. .format(to_replace, method, type(self).__name__))
  68. orig_dtype = self.dtype
  69. result = self if inplace else self.copy()
  70. fill_f = missing.get_fill_func(method)
  71. mask = missing.mask_missing(result.values, to_replace)
  72. values = fill_f(result.values, limit=limit, mask=mask)
  73. if values.dtype == orig_dtype and inplace:
  74. return
  75. result = pd.Series(values, index=self.index,
  76. dtype=self.dtype).__finalize__(self)
  77. if inplace:
  78. self._update_inplace(result._data)
  79. return
  80. return result
  81. class NDFrame(PandasObject, SelectionMixin):
  82. """
  83. N-dimensional analogue of DataFrame. Store multi-dimensional in a
  84. size-mutable, labeled data structure
  85. Parameters
  86. ----------
  87. data : BlockManager
  88. axes : list
  89. copy : boolean, default False
  90. """
  91. _internal_names = ['_data', '_cacher', '_item_cache', '_cache', '_is_copy',
  92. '_subtyp', '_name', '_index', '_default_kind',
  93. '_default_fill_value', '_metadata', '__array_struct__',
  94. '__array_interface__']
  95. _internal_names_set = set(_internal_names)
  96. _accessors = frozenset()
  97. _deprecations = frozenset(['as_blocks', 'blocks',
  98. 'convert_objects', 'is_copy'])
  99. _metadata = []
  100. _is_copy = None
  101. # dummy attribute so that datetime.__eq__(Series/DataFrame) defers
  102. # by returning NotImplemented
  103. timetuple = None
  104. # ----------------------------------------------------------------------
  105. # Constructors
  106. def __init__(self, data, axes=None, copy=False, dtype=None,
  107. fastpath=False):
  108. if not fastpath:
  109. if dtype is not None:
  110. data = data.astype(dtype)
  111. elif copy:
  112. data = data.copy()
  113. if axes is not None:
  114. for i, ax in enumerate(axes):
  115. data = data.reindex_axis(ax, axis=i)
  116. object.__setattr__(self, '_is_copy', None)
  117. object.__setattr__(self, '_data', data)
  118. object.__setattr__(self, '_item_cache', {})
  119. def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
  120. """ passed a manager and a axes dict """
  121. for a, axe in axes.items():
  122. if axe is not None:
  123. mgr = mgr.reindex_axis(axe,
  124. axis=self._get_block_manager_axis(a),
  125. copy=False)
  126. # make a copy if explicitly requested
  127. if copy:
  128. mgr = mgr.copy()
  129. if dtype is not None:
  130. # avoid further copies if we can
  131. if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
  132. mgr = mgr.astype(dtype=dtype)
  133. return mgr
  134. # ----------------------------------------------------------------------
  135. @property
  136. def is_copy(self):
  137. """
  138. Return the copy.
  139. """
  140. warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
  141. "in a future version.", FutureWarning, stacklevel=2)
  142. return self._is_copy
  143. @is_copy.setter
  144. def is_copy(self, msg):
  145. warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
  146. "in a future version.", FutureWarning, stacklevel=2)
  147. self._is_copy = msg
  148. def _validate_dtype(self, dtype):
  149. """ validate the passed dtype """
  150. if dtype is not None:
  151. dtype = pandas_dtype(dtype)
  152. # a compound dtype
  153. if dtype.kind == 'V':
  154. raise NotImplementedError("compound dtypes are not implemented"
  155. " in the {0} constructor"
  156. .format(self.__class__.__name__))
  157. return dtype
  158. # ----------------------------------------------------------------------
  159. # Construction
  160. @property
  161. def _constructor(self):
  162. """Used when a manipulation result has the same dimensions as the
  163. original.
  164. """
  165. raise AbstractMethodError(self)
  166. @property
  167. def _constructor_sliced(self):
  168. """Used when a manipulation result has one lower dimension(s) as the
  169. original, such as DataFrame single columns slicing.
  170. """
  171. raise AbstractMethodError(self)
  172. @property
  173. def _constructor_expanddim(self):
  174. """Used when a manipulation result has one higher dimension as the
  175. original, such as Series.to_frame() and DataFrame.to_panel()
  176. """
  177. raise NotImplementedError
  178. # ----------------------------------------------------------------------
  179. # Axis
  180. @classmethod
  181. def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None,
  182. slicers=None, axes_are_reversed=False, build_axes=True,
  183. ns=None, docs=None):
  184. """Provide axes setup for the major PandasObjects.
  185. Parameters
  186. ----------
  187. axes : the names of the axes in order (lowest to highest)
  188. info_axis_num : the axis of the selector dimension (int)
  189. stat_axis_num : the number of axis for the default stats (int)
  190. aliases : other names for a single axis (dict)
  191. slicers : how axes slice to others (dict)
  192. axes_are_reversed : boolean whether to treat passed axes as
  193. reversed (DataFrame)
  194. build_axes : setup the axis properties (default True)
  195. """
  196. cls._AXIS_ORDERS = axes
  197. cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)}
  198. cls._AXIS_LEN = len(axes)
  199. cls._AXIS_ALIASES = aliases or dict()
  200. cls._AXIS_IALIASES = {v: k for k, v in cls._AXIS_ALIASES.items()}
  201. cls._AXIS_NAMES = dict(enumerate(axes))
  202. cls._AXIS_SLICEMAP = slicers or None
  203. cls._AXIS_REVERSED = axes_are_reversed
  204. # typ
  205. setattr(cls, '_typ', cls.__name__.lower())
  206. # indexing support
  207. cls._ix = None
  208. if info_axis is not None:
  209. cls._info_axis_number = info_axis
  210. cls._info_axis_name = axes[info_axis]
  211. if stat_axis is not None:
  212. cls._stat_axis_number = stat_axis
  213. cls._stat_axis_name = axes[stat_axis]
  214. # setup the actual axis
  215. if build_axes:
  216. def set_axis(a, i):
  217. setattr(cls, a, properties.AxisProperty(i, docs.get(a, a)))
  218. cls._internal_names_set.add(a)
  219. if axes_are_reversed:
  220. m = cls._AXIS_LEN - 1
  221. for i, a in cls._AXIS_NAMES.items():
  222. set_axis(a, m - i)
  223. else:
  224. for i, a in cls._AXIS_NAMES.items():
  225. set_axis(a, i)
  226. assert not isinstance(ns, dict)
  227. def _construct_axes_dict(self, axes=None, **kwargs):
  228. """Return an axes dictionary for myself."""
  229. d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
  230. d.update(kwargs)
  231. return d
  232. @staticmethod
  233. def _construct_axes_dict_from(self, axes, **kwargs):
  234. """Return an axes dictionary for the passed axes."""
  235. d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)}
  236. d.update(kwargs)
  237. return d
  238. def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
  239. """Return an axes dictionary for myself."""
  240. d = {self._AXIS_SLICEMAP[a]: self._get_axis(a)
  241. for a in (axes or self._AXIS_ORDERS)}
  242. d.update(kwargs)
  243. return d
  244. def _construct_axes_from_arguments(
  245. self, args, kwargs, require_all=False, sentinel=None):
  246. """Construct and returns axes if supplied in args/kwargs.
  247. If require_all, raise if all axis arguments are not supplied
  248. return a tuple of (axes, kwargs).
  249. sentinel specifies the default parameter when an axis is not
  250. supplied; useful to distinguish when a user explicitly passes None
  251. in scenarios where None has special meaning.
  252. """
  253. # construct the args
  254. args = list(args)
  255. for a in self._AXIS_ORDERS:
  256. # if we have an alias for this axis
  257. alias = self._AXIS_IALIASES.get(a)
  258. if alias is not None:
  259. if a in kwargs:
  260. if alias in kwargs:
  261. raise TypeError("arguments are mutually exclusive "
  262. "for [%s,%s]" % (a, alias))
  263. continue
  264. if alias in kwargs:
  265. kwargs[a] = kwargs.pop(alias)
  266. continue
  267. # look for a argument by position
  268. if a not in kwargs:
  269. try:
  270. kwargs[a] = args.pop(0)
  271. except IndexError:
  272. if require_all:
  273. raise TypeError("not enough/duplicate arguments "
  274. "specified!")
  275. axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS}
  276. return axes, kwargs
  277. @classmethod
  278. def _from_axes(cls, data, axes, **kwargs):
  279. # for construction from BlockManager
  280. if isinstance(data, BlockManager):
  281. return cls(data, **kwargs)
  282. else:
  283. if cls._AXIS_REVERSED:
  284. axes = axes[::-1]
  285. d = cls._construct_axes_dict_from(cls, axes, copy=False)
  286. d.update(kwargs)
  287. return cls(data, **d)
  288. @classmethod
  289. def _get_axis_number(cls, axis):
  290. axis = cls._AXIS_ALIASES.get(axis, axis)
  291. if is_integer(axis):
  292. if axis in cls._AXIS_NAMES:
  293. return axis
  294. else:
  295. try:
  296. return cls._AXIS_NUMBERS[axis]
  297. except KeyError:
  298. pass
  299. raise ValueError('No axis named {0} for object type {1}'
  300. .format(axis, type(cls)))
  301. @classmethod
  302. def _get_axis_name(cls, axis):
  303. axis = cls._AXIS_ALIASES.get(axis, axis)
  304. if isinstance(axis, string_types):
  305. if axis in cls._AXIS_NUMBERS:
  306. return axis
  307. else:
  308. try:
  309. return cls._AXIS_NAMES[axis]
  310. except KeyError:
  311. pass
  312. raise ValueError('No axis named {0} for object type {1}'
  313. .format(axis, type(cls)))
  314. def _get_axis(self, axis):
  315. name = self._get_axis_name(axis)
  316. return getattr(self, name)
  317. @classmethod
  318. def _get_block_manager_axis(cls, axis):
  319. """Map the axis to the block_manager axis."""
  320. axis = cls._get_axis_number(axis)
  321. if cls._AXIS_REVERSED:
  322. m = cls._AXIS_LEN - 1
  323. return m - axis
  324. return axis
  325. def _get_axis_resolvers(self, axis):
  326. # index or columns
  327. axis_index = getattr(self, axis)
  328. d = dict()
  329. prefix = axis[0]
  330. for i, name in enumerate(axis_index.names):
  331. if name is not None:
  332. key = level = name
  333. else:
  334. # prefix with 'i' or 'c' depending on the input axis
  335. # e.g., you must do ilevel_0 for the 0th level of an unnamed
  336. # multiiindex
  337. key = '{prefix}level_{i}'.format(prefix=prefix, i=i)
  338. level = i
  339. level_values = axis_index.get_level_values(level)
  340. s = level_values.to_series()
  341. s.index = axis_index
  342. d[key] = s
  343. # put the index/columns itself in the dict
  344. if isinstance(axis_index, MultiIndex):
  345. dindex = axis_index
  346. else:
  347. dindex = axis_index.to_series()
  348. d[axis] = dindex
  349. return d
  350. def _get_index_resolvers(self):
  351. d = {}
  352. for axis_name in self._AXIS_ORDERS:
  353. d.update(self._get_axis_resolvers(axis_name))
  354. return d
  355. @property
  356. def _info_axis(self):
  357. return getattr(self, self._info_axis_name)
  358. @property
  359. def _stat_axis(self):
  360. return getattr(self, self._stat_axis_name)
  361. @property
  362. def shape(self):
  363. """
  364. Return a tuple of axis dimensions
  365. """
  366. return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)
  367. @property
  368. def axes(self):
  369. """
  370. Return index label(s) of the internal NDFrame
  371. """
  372. # we do it this way because if we have reversed axes, then
  373. # the block manager shows then reversed
  374. return [self._get_axis(a) for a in self._AXIS_ORDERS]
  375. @property
  376. def ndim(self):
  377. """
  378. Return an int representing the number of axes / array dimensions.
  379. Return 1 if Series. Otherwise return 2 if DataFrame.
  380. See Also
  381. --------
  382. ndarray.ndim : Number of array dimensions.
  383. Examples
  384. --------
  385. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  386. >>> s.ndim
  387. 1
  388. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  389. >>> df.ndim
  390. 2
  391. """
  392. return self._data.ndim
  393. @property
  394. def size(self):
  395. """
  396. Return an int representing the number of elements in this object.
  397. Return the number of rows if Series. Otherwise return the number of
  398. rows times number of columns if DataFrame.
  399. See Also
  400. --------
  401. ndarray.size : Number of elements in the array.
  402. Examples
  403. --------
  404. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  405. >>> s.size
  406. 3
  407. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  408. >>> df.size
  409. 4
  410. """
  411. return np.prod(self.shape)
  412. @property
  413. def _selected_obj(self):
  414. """ internal compat with SelectionMixin """
  415. return self
  416. @property
  417. def _obj_with_exclusions(self):
  418. """ internal compat with SelectionMixin """
  419. return self
  420. def _expand_axes(self, key):
  421. new_axes = []
  422. for k, ax in zip(key, self.axes):
  423. if k not in ax:
  424. if type(k) != ax.dtype.type:
  425. ax = ax.astype('O')
  426. new_axes.append(ax.insert(len(ax), k))
  427. else:
  428. new_axes.append(ax)
  429. return new_axes
  430. def set_axis(self, labels, axis=0, inplace=None):
  431. """
  432. Assign desired index to given axis.
  433. Indexes for column or row labels can be changed by assigning
  434. a list-like or Index.
  435. .. versionchanged:: 0.21.0
  436. The signature is now `labels` and `axis`, consistent with
  437. the rest of pandas API. Previously, the `axis` and `labels`
  438. arguments were respectively the first and second positional
  439. arguments.
  440. Parameters
  441. ----------
  442. labels : list-like, Index
  443. The values for the new index.
  444. axis : {0 or 'index', 1 or 'columns'}, default 0
  445. The axis to update. The value 0 identifies the rows, and 1
  446. identifies the columns.
  447. inplace : boolean, default None
  448. Whether to return a new %(klass)s instance.
  449. .. warning::
  450. ``inplace=None`` currently falls back to to True, but in a
  451. future version, will default to False. Use inplace=True
  452. explicitly rather than relying on the default.
  453. Returns
  454. -------
  455. renamed : %(klass)s or None
  456. An object of same type as caller if inplace=False, None otherwise.
  457. See Also
  458. --------
  459. DataFrame.rename_axis : Alter the name of the index or columns.
  460. Examples
  461. --------
  462. **Series**
  463. >>> s = pd.Series([1, 2, 3])
  464. >>> s
  465. 0 1
  466. 1 2
  467. 2 3
  468. dtype: int64
  469. >>> s.set_axis(['a', 'b', 'c'], axis=0, inplace=False)
  470. a 1
  471. b 2
  472. c 3
  473. dtype: int64
  474. The original object is not modified.
  475. >>> s
  476. 0 1
  477. 1 2
  478. 2 3
  479. dtype: int64
  480. **DataFrame**
  481. >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  482. Change the row labels.
  483. >>> df.set_axis(['a', 'b', 'c'], axis='index', inplace=False)
  484. A B
  485. a 1 4
  486. b 2 5
  487. c 3 6
  488. Change the column labels.
  489. >>> df.set_axis(['I', 'II'], axis='columns', inplace=False)
  490. I II
  491. 0 1 4
  492. 1 2 5
  493. 2 3 6
  494. Now, update the labels inplace.
  495. >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True)
  496. >>> df
  497. i ii
  498. 0 1 4
  499. 1 2 5
  500. 2 3 6
  501. """
  502. if is_scalar(labels):
  503. warnings.warn(
  504. 'set_axis now takes "labels" as first argument, and '
  505. '"axis" as named parameter. The old form, with "axis" as '
  506. 'first parameter and \"labels\" as second, is still supported '
  507. 'but will be deprecated in a future version of pandas.',
  508. FutureWarning, stacklevel=2)
  509. labels, axis = axis, labels
  510. if inplace is None:
  511. warnings.warn(
  512. 'set_axis currently defaults to operating inplace.\nThis '
  513. 'will change in a future version of pandas, use '
  514. 'inplace=True to avoid this warning.',
  515. FutureWarning, stacklevel=2)
  516. inplace = True
  517. if inplace:
  518. setattr(self, self._get_axis_name(axis), labels)
  519. else:
  520. obj = self.copy()
  521. obj.set_axis(labels, axis=axis, inplace=True)
  522. return obj
  523. def _set_axis(self, axis, labels):
  524. self._data.set_axis(axis, labels)
  525. self._clear_item_cache()
  526. def transpose(self, *args, **kwargs):
  527. """
  528. Permute the dimensions of the %(klass)s
  529. Parameters
  530. ----------
  531. args : %(args_transpose)s
  532. copy : boolean, default False
  533. Make a copy of the underlying data. Mixed-dtype data will
  534. always result in a copy
  535. Returns
  536. -------
  537. y : same as input
  538. Examples
  539. --------
  540. >>> p.transpose(2, 0, 1)
  541. >>> p.transpose(2, 0, 1, copy=True)
  542. """
  543. # construct the args
  544. axes, kwargs = self._construct_axes_from_arguments(args, kwargs,
  545. require_all=True)
  546. axes_names = tuple(self._get_axis_name(axes[a])
  547. for a in self._AXIS_ORDERS)
  548. axes_numbers = tuple(self._get_axis_number(axes[a])
  549. for a in self._AXIS_ORDERS)
  550. # we must have unique axes
  551. if len(axes) != len(set(axes)):
  552. raise ValueError('Must specify %s unique axes' % self._AXIS_LEN)
  553. new_axes = self._construct_axes_dict_from(self, [self._get_axis(x)
  554. for x in axes_names])
  555. new_values = self.values.transpose(axes_numbers)
  556. if kwargs.pop('copy', None) or (len(args) and args[-1]):
  557. new_values = new_values.copy()
  558. nv.validate_transpose_for_generic(self, kwargs)
  559. return self._constructor(new_values, **new_axes).__finalize__(self)
  560. def swapaxes(self, axis1, axis2, copy=True):
  561. """
  562. Interchange axes and swap values axes appropriately.
  563. Returns
  564. -------
  565. y : same as input
  566. """
  567. i = self._get_axis_number(axis1)
  568. j = self._get_axis_number(axis2)
  569. if i == j:
  570. if copy:
  571. return self.copy()
  572. return self
  573. mapping = {i: j, j: i}
  574. new_axes = (self._get_axis(mapping.get(k, k))
  575. for k in range(self._AXIS_LEN))
  576. new_values = self.values.swapaxes(i, j)
  577. if copy:
  578. new_values = new_values.copy()
  579. return self._constructor(new_values, *new_axes).__finalize__(self)
  580. def droplevel(self, level, axis=0):
  581. """
  582. Return DataFrame with requested index / column level(s) removed.
  583. .. versionadded:: 0.24.0
  584. Parameters
  585. ----------
  586. level : int, str, or list-like
  587. If a string is given, must be the name of a level
  588. If list-like, elements must be names or positional indexes
  589. of levels.
  590. axis : {0 or 'index', 1 or 'columns'}, default 0
  591. Returns
  592. -------
  593. DataFrame.droplevel()
  594. Examples
  595. --------
  596. >>> df = pd.DataFrame([
  597. ... [1, 2, 3, 4],
  598. ... [5, 6, 7, 8],
  599. ... [9, 10, 11, 12]
  600. ... ]).set_index([0, 1]).rename_axis(['a', 'b'])
  601. >>> df.columns = pd.MultiIndex.from_tuples([
  602. ... ('c', 'e'), ('d', 'f')
  603. ... ], names=['level_1', 'level_2'])
  604. >>> df
  605. level_1 c d
  606. level_2 e f
  607. a b
  608. 1 2 3 4
  609. 5 6 7 8
  610. 9 10 11 12
  611. >>> df.droplevel('a')
  612. level_1 c d
  613. level_2 e f
  614. b
  615. 2 3 4
  616. 6 7 8
  617. 10 11 12
  618. >>> df.droplevel('level2', axis=1)
  619. level_1 c d
  620. a b
  621. 1 2 3 4
  622. 5 6 7 8
  623. 9 10 11 12
  624. """
  625. labels = self._get_axis(axis)
  626. new_labels = labels.droplevel(level)
  627. result = self.set_axis(new_labels, axis=axis, inplace=False)
  628. return result
  629. def pop(self, item):
  630. """
  631. Return item and drop from frame. Raise KeyError if not found.
  632. Parameters
  633. ----------
  634. item : str
  635. Column label to be popped
  636. Returns
  637. -------
  638. popped : Series
  639. Examples
  640. --------
  641. >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
  642. ... ('parrot', 'bird', 24.0),
  643. ... ('lion', 'mammal', 80.5),
  644. ... ('monkey', 'mammal', np.nan)],
  645. ... columns=('name', 'class', 'max_speed'))
  646. >>> df
  647. name class max_speed
  648. 0 falcon bird 389.0
  649. 1 parrot bird 24.0
  650. 2 lion mammal 80.5
  651. 3 monkey mammal NaN
  652. >>> df.pop('class')
  653. 0 bird
  654. 1 bird
  655. 2 mammal
  656. 3 mammal
  657. Name: class, dtype: object
  658. >>> df
  659. name max_speed
  660. 0 falcon 389.0
  661. 1 parrot 24.0
  662. 2 lion 80.5
  663. 3 monkey NaN
  664. """
  665. result = self[item]
  666. del self[item]
  667. try:
  668. result._reset_cacher()
  669. except AttributeError:
  670. pass
  671. return result
  672. def squeeze(self, axis=None):
  673. """
  674. Squeeze 1 dimensional axis objects into scalars.
  675. Series or DataFrames with a single element are squeezed to a scalar.
  676. DataFrames with a single column or a single row are squeezed to a
  677. Series. Otherwise the object is unchanged.
  678. This method is most useful when you don't know if your
  679. object is a Series or DataFrame, but you do know it has just a single
  680. column. In that case you can safely call `squeeze` to ensure you have a
  681. Series.
  682. Parameters
  683. ----------
  684. axis : {0 or 'index', 1 or 'columns', None}, default None
  685. A specific axis to squeeze. By default, all length-1 axes are
  686. squeezed.
  687. .. versionadded:: 0.20.0
  688. Returns
  689. -------
  690. DataFrame, Series, or scalar
  691. The projection after squeezing `axis` or all the axes.
  692. See Also
  693. --------
  694. Series.iloc : Integer-location based indexing for selecting scalars.
  695. DataFrame.iloc : Integer-location based indexing for selecting Series.
  696. Series.to_frame : Inverse of DataFrame.squeeze for a
  697. single-column DataFrame.
  698. Examples
  699. --------
  700. >>> primes = pd.Series([2, 3, 5, 7])
  701. Slicing might produce a Series with a single value:
  702. >>> even_primes = primes[primes % 2 == 0]
  703. >>> even_primes
  704. 0 2
  705. dtype: int64
  706. >>> even_primes.squeeze()
  707. 2
  708. Squeezing objects with more than one value in every axis does nothing:
  709. >>> odd_primes = primes[primes % 2 == 1]
  710. >>> odd_primes
  711. 1 3
  712. 2 5
  713. 3 7
  714. dtype: int64
  715. >>> odd_primes.squeeze()
  716. 1 3
  717. 2 5
  718. 3 7
  719. dtype: int64
  720. Squeezing is even more effective when used with DataFrames.
  721. >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
  722. >>> df
  723. a b
  724. 0 1 2
  725. 1 3 4
  726. Slicing a single column will produce a DataFrame with the columns
  727. having only one value:
  728. >>> df_a = df[['a']]
  729. >>> df_a
  730. a
  731. 0 1
  732. 1 3
  733. So the columns can be squeezed down, resulting in a Series:
  734. >>> df_a.squeeze('columns')
  735. 0 1
  736. 1 3
  737. Name: a, dtype: int64
  738. Slicing a single row from a single column will produce a single
  739. scalar DataFrame:
  740. >>> df_0a = df.loc[df.index < 1, ['a']]
  741. >>> df_0a
  742. a
  743. 0 1
  744. Squeezing the rows produces a single scalar Series:
  745. >>> df_0a.squeeze('rows')
  746. a 1
  747. Name: 0, dtype: int64
  748. Squeezing all axes wil project directly into a scalar:
  749. >>> df_0a.squeeze()
  750. 1
  751. """
  752. axis = (self._AXIS_NAMES if axis is None else
  753. (self._get_axis_number(axis),))
  754. try:
  755. return self.iloc[
  756. tuple(0 if i in axis and len(a) == 1 else slice(None)
  757. for i, a in enumerate(self.axes))]
  758. except Exception:
  759. return self
  760. def swaplevel(self, i=-2, j=-1, axis=0):
  761. """
  762. Swap levels i and j in a MultiIndex on a particular axis
  763. Parameters
  764. ----------
  765. i, j : int, string (can be mixed)
  766. Level of index to be swapped. Can pass level name as string.
  767. Returns
  768. -------
  769. swapped : same type as caller (new object)
  770. .. versionchanged:: 0.18.1
  771. The indexes ``i`` and ``j`` are now optional, and default to
  772. the two innermost levels of the index.
  773. """
  774. axis = self._get_axis_number(axis)
  775. result = self.copy()
  776. labels = result._data.axes[axis]
  777. result._data.set_axis(axis, labels.swaplevel(i, j))
  778. return result
  779. # ----------------------------------------------------------------------
  780. # Rename
  781. def rename(self, *args, **kwargs):
  782. """
  783. Alter axes input function or functions. Function / dict values must be
  784. unique (1-to-1). Labels not contained in a dict / Series will be left
  785. as-is. Extra labels listed don't throw an error. Alternatively, change
  786. ``Series.name`` with a scalar value (Series only).
  787. Parameters
  788. ----------
  789. %(axes)s : scalar, list-like, dict-like or function, optional
  790. Scalar or list-like will alter the ``Series.name`` attribute,
  791. and raise on DataFrame or Panel.
  792. dict-like or functions are transformations to apply to
  793. that axis' values
  794. copy : boolean, default True
  795. Also copy underlying data
  796. inplace : boolean, default False
  797. Whether to return a new %(klass)s. If True then value of copy is
  798. ignored.
  799. level : int or level name, default None
  800. In case of a MultiIndex, only rename labels in the specified
  801. level.
  802. Returns
  803. -------
  804. renamed : %(klass)s (new object)
  805. See Also
  806. --------
  807. pandas.NDFrame.rename_axis
  808. Examples
  809. --------
  810. >>> s = pd.Series([1, 2, 3])
  811. >>> s
  812. 0 1
  813. 1 2
  814. 2 3
  815. dtype: int64
  816. >>> s.rename("my_name") # scalar, changes Series.name
  817. 0 1
  818. 1 2
  819. 2 3
  820. Name: my_name, dtype: int64
  821. >>> s.rename(lambda x: x ** 2) # function, changes labels
  822. 0 1
  823. 1 2
  824. 4 3
  825. dtype: int64
  826. >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
  827. 0 1
  828. 3 2
  829. 5 3
  830. dtype: int64
  831. Since ``DataFrame`` doesn't have a ``.name`` attribute,
  832. only mapping-type arguments are allowed.
  833. >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  834. >>> df.rename(2)
  835. Traceback (most recent call last):
  836. ...
  837. TypeError: 'int' object is not callable
  838. ``DataFrame.rename`` supports two calling conventions
  839. * ``(index=index_mapper, columns=columns_mapper, ...)``
  840. * ``(mapper, axis={'index', 'columns'}, ...)``
  841. We *highly* recommend using keyword arguments to clarify your
  842. intent.
  843. >>> df.rename(index=str, columns={"A": "a", "B": "c"})
  844. a c
  845. 0 1 4
  846. 1 2 5
  847. 2 3 6
  848. >>> df.rename(index=str, columns={"A": "a", "C": "c"})
  849. a B
  850. 0 1 4
  851. 1 2 5
  852. 2 3 6
  853. Using axis-style parameters
  854. >>> df.rename(str.lower, axis='columns')
  855. a b
  856. 0 1 4
  857. 1 2 5
  858. 2 3 6
  859. >>> df.rename({1: 2, 2: 4}, axis='index')
  860. A B
  861. 0 1 4
  862. 2 2 5
  863. 4 3 6
  864. See the :ref:`user guide <basics.rename>` for more.
  865. """
  866. axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
  867. copy = kwargs.pop('copy', True)
  868. inplace = kwargs.pop('inplace', False)
  869. level = kwargs.pop('level', None)
  870. axis = kwargs.pop('axis', None)
  871. if axis is not None:
  872. # Validate the axis
  873. self._get_axis_number(axis)
  874. if kwargs:
  875. raise TypeError('rename() got an unexpected keyword '
  876. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  877. if com.count_not_none(*axes.values()) == 0:
  878. raise TypeError('must pass an index to rename')
  879. self._consolidate_inplace()
  880. result = self if inplace else self.copy(deep=copy)
  881. # start in the axis order to eliminate too many copies
  882. for axis in lrange(self._AXIS_LEN):
  883. v = axes.get(self._AXIS_NAMES[axis])
  884. if v is None:
  885. continue
  886. f = com._get_rename_function(v)
  887. baxis = self._get_block_manager_axis(axis)
  888. if level is not None:
  889. level = self.axes[axis]._get_level_number(level)
  890. result._data = result._data.rename_axis(f, axis=baxis, copy=copy,
  891. level=level)
  892. result._clear_item_cache()
  893. if inplace:
  894. self._update_inplace(result._data)
  895. else:
  896. return result.__finalize__(self)
  897. @rewrite_axis_style_signature('mapper', [('copy', True),
  898. ('inplace', False)])
  899. def rename_axis(self, mapper=sentinel, **kwargs):
  900. """
  901. Set the name of the axis for the index or columns.
  902. Parameters
  903. ----------
  904. mapper : scalar, list-like, optional
  905. Value to set the axis name attribute.
  906. index, columns : scalar, list-like, dict-like or function, optional
  907. A scalar, list-like, dict-like or functions transformations to
  908. apply to that axis' values.
  909. Use either ``mapper`` and ``axis`` to
  910. specify the axis to target with ``mapper``, or ``index``
  911. and/or ``columns``.
  912. .. versionchanged:: 0.24.0
  913. axis : {0 or 'index', 1 or 'columns'}, default 0
  914. The axis to rename.
  915. copy : bool, default True
  916. Also copy underlying data.
  917. inplace : bool, default False
  918. Modifies the object directly, instead of creating a new Series
  919. or DataFrame.
  920. Returns
  921. -------
  922. Series, DataFrame, or None
  923. The same type as the caller or None if `inplace` is True.
  924. See Also
  925. --------
  926. Series.rename : Alter Series index labels or name.
  927. DataFrame.rename : Alter DataFrame index labels or name.
  928. Index.rename : Set new names on index.
  929. Notes
  930. -----
  931. Prior to version 0.21.0, ``rename_axis`` could also be used to change
  932. the axis *labels* by passing a mapping or scalar. This behavior is
  933. deprecated and will be removed in a future version. Use ``rename``
  934. instead.
  935. ``DataFrame.rename_axis`` supports two calling conventions
  936. * ``(index=index_mapper, columns=columns_mapper, ...)``
  937. * ``(mapper, axis={'index', 'columns'}, ...)``
  938. The first calling convention will only modify the names of
  939. the index and/or the names of the Index object that is the columns.
  940. In this case, the parameter ``copy`` is ignored.
  941. The second calling convention will modify the names of the
  942. the corresponding index if mapper is a list or a scalar.
  943. However, if mapper is dict-like or a function, it will use the
  944. deprecated behavior of modifying the axis *labels*.
  945. We *highly* recommend using keyword arguments to clarify your
  946. intent.
  947. Examples
  948. --------
  949. **Series**
  950. >>> s = pd.Series(["dog", "cat", "monkey"])
  951. >>> s
  952. 0 dog
  953. 1 cat
  954. 2 monkey
  955. dtype: object
  956. >>> s.rename_axis("animal")
  957. animal
  958. 0 dog
  959. 1 cat
  960. 2 monkey
  961. dtype: object
  962. **DataFrame**
  963. >>> df = pd.DataFrame({"num_legs": [4, 4, 2],
  964. ... "num_arms": [0, 0, 2]},
  965. ... ["dog", "cat", "monkey"])
  966. >>> df
  967. num_legs num_arms
  968. dog 4 0
  969. cat 4 0
  970. monkey 2 2
  971. >>> df = df.rename_axis("animal")
  972. >>> df
  973. num_legs num_arms
  974. animal
  975. dog 4 0
  976. cat 4 0
  977. monkey 2 2
  978. >>> df = df.rename_axis("limbs", axis="columns")
  979. >>> df
  980. limbs num_legs num_arms
  981. animal
  982. dog 4 0
  983. cat 4 0
  984. monkey 2 2
  985. **MultiIndex**
  986. >>> df.index = pd.MultiIndex.from_product([['mammal'],
  987. ... ['dog', 'cat', 'monkey']],
  988. ... names=['type', 'name'])
  989. >>> df
  990. limbs num_legs num_arms
  991. type name
  992. mammal dog 4 0
  993. cat 4 0
  994. monkey 2 2
  995. >>> df.rename_axis(index={'type': 'class'})
  996. limbs num_legs num_arms
  997. class name
  998. mammal dog 4 0
  999. cat 4 0
  1000. monkey 2 2
  1001. >>> df.rename_axis(columns=str.upper)
  1002. LIMBS num_legs num_arms
  1003. type name
  1004. mammal dog 4 0
  1005. cat 4 0
  1006. monkey 2 2
  1007. """
  1008. axes, kwargs = self._construct_axes_from_arguments(
  1009. (), kwargs, sentinel=sentinel)
  1010. copy = kwargs.pop('copy', True)
  1011. inplace = kwargs.pop('inplace', False)
  1012. axis = kwargs.pop('axis', 0)
  1013. if axis is not None:
  1014. axis = self._get_axis_number(axis)
  1015. if kwargs:
  1016. raise TypeError('rename_axis() got an unexpected keyword '
  1017. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  1018. inplace = validate_bool_kwarg(inplace, 'inplace')
  1019. if (mapper is not sentinel):
  1020. # Use v0.23 behavior if a scalar or list
  1021. non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not
  1022. is_dict_like(mapper))
  1023. if non_mapper:
  1024. return self._set_axis_name(mapper, axis=axis, inplace=inplace)
  1025. else:
  1026. # Deprecated (v0.21) behavior is if mapper is specified,
  1027. # and not a list or scalar, then call rename
  1028. msg = ("Using 'rename_axis' to alter labels is deprecated. "
  1029. "Use '.rename' instead")
  1030. warnings.warn(msg, FutureWarning, stacklevel=3)
  1031. axis = self._get_axis_name(axis)
  1032. d = {'copy': copy, 'inplace': inplace}
  1033. d[axis] = mapper
  1034. return self.rename(**d)
  1035. else:
  1036. # Use new behavior. Means that index and/or columns
  1037. # is specified
  1038. result = self if inplace else self.copy(deep=copy)
  1039. for axis in lrange(self._AXIS_LEN):
  1040. v = axes.get(self._AXIS_NAMES[axis])
  1041. if v is sentinel:
  1042. continue
  1043. non_mapper = is_scalar(v) or (is_list_like(v) and not
  1044. is_dict_like(v))
  1045. if non_mapper:
  1046. newnames = v
  1047. else:
  1048. f = com._get_rename_function(v)
  1049. curnames = self._get_axis(axis).names
  1050. newnames = [f(name) for name in curnames]
  1051. result._set_axis_name(newnames, axis=axis,
  1052. inplace=True)
  1053. if not inplace:
  1054. return result
  1055. def _set_axis_name(self, name, axis=0, inplace=False):
  1056. """
  1057. Set the name(s) of the axis.
  1058. Parameters
  1059. ----------
  1060. name : str or list of str
  1061. Name(s) to set.
  1062. axis : {0 or 'index', 1 or 'columns'}, default 0
  1063. The axis to set the label. The value 0 or 'index' specifies index,
  1064. and the value 1 or 'columns' specifies columns.
  1065. inplace : bool, default False
  1066. If `True`, do operation inplace and return None.
  1067. .. versionadded:: 0.21.0
  1068. Returns
  1069. -------
  1070. Series, DataFrame, or None
  1071. The same type as the caller or `None` if `inplace` is `True`.
  1072. See Also
  1073. --------
  1074. DataFrame.rename : Alter the axis labels of :class:`DataFrame`.
  1075. Series.rename : Alter the index labels or set the index name
  1076. of :class:`Series`.
  1077. Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.
  1078. Examples
  1079. --------
  1080. >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},
  1081. ... ["dog", "cat", "monkey"])
  1082. >>> df
  1083. num_legs
  1084. dog 4
  1085. cat 4
  1086. monkey 2
  1087. >>> df._set_axis_name("animal")
  1088. num_legs
  1089. animal
  1090. dog 4
  1091. cat 4
  1092. monkey 2
  1093. >>> df.index = pd.MultiIndex.from_product(
  1094. ... [["mammal"], ['dog', 'cat', 'monkey']])
  1095. >>> df._set_axis_name(["type", "name"])
  1096. legs
  1097. type name
  1098. mammal dog 4
  1099. cat 4
  1100. monkey 2
  1101. """
  1102. axis = self._get_axis_number(axis)
  1103. idx = self._get_axis(axis).set_names(name)
  1104. inplace = validate_bool_kwarg(inplace, 'inplace')
  1105. renamed = self if inplace else self.copy()
  1106. renamed.set_axis(idx, axis=axis, inplace=True)
  1107. if not inplace:
  1108. return renamed
  1109. # ----------------------------------------------------------------------
  1110. # Comparison Methods
  1111. def _indexed_same(self, other):
  1112. return all(self._get_axis(a).equals(other._get_axis(a))
  1113. for a in self._AXIS_ORDERS)
  1114. def equals(self, other):
  1115. """
  1116. Test whether two objects contain the same elements.
  1117. This function allows two Series or DataFrames to be compared against
  1118. each other to see if they have the same shape and elements. NaNs in
  1119. the same location are considered equal. The column headers do not
  1120. need to have the same type, but the elements within the columns must
  1121. be the same dtype.
  1122. Parameters
  1123. ----------
  1124. other : Series or DataFrame
  1125. The other Series or DataFrame to be compared with the first.
  1126. Returns
  1127. -------
  1128. bool
  1129. True if all elements are the same in both objects, False
  1130. otherwise.
  1131. See Also
  1132. --------
  1133. Series.eq : Compare two Series objects of the same length
  1134. and return a Series where each element is True if the element
  1135. in each Series is equal, False otherwise.
  1136. DataFrame.eq : Compare two DataFrame objects of the same shape and
  1137. return a DataFrame where each element is True if the respective
  1138. element in each DataFrame is equal, False otherwise.
  1139. assert_series_equal : Return True if left and right Series are equal,
  1140. False otherwise.
  1141. assert_frame_equal : Return True if left and right DataFrames are
  1142. equal, False otherwise.
  1143. numpy.array_equal : Return True if two arrays have the same shape
  1144. and elements, False otherwise.
  1145. Notes
  1146. -----
  1147. This function requires that the elements have the same dtype as their
  1148. respective elements in the other Series or DataFrame. However, the
  1149. column labels do not need to have the same type, as long as they are
  1150. still considered equal.
  1151. Examples
  1152. --------
  1153. >>> df = pd.DataFrame({1: [10], 2: [20]})
  1154. >>> df
  1155. 1 2
  1156. 0 10 20
  1157. DataFrames df and exactly_equal have the same types and values for
  1158. their elements and column labels, which will return True.
  1159. >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})
  1160. >>> exactly_equal
  1161. 1 2
  1162. 0 10 20
  1163. >>> df.equals(exactly_equal)
  1164. True
  1165. DataFrames df and different_column_type have the same element
  1166. types and values, but have different types for the column labels,
  1167. which will still return True.
  1168. >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})
  1169. >>> different_column_type
  1170. 1.0 2.0
  1171. 0 10 20
  1172. >>> df.equals(different_column_type)
  1173. True
  1174. DataFrames df and different_data_type have different types for the
  1175. same values for their elements, and will return False even though
  1176. their column labels are the same values and types.
  1177. >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})
  1178. >>> different_data_type
  1179. 1 2
  1180. 0 10.0 20.0
  1181. >>> df.equals(different_data_type)
  1182. False
  1183. """
  1184. if not isinstance(other, self._constructor):
  1185. return False
  1186. return self._data.equals(other._data)
  1187. # -------------------------------------------------------------------------
  1188. # Unary Methods
  1189. def __neg__(self):
  1190. values = com.values_from_object(self)
  1191. if is_bool_dtype(values):
  1192. arr = operator.inv(values)
  1193. elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
  1194. or is_object_dtype(values)):
  1195. arr = operator.neg(values)
  1196. else:
  1197. raise TypeError("Unary negative expects numeric dtype, not {}"
  1198. .format(values.dtype))
  1199. return self.__array_wrap__(arr)
  1200. def __pos__(self):
  1201. values = com.values_from_object(self)
  1202. if (is_bool_dtype(values) or is_period_arraylike(values)):
  1203. arr = values
  1204. elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
  1205. or is_object_dtype(values)):
  1206. arr = operator.pos(values)
  1207. else:
  1208. raise TypeError("Unary plus expects numeric dtype, not {}"
  1209. .format(values.dtype))
  1210. return self.__array_wrap__(arr)
  1211. def __invert__(self):
  1212. try:
  1213. arr = operator.inv(com.values_from_object(self))
  1214. return self.__array_wrap__(arr)
  1215. except Exception:
  1216. # inv fails with 0 len
  1217. if not np.prod(self.shape):
  1218. return self
  1219. raise
  1220. def __nonzero__(self):
  1221. raise ValueError("The truth value of a {0} is ambiguous. "
  1222. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  1223. .format(self.__class__.__name__))
  1224. __bool__ = __nonzero__
  1225. def bool(self):
  1226. """
  1227. Return the bool of a single element PandasObject.
  1228. This must be a boolean scalar value, either True or False. Raise a
  1229. ValueError if the PandasObject does not have exactly 1 element, or that
  1230. element is not boolean
  1231. """
  1232. v = self.squeeze()
  1233. if isinstance(v, (bool, np.bool_)):
  1234. return bool(v)
  1235. elif is_scalar(v):
  1236. raise ValueError("bool cannot act on a non-boolean single element "
  1237. "{0}".format(self.__class__.__name__))
  1238. self.__nonzero__()
  1239. def __abs__(self):
  1240. return self.abs()
  1241. def __round__(self, decimals=0):
  1242. return self.round(decimals)
  1243. # -------------------------------------------------------------------------
  1244. # Label or Level Combination Helpers
  1245. #
  1246. # A collection of helper methods for DataFrame/Series operations that
  1247. # accept a combination of column/index labels and levels. All such
  1248. # operations should utilize/extend these methods when possible so that we
  1249. # have consistent precedence and validation logic throughout the library.
  1250. def _is_level_reference(self, key, axis=0):
  1251. """
  1252. Test whether a key is a level reference for a given axis.
  1253. To be considered a level reference, `key` must be a string that:
  1254. - (axis=0): Matches the name of an index level and does NOT match
  1255. a column label.
  1256. - (axis=1): Matches the name of a column level and does NOT match
  1257. an index label.
  1258. Parameters
  1259. ----------
  1260. key : str
  1261. Potential level name for the given axis
  1262. axis : int, default 0
  1263. Axis that levels are associated with (0 for index, 1 for columns)
  1264. Returns
  1265. -------
  1266. is_level : bool
  1267. """
  1268. axis = self._get_axis_number(axis)
  1269. if self.ndim > 2:
  1270. raise NotImplementedError(
  1271. "_is_level_reference is not implemented for {type}"
  1272. .format(type=type(self)))
  1273. return (key is not None and
  1274. is_hashable(key) and
  1275. key in self.axes[axis].names and
  1276. not self._is_label_reference(key, axis=axis))
  1277. def _is_label_reference(self, key, axis=0):
  1278. """
  1279. Test whether a key is a label reference for a given axis.
  1280. To be considered a label reference, `key` must be a string that:
  1281. - (axis=0): Matches a column label
  1282. - (axis=1): Matches an index label
  1283. Parameters
  1284. ----------
  1285. key: str
  1286. Potential label name
  1287. axis: int, default 0
  1288. Axis perpendicular to the axis that labels are associated with
  1289. (0 means search for column labels, 1 means search for index labels)
  1290. Returns
  1291. -------
  1292. is_label: bool
  1293. """
  1294. axis = self._get_axis_number(axis)
  1295. other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
  1296. if self.ndim > 2:
  1297. raise NotImplementedError(
  1298. "_is_label_reference is not implemented for {type}"
  1299. .format(type=type(self)))
  1300. return (key is not None and
  1301. is_hashable(key) and
  1302. any(key in self.axes[ax] for ax in other_axes))
  1303. def _is_label_or_level_reference(self, key, axis=0):
  1304. """
  1305. Test whether a key is a label or level reference for a given axis.
  1306. To be considered either a label or a level reference, `key` must be a
  1307. string that:
  1308. - (axis=0): Matches a column label or an index level
  1309. - (axis=1): Matches an index label or a column level
  1310. Parameters
  1311. ----------
  1312. key: str
  1313. Potential label or level name
  1314. axis: int, default 0
  1315. Axis that levels are associated with (0 for index, 1 for columns)
  1316. Returns
  1317. -------
  1318. is_label_or_level: bool
  1319. """
  1320. if self.ndim > 2:
  1321. raise NotImplementedError(
  1322. "_is_label_or_level_reference is not implemented for {type}"
  1323. .format(type=type(self)))
  1324. return (self._is_level_reference(key, axis=axis) or
  1325. self._is_label_reference(key, axis=axis))
  1326. def _check_label_or_level_ambiguity(self, key, axis=0):
  1327. """
  1328. Check whether `key` is ambiguous.
  1329. By ambiguous, we mean that it matches both a level of the input
  1330. `axis` and a label of the other axis.
  1331. Parameters
  1332. ----------
  1333. key: str or object
  1334. label or level name
  1335. axis: int, default 0
  1336. Axis that levels are associated with (0 for index, 1 for columns)
  1337. Raises
  1338. ------
  1339. ValueError: `key` is ambiguous
  1340. """
  1341. axis = self._get_axis_number(axis)
  1342. other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
  1343. if self.ndim > 2:
  1344. raise NotImplementedError(
  1345. "_check_label_or_level_ambiguity is not implemented for {type}"
  1346. .format(type=type(self)))
  1347. if (key is not None and
  1348. is_hashable(key) and
  1349. key in self.axes[axis].names and
  1350. any(key in self.axes[ax] for ax in other_axes)):
  1351. # Build an informative and grammatical warning
  1352. level_article, level_type = (('an', 'index')
  1353. if axis == 0 else
  1354. ('a', 'column'))
  1355. label_article, label_type = (('a', 'column')
  1356. if axis == 0 else
  1357. ('an', 'index'))
  1358. msg = ("'{key}' is both {level_article} {level_type} level and "
  1359. "{label_article} {label_type} label, which is ambiguous."
  1360. ).format(key=key,
  1361. level_article=level_article,
  1362. level_type=level_type,
  1363. label_article=label_article,
  1364. label_type=label_type)
  1365. raise ValueError(msg)
  1366. def _get_label_or_level_values(self, key, axis=0):
  1367. """
  1368. Return a 1-D array of values associated with `key`, a label or level
  1369. from the given `axis`.
  1370. Retrieval logic:
  1371. - (axis=0): Return column values if `key` matches a column label.
  1372. Otherwise return index level values if `key` matches an index
  1373. level.
  1374. - (axis=1): Return row values if `key` matches an index label.
  1375. Otherwise return column level values if 'key' matches a column
  1376. level
  1377. Parameters
  1378. ----------
  1379. key: str
  1380. Label or level name.
  1381. axis: int, default 0
  1382. Axis that levels are associated with (0 for index, 1 for columns)
  1383. Returns
  1384. -------
  1385. values: np.ndarray
  1386. Raises
  1387. ------
  1388. KeyError
  1389. if `key` matches neither a label nor a level
  1390. ValueError
  1391. if `key` matches multiple labels
  1392. FutureWarning
  1393. if `key` is ambiguous. This will become an ambiguity error in a
  1394. future version
  1395. """
  1396. axis = self._get_axis_number(axis)
  1397. other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
  1398. if self.ndim > 2:
  1399. raise NotImplementedError(
  1400. "_get_label_or_level_values is not implemented for {type}"
  1401. .format(type=type(self)))
  1402. if self._is_label_reference(key, axis=axis):
  1403. self._check_label_or_level_ambiguity(key, axis=axis)
  1404. values = self.xs(key, axis=other_axes[0])._values
  1405. elif self._is_level_reference(key, axis=axis):
  1406. values = self.axes[axis].get_level_values(key)._values
  1407. else:
  1408. raise KeyError(key)
  1409. # Check for duplicates
  1410. if values.ndim > 1:
  1411. if other_axes and isinstance(
  1412. self._get_axis(other_axes[0]), MultiIndex):
  1413. multi_message = ('\n'
  1414. 'For a multi-index, the label must be a '
  1415. 'tuple with elements corresponding to '
  1416. 'each level.')
  1417. else:
  1418. multi_message = ''
  1419. label_axis_name = 'column' if axis == 0 else 'index'
  1420. raise ValueError(("The {label_axis_name} label '{key}' "
  1421. "is not unique.{multi_message}")
  1422. .format(key=key,
  1423. label_axis_name=label_axis_name,
  1424. multi_message=multi_message))
  1425. return values
  1426. def _drop_labels_or_levels(self, keys, axis=0):
  1427. """
  1428. Drop labels and/or levels for the given `axis`.
  1429. For each key in `keys`:
  1430. - (axis=0): If key matches a column label then drop the column.
  1431. Otherwise if key matches an index level then drop the level.
  1432. - (axis=1): If key matches an index label then drop the row.
  1433. Otherwise if key matches a column level then drop the level.
  1434. Parameters
  1435. ----------
  1436. keys: str or list of str
  1437. labels or levels to drop
  1438. axis: int, default 0
  1439. Axis that levels are associated with (0 for index, 1 for columns)
  1440. Returns
  1441. -------
  1442. dropped: DataFrame
  1443. Raises
  1444. ------
  1445. ValueError
  1446. if any `keys` match neither a label nor a level
  1447. """
  1448. axis = self._get_axis_number(axis)
  1449. if self.ndim > 2:
  1450. raise NotImplementedError(
  1451. "_drop_labels_or_levels is not implemented for {type}"
  1452. .format(type=type(self)))
  1453. # Validate keys
  1454. keys = com.maybe_make_list(keys)
  1455. invalid_keys = [k for k in keys if not
  1456. self._is_label_or_level_reference(k, axis=axis)]
  1457. if invalid_keys:
  1458. raise ValueError(("The following keys are not valid labels or "
  1459. "levels for axis {axis}: {invalid_keys}")
  1460. .format(axis=axis,
  1461. invalid_keys=invalid_keys))
  1462. # Compute levels and labels to drop
  1463. levels_to_drop = [k for k in keys
  1464. if self._is_level_reference(k, axis=axis)]
  1465. labels_to_drop = [k for k in keys
  1466. if not self._is_level_reference(k, axis=axis)]
  1467. # Perform copy upfront and then use inplace operations below.
  1468. # This ensures that we always perform exactly one copy.
  1469. # ``copy`` and/or ``inplace`` options could be added in the future.
  1470. dropped = self.copy()
  1471. if axis == 0:
  1472. # Handle dropping index levels
  1473. if levels_to_drop:
  1474. dropped.reset_index(levels_to_drop, drop=True, inplace=True)
  1475. # Handle dropping columns labels
  1476. if labels_to_drop:
  1477. dropped.drop(labels_to_drop, axis=1, inplace=True)
  1478. else:
  1479. # Handle dropping column levels
  1480. if levels_to_drop:
  1481. if isinstance(dropped.columns, MultiIndex):
  1482. # Drop the specified levels from the MultiIndex
  1483. dropped.columns = dropped.columns.droplevel(levels_to_drop)
  1484. else:
  1485. # Drop the last level of Index by replacing with
  1486. # a RangeIndex
  1487. dropped.columns = RangeIndex(dropped.columns.size)
  1488. # Handle dropping index labels
  1489. if labels_to_drop:
  1490. dropped.drop(labels_to_drop, axis=0, inplace=True)
  1491. return dropped
  1492. # ----------------------------------------------------------------------
  1493. # Iteration
  1494. def __hash__(self):
  1495. raise TypeError('{0!r} objects are mutable, thus they cannot be'
  1496. ' hashed'.format(self.__class__.__name__))
  1497. def __iter__(self):
  1498. """Iterate over infor axis"""
  1499. return iter(self._info_axis)
  1500. # can we get a better explanation of this?
  1501. def keys(self):
  1502. """Get the 'info axis' (see Indexing for more)
  1503. This is index for Series, columns for DataFrame and major_axis for
  1504. Panel.
  1505. """
  1506. return self._info_axis
  1507. def iteritems(self):
  1508. """Iterate over (label, values) on info axis
  1509. This is index for Series, columns for DataFrame, major_axis for Panel,
  1510. and so on.
  1511. """
  1512. for h in self._info_axis:
  1513. yield h, self[h]
  1514. def __len__(self):
  1515. """Returns length of info axis"""
  1516. return len(self._info_axis)
  1517. def __contains__(self, key):
  1518. """True if the key is in the info axis"""
  1519. return key in self._info_axis
  1520. @property
  1521. def empty(self):
  1522. """
  1523. Indicator whether DataFrame is empty.
  1524. True if DataFrame is entirely empty (no items), meaning any of the
  1525. axes are of length 0.
  1526. Returns
  1527. -------
  1528. bool
  1529. If DataFrame is empty, return True, if not return False.
  1530. See Also
  1531. --------
  1532. pandas.Series.dropna
  1533. pandas.DataFrame.dropna
  1534. Notes
  1535. -----
  1536. If DataFrame contains only NaNs, it is still not considered empty. See
  1537. the example below.
  1538. Examples
  1539. --------
  1540. An example of an actual empty DataFrame. Notice the index is empty:
  1541. >>> df_empty = pd.DataFrame({'A' : []})
  1542. >>> df_empty
  1543. Empty DataFrame
  1544. Columns: [A]
  1545. Index: []
  1546. >>> df_empty.empty
  1547. True
  1548. If we only have NaNs in our DataFrame, it is not considered empty! We
  1549. will need to drop the NaNs to make the DataFrame empty:
  1550. >>> df = pd.DataFrame({'A' : [np.nan]})
  1551. >>> df
  1552. A
  1553. 0 NaN
  1554. >>> df.empty
  1555. False
  1556. >>> df.dropna().empty
  1557. True
  1558. """
  1559. return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS)
  1560. # ----------------------------------------------------------------------
  1561. # Array Interface
  1562. # This is also set in IndexOpsMixin
  1563. # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
  1564. __array_priority__ = 1000
  1565. def __array__(self, dtype=None):
  1566. return com.values_from_object(self)
  1567. def __array_wrap__(self, result, context=None):
  1568. d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
  1569. return self._constructor(result, **d).__finalize__(self)
  1570. # ideally we would define this to avoid the getattr checks, but
  1571. # is slower
  1572. # @property
  1573. # def __array_interface__(self):
  1574. # """ provide numpy array interface method """
  1575. # values = self.values
  1576. # return dict(typestr=values.dtype.str,shape=values.shape,data=values)
  1577. def to_dense(self):
  1578. """
  1579. Return dense representation of NDFrame (as opposed to sparse).
  1580. """
  1581. # compat
  1582. return self
  1583. # ----------------------------------------------------------------------
  1584. # Picklability
  1585. def __getstate__(self):
  1586. meta = {k: getattr(self, k, None) for k in self._metadata}
  1587. return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata,
  1588. **meta)
  1589. def __setstate__(self, state):
  1590. if isinstance(state, BlockManager):
  1591. self._data = state
  1592. elif isinstance(state, dict):
  1593. typ = state.get('_typ')
  1594. if typ is not None:
  1595. # set in the order of internal names
  1596. # to avoid definitional recursion
  1597. # e.g. say fill_value needing _data to be
  1598. # defined
  1599. meta = set(self._internal_names + self._metadata)
  1600. for k in list(meta):
  1601. if k in state:
  1602. v = state[k]
  1603. object.__setattr__(self, k, v)
  1604. for k, v in state.items():
  1605. if k not in meta:
  1606. object.__setattr__(self, k, v)
  1607. else:
  1608. self._unpickle_series_compat(state)
  1609. elif isinstance(state[0], dict):
  1610. if len(state) == 5:
  1611. self._unpickle_sparse_frame_compat(state)
  1612. else:
  1613. self._unpickle_frame_compat(state)
  1614. elif len(state) == 4:
  1615. self._unpickle_panel_compat(state)
  1616. elif len(state) == 2:
  1617. self._unpickle_series_compat(state)
  1618. else: # pragma: no cover
  1619. # old pickling format, for compatibility
  1620. self._unpickle_matrix_compat(state)
  1621. self._item_cache = {}
  1622. # ----------------------------------------------------------------------
  1623. # Rendering Methods
  1624. def __unicode__(self):
  1625. # unicode representation based upon iterating over self
  1626. # (since, by definition, `PandasContainers` are iterable)
  1627. prepr = '[%s]' % ','.join(map(pprint_thing, self))
  1628. return '%s(%s)' % (self.__class__.__name__, prepr)
  1629. def _repr_latex_(self):
  1630. """
  1631. Returns a LaTeX representation for a particular object.
  1632. Mainly for use with nbconvert (jupyter notebook conversion to pdf).
  1633. """
  1634. if config.get_option('display.latex.repr'):
  1635. return self.to_latex()
  1636. else:
  1637. return None
  1638. def _repr_data_resource_(self):
  1639. """
  1640. Not a real Jupyter special repr method, but we use the same
  1641. naming convention.
  1642. """
  1643. if config.get_option("display.html.table_schema"):
  1644. data = self.head(config.get_option('display.max_rows'))
  1645. payload = json.loads(data.to_json(orient='table'),
  1646. object_pairs_hook=collections.OrderedDict)
  1647. return payload
  1648. # ----------------------------------------------------------------------
  1649. # I/O Methods
  1650. _shared_docs['to_excel'] = """
  1651. Write %(klass)s to an Excel sheet.
  1652. To write a single %(klass)s to an Excel .xlsx file it is only necessary to
  1653. specify a target file name. To write to multiple sheets it is necessary to
  1654. create an `ExcelWriter` object with a target file name, and specify a sheet
  1655. in the file to write to.
  1656. Multiple sheets may be written to by specifying unique `sheet_name`.
  1657. With all data written to the file it is necessary to save the changes.
  1658. Note that creating an `ExcelWriter` object with a file name that already
  1659. exists will result in the contents of the existing file being erased.
  1660. Parameters
  1661. ----------
  1662. excel_writer : str or ExcelWriter object
  1663. File path or existing ExcelWriter.
  1664. sheet_name : str, default 'Sheet1'
  1665. Name of sheet which will contain DataFrame.
  1666. na_rep : str, default ''
  1667. Missing data representation.
  1668. float_format : str, optional
  1669. Format string for floating point numbers. For example
  1670. ``float_format="%%.2f"`` will format 0.1234 to 0.12.
  1671. columns : sequence or list of str, optional
  1672. Columns to write.
  1673. header : bool or list of str, default True
  1674. Write out the column names. If a list of string is given it is
  1675. assumed to be aliases for the column names.
  1676. index : bool, default True
  1677. Write row names (index).
  1678. index_label : str or sequence, optional
  1679. Column label for index column(s) if desired. If not specified, and
  1680. `header` and `index` are True, then the index names are used. A
  1681. sequence should be given if the DataFrame uses MultiIndex.
  1682. startrow : int, default 0
  1683. Upper left cell row to dump data frame.
  1684. startcol : int, default 0
  1685. Upper left cell column to dump data frame.
  1686. engine : str, optional
  1687. Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
  1688. via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
  1689. ``io.excel.xlsm.writer``.
  1690. merge_cells : bool, default True
  1691. Write MultiIndex and Hierarchical Rows as merged cells.
  1692. encoding : str, optional
  1693. Encoding of the resulting excel file. Only necessary for xlwt,
  1694. other writers support unicode natively.
  1695. inf_rep : str, default 'inf'
  1696. Representation for infinity (there is no native representation for
  1697. infinity in Excel).
  1698. verbose : bool, default True
  1699. Display more information in the error logs.
  1700. freeze_panes : tuple of int (length 2), optional
  1701. Specifies the one-based bottommost row and rightmost column that
  1702. is to be frozen.
  1703. .. versionadded:: 0.20.0.
  1704. See Also
  1705. --------
  1706. to_csv : Write DataFrame to a comma-separated values (csv) file.
  1707. ExcelWriter : Class for writing DataFrame objects into excel sheets.
  1708. read_excel : Read an Excel file into a pandas DataFrame.
  1709. read_csv : Read a comma-separated values (csv) file into DataFrame.
  1710. Notes
  1711. -----
  1712. For compatibility with :meth:`~DataFrame.to_csv`,
  1713. to_excel serializes lists and dicts to strings before writing.
  1714. Once a workbook has been saved it is not possible write further data
  1715. without rewriting the whole workbook.
  1716. Examples
  1717. --------
  1718. Create, write to and save a workbook:
  1719. >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],
  1720. ... index=['row 1', 'row 2'],
  1721. ... columns=['col 1', 'col 2'])
  1722. >>> df1.to_excel("output.xlsx") # doctest: +SKIP
  1723. To specify the sheet name:
  1724. >>> df1.to_excel("output.xlsx",
  1725. ... sheet_name='Sheet_name_1') # doctest: +SKIP
  1726. If you wish to write to more than one sheet in the workbook, it is
  1727. necessary to specify an ExcelWriter object:
  1728. >>> df2 = df1.copy()
  1729. >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP
  1730. ... df1.to_excel(writer, sheet_name='Sheet_name_1')
  1731. ... df2.to_excel(writer, sheet_name='Sheet_name_2')
  1732. To set the library that is used to write the Excel file,
  1733. you can pass the `engine` keyword (the default engine is
  1734. automatically chosen depending on the file extension):
  1735. >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP
  1736. """
  1737. @Appender(_shared_docs["to_excel"] % dict(klass="object"))
  1738. def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="",
  1739. float_format=None, columns=None, header=True, index=True,
  1740. index_label=None, startrow=0, startcol=0, engine=None,
  1741. merge_cells=True, encoding=None, inf_rep="inf", verbose=True,
  1742. freeze_panes=None):
  1743. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  1744. from pandas.io.formats.excel import ExcelFormatter
  1745. formatter = ExcelFormatter(df, na_rep=na_rep, cols=columns,
  1746. header=header,
  1747. float_format=float_format, index=index,
  1748. index_label=index_label,
  1749. merge_cells=merge_cells,
  1750. inf_rep=inf_rep)
  1751. formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow,
  1752. startcol=startcol, freeze_panes=freeze_panes,
  1753. engine=engine)
  1754. def to_json(self, path_or_buf=None, orient=None, date_format=None,
  1755. double_precision=10, force_ascii=True, date_unit='ms',
  1756. default_handler=None, lines=False, compression='infer',
  1757. index=True):
  1758. """
  1759. Convert the object to a JSON string.
  1760. Note NaN's and None will be converted to null and datetime objects
  1761. will be converted to UNIX timestamps.
  1762. Parameters
  1763. ----------
  1764. path_or_buf : string or file handle, optional
  1765. File path or object. If not specified, the result is returned as
  1766. a string.
  1767. orient : string
  1768. Indication of expected JSON string format.
  1769. * Series
  1770. - default is 'index'
  1771. - allowed values are: {'split','records','index','table'}
  1772. * DataFrame
  1773. - default is 'columns'
  1774. - allowed values are:
  1775. {'split','records','index','columns','values','table'}
  1776. * The format of the JSON string
  1777. - 'split' : dict like {'index' -> [index],
  1778. 'columns' -> [columns], 'data' -> [values]}
  1779. - 'records' : list like
  1780. [{column -> value}, ... , {column -> value}]
  1781. - 'index' : dict like {index -> {column -> value}}
  1782. - 'columns' : dict like {column -> {index -> value}}
  1783. - 'values' : just the values array
  1784. - 'table' : dict like {'schema': {schema}, 'data': {data}}
  1785. describing the data, and the data component is
  1786. like ``orient='records'``.
  1787. .. versionchanged:: 0.20.0
  1788. date_format : {None, 'epoch', 'iso'}
  1789. Type of date conversion. 'epoch' = epoch milliseconds,
  1790. 'iso' = ISO8601. The default depends on the `orient`. For
  1791. ``orient='table'``, the default is 'iso'. For all other orients,
  1792. the default is 'epoch'.
  1793. double_precision : int, default 10
  1794. The number of decimal places to use when encoding
  1795. floating point values.
  1796. force_ascii : bool, default True
  1797. Force encoded string to be ASCII.
  1798. date_unit : string, default 'ms' (milliseconds)
  1799. The time unit to encode to, governs timestamp and ISO8601
  1800. precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
  1801. microsecond, and nanosecond respectively.
  1802. default_handler : callable, default None
  1803. Handler to call if object cannot otherwise be converted to a
  1804. suitable format for JSON. Should receive a single argument which is
  1805. the object to convert and return a serialisable object.
  1806. lines : bool, default False
  1807. If 'orient' is 'records' write out line delimited json format. Will
  1808. throw ValueError if incorrect 'orient' since others are not list
  1809. like.
  1810. .. versionadded:: 0.19.0
  1811. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
  1812. A string representing the compression to use in the output file,
  1813. only used when the first argument is a filename. By default, the
  1814. compression is inferred from the filename.
  1815. .. versionadded:: 0.21.0
  1816. .. versionchanged:: 0.24.0
  1817. 'infer' option added and set to default
  1818. index : bool, default True
  1819. Whether to include the index values in the JSON string. Not
  1820. including the index (``index=False``) is only supported when
  1821. orient is 'split' or 'table'.
  1822. .. versionadded:: 0.23.0
  1823. See Also
  1824. --------
  1825. read_json
  1826. Examples
  1827. --------
  1828. >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
  1829. ... index=['row 1', 'row 2'],
  1830. ... columns=['col 1', 'col 2'])
  1831. >>> df.to_json(orient='split')
  1832. '{"columns":["col 1","col 2"],
  1833. "index":["row 1","row 2"],
  1834. "data":[["a","b"],["c","d"]]}'
  1835. Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
  1836. Note that index labels are not preserved with this encoding.
  1837. >>> df.to_json(orient='records')
  1838. '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'
  1839. Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
  1840. >>> df.to_json(orient='index')
  1841. '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'
  1842. Encoding/decoding a Dataframe using ``'columns'`` formatted JSON:
  1843. >>> df.to_json(orient='columns')
  1844. '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}'
  1845. Encoding/decoding a Dataframe using ``'values'`` formatted JSON:
  1846. >>> df.to_json(orient='values')
  1847. '[["a","b"],["c","d"]]'
  1848. Encoding with Table Schema
  1849. >>> df.to_json(orient='table')
  1850. '{"schema": {"fields": [{"name": "index", "type": "string"},
  1851. {"name": "col 1", "type": "string"},
  1852. {"name": "col 2", "type": "string"}],
  1853. "primaryKey": "index",
  1854. "pandas_version": "0.20.0"},
  1855. "data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
  1856. {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
  1857. """
  1858. from pandas.io import json
  1859. if date_format is None and orient == 'table':
  1860. date_format = 'iso'
  1861. elif date_format is None:
  1862. date_format = 'epoch'
  1863. return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient,
  1864. date_format=date_format,
  1865. double_precision=double_precision,
  1866. force_ascii=force_ascii, date_unit=date_unit,
  1867. default_handler=default_handler,
  1868. lines=lines, compression=compression,
  1869. index=index)
  1870. def to_hdf(self, path_or_buf, key, **kwargs):
  1871. """
  1872. Write the contained data to an HDF5 file using HDFStore.
  1873. Hierarchical Data Format (HDF) is self-describing, allowing an
  1874. application to interpret the structure and contents of a file with
  1875. no outside information. One HDF file can hold a mix of related objects
  1876. which can be accessed as a group or as individual objects.
  1877. In order to add another DataFrame or Series to an existing HDF file
  1878. please use append mode and a different a key.
  1879. For more information see the :ref:`user guide <io.hdf5>`.
  1880. Parameters
  1881. ----------
  1882. path_or_buf : str or pandas.HDFStore
  1883. File path or HDFStore object.
  1884. key : str
  1885. Identifier for the group in the store.
  1886. mode : {'a', 'w', 'r+'}, default 'a'
  1887. Mode to open file:
  1888. - 'w': write, a new file is created (an existing file with
  1889. the same name would be deleted).
  1890. - 'a': append, an existing file is opened for reading and
  1891. writing, and if the file does not exist it is created.
  1892. - 'r+': similar to 'a', but the file must already exist.
  1893. format : {'fixed', 'table'}, default 'fixed'
  1894. Possible values:
  1895. - 'fixed': Fixed format. Fast writing/reading. Not-appendable,
  1896. nor searchable.
  1897. - 'table': Table format. Write as a PyTables Table structure
  1898. which may perform worse but allow more flexible operations
  1899. like searching / selecting subsets of the data.
  1900. append : bool, default False
  1901. For Table formats, append the input data to the existing.
  1902. data_columns : list of columns or True, optional
  1903. List of columns to create as indexed data columns for on-disk
  1904. queries, or True to use all columns. By default only the axes
  1905. of the object are indexed. See :ref:`io.hdf5-query-data-columns`.
  1906. Applicable only to format='table'.
  1907. complevel : {0-9}, optional
  1908. Specifies a compression level for data.
  1909. A value of 0 disables compression.
  1910. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
  1911. Specifies the compression library to be used.
  1912. As of v0.20.2 these additional compressors for Blosc are supported
  1913. (default if no compressor specified: 'blosc:blosclz'):
  1914. {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
  1915. 'blosc:zlib', 'blosc:zstd'}.
  1916. Specifying a compression library which is not available issues
  1917. a ValueError.
  1918. fletcher32 : bool, default False
  1919. If applying compression use the fletcher32 checksum.
  1920. dropna : bool, default False
  1921. If true, ALL nan rows will not be written to store.
  1922. errors : str, default 'strict'
  1923. Specifies how encoding and decoding errors are to be handled.
  1924. See the errors argument for :func:`open` for a full list
  1925. of options.
  1926. See Also
  1927. --------
  1928. DataFrame.read_hdf : Read from HDF file.
  1929. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  1930. DataFrame.to_sql : Write to a sql table.
  1931. DataFrame.to_feather : Write out feather-format for DataFrames.
  1932. DataFrame.to_csv : Write out to a csv file.
  1933. Examples
  1934. --------
  1935. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
  1936. ... index=['a', 'b', 'c'])
  1937. >>> df.to_hdf('data.h5', key='df', mode='w')
  1938. We can add another object to the same file:
  1939. >>> s = pd.Series([1, 2, 3, 4])
  1940. >>> s.to_hdf('data.h5', key='s')
  1941. Reading from HDF file:
  1942. >>> pd.read_hdf('data.h5', 'df')
  1943. A B
  1944. a 1 4
  1945. b 2 5
  1946. c 3 6
  1947. >>> pd.read_hdf('data.h5', 's')
  1948. 0 1
  1949. 1 2
  1950. 2 3
  1951. 3 4
  1952. dtype: int64
  1953. Deleting file with data:
  1954. >>> import os
  1955. >>> os.remove('data.h5')
  1956. """
  1957. from pandas.io import pytables
  1958. return pytables.to_hdf(path_or_buf, key, self, **kwargs)
  1959. def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
  1960. """
  1961. Serialize object to input file path using msgpack format.
  1962. THIS IS AN EXPERIMENTAL LIBRARY and the storage format
  1963. may not be stable until a future release.
  1964. Parameters
  1965. ----------
  1966. path : string File path, buffer-like, or None
  1967. if None, return generated string
  1968. append : bool whether to append to an existing msgpack
  1969. (default is False)
  1970. compress : type of compressor (zlib or blosc), default to None (no
  1971. compression)
  1972. """
  1973. from pandas.io import packers
  1974. return packers.to_msgpack(path_or_buf, self, encoding=encoding,
  1975. **kwargs)
  1976. def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
  1977. index_label=None, chunksize=None, dtype=None, method=None):
  1978. """
  1979. Write records stored in a DataFrame to a SQL database.
  1980. Databases supported by SQLAlchemy [1]_ are supported. Tables can be
  1981. newly created, appended to, or overwritten.
  1982. Parameters
  1983. ----------
  1984. name : string
  1985. Name of SQL table.
  1986. con : sqlalchemy.engine.Engine or sqlite3.Connection
  1987. Using SQLAlchemy makes it possible to use any DB supported by that
  1988. library. Legacy support is provided for sqlite3.Connection objects.
  1989. schema : string, optional
  1990. Specify the schema (if database flavor supports this). If None, use
  1991. default schema.
  1992. if_exists : {'fail', 'replace', 'append'}, default 'fail'
  1993. How to behave if the table already exists.
  1994. * fail: Raise a ValueError.
  1995. * replace: Drop the table before inserting new values.
  1996. * append: Insert new values to the existing table.
  1997. index : bool, default True
  1998. Write DataFrame index as a column. Uses `index_label` as the column
  1999. name in the table.
  2000. index_label : string or sequence, default None
  2001. Column label for index column(s). If None is given (default) and
  2002. `index` is True, then the index names are used.
  2003. A sequence should be given if the DataFrame uses MultiIndex.
  2004. chunksize : int, optional
  2005. Rows will be written in batches of this size at a time. By default,
  2006. all rows will be written at once.
  2007. dtype : dict, optional
  2008. Specifying the datatype for columns. The keys should be the column
  2009. names and the values should be the SQLAlchemy types or strings for
  2010. the sqlite3 legacy mode.
  2011. method : {None, 'multi', callable}, default None
  2012. Controls the SQL insertion clause used:
  2013. * None : Uses standard SQL ``INSERT`` clause (one per row).
  2014. * 'multi': Pass multiple values in a single ``INSERT`` clause.
  2015. * callable with signature ``(pd_table, conn, keys, data_iter)``.
  2016. Details and a sample callable implementation can be found in the
  2017. section :ref:`insert method <io.sql.method>`.
  2018. .. versionadded:: 0.24.0
  2019. Raises
  2020. ------
  2021. ValueError
  2022. When the table already exists and `if_exists` is 'fail' (the
  2023. default).
  2024. See Also
  2025. --------
  2026. read_sql : Read a DataFrame from a table.
  2027. Notes
  2028. -----
  2029. Timezone aware datetime columns will be written as
  2030. ``Timestamp with timezone`` type with SQLAlchemy if supported by the
  2031. database. Otherwise, the datetimes will be stored as timezone unaware
  2032. timestamps local to the original timezone.
  2033. .. versionadded:: 0.24.0
  2034. References
  2035. ----------
  2036. .. [1] http://docs.sqlalchemy.org
  2037. .. [2] https://www.python.org/dev/peps/pep-0249/
  2038. Examples
  2039. --------
  2040. Create an in-memory SQLite database.
  2041. >>> from sqlalchemy import create_engine
  2042. >>> engine = create_engine('sqlite://', echo=False)
  2043. Create a table from scratch with 3 rows.
  2044. >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})
  2045. >>> df
  2046. name
  2047. 0 User 1
  2048. 1 User 2
  2049. 2 User 3
  2050. >>> df.to_sql('users', con=engine)
  2051. >>> engine.execute("SELECT * FROM users").fetchall()
  2052. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
  2053. >>> df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})
  2054. >>> df1.to_sql('users', con=engine, if_exists='append')
  2055. >>> engine.execute("SELECT * FROM users").fetchall()
  2056. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),
  2057. (0, 'User 4'), (1, 'User 5')]
  2058. Overwrite the table with just ``df1``.
  2059. >>> df1.to_sql('users', con=engine, if_exists='replace',
  2060. ... index_label='id')
  2061. >>> engine.execute("SELECT * FROM users").fetchall()
  2062. [(0, 'User 4'), (1, 'User 5')]
  2063. Specify the dtype (especially useful for integers with missing values).
  2064. Notice that while pandas is forced to store the data as floating point,
  2065. the database supports nullable integers. When fetching the data with
  2066. Python, we get back integer scalars.
  2067. >>> df = pd.DataFrame({"A": [1, None, 2]})
  2068. >>> df
  2069. A
  2070. 0 1.0
  2071. 1 NaN
  2072. 2 2.0
  2073. >>> from sqlalchemy.types import Integer
  2074. >>> df.to_sql('integers', con=engine, index=False,
  2075. ... dtype={"A": Integer()})
  2076. >>> engine.execute("SELECT * FROM integers").fetchall()
  2077. [(1,), (None,), (2,)]
  2078. """
  2079. from pandas.io import sql
  2080. sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,
  2081. index=index, index_label=index_label, chunksize=chunksize,
  2082. dtype=dtype, method=method)
  2083. def to_pickle(self, path, compression='infer',
  2084. protocol=pkl.HIGHEST_PROTOCOL):
  2085. """
  2086. Pickle (serialize) object to file.
  2087. Parameters
  2088. ----------
  2089. path : str
  2090. File path where the pickled object will be stored.
  2091. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, \
  2092. default 'infer'
  2093. A string representing the compression to use in the output file. By
  2094. default, infers from the file extension in specified path.
  2095. .. versionadded:: 0.20.0
  2096. protocol : int
  2097. Int which indicates which protocol should be used by the pickler,
  2098. default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible
  2099. values for this parameter depend on the version of Python. For
  2100. Python 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a
  2101. valid value. For Python >= 3.4, 4 is a valid value. A negative
  2102. value for the protocol parameter is equivalent to setting its value
  2103. to HIGHEST_PROTOCOL.
  2104. .. [1] https://docs.python.org/3/library/pickle.html
  2105. .. versionadded:: 0.21.0
  2106. See Also
  2107. --------
  2108. read_pickle : Load pickled pandas object (or any object) from file.
  2109. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2110. DataFrame.to_sql : Write DataFrame to a SQL database.
  2111. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2112. Examples
  2113. --------
  2114. >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
  2115. >>> original_df
  2116. foo bar
  2117. 0 0 5
  2118. 1 1 6
  2119. 2 2 7
  2120. 3 3 8
  2121. 4 4 9
  2122. >>> original_df.to_pickle("./dummy.pkl")
  2123. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  2124. >>> unpickled_df
  2125. foo bar
  2126. 0 0 5
  2127. 1 1 6
  2128. 2 2 7
  2129. 3 3 8
  2130. 4 4 9
  2131. >>> import os
  2132. >>> os.remove("./dummy.pkl")
  2133. """
  2134. from pandas.io.pickle import to_pickle
  2135. return to_pickle(self, path, compression=compression,
  2136. protocol=protocol)
  2137. def to_clipboard(self, excel=True, sep=None, **kwargs):
  2138. r"""
  2139. Copy object to the system clipboard.
  2140. Write a text representation of object to the system clipboard.
  2141. This can be pasted into Excel, for example.
  2142. Parameters
  2143. ----------
  2144. excel : bool, default True
  2145. - True, use the provided separator, writing in a csv format for
  2146. allowing easy pasting into excel.
  2147. - False, write a string representation of the object to the
  2148. clipboard.
  2149. sep : str, default ``'\t'``
  2150. Field delimiter.
  2151. **kwargs
  2152. These parameters will be passed to DataFrame.to_csv.
  2153. See Also
  2154. --------
  2155. DataFrame.to_csv : Write a DataFrame to a comma-separated values
  2156. (csv) file.
  2157. read_clipboard : Read text from clipboard and pass to read_table.
  2158. Notes
  2159. -----
  2160. Requirements for your platform.
  2161. - Linux : `xclip`, or `xsel` (with `gtk` or `PyQt4` modules)
  2162. - Windows : none
  2163. - OS X : none
  2164. Examples
  2165. --------
  2166. Copy the contents of a DataFrame to the clipboard.
  2167. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
  2168. >>> df.to_clipboard(sep=',')
  2169. ... # Wrote the following to the system clipboard:
  2170. ... # ,A,B,C
  2171. ... # 0,1,2,3
  2172. ... # 1,4,5,6
  2173. We can omit the the index by passing the keyword `index` and setting
  2174. it to false.
  2175. >>> df.to_clipboard(sep=',', index=False)
  2176. ... # Wrote the following to the system clipboard:
  2177. ... # A,B,C
  2178. ... # 1,2,3
  2179. ... # 4,5,6
  2180. """
  2181. from pandas.io import clipboards
  2182. clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs)
  2183. def to_xarray(self):
  2184. """
  2185. Return an xarray object from the pandas object.
  2186. Returns
  2187. -------
  2188. xarray.DataArray or xarray.Dataset
  2189. Data in the pandas structure converted to Dataset if the object is
  2190. a DataFrame, or a DataArray if the object is a Series.
  2191. See Also
  2192. --------
  2193. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2194. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2195. Notes
  2196. -----
  2197. See the `xarray docs <http://xarray.pydata.org/en/stable/>`__
  2198. Examples
  2199. --------
  2200. >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2),
  2201. ... ('parrot', 'bird', 24.0, 2),
  2202. ... ('lion', 'mammal', 80.5, 4),
  2203. ... ('monkey', 'mammal', np.nan, 4)],
  2204. ... columns=['name', 'class', 'max_speed',
  2205. ... 'num_legs'])
  2206. >>> df
  2207. name class max_speed num_legs
  2208. 0 falcon bird 389.0 2
  2209. 1 parrot bird 24.0 2
  2210. 2 lion mammal 80.5 4
  2211. 3 monkey mammal NaN 4
  2212. >>> df.to_xarray()
  2213. <xarray.Dataset>
  2214. Dimensions: (index: 4)
  2215. Coordinates:
  2216. * index (index) int64 0 1 2 3
  2217. Data variables:
  2218. name (index) object 'falcon' 'parrot' 'lion' 'monkey'
  2219. class (index) object 'bird' 'bird' 'mammal' 'mammal'
  2220. max_speed (index) float64 389.0 24.0 80.5 nan
  2221. num_legs (index) int64 2 2 4 4
  2222. >>> df['max_speed'].to_xarray()
  2223. <xarray.DataArray 'max_speed' (index: 4)>
  2224. array([389. , 24. , 80.5, nan])
  2225. Coordinates:
  2226. * index (index) int64 0 1 2 3
  2227. >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01',
  2228. ... '2018-01-02', '2018-01-02'])
  2229. >>> df_multiindex = pd.DataFrame({'date': dates,
  2230. ... 'animal': ['falcon', 'parrot', 'falcon',
  2231. ... 'parrot'],
  2232. ... 'speed': [350, 18, 361, 15]}).set_index(['date',
  2233. ... 'animal'])
  2234. >>> df_multiindex
  2235. speed
  2236. date animal
  2237. 2018-01-01 falcon 350
  2238. parrot 18
  2239. 2018-01-02 falcon 361
  2240. parrot 15
  2241. >>> df_multiindex.to_xarray()
  2242. <xarray.Dataset>
  2243. Dimensions: (animal: 2, date: 2)
  2244. Coordinates:
  2245. * date (date) datetime64[ns] 2018-01-01 2018-01-02
  2246. * animal (animal) object 'falcon' 'parrot'
  2247. Data variables:
  2248. speed (date, animal) int64 350 18 361 15
  2249. """
  2250. try:
  2251. import xarray
  2252. except ImportError:
  2253. # Give a nice error message
  2254. raise ImportError("the xarray library is not installed\n"
  2255. "you can install via conda\n"
  2256. "conda install xarray\n"
  2257. "or via pip\n"
  2258. "pip install xarray\n")
  2259. if self.ndim == 1:
  2260. return xarray.DataArray.from_series(self)
  2261. elif self.ndim == 2:
  2262. return xarray.Dataset.from_dataframe(self)
  2263. # > 2 dims
  2264. coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS]
  2265. return xarray.DataArray(self,
  2266. coords=coords,
  2267. )
  2268. def to_latex(self, buf=None, columns=None, col_space=None, header=True,
  2269. index=True, na_rep='NaN', formatters=None, float_format=None,
  2270. sparsify=None, index_names=True, bold_rows=False,
  2271. column_format=None, longtable=None, escape=None,
  2272. encoding=None, decimal='.', multicolumn=None,
  2273. multicolumn_format=None, multirow=None):
  2274. r"""
  2275. Render an object to a LaTeX tabular environment table.
  2276. Render an object to a tabular environment table. You can splice
  2277. this into a LaTeX document. Requires \usepackage{booktabs}.
  2278. .. versionchanged:: 0.20.2
  2279. Added to Series
  2280. Parameters
  2281. ----------
  2282. buf : file descriptor or None
  2283. Buffer to write to. If None, the output is returned as a string.
  2284. columns : list of label, optional
  2285. The subset of columns to write. Writes all columns by default.
  2286. col_space : int, optional
  2287. The minimum width of each column.
  2288. header : bool or list of str, default True
  2289. Write out the column names. If a list of strings is given,
  2290. it is assumed to be aliases for the column names.
  2291. index : bool, default True
  2292. Write row names (index).
  2293. na_rep : str, default 'NaN'
  2294. Missing data representation.
  2295. formatters : list of functions or dict of {str: function}, optional
  2296. Formatter functions to apply to columns' elements by position or
  2297. name. The result of each function must be a unicode string.
  2298. List must be of length equal to the number of columns.
  2299. float_format : str, optional
  2300. Format string for floating point numbers.
  2301. sparsify : bool, optional
  2302. Set to False for a DataFrame with a hierarchical index to print
  2303. every multiindex key at each row. By default, the value will be
  2304. read from the config module.
  2305. index_names : bool, default True
  2306. Prints the names of the indexes.
  2307. bold_rows : bool, default False
  2308. Make the row labels bold in the output.
  2309. column_format : str, optional
  2310. The columns format as specified in `LaTeX table format
  2311. <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3
  2312. columns. By default, 'l' will be used for all columns except
  2313. columns of numbers, which default to 'r'.
  2314. longtable : bool, optional
  2315. By default, the value will be read from the pandas config
  2316. module. Use a longtable environment instead of tabular. Requires
  2317. adding a \usepackage{longtable} to your LaTeX preamble.
  2318. escape : bool, optional
  2319. By default, the value will be read from the pandas config
  2320. module. When set to False prevents from escaping latex special
  2321. characters in column names.
  2322. encoding : str, optional
  2323. A string representing the encoding to use in the output file,
  2324. defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
  2325. decimal : str, default '.'
  2326. Character recognized as decimal separator, e.g. ',' in Europe.
  2327. .. versionadded:: 0.18.0
  2328. multicolumn : bool, default True
  2329. Use \multicolumn to enhance MultiIndex columns.
  2330. The default will be read from the config module.
  2331. .. versionadded:: 0.20.0
  2332. multicolumn_format : str, default 'l'
  2333. The alignment for multicolumns, similar to `column_format`
  2334. The default will be read from the config module.
  2335. .. versionadded:: 0.20.0
  2336. multirow : bool, default False
  2337. Use \multirow to enhance MultiIndex rows. Requires adding a
  2338. \usepackage{multirow} to your LaTeX preamble. Will print
  2339. centered labels (instead of top-aligned) across the contained
  2340. rows, separating groups via clines. The default will be read
  2341. from the pandas config module.
  2342. .. versionadded:: 0.20.0
  2343. Returns
  2344. -------
  2345. str or None
  2346. If buf is None, returns the resulting LateX format as a
  2347. string. Otherwise returns None.
  2348. See Also
  2349. --------
  2350. DataFrame.to_string : Render a DataFrame to a console-friendly
  2351. tabular output.
  2352. DataFrame.to_html : Render a DataFrame as an HTML table.
  2353. Examples
  2354. --------
  2355. >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
  2356. ... 'mask': ['red', 'purple'],
  2357. ... 'weapon': ['sai', 'bo staff']})
  2358. >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE
  2359. '\\begin{tabular}{lll}\n\\toprule\n name & mask & weapon
  2360. \\\\\n\\midrule\n Raphael & red & sai \\\\\n Donatello &
  2361. purple & bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
  2362. """
  2363. # Get defaults from the pandas config
  2364. if self.ndim == 1:
  2365. self = self.to_frame()
  2366. if longtable is None:
  2367. longtable = config.get_option("display.latex.longtable")
  2368. if escape is None:
  2369. escape = config.get_option("display.latex.escape")
  2370. if multicolumn is None:
  2371. multicolumn = config.get_option("display.latex.multicolumn")
  2372. if multicolumn_format is None:
  2373. multicolumn_format = config.get_option(
  2374. "display.latex.multicolumn_format")
  2375. if multirow is None:
  2376. multirow = config.get_option("display.latex.multirow")
  2377. formatter = DataFrameFormatter(self, buf=buf, columns=columns,
  2378. col_space=col_space, na_rep=na_rep,
  2379. header=header, index=index,
  2380. formatters=formatters,
  2381. float_format=float_format,
  2382. bold_rows=bold_rows,
  2383. sparsify=sparsify,
  2384. index_names=index_names,
  2385. escape=escape, decimal=decimal)
  2386. formatter.to_latex(column_format=column_format, longtable=longtable,
  2387. encoding=encoding, multicolumn=multicolumn,
  2388. multicolumn_format=multicolumn_format,
  2389. multirow=multirow)
  2390. if buf is None:
  2391. return formatter.buf.getvalue()
  2392. def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
  2393. columns=None, header=True, index=True, index_label=None,
  2394. mode='w', encoding=None, compression='infer', quoting=None,
  2395. quotechar='"', line_terminator=None, chunksize=None,
  2396. tupleize_cols=None, date_format=None, doublequote=True,
  2397. escapechar=None, decimal='.'):
  2398. r"""
  2399. Write object to a comma-separated values (csv) file.
  2400. .. versionchanged:: 0.24.0
  2401. The order of arguments for Series was changed.
  2402. Parameters
  2403. ----------
  2404. path_or_buf : str or file handle, default None
  2405. File path or object, if None is provided the result is returned as
  2406. a string. If a file object is passed it should be opened with
  2407. `newline=''`, disabling universal newlines.
  2408. .. versionchanged:: 0.24.0
  2409. Was previously named "path" for Series.
  2410. sep : str, default ','
  2411. String of length 1. Field delimiter for the output file.
  2412. na_rep : str, default ''
  2413. Missing data representation.
  2414. float_format : str, default None
  2415. Format string for floating point numbers.
  2416. columns : sequence, optional
  2417. Columns to write.
  2418. header : bool or list of str, default True
  2419. Write out the column names. If a list of strings is given it is
  2420. assumed to be aliases for the column names.
  2421. .. versionchanged:: 0.24.0
  2422. Previously defaulted to False for Series.
  2423. index : bool, default True
  2424. Write row names (index).
  2425. index_label : str or sequence, or False, default None
  2426. Column label for index column(s) if desired. If None is given, and
  2427. `header` and `index` are True, then the index names are used. A
  2428. sequence should be given if the object uses MultiIndex. If
  2429. False do not print fields for index names. Use index_label=False
  2430. for easier importing in R.
  2431. mode : str
  2432. Python write mode, default 'w'.
  2433. encoding : str, optional
  2434. A string representing the encoding to use in the output file,
  2435. defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
  2436. compression : str, default 'infer'
  2437. Compression mode among the following possible values: {'infer',
  2438. 'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
  2439. is path-like, then detect compression from the following
  2440. extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
  2441. compression).
  2442. .. versionchanged:: 0.24.0
  2443. 'infer' option added and set to default.
  2444. quoting : optional constant from csv module
  2445. Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
  2446. then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
  2447. will treat them as non-numeric.
  2448. quotechar : str, default '\"'
  2449. String of length 1. Character used to quote fields.
  2450. line_terminator : string, optional
  2451. The newline character or character sequence to use in the output
  2452. file. Defaults to `os.linesep`, which depends on the OS in which
  2453. this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
  2454. .. versionchanged:: 0.24.0
  2455. chunksize : int or None
  2456. Rows to write at a time.
  2457. tupleize_cols : bool, default False
  2458. Write MultiIndex columns as a list of tuples (if True) or in
  2459. the new, expanded format, where each MultiIndex column is a row
  2460. in the CSV (if False).
  2461. .. deprecated:: 0.21.0
  2462. This argument will be removed and will always write each row
  2463. of the multi-index as a separate row in the CSV file.
  2464. date_format : str, default None
  2465. Format string for datetime objects.
  2466. doublequote : bool, default True
  2467. Control quoting of `quotechar` inside a field.
  2468. escapechar : str, default None
  2469. String of length 1. Character used to escape `sep` and `quotechar`
  2470. when appropriate.
  2471. decimal : str, default '.'
  2472. Character recognized as decimal separator. E.g. use ',' for
  2473. European data.
  2474. Returns
  2475. -------
  2476. None or str
  2477. If path_or_buf is None, returns the resulting csv format as a
  2478. string. Otherwise returns None.
  2479. See Also
  2480. --------
  2481. read_csv : Load a CSV file into a DataFrame.
  2482. to_excel : Load an Excel file into a DataFrame.
  2483. Examples
  2484. --------
  2485. >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
  2486. ... 'mask': ['red', 'purple'],
  2487. ... 'weapon': ['sai', 'bo staff']})
  2488. >>> df.to_csv(index=False)
  2489. 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
  2490. """
  2491. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  2492. if tupleize_cols is not None:
  2493. warnings.warn("The 'tupleize_cols' parameter is deprecated and "
  2494. "will be removed in a future version",
  2495. FutureWarning, stacklevel=2)
  2496. else:
  2497. tupleize_cols = False
  2498. from pandas.io.formats.csvs import CSVFormatter
  2499. formatter = CSVFormatter(df, path_or_buf,
  2500. line_terminator=line_terminator, sep=sep,
  2501. encoding=encoding,
  2502. compression=compression, quoting=quoting,
  2503. na_rep=na_rep, float_format=float_format,
  2504. cols=columns, header=header, index=index,
  2505. index_label=index_label, mode=mode,
  2506. chunksize=chunksize, quotechar=quotechar,
  2507. tupleize_cols=tupleize_cols,
  2508. date_format=date_format,
  2509. doublequote=doublequote,
  2510. escapechar=escapechar, decimal=decimal)
  2511. formatter.save()
  2512. if path_or_buf is None:
  2513. return formatter.path_or_buf.getvalue()
  2514. # ----------------------------------------------------------------------
  2515. # Fancy Indexing
  2516. @classmethod
  2517. def _create_indexer(cls, name, indexer):
  2518. """Create an indexer like _name in the class."""
  2519. if getattr(cls, name, None) is None:
  2520. _indexer = functools.partial(indexer, name)
  2521. setattr(cls, name, property(_indexer, doc=indexer.__doc__))
  2522. def get(self, key, default=None):
  2523. """
  2524. Get item from object for given key (DataFrame column, Panel slice,
  2525. etc.). Returns default value if not found.
  2526. Parameters
  2527. ----------
  2528. key : object
  2529. Returns
  2530. -------
  2531. value : same type as items contained in object
  2532. """
  2533. try:
  2534. return self[key]
  2535. except (KeyError, ValueError, IndexError):
  2536. return default
  2537. def __getitem__(self, item):
  2538. return self._get_item_cache(item)
  2539. def _get_item_cache(self, item):
  2540. """Return the cached item, item represents a label indexer."""
  2541. cache = self._item_cache
  2542. res = cache.get(item)
  2543. if res is None:
  2544. values = self._data.get(item)
  2545. res = self._box_item_values(item, values)
  2546. cache[item] = res
  2547. res._set_as_cached(item, self)
  2548. # for a chain
  2549. res._is_copy = self._is_copy
  2550. return res
  2551. def _set_as_cached(self, item, cacher):
  2552. """Set the _cacher attribute on the calling object with a weakref to
  2553. cacher.
  2554. """
  2555. self._cacher = (item, weakref.ref(cacher))
  2556. def _reset_cacher(self):
  2557. """Reset the cacher."""
  2558. if hasattr(self, '_cacher'):
  2559. del self._cacher
  2560. def _iget_item_cache(self, item):
  2561. """Return the cached item, item represents a positional indexer."""
  2562. ax = self._info_axis
  2563. if ax.is_unique:
  2564. lower = self._get_item_cache(ax[item])
  2565. else:
  2566. lower = self._take(item, axis=self._info_axis_number)
  2567. return lower
  2568. def _box_item_values(self, key, values):
  2569. raise AbstractMethodError(self)
  2570. def _maybe_cache_changed(self, item, value):
  2571. """The object has called back to us saying maybe it has changed.
  2572. """
  2573. self._data.set(item, value)
  2574. @property
  2575. def _is_cached(self):
  2576. """Return boolean indicating if self is cached or not."""
  2577. return getattr(self, '_cacher', None) is not None
  2578. def _get_cacher(self):
  2579. """return my cacher or None"""
  2580. cacher = getattr(self, '_cacher', None)
  2581. if cacher is not None:
  2582. cacher = cacher[1]()
  2583. return cacher
  2584. @property
  2585. def _is_view(self):
  2586. """Return boolean indicating if self is view of another array """
  2587. return self._data.is_view
  2588. def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
  2589. """
  2590. See if we need to update our parent cacher if clear, then clear our
  2591. cache.
  2592. Parameters
  2593. ----------
  2594. clear : boolean, default False
  2595. clear the item cache
  2596. verify_is_copy : boolean, default True
  2597. provide is_copy checks
  2598. """
  2599. cacher = getattr(self, '_cacher', None)
  2600. if cacher is not None:
  2601. ref = cacher[1]()
  2602. # we are trying to reference a dead referant, hence
  2603. # a copy
  2604. if ref is None:
  2605. del self._cacher
  2606. else:
  2607. try:
  2608. ref._maybe_cache_changed(cacher[0], self)
  2609. except Exception:
  2610. pass
  2611. if verify_is_copy:
  2612. self._check_setitem_copy(stacklevel=5, t='referant')
  2613. if clear:
  2614. self._clear_item_cache()
  2615. def _clear_item_cache(self, i=None):
  2616. if i is not None:
  2617. self._item_cache.pop(i, None)
  2618. else:
  2619. self._item_cache.clear()
  2620. def _slice(self, slobj, axis=0, kind=None):
  2621. """
  2622. Construct a slice of this container.
  2623. kind parameter is maintained for compatibility with Series slicing.
  2624. """
  2625. axis = self._get_block_manager_axis(axis)
  2626. result = self._constructor(self._data.get_slice(slobj, axis=axis))
  2627. result = result.__finalize__(self)
  2628. # this could be a view
  2629. # but only in a single-dtyped view slicable case
  2630. is_copy = axis != 0 or result._is_view
  2631. result._set_is_copy(self, copy=is_copy)
  2632. return result
  2633. def _set_item(self, key, value):
  2634. self._data.set(key, value)
  2635. self._clear_item_cache()
  2636. def _set_is_copy(self, ref=None, copy=True):
  2637. if not copy:
  2638. self._is_copy = None
  2639. else:
  2640. if ref is not None:
  2641. self._is_copy = weakref.ref(ref)
  2642. else:
  2643. self._is_copy = None
  2644. def _check_is_chained_assignment_possible(self):
  2645. """
  2646. Check if we are a view, have a cacher, and are of mixed type.
  2647. If so, then force a setitem_copy check.
  2648. Should be called just near setting a value
  2649. Will return a boolean if it we are a view and are cached, but a
  2650. single-dtype meaning that the cacher should be updated following
  2651. setting.
  2652. """
  2653. if self._is_view and self._is_cached:
  2654. ref = self._get_cacher()
  2655. if ref is not None and ref._is_mixed_type:
  2656. self._check_setitem_copy(stacklevel=4, t='referant',
  2657. force=True)
  2658. return True
  2659. elif self._is_copy:
  2660. self._check_setitem_copy(stacklevel=4, t='referant')
  2661. return False
  2662. def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
  2663. """
  2664. Parameters
  2665. ----------
  2666. stacklevel : integer, default 4
  2667. the level to show of the stack when the error is output
  2668. t : string, the type of setting error
  2669. force : boolean, default False
  2670. if True, then force showing an error
  2671. validate if we are doing a settitem on a chained copy.
  2672. If you call this function, be sure to set the stacklevel such that the
  2673. user will see the error *at the level of setting*
  2674. It is technically possible to figure out that we are setting on
  2675. a copy even WITH a multi-dtyped pandas object. In other words, some
  2676. blocks may be views while other are not. Currently _is_view will ALWAYS
  2677. return False for multi-blocks to avoid having to handle this case.
  2678. df = DataFrame(np.arange(0,9), columns=['count'])
  2679. df['group'] = 'b'
  2680. # This technically need not raise SettingWithCopy if both are view
  2681. # (which is not # generally guaranteed but is usually True. However,
  2682. # this is in general not a good practice and we recommend using .loc.
  2683. df.iloc[0:5]['group'] = 'a'
  2684. """
  2685. if force or self._is_copy:
  2686. value = config.get_option('mode.chained_assignment')
  2687. if value is None:
  2688. return
  2689. # see if the copy is not actually referred; if so, then dissolve
  2690. # the copy weakref
  2691. try:
  2692. gc.collect(2)
  2693. if not gc.get_referents(self._is_copy()):
  2694. self._is_copy = None
  2695. return
  2696. except Exception:
  2697. pass
  2698. # we might be a false positive
  2699. try:
  2700. if self._is_copy().shape == self.shape:
  2701. self._is_copy = None
  2702. return
  2703. except Exception:
  2704. pass
  2705. # a custom message
  2706. if isinstance(self._is_copy, string_types):
  2707. t = self._is_copy
  2708. elif t == 'referant':
  2709. t = ("\n"
  2710. "A value is trying to be set on a copy of a slice from a "
  2711. "DataFrame\n\n"
  2712. "See the caveats in the documentation: "
  2713. "http://pandas.pydata.org/pandas-docs/stable/"
  2714. "indexing.html#indexing-view-versus-copy"
  2715. )
  2716. else:
  2717. t = ("\n"
  2718. "A value is trying to be set on a copy of a slice from a "
  2719. "DataFrame.\n"
  2720. "Try using .loc[row_indexer,col_indexer] = value "
  2721. "instead\n\nSee the caveats in the documentation: "
  2722. "http://pandas.pydata.org/pandas-docs/stable/"
  2723. "indexing.html#indexing-view-versus-copy"
  2724. )
  2725. if value == 'raise':
  2726. raise com.SettingWithCopyError(t)
  2727. elif value == 'warn':
  2728. warnings.warn(t, com.SettingWithCopyWarning,
  2729. stacklevel=stacklevel)
  2730. def __delitem__(self, key):
  2731. """
  2732. Delete item
  2733. """
  2734. deleted = False
  2735. maybe_shortcut = False
  2736. if hasattr(self, 'columns') and isinstance(self.columns, MultiIndex):
  2737. try:
  2738. maybe_shortcut = key not in self.columns._engine
  2739. except TypeError:
  2740. pass
  2741. if maybe_shortcut:
  2742. # Allow shorthand to delete all columns whose first len(key)
  2743. # elements match key:
  2744. if not isinstance(key, tuple):
  2745. key = (key, )
  2746. for col in self.columns:
  2747. if isinstance(col, tuple) and col[:len(key)] == key:
  2748. del self[col]
  2749. deleted = True
  2750. if not deleted:
  2751. # If the above loop ran and didn't delete anything because
  2752. # there was no match, this call should raise the appropriate
  2753. # exception:
  2754. self._data.delete(key)
  2755. # delete from the caches
  2756. try:
  2757. del self._item_cache[key]
  2758. except KeyError:
  2759. pass
  2760. def _take(self, indices, axis=0, is_copy=True):
  2761. """
  2762. Return the elements in the given *positional* indices along an axis.
  2763. This means that we are not indexing according to actual values in
  2764. the index attribute of the object. We are indexing according to the
  2765. actual position of the element in the object.
  2766. This is the internal version of ``.take()`` and will contain a wider
  2767. selection of parameters useful for internal use but not as suitable
  2768. for public usage.
  2769. Parameters
  2770. ----------
  2771. indices : array-like
  2772. An array of ints indicating which positions to take.
  2773. axis : int, default 0
  2774. The axis on which to select elements. "0" means that we are
  2775. selecting rows, "1" means that we are selecting columns, etc.
  2776. is_copy : bool, default True
  2777. Whether to return a copy of the original object or not.
  2778. Returns
  2779. -------
  2780. taken : same type as caller
  2781. An array-like containing the elements taken from the object.
  2782. See Also
  2783. --------
  2784. numpy.ndarray.take
  2785. numpy.take
  2786. """
  2787. self._consolidate_inplace()
  2788. new_data = self._data.take(indices,
  2789. axis=self._get_block_manager_axis(axis),
  2790. verify=True)
  2791. result = self._constructor(new_data).__finalize__(self)
  2792. # Maybe set copy if we didn't actually change the index.
  2793. if is_copy:
  2794. if not result._get_axis(axis).equals(self._get_axis(axis)):
  2795. result._set_is_copy(self)
  2796. return result
  2797. def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs):
  2798. """
  2799. Return the elements in the given *positional* indices along an axis.
  2800. This means that we are not indexing according to actual values in
  2801. the index attribute of the object. We are indexing according to the
  2802. actual position of the element in the object.
  2803. Parameters
  2804. ----------
  2805. indices : array-like
  2806. An array of ints indicating which positions to take.
  2807. axis : {0 or 'index', 1 or 'columns', None}, default 0
  2808. The axis on which to select elements. ``0`` means that we are
  2809. selecting rows, ``1`` means that we are selecting columns.
  2810. convert : bool, default True
  2811. Whether to convert negative indices into positive ones.
  2812. For example, ``-1`` would map to the ``len(axis) - 1``.
  2813. The conversions are similar to the behavior of indexing a
  2814. regular Python list.
  2815. .. deprecated:: 0.21.0
  2816. In the future, negative indices will always be converted.
  2817. is_copy : bool, default True
  2818. Whether to return a copy of the original object or not.
  2819. **kwargs
  2820. For compatibility with :meth:`numpy.take`. Has no effect on the
  2821. output.
  2822. Returns
  2823. -------
  2824. taken : same type as caller
  2825. An array-like containing the elements taken from the object.
  2826. See Also
  2827. --------
  2828. DataFrame.loc : Select a subset of a DataFrame by labels.
  2829. DataFrame.iloc : Select a subset of a DataFrame by positions.
  2830. numpy.take : Take elements from an array along an axis.
  2831. Examples
  2832. --------
  2833. >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
  2834. ... ('parrot', 'bird', 24.0),
  2835. ... ('lion', 'mammal', 80.5),
  2836. ... ('monkey', 'mammal', np.nan)],
  2837. ... columns=['name', 'class', 'max_speed'],
  2838. ... index=[0, 2, 3, 1])
  2839. >>> df
  2840. name class max_speed
  2841. 0 falcon bird 389.0
  2842. 2 parrot bird 24.0
  2843. 3 lion mammal 80.5
  2844. 1 monkey mammal NaN
  2845. Take elements at positions 0 and 3 along the axis 0 (default).
  2846. Note how the actual indices selected (0 and 1) do not correspond to
  2847. our selected indices 0 and 3. That's because we are selecting the 0th
  2848. and 3rd rows, not rows whose indices equal 0 and 3.
  2849. >>> df.take([0, 3])
  2850. name class max_speed
  2851. 0 falcon bird 389.0
  2852. 1 monkey mammal NaN
  2853. Take elements at indices 1 and 2 along the axis 1 (column selection).
  2854. >>> df.take([1, 2], axis=1)
  2855. class max_speed
  2856. 0 bird 389.0
  2857. 2 bird 24.0
  2858. 3 mammal 80.5
  2859. 1 mammal NaN
  2860. We may take elements using negative integers for positive indices,
  2861. starting from the end of the object, just like with Python lists.
  2862. >>> df.take([-1, -2])
  2863. name class max_speed
  2864. 1 monkey mammal NaN
  2865. 3 lion mammal 80.5
  2866. """
  2867. if convert is not None:
  2868. msg = ("The 'convert' parameter is deprecated "
  2869. "and will be removed in a future version.")
  2870. warnings.warn(msg, FutureWarning, stacklevel=2)
  2871. nv.validate_take(tuple(), kwargs)
  2872. return self._take(indices, axis=axis, is_copy=is_copy)
  2873. def xs(self, key, axis=0, level=None, drop_level=True):
  2874. """
  2875. Return cross-section from the Series/DataFrame.
  2876. This method takes a `key` argument to select data at a particular
  2877. level of a MultiIndex.
  2878. Parameters
  2879. ----------
  2880. key : label or tuple of label
  2881. Label contained in the index, or partially in a MultiIndex.
  2882. axis : {0 or 'index', 1 or 'columns'}, default 0
  2883. Axis to retrieve cross-section on.
  2884. level : object, defaults to first n levels (n=1 or len(key))
  2885. In case of a key partially contained in a MultiIndex, indicate
  2886. which levels are used. Levels can be referred by label or position.
  2887. drop_level : bool, default True
  2888. If False, returns object with same levels as self.
  2889. Returns
  2890. -------
  2891. Series or DataFrame
  2892. Cross-section from the original Series or DataFrame
  2893. corresponding to the selected index levels.
  2894. See Also
  2895. --------
  2896. DataFrame.loc : Access a group of rows and columns
  2897. by label(s) or a boolean array.
  2898. DataFrame.iloc : Purely integer-location based indexing
  2899. for selection by position.
  2900. Notes
  2901. -----
  2902. `xs` can not be used to set values.
  2903. MultiIndex Slicers is a generic way to get/set values on
  2904. any level or levels.
  2905. It is a superset of `xs` functionality, see
  2906. :ref:`MultiIndex Slicers <advanced.mi_slicers>`.
  2907. Examples
  2908. --------
  2909. >>> d = {'num_legs': [4, 4, 2, 2],
  2910. ... 'num_wings': [0, 0, 2, 2],
  2911. ... 'class': ['mammal', 'mammal', 'mammal', 'bird'],
  2912. ... 'animal': ['cat', 'dog', 'bat', 'penguin'],
  2913. ... 'locomotion': ['walks', 'walks', 'flies', 'walks']}
  2914. >>> df = pd.DataFrame(data=d)
  2915. >>> df = df.set_index(['class', 'animal', 'locomotion'])
  2916. >>> df
  2917. num_legs num_wings
  2918. class animal locomotion
  2919. mammal cat walks 4 0
  2920. dog walks 4 0
  2921. bat flies 2 2
  2922. bird penguin walks 2 2
  2923. Get values at specified index
  2924. >>> df.xs('mammal')
  2925. num_legs num_wings
  2926. animal locomotion
  2927. cat walks 4 0
  2928. dog walks 4 0
  2929. bat flies 2 2
  2930. Get values at several indexes
  2931. >>> df.xs(('mammal', 'dog'))
  2932. num_legs num_wings
  2933. locomotion
  2934. walks 4 0
  2935. Get values at specified index and level
  2936. >>> df.xs('cat', level=1)
  2937. num_legs num_wings
  2938. class locomotion
  2939. mammal walks 4 0
  2940. Get values at several indexes and levels
  2941. >>> df.xs(('bird', 'walks'),
  2942. ... level=[0, 'locomotion'])
  2943. num_legs num_wings
  2944. animal
  2945. penguin 2 2
  2946. Get values at specified column and axis
  2947. >>> df.xs('num_wings', axis=1)
  2948. class animal locomotion
  2949. mammal cat walks 0
  2950. dog walks 0
  2951. bat flies 2
  2952. bird penguin walks 2
  2953. Name: num_wings, dtype: int64
  2954. """
  2955. axis = self._get_axis_number(axis)
  2956. labels = self._get_axis(axis)
  2957. if level is not None:
  2958. loc, new_ax = labels.get_loc_level(key, level=level,
  2959. drop_level=drop_level)
  2960. # create the tuple of the indexer
  2961. indexer = [slice(None)] * self.ndim
  2962. indexer[axis] = loc
  2963. indexer = tuple(indexer)
  2964. result = self.iloc[indexer]
  2965. setattr(result, result._get_axis_name(axis), new_ax)
  2966. return result
  2967. if axis == 1:
  2968. return self[key]
  2969. self._consolidate_inplace()
  2970. index = self.index
  2971. if isinstance(index, MultiIndex):
  2972. loc, new_index = self.index.get_loc_level(key,
  2973. drop_level=drop_level)
  2974. else:
  2975. loc = self.index.get_loc(key)
  2976. if isinstance(loc, np.ndarray):
  2977. if loc.dtype == np.bool_:
  2978. inds, = loc.nonzero()
  2979. return self._take(inds, axis=axis)
  2980. else:
  2981. return self._take(loc, axis=axis)
  2982. if not is_scalar(loc):
  2983. new_index = self.index[loc]
  2984. if is_scalar(loc):
  2985. new_values = self._data.fast_xs(loc)
  2986. # may need to box a datelike-scalar
  2987. #
  2988. # if we encounter an array-like and we only have 1 dim
  2989. # that means that their are list/ndarrays inside the Series!
  2990. # so just return them (GH 6394)
  2991. if not is_list_like(new_values) or self.ndim == 1:
  2992. return com.maybe_box_datetimelike(new_values)
  2993. result = self._constructor_sliced(
  2994. new_values, index=self.columns,
  2995. name=self.index[loc], dtype=new_values.dtype)
  2996. else:
  2997. result = self.iloc[loc]
  2998. result.index = new_index
  2999. # this could be a view
  3000. # but only in a single-dtyped view slicable case
  3001. result._set_is_copy(self, copy=not result._is_view)
  3002. return result
  3003. _xs = xs
  3004. def select(self, crit, axis=0):
  3005. """
  3006. Return data corresponding to axis labels matching criteria.
  3007. .. deprecated:: 0.21.0
  3008. Use df.loc[df.index.map(crit)] to select via labels
  3009. Parameters
  3010. ----------
  3011. crit : function
  3012. To be called on each index (label). Should return True or False
  3013. axis : int
  3014. Returns
  3015. -------
  3016. selection : same type as caller
  3017. """
  3018. warnings.warn("'select' is deprecated and will be removed in a "
  3019. "future release. You can use "
  3020. ".loc[labels.map(crit)] as a replacement",
  3021. FutureWarning, stacklevel=2)
  3022. axis = self._get_axis_number(axis)
  3023. axis_name = self._get_axis_name(axis)
  3024. axis_values = self._get_axis(axis)
  3025. if len(axis_values) > 0:
  3026. new_axis = axis_values[
  3027. np.asarray([bool(crit(label)) for label in axis_values])]
  3028. else:
  3029. new_axis = axis_values
  3030. return self.reindex(**{axis_name: new_axis})
  3031. def reindex_like(self, other, method=None, copy=True, limit=None,
  3032. tolerance=None):
  3033. """
  3034. Return an object with matching indices as other object.
  3035. Conform the object to the same index on all axes. Optional
  3036. filling logic, placing NaN in locations having no value
  3037. in the previous index. A new object is produced unless the
  3038. new index is equivalent to the current one and copy=False.
  3039. Parameters
  3040. ----------
  3041. other : Object of the same data type
  3042. Its row and column indices are used to define the new indices
  3043. of this object.
  3044. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
  3045. Method to use for filling holes in reindexed DataFrame.
  3046. Please note: this is only applicable to DataFrames/Series with a
  3047. monotonically increasing/decreasing index.
  3048. * None (default): don't fill gaps
  3049. * pad / ffill: propagate last valid observation forward to next
  3050. valid
  3051. * backfill / bfill: use next valid observation to fill gap
  3052. * nearest: use nearest valid observations to fill gap
  3053. copy : bool, default True
  3054. Return a new object, even if the passed indexes are the same.
  3055. limit : int, default None
  3056. Maximum number of consecutive labels to fill for inexact matches.
  3057. tolerance : optional
  3058. Maximum distance between original and new labels for inexact
  3059. matches. The values of the index at the matching locations most
  3060. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3061. Tolerance may be a scalar value, which applies the same tolerance
  3062. to all values, or list-like, which applies variable tolerance per
  3063. element. List-like includes list, tuple, array, Series, and must be
  3064. the same size as the index and its dtype must exactly match the
  3065. index's type.
  3066. .. versionadded:: 0.21.0 (list-like tolerance)
  3067. Returns
  3068. -------
  3069. Series or DataFrame
  3070. Same type as caller, but with changed indices on each axis.
  3071. See Also
  3072. --------
  3073. DataFrame.set_index : Set row labels.
  3074. DataFrame.reset_index : Remove row labels or move them to new columns.
  3075. DataFrame.reindex : Change to new indices or expand indices.
  3076. Notes
  3077. -----
  3078. Same as calling
  3079. ``.reindex(index=other.index, columns=other.columns,...)``.
  3080. Examples
  3081. --------
  3082. >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'],
  3083. ... [31, 87.8, 'high'],
  3084. ... [22, 71.6, 'medium'],
  3085. ... [35, 95, 'medium']],
  3086. ... columns=['temp_celsius', 'temp_fahrenheit', 'windspeed'],
  3087. ... index=pd.date_range(start='2014-02-12',
  3088. ... end='2014-02-15', freq='D'))
  3089. >>> df1
  3090. temp_celsius temp_fahrenheit windspeed
  3091. 2014-02-12 24.3 75.7 high
  3092. 2014-02-13 31.0 87.8 high
  3093. 2014-02-14 22.0 71.6 medium
  3094. 2014-02-15 35.0 95.0 medium
  3095. >>> df2 = pd.DataFrame([[28, 'low'],
  3096. ... [30, 'low'],
  3097. ... [35.1, 'medium']],
  3098. ... columns=['temp_celsius', 'windspeed'],
  3099. ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',
  3100. ... '2014-02-15']))
  3101. >>> df2
  3102. temp_celsius windspeed
  3103. 2014-02-12 28.0 low
  3104. 2014-02-13 30.0 low
  3105. 2014-02-15 35.1 medium
  3106. >>> df2.reindex_like(df1)
  3107. temp_celsius temp_fahrenheit windspeed
  3108. 2014-02-12 28.0 NaN low
  3109. 2014-02-13 30.0 NaN low
  3110. 2014-02-14 NaN NaN NaN
  3111. 2014-02-15 35.1 NaN medium
  3112. """
  3113. d = other._construct_axes_dict(axes=self._AXIS_ORDERS, method=method,
  3114. copy=copy, limit=limit,
  3115. tolerance=tolerance)
  3116. return self.reindex(**d)
  3117. def drop(self, labels=None, axis=0, index=None, columns=None, level=None,
  3118. inplace=False, errors='raise'):
  3119. inplace = validate_bool_kwarg(inplace, 'inplace')
  3120. if labels is not None:
  3121. if index is not None or columns is not None:
  3122. raise ValueError("Cannot specify both 'labels' and "
  3123. "'index'/'columns'")
  3124. axis_name = self._get_axis_name(axis)
  3125. axes = {axis_name: labels}
  3126. elif index is not None or columns is not None:
  3127. axes, _ = self._construct_axes_from_arguments((index, columns), {})
  3128. else:
  3129. raise ValueError("Need to specify at least one of 'labels', "
  3130. "'index' or 'columns'")
  3131. obj = self
  3132. for axis, labels in axes.items():
  3133. if labels is not None:
  3134. obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  3135. if inplace:
  3136. self._update_inplace(obj)
  3137. else:
  3138. return obj
  3139. def _drop_axis(self, labels, axis, level=None, errors='raise'):
  3140. """
  3141. Drop labels from specified axis. Used in the ``drop`` method
  3142. internally.
  3143. Parameters
  3144. ----------
  3145. labels : single label or list-like
  3146. axis : int or axis name
  3147. level : int or level name, default None
  3148. For MultiIndex
  3149. errors : {'ignore', 'raise'}, default 'raise'
  3150. If 'ignore', suppress error and existing labels are dropped.
  3151. """
  3152. axis = self._get_axis_number(axis)
  3153. axis_name = self._get_axis_name(axis)
  3154. axis = self._get_axis(axis)
  3155. if axis.is_unique:
  3156. if level is not None:
  3157. if not isinstance(axis, MultiIndex):
  3158. raise AssertionError('axis must be a MultiIndex')
  3159. new_axis = axis.drop(labels, level=level, errors=errors)
  3160. else:
  3161. new_axis = axis.drop(labels, errors=errors)
  3162. result = self.reindex(**{axis_name: new_axis})
  3163. # Case for non-unique axis
  3164. else:
  3165. labels = ensure_object(com.index_labels_to_array(labels))
  3166. if level is not None:
  3167. if not isinstance(axis, MultiIndex):
  3168. raise AssertionError('axis must be a MultiIndex')
  3169. indexer = ~axis.get_level_values(level).isin(labels)
  3170. # GH 18561 MultiIndex.drop should raise if label is absent
  3171. if errors == 'raise' and indexer.all():
  3172. raise KeyError('{} not found in axis'.format(labels))
  3173. else:
  3174. indexer = ~axis.isin(labels)
  3175. # Check if label doesn't exist along axis
  3176. labels_missing = (axis.get_indexer_for(labels) == -1).any()
  3177. if errors == 'raise' and labels_missing:
  3178. raise KeyError('{} not found in axis'.format(labels))
  3179. slicer = [slice(None)] * self.ndim
  3180. slicer[self._get_axis_number(axis_name)] = indexer
  3181. result = self.loc[tuple(slicer)]
  3182. return result
  3183. def _update_inplace(self, result, verify_is_copy=True):
  3184. """
  3185. Replace self internals with result.
  3186. Parameters
  3187. ----------
  3188. verify_is_copy : boolean, default True
  3189. provide is_copy checks
  3190. """
  3191. # NOTE: This does *not* call __finalize__ and that's an explicit
  3192. # decision that we may revisit in the future.
  3193. self._reset_cache()
  3194. self._clear_item_cache()
  3195. self._data = getattr(result, '_data', result)
  3196. self._maybe_update_cacher(verify_is_copy=verify_is_copy)
  3197. def add_prefix(self, prefix):
  3198. """
  3199. Prefix labels with string `prefix`.
  3200. For Series, the row labels are prefixed.
  3201. For DataFrame, the column labels are prefixed.
  3202. Parameters
  3203. ----------
  3204. prefix : str
  3205. The string to add before each label.
  3206. Returns
  3207. -------
  3208. Series or DataFrame
  3209. New Series or DataFrame with updated labels.
  3210. See Also
  3211. --------
  3212. Series.add_suffix: Suffix row labels with string `suffix`.
  3213. DataFrame.add_suffix: Suffix column labels with string `suffix`.
  3214. Examples
  3215. --------
  3216. >>> s = pd.Series([1, 2, 3, 4])
  3217. >>> s
  3218. 0 1
  3219. 1 2
  3220. 2 3
  3221. 3 4
  3222. dtype: int64
  3223. >>> s.add_prefix('item_')
  3224. item_0 1
  3225. item_1 2
  3226. item_2 3
  3227. item_3 4
  3228. dtype: int64
  3229. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  3230. >>> df
  3231. A B
  3232. 0 1 3
  3233. 1 2 4
  3234. 2 3 5
  3235. 3 4 6
  3236. >>> df.add_prefix('col_')
  3237. col_A col_B
  3238. 0 1 3
  3239. 1 2 4
  3240. 2 3 5
  3241. 3 4 6
  3242. """
  3243. f = functools.partial('{prefix}{}'.format, prefix=prefix)
  3244. mapper = {self._info_axis_name: f}
  3245. return self.rename(**mapper)
  3246. def add_suffix(self, suffix):
  3247. """
  3248. Suffix labels with string `suffix`.
  3249. For Series, the row labels are suffixed.
  3250. For DataFrame, the column labels are suffixed.
  3251. Parameters
  3252. ----------
  3253. suffix : str
  3254. The string to add after each label.
  3255. Returns
  3256. -------
  3257. Series or DataFrame
  3258. New Series or DataFrame with updated labels.
  3259. See Also
  3260. --------
  3261. Series.add_prefix: Prefix row labels with string `prefix`.
  3262. DataFrame.add_prefix: Prefix column labels with string `prefix`.
  3263. Examples
  3264. --------
  3265. >>> s = pd.Series([1, 2, 3, 4])
  3266. >>> s
  3267. 0 1
  3268. 1 2
  3269. 2 3
  3270. 3 4
  3271. dtype: int64
  3272. >>> s.add_suffix('_item')
  3273. 0_item 1
  3274. 1_item 2
  3275. 2_item 3
  3276. 3_item 4
  3277. dtype: int64
  3278. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  3279. >>> df
  3280. A B
  3281. 0 1 3
  3282. 1 2 4
  3283. 2 3 5
  3284. 3 4 6
  3285. >>> df.add_suffix('_col')
  3286. A_col B_col
  3287. 0 1 3
  3288. 1 2 4
  3289. 2 3 5
  3290. 3 4 6
  3291. """
  3292. f = functools.partial('{}{suffix}'.format, suffix=suffix)
  3293. mapper = {self._info_axis_name: f}
  3294. return self.rename(**mapper)
  3295. def sort_values(self, by=None, axis=0, ascending=True, inplace=False,
  3296. kind='quicksort', na_position='last'):
  3297. """
  3298. Sort by the values along either axis
  3299. Parameters
  3300. ----------%(optional_by)s
  3301. axis : %(axes_single_arg)s, default 0
  3302. Axis to be sorted
  3303. ascending : bool or list of bool, default True
  3304. Sort ascending vs. descending. Specify list for multiple sort
  3305. orders. If this is a list of bools, must match the length of
  3306. the by.
  3307. inplace : bool, default False
  3308. if True, perform operation in-place
  3309. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  3310. Choice of sorting algorithm. See also ndarray.np.sort for more
  3311. information. `mergesort` is the only stable algorithm. For
  3312. DataFrames, this option is only applied when sorting on a single
  3313. column or label.
  3314. na_position : {'first', 'last'}, default 'last'
  3315. `first` puts NaNs at the beginning, `last` puts NaNs at the end
  3316. Returns
  3317. -------
  3318. sorted_obj : %(klass)s
  3319. Examples
  3320. --------
  3321. >>> df = pd.DataFrame({
  3322. ... 'col1' : ['A', 'A', 'B', np.nan, 'D', 'C'],
  3323. ... 'col2' : [2, 1, 9, 8, 7, 4],
  3324. ... 'col3': [0, 1, 9, 4, 2, 3],
  3325. ... })
  3326. >>> df
  3327. col1 col2 col3
  3328. 0 A 2 0
  3329. 1 A 1 1
  3330. 2 B 9 9
  3331. 3 NaN 8 4
  3332. 4 D 7 2
  3333. 5 C 4 3
  3334. Sort by col1
  3335. >>> df.sort_values(by=['col1'])
  3336. col1 col2 col3
  3337. 0 A 2 0
  3338. 1 A 1 1
  3339. 2 B 9 9
  3340. 5 C 4 3
  3341. 4 D 7 2
  3342. 3 NaN 8 4
  3343. Sort by multiple columns
  3344. >>> df.sort_values(by=['col1', 'col2'])
  3345. col1 col2 col3
  3346. 1 A 1 1
  3347. 0 A 2 0
  3348. 2 B 9 9
  3349. 5 C 4 3
  3350. 4 D 7 2
  3351. 3 NaN 8 4
  3352. Sort Descending
  3353. >>> df.sort_values(by='col1', ascending=False)
  3354. col1 col2 col3
  3355. 4 D 7 2
  3356. 5 C 4 3
  3357. 2 B 9 9
  3358. 0 A 2 0
  3359. 1 A 1 1
  3360. 3 NaN 8 4
  3361. Putting NAs first
  3362. >>> df.sort_values(by='col1', ascending=False, na_position='first')
  3363. col1 col2 col3
  3364. 3 NaN 8 4
  3365. 4 D 7 2
  3366. 5 C 4 3
  3367. 2 B 9 9
  3368. 0 A 2 0
  3369. 1 A 1 1
  3370. """
  3371. raise NotImplementedError("sort_values has not been implemented "
  3372. "on Panel or Panel4D objects.")
  3373. def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
  3374. kind='quicksort', na_position='last', sort_remaining=True):
  3375. """
  3376. Sort object by labels (along an axis)
  3377. Parameters
  3378. ----------
  3379. axis : %(axes)s to direct sorting
  3380. level : int or level name or list of ints or list of level names
  3381. if not None, sort on values in specified index level(s)
  3382. ascending : boolean, default True
  3383. Sort ascending vs. descending
  3384. inplace : bool, default False
  3385. if True, perform operation in-place
  3386. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
  3387. Choice of sorting algorithm. See also ndarray.np.sort for more
  3388. information. `mergesort` is the only stable algorithm. For
  3389. DataFrames, this option is only applied when sorting on a single
  3390. column or label.
  3391. na_position : {'first', 'last'}, default 'last'
  3392. `first` puts NaNs at the beginning, `last` puts NaNs at the end.
  3393. Not implemented for MultiIndex.
  3394. sort_remaining : bool, default True
  3395. if true and sorting by level and index is multilevel, sort by other
  3396. levels too (in order) after sorting by specified level
  3397. Returns
  3398. -------
  3399. sorted_obj : %(klass)s
  3400. """
  3401. inplace = validate_bool_kwarg(inplace, 'inplace')
  3402. axis = self._get_axis_number(axis)
  3403. axis_name = self._get_axis_name(axis)
  3404. labels = self._get_axis(axis)
  3405. if level is not None:
  3406. raise NotImplementedError("level is not implemented")
  3407. if inplace:
  3408. raise NotImplementedError("inplace is not implemented")
  3409. sort_index = labels.argsort()
  3410. if not ascending:
  3411. sort_index = sort_index[::-1]
  3412. new_axis = labels.take(sort_index)
  3413. return self.reindex(**{axis_name: new_axis})
  3414. def reindex(self, *args, **kwargs):
  3415. """
  3416. Conform %(klass)s to new index with optional filling logic, placing
  3417. NA/NaN in locations having no value in the previous index. A new object
  3418. is produced unless the new index is equivalent to the current one and
  3419. ``copy=False``.
  3420. Parameters
  3421. ----------
  3422. %(optional_labels)s
  3423. %(axes)s : array-like, optional
  3424. New labels / index to conform to, should be specified using
  3425. keywords. Preferably an Index object to avoid duplicating data
  3426. %(optional_axis)s
  3427. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
  3428. Method to use for filling holes in reindexed DataFrame.
  3429. Please note: this is only applicable to DataFrames/Series with a
  3430. monotonically increasing/decreasing index.
  3431. * None (default): don't fill gaps
  3432. * pad / ffill: propagate last valid observation forward to next
  3433. valid
  3434. * backfill / bfill: use next valid observation to fill gap
  3435. * nearest: use nearest valid observations to fill gap
  3436. copy : bool, default True
  3437. Return a new object, even if the passed indexes are the same.
  3438. level : int or name
  3439. Broadcast across a level, matching Index values on the
  3440. passed MultiIndex level.
  3441. fill_value : scalar, default np.NaN
  3442. Value to use for missing values. Defaults to NaN, but can be any
  3443. "compatible" value.
  3444. limit : int, default None
  3445. Maximum number of consecutive elements to forward or backward fill.
  3446. tolerance : optional
  3447. Maximum distance between original and new labels for inexact
  3448. matches. The values of the index at the matching locations most
  3449. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3450. Tolerance may be a scalar value, which applies the same tolerance
  3451. to all values, or list-like, which applies variable tolerance per
  3452. element. List-like includes list, tuple, array, Series, and must be
  3453. the same size as the index and its dtype must exactly match the
  3454. index's type.
  3455. .. versionadded:: 0.21.0 (list-like tolerance)
  3456. Returns
  3457. -------
  3458. %(klass)s with changed index.
  3459. See Also
  3460. --------
  3461. DataFrame.set_index : Set row labels.
  3462. DataFrame.reset_index : Remove row labels or move them to new columns.
  3463. DataFrame.reindex_like : Change to same indices as other DataFrame.
  3464. Examples
  3465. --------
  3466. ``DataFrame.reindex`` supports two calling conventions
  3467. * ``(index=index_labels, columns=column_labels, ...)``
  3468. * ``(labels, axis={'index', 'columns'}, ...)``
  3469. We *highly* recommend using keyword arguments to clarify your
  3470. intent.
  3471. Create a dataframe with some fictional data.
  3472. >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
  3473. >>> df = pd.DataFrame({
  3474. ... 'http_status': [200,200,404,404,301],
  3475. ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
  3476. ... index=index)
  3477. >>> df
  3478. http_status response_time
  3479. Firefox 200 0.04
  3480. Chrome 200 0.02
  3481. Safari 404 0.07
  3482. IE10 404 0.08
  3483. Konqueror 301 1.00
  3484. Create a new index and reindex the dataframe. By default
  3485. values in the new index that do not have corresponding
  3486. records in the dataframe are assigned ``NaN``.
  3487. >>> new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',
  3488. ... 'Chrome']
  3489. >>> df.reindex(new_index)
  3490. http_status response_time
  3491. Safari 404.0 0.07
  3492. Iceweasel NaN NaN
  3493. Comodo Dragon NaN NaN
  3494. IE10 404.0 0.08
  3495. Chrome 200.0 0.02
  3496. We can fill in the missing values by passing a value to
  3497. the keyword ``fill_value``. Because the index is not monotonically
  3498. increasing or decreasing, we cannot use arguments to the keyword
  3499. ``method`` to fill the ``NaN`` values.
  3500. >>> df.reindex(new_index, fill_value=0)
  3501. http_status response_time
  3502. Safari 404 0.07
  3503. Iceweasel 0 0.00
  3504. Comodo Dragon 0 0.00
  3505. IE10 404 0.08
  3506. Chrome 200 0.02
  3507. >>> df.reindex(new_index, fill_value='missing')
  3508. http_status response_time
  3509. Safari 404 0.07
  3510. Iceweasel missing missing
  3511. Comodo Dragon missing missing
  3512. IE10 404 0.08
  3513. Chrome 200 0.02
  3514. We can also reindex the columns.
  3515. >>> df.reindex(columns=['http_status', 'user_agent'])
  3516. http_status user_agent
  3517. Firefox 200 NaN
  3518. Chrome 200 NaN
  3519. Safari 404 NaN
  3520. IE10 404 NaN
  3521. Konqueror 301 NaN
  3522. Or we can use "axis-style" keyword arguments
  3523. >>> df.reindex(['http_status', 'user_agent'], axis="columns")
  3524. http_status user_agent
  3525. Firefox 200 NaN
  3526. Chrome 200 NaN
  3527. Safari 404 NaN
  3528. IE10 404 NaN
  3529. Konqueror 301 NaN
  3530. To further illustrate the filling functionality in
  3531. ``reindex``, we will create a dataframe with a
  3532. monotonically increasing index (for example, a sequence
  3533. of dates).
  3534. >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')
  3535. >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]},
  3536. ... index=date_index)
  3537. >>> df2
  3538. prices
  3539. 2010-01-01 100.0
  3540. 2010-01-02 101.0
  3541. 2010-01-03 NaN
  3542. 2010-01-04 100.0
  3543. 2010-01-05 89.0
  3544. 2010-01-06 88.0
  3545. Suppose we decide to expand the dataframe to cover a wider
  3546. date range.
  3547. >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
  3548. >>> df2.reindex(date_index2)
  3549. prices
  3550. 2009-12-29 NaN
  3551. 2009-12-30 NaN
  3552. 2009-12-31 NaN
  3553. 2010-01-01 100.0
  3554. 2010-01-02 101.0
  3555. 2010-01-03 NaN
  3556. 2010-01-04 100.0
  3557. 2010-01-05 89.0
  3558. 2010-01-06 88.0
  3559. 2010-01-07 NaN
  3560. The index entries that did not have a value in the original data frame
  3561. (for example, '2009-12-29') are by default filled with ``NaN``.
  3562. If desired, we can fill in the missing values using one of several
  3563. options.
  3564. For example, to back-propagate the last valid value to fill the ``NaN``
  3565. values, pass ``bfill`` as an argument to the ``method`` keyword.
  3566. >>> df2.reindex(date_index2, method='bfill')
  3567. prices
  3568. 2009-12-29 100.0
  3569. 2009-12-30 100.0
  3570. 2009-12-31 100.0
  3571. 2010-01-01 100.0
  3572. 2010-01-02 101.0
  3573. 2010-01-03 NaN
  3574. 2010-01-04 100.0
  3575. 2010-01-05 89.0
  3576. 2010-01-06 88.0
  3577. 2010-01-07 NaN
  3578. Please note that the ``NaN`` value present in the original dataframe
  3579. (at index value 2010-01-03) will not be filled by any of the
  3580. value propagation schemes. This is because filling while reindexing
  3581. does not look at dataframe values, but only compares the original and
  3582. desired indexes. If you do want to fill in the ``NaN`` values present
  3583. in the original dataframe, use the ``fillna()`` method.
  3584. See the :ref:`user guide <basics.reindexing>` for more.
  3585. """
  3586. # TODO: Decide if we care about having different examples for different
  3587. # kinds
  3588. # construct the args
  3589. axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
  3590. method = missing.clean_reindex_fill_method(kwargs.pop('method', None))
  3591. level = kwargs.pop('level', None)
  3592. copy = kwargs.pop('copy', True)
  3593. limit = kwargs.pop('limit', None)
  3594. tolerance = kwargs.pop('tolerance', None)
  3595. fill_value = kwargs.pop('fill_value', None)
  3596. # Series.reindex doesn't use / need the axis kwarg
  3597. # We pop and ignore it here, to make writing Series/Frame generic code
  3598. # easier
  3599. kwargs.pop("axis", None)
  3600. if kwargs:
  3601. raise TypeError('reindex() got an unexpected keyword '
  3602. 'argument "{0}"'.format(list(kwargs.keys())[0]))
  3603. self._consolidate_inplace()
  3604. # if all axes that are requested to reindex are equal, then only copy
  3605. # if indicated must have index names equal here as well as values
  3606. if all(self._get_axis(axis).identical(ax)
  3607. for axis, ax in axes.items() if ax is not None):
  3608. if copy:
  3609. return self.copy()
  3610. return self
  3611. # check if we are a multi reindex
  3612. if self._needs_reindex_multi(axes, method, level):
  3613. try:
  3614. return self._reindex_multi(axes, copy, fill_value)
  3615. except Exception:
  3616. pass
  3617. # perform the reindex on the axes
  3618. return self._reindex_axes(axes, level, limit, tolerance, method,
  3619. fill_value, copy).__finalize__(self)
  3620. def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
  3621. copy):
  3622. """Perform the reindex for all the axes."""
  3623. obj = self
  3624. for a in self._AXIS_ORDERS:
  3625. labels = axes[a]
  3626. if labels is None:
  3627. continue
  3628. ax = self._get_axis(a)
  3629. new_index, indexer = ax.reindex(labels, level=level, limit=limit,
  3630. tolerance=tolerance, method=method)
  3631. axis = self._get_axis_number(a)
  3632. obj = obj._reindex_with_indexers({axis: [new_index, indexer]},
  3633. fill_value=fill_value,
  3634. copy=copy, allow_dups=False)
  3635. return obj
  3636. def _needs_reindex_multi(self, axes, method, level):
  3637. """Check if we do need a multi reindex."""
  3638. return ((com.count_not_none(*axes.values()) == self._AXIS_LEN) and
  3639. method is None and level is None and not self._is_mixed_type)
  3640. def _reindex_multi(self, axes, copy, fill_value):
  3641. return NotImplemented
  3642. _shared_docs['reindex_axis'] = ("""
  3643. Conform input object to new index.
  3644. .. deprecated:: 0.21.0
  3645. Use `reindex` instead.
  3646. By default, places NaN in locations having no value in the
  3647. previous index. A new object is produced unless the new index
  3648. is equivalent to the current one and copy=False.
  3649. Parameters
  3650. ----------
  3651. labels : array-like
  3652. New labels / index to conform to. Preferably an Index object to
  3653. avoid duplicating data.
  3654. axis : %(axes_single_arg)s
  3655. Indicate whether to use rows or columns.
  3656. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
  3657. Method to use for filling holes in reindexed DataFrame:
  3658. * default: don't fill gaps.
  3659. * pad / ffill: propagate last valid observation forward to next
  3660. valid.
  3661. * backfill / bfill: use next valid observation to fill gap.
  3662. * nearest: use nearest valid observations to fill gap.
  3663. level : int or str
  3664. Broadcast across a level, matching Index values on the
  3665. passed MultiIndex level.
  3666. copy : bool, default True
  3667. Return a new object, even if the passed indexes are the same.
  3668. limit : int, optional
  3669. Maximum number of consecutive elements to forward or backward fill.
  3670. fill_value : float, default NaN
  3671. Value used to fill in locations having no value in the previous
  3672. index.
  3673. .. versionadded:: 0.21.0 (list-like tolerance)
  3674. Returns
  3675. -------
  3676. %(klass)s
  3677. Returns a new DataFrame object with new indices, unless the new
  3678. index is equivalent to the current one and copy=False.
  3679. See Also
  3680. --------
  3681. DataFrame.set_index : Set row labels.
  3682. DataFrame.reset_index : Remove row labels or move them to new columns.
  3683. DataFrame.reindex : Change to new indices or expand indices.
  3684. DataFrame.reindex_like : Change to same indices as other DataFrame.
  3685. Examples
  3686. --------
  3687. >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
  3688. ... index=['dog', 'hawk'])
  3689. >>> df
  3690. num_legs num_wings
  3691. dog 4 0
  3692. hawk 2 2
  3693. >>> df.reindex(['num_wings', 'num_legs', 'num_heads'],
  3694. ... axis='columns')
  3695. num_wings num_legs num_heads
  3696. dog 0 4 NaN
  3697. hawk 2 2 NaN
  3698. """)
  3699. @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
  3700. def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
  3701. limit=None, fill_value=None):
  3702. msg = ("'.reindex_axis' is deprecated and will be removed in a future "
  3703. "version. Use '.reindex' instead.")
  3704. self._consolidate_inplace()
  3705. axis_name = self._get_axis_name(axis)
  3706. axis_values = self._get_axis(axis_name)
  3707. method = missing.clean_reindex_fill_method(method)
  3708. warnings.warn(msg, FutureWarning, stacklevel=3)
  3709. new_index, indexer = axis_values.reindex(labels, method, level,
  3710. limit=limit)
  3711. return self._reindex_with_indexers({axis: [new_index, indexer]},
  3712. fill_value=fill_value, copy=copy)
  3713. def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
  3714. allow_dups=False):
  3715. """allow_dups indicates an internal call here """
  3716. # reindex doing multiple operations on different axes if indicated
  3717. new_data = self._data
  3718. for axis in sorted(reindexers.keys()):
  3719. index, indexer = reindexers[axis]
  3720. baxis = self._get_block_manager_axis(axis)
  3721. if index is None:
  3722. continue
  3723. index = ensure_index(index)
  3724. if indexer is not None:
  3725. indexer = ensure_int64(indexer)
  3726. # TODO: speed up on homogeneous DataFrame objects
  3727. new_data = new_data.reindex_indexer(index, indexer, axis=baxis,
  3728. fill_value=fill_value,
  3729. allow_dups=allow_dups,
  3730. copy=copy)
  3731. if copy and new_data is self._data:
  3732. new_data = new_data.copy()
  3733. return self._constructor(new_data).__finalize__(self)
  3734. def filter(self, items=None, like=None, regex=None, axis=None):
  3735. """
  3736. Subset rows or columns of dataframe according to labels in
  3737. the specified index.
  3738. Note that this routine does not filter a dataframe on its
  3739. contents. The filter is applied to the labels of the index.
  3740. Parameters
  3741. ----------
  3742. items : list-like
  3743. List of axis to restrict to (must not all be present).
  3744. like : string
  3745. Keep axis where "arg in col == True".
  3746. regex : string (regular expression)
  3747. Keep axis with re.search(regex, col) == True.
  3748. axis : int or string axis name
  3749. The axis to filter on. By default this is the info axis,
  3750. 'index' for Series, 'columns' for DataFrame.
  3751. Returns
  3752. -------
  3753. same type as input object
  3754. See Also
  3755. --------
  3756. DataFrame.loc
  3757. Notes
  3758. -----
  3759. The ``items``, ``like``, and ``regex`` parameters are
  3760. enforced to be mutually exclusive.
  3761. ``axis`` defaults to the info axis that is used when indexing
  3762. with ``[]``.
  3763. Examples
  3764. --------
  3765. >>> df = pd.DataFrame(np.array(([1,2,3], [4,5,6])),
  3766. ... index=['mouse', 'rabbit'],
  3767. ... columns=['one', 'two', 'three'])
  3768. >>> # select columns by name
  3769. >>> df.filter(items=['one', 'three'])
  3770. one three
  3771. mouse 1 3
  3772. rabbit 4 6
  3773. >>> # select columns by regular expression
  3774. >>> df.filter(regex='e$', axis=1)
  3775. one three
  3776. mouse 1 3
  3777. rabbit 4 6
  3778. >>> # select rows containing 'bbi'
  3779. >>> df.filter(like='bbi', axis=0)
  3780. one two three
  3781. rabbit 4 5 6
  3782. """
  3783. import re
  3784. nkw = com.count_not_none(items, like, regex)
  3785. if nkw > 1:
  3786. raise TypeError('Keyword arguments `items`, `like`, or `regex` '
  3787. 'are mutually exclusive')
  3788. if axis is None:
  3789. axis = self._info_axis_name
  3790. labels = self._get_axis(axis)
  3791. if items is not None:
  3792. name = self._get_axis_name(axis)
  3793. return self.reindex(
  3794. **{name: [r for r in items if r in labels]})
  3795. elif like:
  3796. def f(x):
  3797. return like in to_str(x)
  3798. values = labels.map(f)
  3799. return self.loc(axis=axis)[values]
  3800. elif regex:
  3801. def f(x):
  3802. return matcher.search(to_str(x)) is not None
  3803. matcher = re.compile(regex)
  3804. values = labels.map(f)
  3805. return self.loc(axis=axis)[values]
  3806. else:
  3807. raise TypeError('Must pass either `items`, `like`, or `regex`')
  3808. def head(self, n=5):
  3809. """
  3810. Return the first `n` rows.
  3811. This function returns the first `n` rows for the object based
  3812. on position. It is useful for quickly testing if your object
  3813. has the right type of data in it.
  3814. Parameters
  3815. ----------
  3816. n : int, default 5
  3817. Number of rows to select.
  3818. Returns
  3819. -------
  3820. obj_head : same type as caller
  3821. The first `n` rows of the caller object.
  3822. See Also
  3823. --------
  3824. DataFrame.tail: Returns the last `n` rows.
  3825. Examples
  3826. --------
  3827. >>> df = pd.DataFrame({'animal':['alligator', 'bee', 'falcon', 'lion',
  3828. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  3829. >>> df
  3830. animal
  3831. 0 alligator
  3832. 1 bee
  3833. 2 falcon
  3834. 3 lion
  3835. 4 monkey
  3836. 5 parrot
  3837. 6 shark
  3838. 7 whale
  3839. 8 zebra
  3840. Viewing the first 5 lines
  3841. >>> df.head()
  3842. animal
  3843. 0 alligator
  3844. 1 bee
  3845. 2 falcon
  3846. 3 lion
  3847. 4 monkey
  3848. Viewing the first `n` lines (three in this case)
  3849. >>> df.head(3)
  3850. animal
  3851. 0 alligator
  3852. 1 bee
  3853. 2 falcon
  3854. """
  3855. return self.iloc[:n]
  3856. def tail(self, n=5):
  3857. """
  3858. Return the last `n` rows.
  3859. This function returns last `n` rows from the object based on
  3860. position. It is useful for quickly verifying data, for example,
  3861. after sorting or appending rows.
  3862. Parameters
  3863. ----------
  3864. n : int, default 5
  3865. Number of rows to select.
  3866. Returns
  3867. -------
  3868. type of caller
  3869. The last `n` rows of the caller object.
  3870. See Also
  3871. --------
  3872. DataFrame.head : The first `n` rows of the caller object.
  3873. Examples
  3874. --------
  3875. >>> df = pd.DataFrame({'animal':['alligator', 'bee', 'falcon', 'lion',
  3876. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  3877. >>> df
  3878. animal
  3879. 0 alligator
  3880. 1 bee
  3881. 2 falcon
  3882. 3 lion
  3883. 4 monkey
  3884. 5 parrot
  3885. 6 shark
  3886. 7 whale
  3887. 8 zebra
  3888. Viewing the last 5 lines
  3889. >>> df.tail()
  3890. animal
  3891. 4 monkey
  3892. 5 parrot
  3893. 6 shark
  3894. 7 whale
  3895. 8 zebra
  3896. Viewing the last `n` lines (three in this case)
  3897. >>> df.tail(3)
  3898. animal
  3899. 6 shark
  3900. 7 whale
  3901. 8 zebra
  3902. """
  3903. if n == 0:
  3904. return self.iloc[0:0]
  3905. return self.iloc[-n:]
  3906. def sample(self, n=None, frac=None, replace=False, weights=None,
  3907. random_state=None, axis=None):
  3908. """
  3909. Return a random sample of items from an axis of object.
  3910. You can use `random_state` for reproducibility.
  3911. Parameters
  3912. ----------
  3913. n : int, optional
  3914. Number of items from axis to return. Cannot be used with `frac`.
  3915. Default = 1 if `frac` = None.
  3916. frac : float, optional
  3917. Fraction of axis items to return. Cannot be used with `n`.
  3918. replace : bool, default False
  3919. Sample with or without replacement.
  3920. weights : str or ndarray-like, optional
  3921. Default 'None' results in equal probability weighting.
  3922. If passed a Series, will align with target object on index. Index
  3923. values in weights not found in sampled object will be ignored and
  3924. index values in sampled object not in weights will be assigned
  3925. weights of zero.
  3926. If called on a DataFrame, will accept the name of a column
  3927. when axis = 0.
  3928. Unless weights are a Series, weights must be same length as axis
  3929. being sampled.
  3930. If weights do not sum to 1, they will be normalized to sum to 1.
  3931. Missing values in the weights column will be treated as zero.
  3932. Infinite values not allowed.
  3933. random_state : int or numpy.random.RandomState, optional
  3934. Seed for the random number generator (if int), or numpy RandomState
  3935. object.
  3936. axis : int or string, optional
  3937. Axis to sample. Accepts axis number or name. Default is stat axis
  3938. for given data type (0 for Series and DataFrames, 1 for Panels).
  3939. Returns
  3940. -------
  3941. Series or DataFrame
  3942. A new object of same type as caller containing `n` items randomly
  3943. sampled from the caller object.
  3944. See Also
  3945. --------
  3946. numpy.random.choice: Generates a random sample from a given 1-D numpy
  3947. array.
  3948. Examples
  3949. --------
  3950. >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
  3951. ... 'num_wings': [2, 0, 0, 0],
  3952. ... 'num_specimen_seen': [10, 2, 1, 8]},
  3953. ... index=['falcon', 'dog', 'spider', 'fish'])
  3954. >>> df
  3955. num_legs num_wings num_specimen_seen
  3956. falcon 2 2 10
  3957. dog 4 0 2
  3958. spider 8 0 1
  3959. fish 0 0 8
  3960. Extract 3 random elements from the ``Series`` ``df['num_legs']``:
  3961. Note that we use `random_state` to ensure the reproducibility of
  3962. the examples.
  3963. >>> df['num_legs'].sample(n=3, random_state=1)
  3964. fish 0
  3965. spider 8
  3966. falcon 2
  3967. Name: num_legs, dtype: int64
  3968. A random 50% sample of the ``DataFrame`` with replacement:
  3969. >>> df.sample(frac=0.5, replace=True, random_state=1)
  3970. num_legs num_wings num_specimen_seen
  3971. dog 4 0 2
  3972. fish 0 0 8
  3973. Using a DataFrame column as weights. Rows with larger value in the
  3974. `num_specimen_seen` column are more likely to be sampled.
  3975. >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
  3976. num_legs num_wings num_specimen_seen
  3977. falcon 2 2 10
  3978. fish 0 0 8
  3979. """
  3980. if axis is None:
  3981. axis = self._stat_axis_number
  3982. axis = self._get_axis_number(axis)
  3983. axis_length = self.shape[axis]
  3984. # Process random_state argument
  3985. rs = com.random_state(random_state)
  3986. # Check weights for compliance
  3987. if weights is not None:
  3988. # If a series, align with frame
  3989. if isinstance(weights, pd.Series):
  3990. weights = weights.reindex(self.axes[axis])
  3991. # Strings acceptable if a dataframe and axis = 0
  3992. if isinstance(weights, string_types):
  3993. if isinstance(self, pd.DataFrame):
  3994. if axis == 0:
  3995. try:
  3996. weights = self[weights]
  3997. except KeyError:
  3998. raise KeyError("String passed to weights not a "
  3999. "valid column")
  4000. else:
  4001. raise ValueError("Strings can only be passed to "
  4002. "weights when sampling from rows on "
  4003. "a DataFrame")
  4004. else:
  4005. raise ValueError("Strings cannot be passed as weights "
  4006. "when sampling from a Series or Panel.")
  4007. weights = pd.Series(weights, dtype='float64')
  4008. if len(weights) != axis_length:
  4009. raise ValueError("Weights and axis to be sampled must be of "
  4010. "same length")
  4011. if (weights == np.inf).any() or (weights == -np.inf).any():
  4012. raise ValueError("weight vector may not include `inf` values")
  4013. if (weights < 0).any():
  4014. raise ValueError("weight vector many not include negative "
  4015. "values")
  4016. # If has nan, set to zero.
  4017. weights = weights.fillna(0)
  4018. # Renormalize if don't sum to 1
  4019. if weights.sum() != 1:
  4020. if weights.sum() != 0:
  4021. weights = weights / weights.sum()
  4022. else:
  4023. raise ValueError("Invalid weights: weights sum to zero")
  4024. weights = weights.values
  4025. # If no frac or n, default to n=1.
  4026. if n is None and frac is None:
  4027. n = 1
  4028. elif n is not None and frac is None and n % 1 != 0:
  4029. raise ValueError("Only integers accepted as `n` values")
  4030. elif n is None and frac is not None:
  4031. n = int(round(frac * axis_length))
  4032. elif n is not None and frac is not None:
  4033. raise ValueError('Please enter a value for `frac` OR `n`, not '
  4034. 'both')
  4035. # Check for negative sizes
  4036. if n < 0:
  4037. raise ValueError("A negative number of rows requested. Please "
  4038. "provide positive value.")
  4039. locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
  4040. return self.take(locs, axis=axis, is_copy=False)
  4041. _shared_docs['pipe'] = (r"""
  4042. Apply func(self, \*args, \*\*kwargs).
  4043. Parameters
  4044. ----------
  4045. func : function
  4046. function to apply to the %(klass)s.
  4047. ``args``, and ``kwargs`` are passed into ``func``.
  4048. Alternatively a ``(callable, data_keyword)`` tuple where
  4049. ``data_keyword`` is a string indicating the keyword of
  4050. ``callable`` that expects the %(klass)s.
  4051. args : iterable, optional
  4052. positional arguments passed into ``func``.
  4053. kwargs : mapping, optional
  4054. a dictionary of keyword arguments passed into ``func``.
  4055. Returns
  4056. -------
  4057. object : the return type of ``func``.
  4058. See Also
  4059. --------
  4060. DataFrame.apply
  4061. DataFrame.applymap
  4062. Series.map
  4063. Notes
  4064. -----
  4065. Use ``.pipe`` when chaining together functions that expect
  4066. Series, DataFrames or GroupBy objects. Instead of writing
  4067. >>> f(g(h(df), arg1=a), arg2=b, arg3=c)
  4068. You can write
  4069. >>> (df.pipe(h)
  4070. ... .pipe(g, arg1=a)
  4071. ... .pipe(f, arg2=b, arg3=c)
  4072. ... )
  4073. If you have a function that takes the data as (say) the second
  4074. argument, pass a tuple indicating which keyword expects the
  4075. data. For example, suppose ``f`` takes its data as ``arg2``:
  4076. >>> (df.pipe(h)
  4077. ... .pipe(g, arg1=a)
  4078. ... .pipe((f, 'arg2'), arg1=a, arg3=c)
  4079. ... )
  4080. """)
  4081. @Appender(_shared_docs['pipe'] % _shared_doc_kwargs)
  4082. def pipe(self, func, *args, **kwargs):
  4083. return com._pipe(self, func, *args, **kwargs)
  4084. _shared_docs['aggregate'] = dedent("""
  4085. Aggregate using one or more operations over the specified axis.
  4086. %(versionadded)s
  4087. Parameters
  4088. ----------
  4089. func : function, str, list or dict
  4090. Function to use for aggregating the data. If a function, must either
  4091. work when passed a %(klass)s or when passed to %(klass)s.apply.
  4092. Accepted combinations are:
  4093. - function
  4094. - string function name
  4095. - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
  4096. - dict of axis labels -> functions, function names or list of such.
  4097. %(axis)s
  4098. *args
  4099. Positional arguments to pass to `func`.
  4100. **kwargs
  4101. Keyword arguments to pass to `func`.
  4102. Returns
  4103. -------
  4104. DataFrame, Series or scalar
  4105. if DataFrame.agg is called with a single function, returns a Series
  4106. if DataFrame.agg is called with several functions, returns a DataFrame
  4107. if Series.agg is called with single function, returns a scalar
  4108. if Series.agg is called with several functions, returns a Series
  4109. %(see_also)s
  4110. Notes
  4111. -----
  4112. `agg` is an alias for `aggregate`. Use the alias.
  4113. A passed user-defined-function will be passed a Series for evaluation.
  4114. %(examples)s
  4115. """)
  4116. _shared_docs['transform'] = ("""
  4117. Call ``func`` on self producing a %(klass)s with transformed values
  4118. and that has the same axis length as self.
  4119. .. versionadded:: 0.20.0
  4120. Parameters
  4121. ----------
  4122. func : function, str, list or dict
  4123. Function to use for transforming the data. If a function, must either
  4124. work when passed a %(klass)s or when passed to %(klass)s.apply.
  4125. Accepted combinations are:
  4126. - function
  4127. - string function name
  4128. - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']``
  4129. - dict of axis labels -> functions, function names or list of such.
  4130. %(axis)s
  4131. *args
  4132. Positional arguments to pass to `func`.
  4133. **kwargs
  4134. Keyword arguments to pass to `func`.
  4135. Returns
  4136. -------
  4137. %(klass)s
  4138. A %(klass)s that must have the same length as self.
  4139. Raises
  4140. ------
  4141. ValueError : If the returned %(klass)s has a different length than self.
  4142. See Also
  4143. --------
  4144. %(klass)s.agg : Only perform aggregating type operations.
  4145. %(klass)s.apply : Invoke function on a %(klass)s.
  4146. Examples
  4147. --------
  4148. >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
  4149. >>> df
  4150. A B
  4151. 0 0 1
  4152. 1 1 2
  4153. 2 2 3
  4154. >>> df.transform(lambda x: x + 1)
  4155. A B
  4156. 0 1 2
  4157. 1 2 3
  4158. 2 3 4
  4159. Even though the resulting %(klass)s must have the same length as the
  4160. input %(klass)s, it is possible to provide several input functions:
  4161. >>> s = pd.Series(range(3))
  4162. >>> s
  4163. 0 0
  4164. 1 1
  4165. 2 2
  4166. dtype: int64
  4167. >>> s.transform([np.sqrt, np.exp])
  4168. sqrt exp
  4169. 0 0.000000 1.000000
  4170. 1 1.000000 2.718282
  4171. 2 1.414214 7.389056
  4172. """)
  4173. # ----------------------------------------------------------------------
  4174. # Attribute access
  4175. def __finalize__(self, other, method=None, **kwargs):
  4176. """
  4177. Propagate metadata from other to self.
  4178. Parameters
  4179. ----------
  4180. other : the object from which to get the attributes that we are going
  4181. to propagate
  4182. method : optional, a passed method name ; possibly to take different
  4183. types of propagation actions based on this
  4184. """
  4185. if isinstance(other, NDFrame):
  4186. for name in self._metadata:
  4187. object.__setattr__(self, name, getattr(other, name, None))
  4188. return self
  4189. def __getattr__(self, name):
  4190. """After regular attribute access, try looking up the name
  4191. This allows simpler access to columns for interactive use.
  4192. """
  4193. # Note: obj.x will always call obj.__getattribute__('x') prior to
  4194. # calling obj.__getattr__('x').
  4195. if (name in self._internal_names_set or name in self._metadata or
  4196. name in self._accessors):
  4197. return object.__getattribute__(self, name)
  4198. else:
  4199. if self._info_axis._can_hold_identifiers_and_holds_name(name):
  4200. return self[name]
  4201. return object.__getattribute__(self, name)
  4202. def __setattr__(self, name, value):
  4203. """After regular attribute access, try setting the name
  4204. This allows simpler access to columns for interactive use.
  4205. """
  4206. # first try regular attribute access via __getattribute__, so that
  4207. # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
  4208. # the same attribute.
  4209. try:
  4210. object.__getattribute__(self, name)
  4211. return object.__setattr__(self, name, value)
  4212. except AttributeError:
  4213. pass
  4214. # if this fails, go on to more involved attribute setting
  4215. # (note that this matches __getattr__, above).
  4216. if name in self._internal_names_set:
  4217. object.__setattr__(self, name, value)
  4218. elif name in self._metadata:
  4219. object.__setattr__(self, name, value)
  4220. else:
  4221. try:
  4222. existing = getattr(self, name)
  4223. if isinstance(existing, Index):
  4224. object.__setattr__(self, name, value)
  4225. elif name in self._info_axis:
  4226. self[name] = value
  4227. else:
  4228. object.__setattr__(self, name, value)
  4229. except (AttributeError, TypeError):
  4230. if isinstance(self, ABCDataFrame) and (is_list_like(value)):
  4231. warnings.warn("Pandas doesn't allow columns to be "
  4232. "created via a new attribute name - see "
  4233. "https://pandas.pydata.org/pandas-docs/"
  4234. "stable/indexing.html#attribute-access",
  4235. stacklevel=2)
  4236. object.__setattr__(self, name, value)
  4237. def _dir_additions(self):
  4238. """ add the string-like attributes from the info_axis.
  4239. If info_axis is a MultiIndex, it's first level values are used.
  4240. """
  4241. additions = {c for c in self._info_axis.unique(level=0)[:100]
  4242. if isinstance(c, string_types) and isidentifier(c)}
  4243. return super(NDFrame, self)._dir_additions().union(additions)
  4244. # ----------------------------------------------------------------------
  4245. # Getting and setting elements
  4246. # ----------------------------------------------------------------------
  4247. # Consolidation of internals
  4248. def _protect_consolidate(self, f):
  4249. """Consolidate _data -- if the blocks have changed, then clear the
  4250. cache
  4251. """
  4252. blocks_before = len(self._data.blocks)
  4253. result = f()
  4254. if len(self._data.blocks) != blocks_before:
  4255. self._clear_item_cache()
  4256. return result
  4257. def _consolidate_inplace(self):
  4258. """Consolidate data in place and return None"""
  4259. def f():
  4260. self._data = self._data.consolidate()
  4261. self._protect_consolidate(f)
  4262. def _consolidate(self, inplace=False):
  4263. """
  4264. Compute NDFrame with "consolidated" internals (data of each dtype
  4265. grouped together in a single ndarray).
  4266. Parameters
  4267. ----------
  4268. inplace : boolean, default False
  4269. If False return new object, otherwise modify existing object
  4270. Returns
  4271. -------
  4272. consolidated : same type as caller
  4273. """
  4274. inplace = validate_bool_kwarg(inplace, 'inplace')
  4275. if inplace:
  4276. self._consolidate_inplace()
  4277. else:
  4278. f = lambda: self._data.consolidate()
  4279. cons_data = self._protect_consolidate(f)
  4280. return self._constructor(cons_data).__finalize__(self)
  4281. @property
  4282. def _is_mixed_type(self):
  4283. f = lambda: self._data.is_mixed_type
  4284. return self._protect_consolidate(f)
  4285. @property
  4286. def _is_numeric_mixed_type(self):
  4287. f = lambda: self._data.is_numeric_mixed_type
  4288. return self._protect_consolidate(f)
  4289. @property
  4290. def _is_datelike_mixed_type(self):
  4291. f = lambda: self._data.is_datelike_mixed_type
  4292. return self._protect_consolidate(f)
  4293. def _check_inplace_setting(self, value):
  4294. """ check whether we allow in-place setting with this type of value """
  4295. if self._is_mixed_type:
  4296. if not self._is_numeric_mixed_type:
  4297. # allow an actual np.nan thru
  4298. try:
  4299. if np.isnan(value):
  4300. return True
  4301. except Exception:
  4302. pass
  4303. raise TypeError('Cannot do inplace boolean setting on '
  4304. 'mixed-types with a non np.nan value')
  4305. return True
  4306. def _get_numeric_data(self):
  4307. return self._constructor(
  4308. self._data.get_numeric_data()).__finalize__(self)
  4309. def _get_bool_data(self):
  4310. return self._constructor(self._data.get_bool_data()).__finalize__(self)
  4311. # ----------------------------------------------------------------------
  4312. # Internal Interface Methods
  4313. def as_matrix(self, columns=None):
  4314. """
  4315. Convert the frame to its Numpy-array representation.
  4316. .. deprecated:: 0.23.0
  4317. Use :meth:`DataFrame.values` instead.
  4318. Parameters
  4319. ----------
  4320. columns : list, optional, default:None
  4321. If None, return all columns, otherwise, returns specified columns.
  4322. Returns
  4323. -------
  4324. values : ndarray
  4325. If the caller is heterogeneous and contains booleans or objects,
  4326. the result will be of dtype=object. See Notes.
  4327. See Also
  4328. --------
  4329. DataFrame.values
  4330. Notes
  4331. -----
  4332. Return is NOT a Numpy-matrix, rather, a Numpy-array.
  4333. The dtype will be a lower-common-denominator dtype (implicit
  4334. upcasting); that is to say if the dtypes (even of numeric types)
  4335. are mixed, the one that accommodates all will be chosen. Use this
  4336. with care if you are not dealing with the blocks.
  4337. e.g. If the dtypes are float16 and float32, dtype will be upcast to
  4338. float32. If dtypes are int32 and uint8, dtype will be upcase to
  4339. int32. By numpy.find_common_type convention, mixing int64 and uint64
  4340. will result in a float64 dtype.
  4341. This method is provided for backwards compatibility. Generally,
  4342. it is recommended to use '.values'.
  4343. """
  4344. warnings.warn("Method .as_matrix will be removed in a future version. "
  4345. "Use .values instead.", FutureWarning, stacklevel=2)
  4346. self._consolidate_inplace()
  4347. return self._data.as_array(transpose=self._AXIS_REVERSED,
  4348. items=columns)
  4349. @property
  4350. def values(self):
  4351. """
  4352. Return a Numpy representation of the DataFrame.
  4353. .. warning::
  4354. We recommend using :meth:`DataFrame.to_numpy` instead.
  4355. Only the values in the DataFrame will be returned, the axes labels
  4356. will be removed.
  4357. Returns
  4358. -------
  4359. numpy.ndarray
  4360. The values of the DataFrame.
  4361. See Also
  4362. --------
  4363. DataFrame.to_numpy : Recommended alternative to this method.
  4364. pandas.DataFrame.index : Retrieve the index labels.
  4365. pandas.DataFrame.columns : Retrieving the column names.
  4366. Notes
  4367. -----
  4368. The dtype will be a lower-common-denominator dtype (implicit
  4369. upcasting); that is to say if the dtypes (even of numeric types)
  4370. are mixed, the one that accommodates all will be chosen. Use this
  4371. with care if you are not dealing with the blocks.
  4372. e.g. If the dtypes are float16 and float32, dtype will be upcast to
  4373. float32. If dtypes are int32 and uint8, dtype will be upcast to
  4374. int32. By :func:`numpy.find_common_type` convention, mixing int64
  4375. and uint64 will result in a float64 dtype.
  4376. Examples
  4377. --------
  4378. A DataFrame where all columns are the same type (e.g., int64) results
  4379. in an array of the same type.
  4380. >>> df = pd.DataFrame({'age': [ 3, 29],
  4381. ... 'height': [94, 170],
  4382. ... 'weight': [31, 115]})
  4383. >>> df
  4384. age height weight
  4385. 0 3 94 31
  4386. 1 29 170 115
  4387. >>> df.dtypes
  4388. age int64
  4389. height int64
  4390. weight int64
  4391. dtype: object
  4392. >>> df.values
  4393. array([[ 3, 94, 31],
  4394. [ 29, 170, 115]], dtype=int64)
  4395. A DataFrame with mixed type columns(e.g., str/object, int64, float32)
  4396. results in an ndarray of the broadest type that accommodates these
  4397. mixed types (e.g., object).
  4398. >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'),
  4399. ... ('lion', 80.5, 1),
  4400. ... ('monkey', np.nan, None)],
  4401. ... columns=('name', 'max_speed', 'rank'))
  4402. >>> df2.dtypes
  4403. name object
  4404. max_speed float64
  4405. rank object
  4406. dtype: object
  4407. >>> df2.values
  4408. array([['parrot', 24.0, 'second'],
  4409. ['lion', 80.5, 1],
  4410. ['monkey', nan, None]], dtype=object)
  4411. """
  4412. self._consolidate_inplace()
  4413. return self._data.as_array(transpose=self._AXIS_REVERSED)
  4414. @property
  4415. def _values(self):
  4416. """internal implementation"""
  4417. return self.values
  4418. @property
  4419. def _get_values(self):
  4420. # compat
  4421. return self.values
  4422. def get_values(self):
  4423. """
  4424. Return an ndarray after converting sparse values to dense.
  4425. This is the same as ``.values`` for non-sparse data. For sparse
  4426. data contained in a `pandas.SparseArray`, the data are first
  4427. converted to a dense representation.
  4428. Returns
  4429. -------
  4430. numpy.ndarray
  4431. Numpy representation of DataFrame
  4432. See Also
  4433. --------
  4434. values : Numpy representation of DataFrame.
  4435. pandas.SparseArray : Container for sparse data.
  4436. Examples
  4437. --------
  4438. >>> df = pd.DataFrame({'a': [1, 2], 'b': [True, False],
  4439. ... 'c': [1.0, 2.0]})
  4440. >>> df
  4441. a b c
  4442. 0 1 True 1.0
  4443. 1 2 False 2.0
  4444. >>> df.get_values()
  4445. array([[1, True, 1.0], [2, False, 2.0]], dtype=object)
  4446. >>> df = pd.DataFrame({"a": pd.SparseArray([1, None, None]),
  4447. ... "c": [1.0, 2.0, 3.0]})
  4448. >>> df
  4449. a c
  4450. 0 1.0 1.0
  4451. 1 NaN 2.0
  4452. 2 NaN 3.0
  4453. >>> df.get_values()
  4454. array([[ 1., 1.],
  4455. [nan, 2.],
  4456. [nan, 3.]])
  4457. """
  4458. return self.values
  4459. def get_dtype_counts(self):
  4460. """
  4461. Return counts of unique dtypes in this object.
  4462. Returns
  4463. -------
  4464. dtype : Series
  4465. Series with the count of columns with each dtype.
  4466. See Also
  4467. --------
  4468. dtypes : Return the dtypes in this object.
  4469. Examples
  4470. --------
  4471. >>> a = [['a', 1, 1.0], ['b', 2, 2.0], ['c', 3, 3.0]]
  4472. >>> df = pd.DataFrame(a, columns=['str', 'int', 'float'])
  4473. >>> df
  4474. str int float
  4475. 0 a 1 1.0
  4476. 1 b 2 2.0
  4477. 2 c 3 3.0
  4478. >>> df.get_dtype_counts()
  4479. float64 1
  4480. int64 1
  4481. object 1
  4482. dtype: int64
  4483. """
  4484. from pandas import Series
  4485. return Series(self._data.get_dtype_counts())
  4486. def get_ftype_counts(self):
  4487. """
  4488. Return counts of unique ftypes in this object.
  4489. .. deprecated:: 0.23.0
  4490. This is useful for SparseDataFrame or for DataFrames containing
  4491. sparse arrays.
  4492. Returns
  4493. -------
  4494. dtype : Series
  4495. Series with the count of columns with each type and
  4496. sparsity (dense/sparse)
  4497. See Also
  4498. --------
  4499. ftypes : Return ftypes (indication of sparse/dense and dtype) in
  4500. this object.
  4501. Examples
  4502. --------
  4503. >>> a = [['a', 1, 1.0], ['b', 2, 2.0], ['c', 3, 3.0]]
  4504. >>> df = pd.DataFrame(a, columns=['str', 'int', 'float'])
  4505. >>> df
  4506. str int float
  4507. 0 a 1 1.0
  4508. 1 b 2 2.0
  4509. 2 c 3 3.0
  4510. >>> df.get_ftype_counts() # doctest: +SKIP
  4511. float64:dense 1
  4512. int64:dense 1
  4513. object:dense 1
  4514. dtype: int64
  4515. """
  4516. warnings.warn("get_ftype_counts is deprecated and will "
  4517. "be removed in a future version",
  4518. FutureWarning, stacklevel=2)
  4519. from pandas import Series
  4520. return Series(self._data.get_ftype_counts())
  4521. @property
  4522. def dtypes(self):
  4523. """
  4524. Return the dtypes in the DataFrame.
  4525. This returns a Series with the data type of each column.
  4526. The result's index is the original DataFrame's columns. Columns
  4527. with mixed types are stored with the ``object`` dtype. See
  4528. :ref:`the User Guide <basics.dtypes>` for more.
  4529. Returns
  4530. -------
  4531. pandas.Series
  4532. The data type of each column.
  4533. See Also
  4534. --------
  4535. pandas.DataFrame.ftypes : Dtype and sparsity information.
  4536. Examples
  4537. --------
  4538. >>> df = pd.DataFrame({'float': [1.0],
  4539. ... 'int': [1],
  4540. ... 'datetime': [pd.Timestamp('20180310')],
  4541. ... 'string': ['foo']})
  4542. >>> df.dtypes
  4543. float float64
  4544. int int64
  4545. datetime datetime64[ns]
  4546. string object
  4547. dtype: object
  4548. """
  4549. from pandas import Series
  4550. return Series(self._data.get_dtypes(), index=self._info_axis,
  4551. dtype=np.object_)
  4552. @property
  4553. def ftypes(self):
  4554. """
  4555. Return the ftypes (indication of sparse/dense and dtype) in DataFrame.
  4556. This returns a Series with the data type of each column.
  4557. The result's index is the original DataFrame's columns. Columns
  4558. with mixed types are stored with the ``object`` dtype. See
  4559. :ref:`the User Guide <basics.dtypes>` for more.
  4560. Returns
  4561. -------
  4562. pandas.Series
  4563. The data type and indication of sparse/dense of each column.
  4564. See Also
  4565. --------
  4566. pandas.DataFrame.dtypes: Series with just dtype information.
  4567. pandas.SparseDataFrame : Container for sparse tabular data.
  4568. Notes
  4569. -----
  4570. Sparse data should have the same dtypes as its dense representation.
  4571. Examples
  4572. --------
  4573. >>> arr = np.random.RandomState(0).randn(100, 4)
  4574. >>> arr[arr < .8] = np.nan
  4575. >>> pd.DataFrame(arr).ftypes
  4576. 0 float64:dense
  4577. 1 float64:dense
  4578. 2 float64:dense
  4579. 3 float64:dense
  4580. dtype: object
  4581. >>> pd.SparseDataFrame(arr).ftypes
  4582. 0 float64:sparse
  4583. 1 float64:sparse
  4584. 2 float64:sparse
  4585. 3 float64:sparse
  4586. dtype: object
  4587. """
  4588. from pandas import Series
  4589. return Series(self._data.get_ftypes(), index=self._info_axis,
  4590. dtype=np.object_)
  4591. def as_blocks(self, copy=True):
  4592. """
  4593. Convert the frame to a dict of dtype -> Constructor Types that each has
  4594. a homogeneous dtype.
  4595. .. deprecated:: 0.21.0
  4596. NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
  4597. as_matrix)
  4598. Parameters
  4599. ----------
  4600. copy : boolean, default True
  4601. Returns
  4602. -------
  4603. values : a dict of dtype -> Constructor Types
  4604. """
  4605. warnings.warn("as_blocks is deprecated and will "
  4606. "be removed in a future version",
  4607. FutureWarning, stacklevel=2)
  4608. return self._to_dict_of_blocks(copy=copy)
  4609. @property
  4610. def blocks(self):
  4611. """
  4612. Internal property, property synonym for as_blocks().
  4613. .. deprecated:: 0.21.0
  4614. """
  4615. return self.as_blocks()
  4616. def _to_dict_of_blocks(self, copy=True):
  4617. """
  4618. Return a dict of dtype -> Constructor Types that
  4619. each is a homogeneous dtype.
  4620. Internal ONLY
  4621. """
  4622. return {k: self._constructor(v).__finalize__(self)
  4623. for k, v, in self._data.to_dict(copy=copy).items()}
  4624. def astype(self, dtype, copy=True, errors='raise', **kwargs):
  4625. """
  4626. Cast a pandas object to a specified dtype ``dtype``.
  4627. Parameters
  4628. ----------
  4629. dtype : data type, or dict of column name -> data type
  4630. Use a numpy.dtype or Python type to cast entire pandas object to
  4631. the same type. Alternatively, use {col: dtype, ...}, where col is a
  4632. column label and dtype is a numpy.dtype or Python type to cast one
  4633. or more of the DataFrame's columns to column-specific types.
  4634. copy : bool, default True
  4635. Return a copy when ``copy=True`` (be very careful setting
  4636. ``copy=False`` as changes to values then may propagate to other
  4637. pandas objects).
  4638. errors : {'raise', 'ignore'}, default 'raise'
  4639. Control raising of exceptions on invalid data for provided dtype.
  4640. - ``raise`` : allow exceptions to be raised
  4641. - ``ignore`` : suppress exceptions. On error return original object
  4642. .. versionadded:: 0.20.0
  4643. kwargs : keyword arguments to pass on to the constructor
  4644. Returns
  4645. -------
  4646. casted : same type as caller
  4647. See Also
  4648. --------
  4649. to_datetime : Convert argument to datetime.
  4650. to_timedelta : Convert argument to timedelta.
  4651. to_numeric : Convert argument to a numeric type.
  4652. numpy.ndarray.astype : Cast a numpy array to a specified type.
  4653. Examples
  4654. --------
  4655. >>> ser = pd.Series([1, 2], dtype='int32')
  4656. >>> ser
  4657. 0 1
  4658. 1 2
  4659. dtype: int32
  4660. >>> ser.astype('int64')
  4661. 0 1
  4662. 1 2
  4663. dtype: int64
  4664. Convert to categorical type:
  4665. >>> ser.astype('category')
  4666. 0 1
  4667. 1 2
  4668. dtype: category
  4669. Categories (2, int64): [1, 2]
  4670. Convert to ordered categorical type with custom ordering:
  4671. >>> cat_dtype = pd.api.types.CategoricalDtype(
  4672. ... categories=[2, 1], ordered=True)
  4673. >>> ser.astype(cat_dtype)
  4674. 0 1
  4675. 1 2
  4676. dtype: category
  4677. Categories (2, int64): [2 < 1]
  4678. Note that using ``copy=False`` and changing data on a new
  4679. pandas object may propagate changes:
  4680. >>> s1 = pd.Series([1,2])
  4681. >>> s2 = s1.astype('int64', copy=False)
  4682. >>> s2[0] = 10
  4683. >>> s1 # note that s1[0] has changed too
  4684. 0 10
  4685. 1 2
  4686. dtype: int64
  4687. """
  4688. if is_dict_like(dtype):
  4689. if self.ndim == 1: # i.e. Series
  4690. if len(dtype) > 1 or self.name not in dtype:
  4691. raise KeyError('Only the Series name can be used for '
  4692. 'the key in Series dtype mappings.')
  4693. new_type = dtype[self.name]
  4694. return self.astype(new_type, copy, errors, **kwargs)
  4695. elif self.ndim > 2:
  4696. raise NotImplementedError(
  4697. 'astype() only accepts a dtype arg of type dict when '
  4698. 'invoked on Series and DataFrames. A single dtype must be '
  4699. 'specified when invoked on a Panel.'
  4700. )
  4701. for col_name in dtype.keys():
  4702. if col_name not in self:
  4703. raise KeyError('Only a column name can be used for the '
  4704. 'key in a dtype mappings argument.')
  4705. results = []
  4706. for col_name, col in self.iteritems():
  4707. if col_name in dtype:
  4708. results.append(col.astype(dtype[col_name], copy=copy))
  4709. else:
  4710. results.append(results.append(col.copy() if copy else col))
  4711. elif is_extension_array_dtype(dtype) and self.ndim > 1:
  4712. # GH 18099/22869: columnwise conversion to extension dtype
  4713. # GH 24704: use iloc to handle duplicate column names
  4714. results = (self.iloc[:, i].astype(dtype, copy=copy)
  4715. for i in range(len(self.columns)))
  4716. else:
  4717. # else, only a single dtype is given
  4718. new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
  4719. **kwargs)
  4720. return self._constructor(new_data).__finalize__(self)
  4721. # GH 19920: retain column metadata after concat
  4722. result = pd.concat(results, axis=1, copy=False)
  4723. result.columns = self.columns
  4724. return result
  4725. def copy(self, deep=True):
  4726. """
  4727. Make a copy of this object's indices and data.
  4728. When ``deep=True`` (default), a new object will be created with a
  4729. copy of the calling object's data and indices. Modifications to
  4730. the data or indices of the copy will not be reflected in the
  4731. original object (see notes below).
  4732. When ``deep=False``, a new object will be created without copying
  4733. the calling object's data or index (only references to the data
  4734. and index are copied). Any changes to the data of the original
  4735. will be reflected in the shallow copy (and vice versa).
  4736. Parameters
  4737. ----------
  4738. deep : bool, default True
  4739. Make a deep copy, including a copy of the data and the indices.
  4740. With ``deep=False`` neither the indices nor the data are copied.
  4741. Returns
  4742. -------
  4743. copy : Series, DataFrame or Panel
  4744. Object type matches caller.
  4745. Notes
  4746. -----
  4747. When ``deep=True``, data is copied but actual Python objects
  4748. will not be copied recursively, only the reference to the object.
  4749. This is in contrast to `copy.deepcopy` in the Standard Library,
  4750. which recursively copies object data (see examples below).
  4751. While ``Index`` objects are copied when ``deep=True``, the underlying
  4752. numpy array is not copied for performance reasons. Since ``Index`` is
  4753. immutable, the underlying data can be safely shared and a copy
  4754. is not needed.
  4755. Examples
  4756. --------
  4757. >>> s = pd.Series([1, 2], index=["a", "b"])
  4758. >>> s
  4759. a 1
  4760. b 2
  4761. dtype: int64
  4762. >>> s_copy = s.copy()
  4763. >>> s_copy
  4764. a 1
  4765. b 2
  4766. dtype: int64
  4767. **Shallow copy versus default (deep) copy:**
  4768. >>> s = pd.Series([1, 2], index=["a", "b"])
  4769. >>> deep = s.copy()
  4770. >>> shallow = s.copy(deep=False)
  4771. Shallow copy shares data and index with original.
  4772. >>> s is shallow
  4773. False
  4774. >>> s.values is shallow.values and s.index is shallow.index
  4775. True
  4776. Deep copy has own copy of data and index.
  4777. >>> s is deep
  4778. False
  4779. >>> s.values is deep.values or s.index is deep.index
  4780. False
  4781. Updates to the data shared by shallow copy and original is reflected
  4782. in both; deep copy remains unchanged.
  4783. >>> s[0] = 3
  4784. >>> shallow[1] = 4
  4785. >>> s
  4786. a 3
  4787. b 4
  4788. dtype: int64
  4789. >>> shallow
  4790. a 3
  4791. b 4
  4792. dtype: int64
  4793. >>> deep
  4794. a 1
  4795. b 2
  4796. dtype: int64
  4797. Note that when copying an object containing Python objects, a deep copy
  4798. will copy the data, but will not do so recursively. Updating a nested
  4799. data object will be reflected in the deep copy.
  4800. >>> s = pd.Series([[1, 2], [3, 4]])
  4801. >>> deep = s.copy()
  4802. >>> s[0][0] = 10
  4803. >>> s
  4804. 0 [10, 2]
  4805. 1 [3, 4]
  4806. dtype: object
  4807. >>> deep
  4808. 0 [10, 2]
  4809. 1 [3, 4]
  4810. dtype: object
  4811. """
  4812. data = self._data.copy(deep=deep)
  4813. return self._constructor(data).__finalize__(self)
  4814. def __copy__(self, deep=True):
  4815. return self.copy(deep=deep)
  4816. def __deepcopy__(self, memo=None):
  4817. """
  4818. Parameters
  4819. ----------
  4820. memo, default None
  4821. Standard signature. Unused
  4822. """
  4823. if memo is None:
  4824. memo = {}
  4825. return self.copy(deep=True)
  4826. def _convert(self, datetime=False, numeric=False, timedelta=False,
  4827. coerce=False, copy=True):
  4828. """
  4829. Attempt to infer better dtype for object columns
  4830. Parameters
  4831. ----------
  4832. datetime : boolean, default False
  4833. If True, convert to date where possible.
  4834. numeric : boolean, default False
  4835. If True, attempt to convert to numbers (including strings), with
  4836. unconvertible values becoming NaN.
  4837. timedelta : boolean, default False
  4838. If True, convert to timedelta where possible.
  4839. coerce : boolean, default False
  4840. If True, force conversion with unconvertible values converted to
  4841. nulls (NaN or NaT)
  4842. copy : boolean, default True
  4843. If True, return a copy even if no copy is necessary (e.g. no
  4844. conversion was done). Note: This is meant for internal use, and
  4845. should not be confused with inplace.
  4846. Returns
  4847. -------
  4848. converted : same as input object
  4849. """
  4850. return self._constructor(
  4851. self._data.convert(datetime=datetime, numeric=numeric,
  4852. timedelta=timedelta, coerce=coerce,
  4853. copy=copy)).__finalize__(self)
  4854. def convert_objects(self, convert_dates=True, convert_numeric=False,
  4855. convert_timedeltas=True, copy=True):
  4856. """
  4857. Attempt to infer better dtype for object columns.
  4858. .. deprecated:: 0.21.0
  4859. Parameters
  4860. ----------
  4861. convert_dates : boolean, default True
  4862. If True, convert to date where possible. If 'coerce', force
  4863. conversion, with unconvertible values becoming NaT.
  4864. convert_numeric : boolean, default False
  4865. If True, attempt to coerce to numbers (including strings), with
  4866. unconvertible values becoming NaN.
  4867. convert_timedeltas : boolean, default True
  4868. If True, convert to timedelta where possible. If 'coerce', force
  4869. conversion, with unconvertible values becoming NaT.
  4870. copy : boolean, default True
  4871. If True, return a copy even if no copy is necessary (e.g. no
  4872. conversion was done). Note: This is meant for internal use, and
  4873. should not be confused with inplace.
  4874. Returns
  4875. -------
  4876. converted : same as input object
  4877. See Also
  4878. --------
  4879. to_datetime : Convert argument to datetime.
  4880. to_timedelta : Convert argument to timedelta.
  4881. to_numeric : Convert argument to numeric type.
  4882. """
  4883. msg = ("convert_objects is deprecated. To re-infer data dtypes for "
  4884. "object columns, use {klass}.infer_objects()\nFor all "
  4885. "other conversions use the data-type specific converters "
  4886. "pd.to_datetime, pd.to_timedelta and pd.to_numeric."
  4887. ).format(klass=self.__class__.__name__)
  4888. warnings.warn(msg, FutureWarning, stacklevel=2)
  4889. return self._constructor(
  4890. self._data.convert(convert_dates=convert_dates,
  4891. convert_numeric=convert_numeric,
  4892. convert_timedeltas=convert_timedeltas,
  4893. copy=copy)).__finalize__(self)
  4894. def infer_objects(self):
  4895. """
  4896. Attempt to infer better dtypes for object columns.
  4897. Attempts soft conversion of object-dtyped
  4898. columns, leaving non-object and unconvertible
  4899. columns unchanged. The inference rules are the
  4900. same as during normal Series/DataFrame construction.
  4901. .. versionadded:: 0.21.0
  4902. Returns
  4903. -------
  4904. converted : same type as input object
  4905. See Also
  4906. --------
  4907. to_datetime : Convert argument to datetime.
  4908. to_timedelta : Convert argument to timedelta.
  4909. to_numeric : Convert argument to numeric type.
  4910. Examples
  4911. --------
  4912. >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]})
  4913. >>> df = df.iloc[1:]
  4914. >>> df
  4915. A
  4916. 1 1
  4917. 2 2
  4918. 3 3
  4919. >>> df.dtypes
  4920. A object
  4921. dtype: object
  4922. >>> df.infer_objects().dtypes
  4923. A int64
  4924. dtype: object
  4925. """
  4926. # numeric=False necessary to only soft convert;
  4927. # python objects will still be converted to
  4928. # native numpy numeric types
  4929. return self._constructor(
  4930. self._data.convert(datetime=True, numeric=False,
  4931. timedelta=True, coerce=False,
  4932. copy=True)).__finalize__(self)
  4933. # ----------------------------------------------------------------------
  4934. # Filling NA's
  4935. def fillna(self, value=None, method=None, axis=None, inplace=False,
  4936. limit=None, downcast=None):
  4937. """
  4938. Fill NA/NaN values using the specified method.
  4939. Parameters
  4940. ----------
  4941. value : scalar, dict, Series, or DataFrame
  4942. Value to use to fill holes (e.g. 0), alternately a
  4943. dict/Series/DataFrame of values specifying which value to use for
  4944. each index (for a Series) or column (for a DataFrame). (values not
  4945. in the dict/Series/DataFrame will not be filled). This value cannot
  4946. be a list.
  4947. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  4948. Method to use for filling holes in reindexed Series
  4949. pad / ffill: propagate last valid observation forward to next valid
  4950. backfill / bfill: use NEXT valid observation to fill gap
  4951. axis : %(axes_single_arg)s
  4952. inplace : boolean, default False
  4953. If True, fill in place. Note: this will modify any
  4954. other views on this object, (e.g. a no-copy slice for a column in a
  4955. DataFrame).
  4956. limit : int, default None
  4957. If method is specified, this is the maximum number of consecutive
  4958. NaN values to forward/backward fill. In other words, if there is
  4959. a gap with more than this number of consecutive NaNs, it will only
  4960. be partially filled. If method is not specified, this is the
  4961. maximum number of entries along the entire axis where NaNs will be
  4962. filled. Must be greater than 0 if not None.
  4963. downcast : dict, default is None
  4964. a dict of item->dtype of what to downcast if possible,
  4965. or the string 'infer' which will try to downcast to an appropriate
  4966. equal type (e.g. float64 to int64 if possible)
  4967. Returns
  4968. -------
  4969. filled : %(klass)s
  4970. See Also
  4971. --------
  4972. interpolate : Fill NaN values using interpolation.
  4973. reindex, asfreq
  4974. Examples
  4975. --------
  4976. >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
  4977. ... [3, 4, np.nan, 1],
  4978. ... [np.nan, np.nan, np.nan, 5],
  4979. ... [np.nan, 3, np.nan, 4]],
  4980. ... columns=list('ABCD'))
  4981. >>> df
  4982. A B C D
  4983. 0 NaN 2.0 NaN 0
  4984. 1 3.0 4.0 NaN 1
  4985. 2 NaN NaN NaN 5
  4986. 3 NaN 3.0 NaN 4
  4987. Replace all NaN elements with 0s.
  4988. >>> df.fillna(0)
  4989. A B C D
  4990. 0 0.0 2.0 0.0 0
  4991. 1 3.0 4.0 0.0 1
  4992. 2 0.0 0.0 0.0 5
  4993. 3 0.0 3.0 0.0 4
  4994. We can also propagate non-null values forward or backward.
  4995. >>> df.fillna(method='ffill')
  4996. A B C D
  4997. 0 NaN 2.0 NaN 0
  4998. 1 3.0 4.0 NaN 1
  4999. 2 3.0 4.0 NaN 5
  5000. 3 3.0 3.0 NaN 4
  5001. Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,
  5002. 2, and 3 respectively.
  5003. >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
  5004. >>> df.fillna(value=values)
  5005. A B C D
  5006. 0 0.0 2.0 2.0 0
  5007. 1 3.0 4.0 2.0 1
  5008. 2 0.0 1.0 2.0 5
  5009. 3 0.0 3.0 2.0 4
  5010. Only replace the first NaN element.
  5011. >>> df.fillna(value=values, limit=1)
  5012. A B C D
  5013. 0 0.0 2.0 2.0 0
  5014. 1 3.0 4.0 NaN 1
  5015. 2 NaN 1.0 NaN 5
  5016. 3 NaN 3.0 NaN 4
  5017. """
  5018. inplace = validate_bool_kwarg(inplace, 'inplace')
  5019. value, method = validate_fillna_kwargs(value, method)
  5020. self._consolidate_inplace()
  5021. # set the default here, so functions examining the signaure
  5022. # can detect if something was set (e.g. in groupby) (GH9221)
  5023. if axis is None:
  5024. axis = 0
  5025. axis = self._get_axis_number(axis)
  5026. from pandas import DataFrame
  5027. if value is None:
  5028. if self._is_mixed_type and axis == 1:
  5029. if inplace:
  5030. raise NotImplementedError()
  5031. result = self.T.fillna(method=method, limit=limit).T
  5032. # need to downcast here because of all of the transposes
  5033. result._data = result._data.downcast()
  5034. return result
  5035. # > 3d
  5036. if self.ndim > 3:
  5037. raise NotImplementedError('Cannot fillna with a method for > '
  5038. '3dims')
  5039. # 3d
  5040. elif self.ndim == 3:
  5041. # fill in 2d chunks
  5042. result = {col: s.fillna(method=method, value=value)
  5043. for col, s in self.iteritems()}
  5044. prelim_obj = self._constructor.from_dict(result)
  5045. new_obj = prelim_obj.__finalize__(self)
  5046. new_data = new_obj._data
  5047. else:
  5048. # 2d or less
  5049. new_data = self._data.interpolate(method=method, axis=axis,
  5050. limit=limit, inplace=inplace,
  5051. coerce=True,
  5052. downcast=downcast)
  5053. else:
  5054. if len(self._get_axis(axis)) == 0:
  5055. return self
  5056. if self.ndim == 1:
  5057. if isinstance(value, (dict, ABCSeries)):
  5058. from pandas import Series
  5059. value = Series(value)
  5060. elif not is_list_like(value):
  5061. pass
  5062. else:
  5063. raise TypeError('"value" parameter must be a scalar, dict '
  5064. 'or Series, but you passed a '
  5065. '"{0}"'.format(type(value).__name__))
  5066. new_data = self._data.fillna(value=value, limit=limit,
  5067. inplace=inplace,
  5068. downcast=downcast)
  5069. elif isinstance(value, (dict, ABCSeries)):
  5070. if axis == 1:
  5071. raise NotImplementedError('Currently only can fill '
  5072. 'with dict/Series column '
  5073. 'by column')
  5074. result = self if inplace else self.copy()
  5075. for k, v in compat.iteritems(value):
  5076. if k not in result:
  5077. continue
  5078. obj = result[k]
  5079. obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
  5080. return result if not inplace else None
  5081. elif not is_list_like(value):
  5082. new_data = self._data.fillna(value=value, limit=limit,
  5083. inplace=inplace,
  5084. downcast=downcast)
  5085. elif isinstance(value, DataFrame) and self.ndim == 2:
  5086. new_data = self.where(self.notna(), value)
  5087. else:
  5088. raise ValueError("invalid fill value with a %s" % type(value))
  5089. if inplace:
  5090. self._update_inplace(new_data)
  5091. else:
  5092. return self._constructor(new_data).__finalize__(self)
  5093. def ffill(self, axis=None, inplace=False, limit=None, downcast=None):
  5094. """
  5095. Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
  5096. """
  5097. return self.fillna(method='ffill', axis=axis, inplace=inplace,
  5098. limit=limit, downcast=downcast)
  5099. def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
  5100. """
  5101. Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
  5102. """
  5103. return self.fillna(method='bfill', axis=axis, inplace=inplace,
  5104. limit=limit, downcast=downcast)
  5105. _shared_docs['replace'] = ("""
  5106. Replace values given in `to_replace` with `value`.
  5107. Values of the %(klass)s are replaced with other values dynamically.
  5108. This differs from updating with ``.loc`` or ``.iloc``, which require
  5109. you to specify a location to update with some value.
  5110. Parameters
  5111. ----------
  5112. to_replace : str, regex, list, dict, Series, int, float, or None
  5113. How to find the values that will be replaced.
  5114. * numeric, str or regex:
  5115. - numeric: numeric values equal to `to_replace` will be
  5116. replaced with `value`
  5117. - str: string exactly matching `to_replace` will be replaced
  5118. with `value`
  5119. - regex: regexs matching `to_replace` will be replaced with
  5120. `value`
  5121. * list of str, regex, or numeric:
  5122. - First, if `to_replace` and `value` are both lists, they
  5123. **must** be the same length.
  5124. - Second, if ``regex=True`` then all of the strings in **both**
  5125. lists will be interpreted as regexs otherwise they will match
  5126. directly. This doesn't matter much for `value` since there
  5127. are only a few possible substitution regexes you can use.
  5128. - str, regex and numeric rules apply as above.
  5129. * dict:
  5130. - Dicts can be used to specify different replacement values
  5131. for different existing values. For example,
  5132. ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
  5133. 'y' with 'z'. To use a dict in this way the `value`
  5134. parameter should be `None`.
  5135. - For a DataFrame a dict can specify that different values
  5136. should be replaced in different columns. For example,
  5137. ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
  5138. and the value 'z' in column 'b' and replaces these values
  5139. with whatever is specified in `value`. The `value` parameter
  5140. should not be ``None`` in this case. You can treat this as a
  5141. special case of passing two lists except that you are
  5142. specifying the column to search in.
  5143. - For a DataFrame nested dictionaries, e.g.,
  5144. ``{'a': {'b': np.nan}}``, are read as follows: look in column
  5145. 'a' for the value 'b' and replace it with NaN. The `value`
  5146. parameter should be ``None`` to use a nested dict in this
  5147. way. You can nest regular expressions as well. Note that
  5148. column names (the top-level dictionary keys in a nested
  5149. dictionary) **cannot** be regular expressions.
  5150. * None:
  5151. - This means that the `regex` argument must be a string,
  5152. compiled regular expression, or list, dict, ndarray or
  5153. Series of such elements. If `value` is also ``None`` then
  5154. this **must** be a nested dictionary or Series.
  5155. See the examples section for examples of each of these.
  5156. value : scalar, dict, list, str, regex, default None
  5157. Value to replace any values matching `to_replace` with.
  5158. For a DataFrame a dict of values can be used to specify which
  5159. value to use for each column (columns not in the dict will not be
  5160. filled). Regular expressions, strings and lists or dicts of such
  5161. objects are also allowed.
  5162. inplace : bool, default False
  5163. If True, in place. Note: this will modify any
  5164. other views on this object (e.g. a column from a DataFrame).
  5165. Returns the caller if this is True.
  5166. limit : int, default None
  5167. Maximum size gap to forward or backward fill.
  5168. regex : bool or same types as `to_replace`, default False
  5169. Whether to interpret `to_replace` and/or `value` as regular
  5170. expressions. If this is ``True`` then `to_replace` *must* be a
  5171. string. Alternatively, this could be a regular expression or a
  5172. list, dict, or array of regular expressions in which case
  5173. `to_replace` must be ``None``.
  5174. method : {'pad', 'ffill', 'bfill', `None`}
  5175. The method to use when for replacement, when `to_replace` is a
  5176. scalar, list or tuple and `value` is ``None``.
  5177. .. versionchanged:: 0.23.0
  5178. Added to DataFrame.
  5179. Returns
  5180. -------
  5181. %(klass)s
  5182. Object after replacement.
  5183. Raises
  5184. ------
  5185. AssertionError
  5186. * If `regex` is not a ``bool`` and `to_replace` is not
  5187. ``None``.
  5188. TypeError
  5189. * If `to_replace` is a ``dict`` and `value` is not a ``list``,
  5190. ``dict``, ``ndarray``, or ``Series``
  5191. * If `to_replace` is ``None`` and `regex` is not compilable
  5192. into a regular expression or is a list, dict, ndarray, or
  5193. Series.
  5194. * When replacing multiple ``bool`` or ``datetime64`` objects and
  5195. the arguments to `to_replace` does not match the type of the
  5196. value being replaced
  5197. ValueError
  5198. * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
  5199. `value` but they are not the same length.
  5200. See Also
  5201. --------
  5202. %(klass)s.fillna : Fill NA values.
  5203. %(klass)s.where : Replace values based on boolean condition.
  5204. Series.str.replace : Simple string replacement.
  5205. Notes
  5206. -----
  5207. * Regex substitution is performed under the hood with ``re.sub``. The
  5208. rules for substitution for ``re.sub`` are the same.
  5209. * Regular expressions will only substitute on strings, meaning you
  5210. cannot provide, for example, a regular expression matching floating
  5211. point numbers and expect the columns in your frame that have a
  5212. numeric dtype to be matched. However, if those floating point
  5213. numbers *are* strings, then you can do this.
  5214. * This method has *a lot* of options. You are encouraged to experiment
  5215. and play with this method to gain intuition about how it works.
  5216. * When dict is used as the `to_replace` value, it is like
  5217. key(s) in the dict are the to_replace part and
  5218. value(s) in the dict are the value parameter.
  5219. Examples
  5220. --------
  5221. **Scalar `to_replace` and `value`**
  5222. >>> s = pd.Series([0, 1, 2, 3, 4])
  5223. >>> s.replace(0, 5)
  5224. 0 5
  5225. 1 1
  5226. 2 2
  5227. 3 3
  5228. 4 4
  5229. dtype: int64
  5230. >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
  5231. ... 'B': [5, 6, 7, 8, 9],
  5232. ... 'C': ['a', 'b', 'c', 'd', 'e']})
  5233. >>> df.replace(0, 5)
  5234. A B C
  5235. 0 5 5 a
  5236. 1 1 6 b
  5237. 2 2 7 c
  5238. 3 3 8 d
  5239. 4 4 9 e
  5240. **List-like `to_replace`**
  5241. >>> df.replace([0, 1, 2, 3], 4)
  5242. A B C
  5243. 0 4 5 a
  5244. 1 4 6 b
  5245. 2 4 7 c
  5246. 3 4 8 d
  5247. 4 4 9 e
  5248. >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
  5249. A B C
  5250. 0 4 5 a
  5251. 1 3 6 b
  5252. 2 2 7 c
  5253. 3 1 8 d
  5254. 4 4 9 e
  5255. >>> s.replace([1, 2], method='bfill')
  5256. 0 0
  5257. 1 3
  5258. 2 3
  5259. 3 3
  5260. 4 4
  5261. dtype: int64
  5262. **dict-like `to_replace`**
  5263. >>> df.replace({0: 10, 1: 100})
  5264. A B C
  5265. 0 10 5 a
  5266. 1 100 6 b
  5267. 2 2 7 c
  5268. 3 3 8 d
  5269. 4 4 9 e
  5270. >>> df.replace({'A': 0, 'B': 5}, 100)
  5271. A B C
  5272. 0 100 100 a
  5273. 1 1 6 b
  5274. 2 2 7 c
  5275. 3 3 8 d
  5276. 4 4 9 e
  5277. >>> df.replace({'A': {0: 100, 4: 400}})
  5278. A B C
  5279. 0 100 5 a
  5280. 1 1 6 b
  5281. 2 2 7 c
  5282. 3 3 8 d
  5283. 4 400 9 e
  5284. **Regular expression `to_replace`**
  5285. >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
  5286. ... 'B': ['abc', 'bar', 'xyz']})
  5287. >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
  5288. A B
  5289. 0 new abc
  5290. 1 foo new
  5291. 2 bait xyz
  5292. >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
  5293. A B
  5294. 0 new abc
  5295. 1 foo bar
  5296. 2 bait xyz
  5297. >>> df.replace(regex=r'^ba.$', value='new')
  5298. A B
  5299. 0 new abc
  5300. 1 foo new
  5301. 2 bait xyz
  5302. >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
  5303. A B
  5304. 0 new abc
  5305. 1 xyz new
  5306. 2 bait xyz
  5307. >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
  5308. A B
  5309. 0 new abc
  5310. 1 new new
  5311. 2 bait xyz
  5312. Note that when replacing multiple ``bool`` or ``datetime64`` objects,
  5313. the data types in the `to_replace` parameter must match the data
  5314. type of the value being replaced:
  5315. >>> df = pd.DataFrame({'A': [True, False, True],
  5316. ... 'B': [False, True, False]})
  5317. >>> df.replace({'a string': 'new value', True: False}) # raises
  5318. Traceback (most recent call last):
  5319. ...
  5320. TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
  5321. This raises a ``TypeError`` because one of the ``dict`` keys is not of
  5322. the correct type for replacement.
  5323. Compare the behavior of ``s.replace({'a': None})`` and
  5324. ``s.replace('a', None)`` to understand the peculiarities
  5325. of the `to_replace` parameter:
  5326. >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
  5327. When one uses a dict as the `to_replace` value, it is like the
  5328. value(s) in the dict are equal to the `value` parameter.
  5329. ``s.replace({'a': None})`` is equivalent to
  5330. ``s.replace(to_replace={'a': None}, value=None, method=None)``:
  5331. >>> s.replace({'a': None})
  5332. 0 10
  5333. 1 None
  5334. 2 None
  5335. 3 b
  5336. 4 None
  5337. dtype: object
  5338. When ``value=None`` and `to_replace` is a scalar, list or
  5339. tuple, `replace` uses the method parameter (default 'pad') to do the
  5340. replacement. So this is why the 'a' values are being replaced by 10
  5341. in rows 1 and 2 and 'b' in row 4 in this case.
  5342. The command ``s.replace('a', None)`` is actually equivalent to
  5343. ``s.replace(to_replace='a', value=None, method='pad')``:
  5344. >>> s.replace('a', None)
  5345. 0 10
  5346. 1 10
  5347. 2 10
  5348. 3 b
  5349. 4 b
  5350. dtype: object
  5351. """)
  5352. @Appender(_shared_docs['replace'] % _shared_doc_kwargs)
  5353. def replace(self, to_replace=None, value=None, inplace=False, limit=None,
  5354. regex=False, method='pad'):
  5355. inplace = validate_bool_kwarg(inplace, 'inplace')
  5356. if not is_bool(regex) and to_replace is not None:
  5357. raise AssertionError("'to_replace' must be 'None' if 'regex' is "
  5358. "not a bool")
  5359. self._consolidate_inplace()
  5360. if value is None:
  5361. # passing a single value that is scalar like
  5362. # when value is None (GH5319), for compat
  5363. if not is_dict_like(to_replace) and not is_dict_like(regex):
  5364. to_replace = [to_replace]
  5365. if isinstance(to_replace, (tuple, list)):
  5366. if isinstance(self, pd.DataFrame):
  5367. return self.apply(_single_replace,
  5368. args=(to_replace, method, inplace,
  5369. limit))
  5370. return _single_replace(self, to_replace, method, inplace,
  5371. limit)
  5372. if not is_dict_like(to_replace):
  5373. if not is_dict_like(regex):
  5374. raise TypeError('If "to_replace" and "value" are both None'
  5375. ' and "to_replace" is not a list, then '
  5376. 'regex must be a mapping')
  5377. to_replace = regex
  5378. regex = True
  5379. items = list(compat.iteritems(to_replace))
  5380. keys, values = lzip(*items) or ([], [])
  5381. are_mappings = [is_dict_like(v) for v in values]
  5382. if any(are_mappings):
  5383. if not all(are_mappings):
  5384. raise TypeError("If a nested mapping is passed, all values"
  5385. " of the top level mapping must be "
  5386. "mappings")
  5387. # passed a nested dict/Series
  5388. to_rep_dict = {}
  5389. value_dict = {}
  5390. for k, v in items:
  5391. keys, values = lzip(*v.items()) or ([], [])
  5392. if set(keys) & set(values):
  5393. raise ValueError("Replacement not allowed with "
  5394. "overlapping keys and values")
  5395. to_rep_dict[k] = list(keys)
  5396. value_dict[k] = list(values)
  5397. to_replace, value = to_rep_dict, value_dict
  5398. else:
  5399. to_replace, value = keys, values
  5400. return self.replace(to_replace, value, inplace=inplace,
  5401. limit=limit, regex=regex)
  5402. else:
  5403. # need a non-zero len on all axes
  5404. for a in self._AXIS_ORDERS:
  5405. if not len(self._get_axis(a)):
  5406. return self
  5407. new_data = self._data
  5408. if is_dict_like(to_replace):
  5409. if is_dict_like(value): # {'A' : NA} -> {'A' : 0}
  5410. res = self if inplace else self.copy()
  5411. for c, src in compat.iteritems(to_replace):
  5412. if c in value and c in self:
  5413. # object conversion is handled in
  5414. # series.replace which is called recursivelly
  5415. res[c] = res[c].replace(to_replace=src,
  5416. value=value[c],
  5417. inplace=False,
  5418. regex=regex)
  5419. return None if inplace else res
  5420. # {'A': NA} -> 0
  5421. elif not is_list_like(value):
  5422. keys = [(k, src) for k, src in compat.iteritems(to_replace)
  5423. if k in self]
  5424. keys_len = len(keys) - 1
  5425. for i, (k, src) in enumerate(keys):
  5426. convert = i == keys_len
  5427. new_data = new_data.replace(to_replace=src,
  5428. value=value,
  5429. filter=[k],
  5430. inplace=inplace,
  5431. regex=regex,
  5432. convert=convert)
  5433. else:
  5434. raise TypeError('value argument must be scalar, dict, or '
  5435. 'Series')
  5436. elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
  5437. if is_list_like(value):
  5438. if len(to_replace) != len(value):
  5439. raise ValueError('Replacement lists must match '
  5440. 'in length. Expecting %d got %d ' %
  5441. (len(to_replace), len(value)))
  5442. new_data = self._data.replace_list(src_list=to_replace,
  5443. dest_list=value,
  5444. inplace=inplace,
  5445. regex=regex)
  5446. else: # [NA, ''] -> 0
  5447. new_data = self._data.replace(to_replace=to_replace,
  5448. value=value, inplace=inplace,
  5449. regex=regex)
  5450. elif to_replace is None:
  5451. if not (is_re_compilable(regex) or
  5452. is_list_like(regex) or is_dict_like(regex)):
  5453. raise TypeError("'regex' must be a string or a compiled "
  5454. "regular expression or a list or dict of "
  5455. "strings or regular expressions, you "
  5456. "passed a"
  5457. " {0!r}".format(type(regex).__name__))
  5458. return self.replace(regex, value, inplace=inplace, limit=limit,
  5459. regex=True)
  5460. else:
  5461. # dest iterable dict-like
  5462. if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
  5463. new_data = self._data
  5464. for k, v in compat.iteritems(value):
  5465. if k in self:
  5466. new_data = new_data.replace(to_replace=to_replace,
  5467. value=v, filter=[k],
  5468. inplace=inplace,
  5469. regex=regex)
  5470. elif not is_list_like(value): # NA -> 0
  5471. new_data = self._data.replace(to_replace=to_replace,
  5472. value=value, inplace=inplace,
  5473. regex=regex)
  5474. else:
  5475. msg = ('Invalid "to_replace" type: '
  5476. '{0!r}').format(type(to_replace).__name__)
  5477. raise TypeError(msg) # pragma: no cover
  5478. if inplace:
  5479. self._update_inplace(new_data)
  5480. else:
  5481. return self._constructor(new_data).__finalize__(self)
  5482. _shared_docs['interpolate'] = """
  5483. Please note that only ``method='linear'`` is supported for
  5484. DataFrame/Series with a MultiIndex.
  5485. Parameters
  5486. ----------
  5487. method : str, default 'linear'
  5488. Interpolation technique to use. One of:
  5489. * 'linear': Ignore the index and treat the values as equally
  5490. spaced. This is the only method supported on MultiIndexes.
  5491. * 'time': Works on daily and higher resolution data to interpolate
  5492. given length of interval.
  5493. * 'index', 'values': use the actual numerical values of the index.
  5494. * 'pad': Fill in NaNs using existing values.
  5495. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline',
  5496. 'barycentric', 'polynomial': Passed to
  5497. `scipy.interpolate.interp1d`. Both 'polynomial' and 'spline'
  5498. require that you also specify an `order` (int),
  5499. e.g. ``df.interpolate(method='polynomial', order=4)``.
  5500. These use the numerical values of the index.
  5501. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima':
  5502. Wrappers around the SciPy interpolation methods of similar
  5503. names. See `Notes`.
  5504. * 'from_derivatives': Refers to
  5505. `scipy.interpolate.BPoly.from_derivatives` which
  5506. replaces 'piecewise_polynomial' interpolation method in
  5507. scipy 0.18.
  5508. .. versionadded:: 0.18.1
  5509. Added support for the 'akima' method.
  5510. Added interpolate method 'from_derivatives' which replaces
  5511. 'piecewise_polynomial' in SciPy 0.18; backwards-compatible with
  5512. SciPy < 0.18
  5513. axis : {0 or 'index', 1 or 'columns', None}, default None
  5514. Axis to interpolate along.
  5515. limit : int, optional
  5516. Maximum number of consecutive NaNs to fill. Must be greater than
  5517. 0.
  5518. inplace : bool, default False
  5519. Update the data in place if possible.
  5520. limit_direction : {'forward', 'backward', 'both'}, default 'forward'
  5521. If limit is specified, consecutive NaNs will be filled in this
  5522. direction.
  5523. limit_area : {`None`, 'inside', 'outside'}, default None
  5524. If limit is specified, consecutive NaNs will be filled with this
  5525. restriction.
  5526. * ``None``: No fill restriction.
  5527. * 'inside': Only fill NaNs surrounded by valid values
  5528. (interpolate).
  5529. * 'outside': Only fill NaNs outside valid values (extrapolate).
  5530. .. versionadded:: 0.21.0
  5531. downcast : optional, 'infer' or None, defaults to None
  5532. Downcast dtypes if possible.
  5533. **kwargs
  5534. Keyword arguments to pass on to the interpolating function.
  5535. Returns
  5536. -------
  5537. Series or DataFrame
  5538. Returns the same object type as the caller, interpolated at
  5539. some or all ``NaN`` values
  5540. See Also
  5541. --------
  5542. fillna : Fill missing values using different methods.
  5543. scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials
  5544. (Akima interpolator).
  5545. scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the
  5546. Bernstein basis.
  5547. scipy.interpolate.interp1d : Interpolate a 1-D function.
  5548. scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh
  5549. interpolator).
  5550. scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic
  5551. interpolation.
  5552. scipy.interpolate.CubicSpline : Cubic spline data interpolator.
  5553. Notes
  5554. -----
  5555. The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'
  5556. methods are wrappers around the respective SciPy implementations of
  5557. similar names. These use the actual numerical values of the index.
  5558. For more information on their behavior, see the
  5559. `SciPy documentation
  5560. <http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__
  5561. and `SciPy tutorial
  5562. <http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html>`__.
  5563. Examples
  5564. --------
  5565. Filling in ``NaN`` in a :class:`~pandas.Series` via linear
  5566. interpolation.
  5567. >>> s = pd.Series([0, 1, np.nan, 3])
  5568. >>> s
  5569. 0 0.0
  5570. 1 1.0
  5571. 2 NaN
  5572. 3 3.0
  5573. dtype: float64
  5574. >>> s.interpolate()
  5575. 0 0.0
  5576. 1 1.0
  5577. 2 2.0
  5578. 3 3.0
  5579. dtype: float64
  5580. Filling in ``NaN`` in a Series by padding, but filling at most two
  5581. consecutive ``NaN`` at a time.
  5582. >>> s = pd.Series([np.nan, "single_one", np.nan,
  5583. ... "fill_two_more", np.nan, np.nan, np.nan,
  5584. ... 4.71, np.nan])
  5585. >>> s
  5586. 0 NaN
  5587. 1 single_one
  5588. 2 NaN
  5589. 3 fill_two_more
  5590. 4 NaN
  5591. 5 NaN
  5592. 6 NaN
  5593. 7 4.71
  5594. 8 NaN
  5595. dtype: object
  5596. >>> s.interpolate(method='pad', limit=2)
  5597. 0 NaN
  5598. 1 single_one
  5599. 2 single_one
  5600. 3 fill_two_more
  5601. 4 fill_two_more
  5602. 5 fill_two_more
  5603. 6 NaN
  5604. 7 4.71
  5605. 8 4.71
  5606. dtype: object
  5607. Filling in ``NaN`` in a Series via polynomial interpolation or splines:
  5608. Both 'polynomial' and 'spline' methods require that you also specify
  5609. an ``order`` (int).
  5610. >>> s = pd.Series([0, 2, np.nan, 8])
  5611. >>> s.interpolate(method='polynomial', order=2)
  5612. 0 0.000000
  5613. 1 2.000000
  5614. 2 4.666667
  5615. 3 8.000000
  5616. dtype: float64
  5617. Fill the DataFrame forward (that is, going down) along each column
  5618. using linear interpolation.
  5619. Note how the last entry in column 'a' is interpolated differently,
  5620. because there is no entry after it to use for interpolation.
  5621. Note how the first entry in column 'b' remains ``NaN``, because there
  5622. is no entry befofe it to use for interpolation.
  5623. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0),
  5624. ... (np.nan, 2.0, np.nan, np.nan),
  5625. ... (2.0, 3.0, np.nan, 9.0),
  5626. ... (np.nan, 4.0, -4.0, 16.0)],
  5627. ... columns=list('abcd'))
  5628. >>> df
  5629. a b c d
  5630. 0 0.0 NaN -1.0 1.0
  5631. 1 NaN 2.0 NaN NaN
  5632. 2 2.0 3.0 NaN 9.0
  5633. 3 NaN 4.0 -4.0 16.0
  5634. >>> df.interpolate(method='linear', limit_direction='forward', axis=0)
  5635. a b c d
  5636. 0 0.0 NaN -1.0 1.0
  5637. 1 1.0 2.0 -2.0 5.0
  5638. 2 2.0 3.0 -3.0 9.0
  5639. 3 2.0 4.0 -4.0 16.0
  5640. Using polynomial interpolation.
  5641. >>> df['d'].interpolate(method='polynomial', order=2)
  5642. 0 1.0
  5643. 1 4.0
  5644. 2 9.0
  5645. 3 16.0
  5646. Name: d, dtype: float64
  5647. """
  5648. @Appender(_shared_docs['interpolate'] % _shared_doc_kwargs)
  5649. def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
  5650. limit_direction='forward', limit_area=None,
  5651. downcast=None, **kwargs):
  5652. """
  5653. Interpolate values according to different methods.
  5654. """
  5655. inplace = validate_bool_kwarg(inplace, 'inplace')
  5656. if self.ndim > 2:
  5657. raise NotImplementedError("Interpolate has not been implemented "
  5658. "on Panel and Panel 4D objects.")
  5659. if axis == 0:
  5660. ax = self._info_axis_name
  5661. _maybe_transposed_self = self
  5662. elif axis == 1:
  5663. _maybe_transposed_self = self.T
  5664. ax = 1
  5665. else:
  5666. _maybe_transposed_self = self
  5667. ax = _maybe_transposed_self._get_axis_number(ax)
  5668. if _maybe_transposed_self.ndim == 2:
  5669. alt_ax = 1 - ax
  5670. else:
  5671. alt_ax = ax
  5672. if (isinstance(_maybe_transposed_self.index, MultiIndex) and
  5673. method != 'linear'):
  5674. raise ValueError("Only `method=linear` interpolation is supported "
  5675. "on MultiIndexes.")
  5676. if _maybe_transposed_self._data.get_dtype_counts().get(
  5677. 'object') == len(_maybe_transposed_self.T):
  5678. raise TypeError("Cannot interpolate with all object-dtype columns "
  5679. "in the DataFrame. Try setting at least one "
  5680. "column to a numeric dtype.")
  5681. # create/use the index
  5682. if method == 'linear':
  5683. # prior default
  5684. index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax)))
  5685. else:
  5686. index = _maybe_transposed_self._get_axis(alt_ax)
  5687. if isna(index).any():
  5688. raise NotImplementedError("Interpolation with NaNs in the index "
  5689. "has not been implemented. Try filling "
  5690. "those NaNs before interpolating.")
  5691. data = _maybe_transposed_self._data
  5692. new_data = data.interpolate(method=method, axis=ax, index=index,
  5693. values=_maybe_transposed_self, limit=limit,
  5694. limit_direction=limit_direction,
  5695. limit_area=limit_area,
  5696. inplace=inplace, downcast=downcast,
  5697. **kwargs)
  5698. if inplace:
  5699. if axis == 1:
  5700. new_data = self._constructor(new_data).T._data
  5701. self._update_inplace(new_data)
  5702. else:
  5703. res = self._constructor(new_data).__finalize__(self)
  5704. if axis == 1:
  5705. res = res.T
  5706. return res
  5707. # ----------------------------------------------------------------------
  5708. # Timeseries methods Methods
  5709. def asof(self, where, subset=None):
  5710. """
  5711. Return the last row(s) without any NaNs before `where`.
  5712. The last row (for each element in `where`, if list) without any
  5713. NaN is taken.
  5714. In case of a :class:`~pandas.DataFrame`, the last row without NaN
  5715. considering only the subset of columns (if not `None`)
  5716. .. versionadded:: 0.19.0 For DataFrame
  5717. If there is no good value, NaN is returned for a Series or
  5718. a Series of NaN values for a DataFrame
  5719. Parameters
  5720. ----------
  5721. where : date or array-like of dates
  5722. Date(s) before which the last row(s) are returned.
  5723. subset : str or array-like of str, default `None`
  5724. For DataFrame, if not `None`, only use these columns to
  5725. check for NaNs.
  5726. Returns
  5727. -------
  5728. scalar, Series, or DataFrame
  5729. * scalar : when `self` is a Series and `where` is a scalar
  5730. * Series: when `self` is a Series and `where` is an array-like,
  5731. or when `self` is a DataFrame and `where` is a scalar
  5732. * DataFrame : when `self` is a DataFrame and `where` is an
  5733. array-like
  5734. See Also
  5735. --------
  5736. merge_asof : Perform an asof merge. Similar to left join.
  5737. Notes
  5738. -----
  5739. Dates are assumed to be sorted. Raises if this is not the case.
  5740. Examples
  5741. --------
  5742. A Series and a scalar `where`.
  5743. >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])
  5744. >>> s
  5745. 10 1.0
  5746. 20 2.0
  5747. 30 NaN
  5748. 40 4.0
  5749. dtype: float64
  5750. >>> s.asof(20)
  5751. 2.0
  5752. For a sequence `where`, a Series is returned. The first value is
  5753. NaN, because the first element of `where` is before the first
  5754. index value.
  5755. >>> s.asof([5, 20])
  5756. 5 NaN
  5757. 20 2.0
  5758. dtype: float64
  5759. Missing values are not considered. The following is ``2.0``, not
  5760. NaN, even though NaN is at the index location for ``30``.
  5761. >>> s.asof(30)
  5762. 2.0
  5763. Take all columns into consideration
  5764. >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50],
  5765. ... 'b': [None, None, None, None, 500]},
  5766. ... index=pd.DatetimeIndex(['2018-02-27 09:01:00',
  5767. ... '2018-02-27 09:02:00',
  5768. ... '2018-02-27 09:03:00',
  5769. ... '2018-02-27 09:04:00',
  5770. ... '2018-02-27 09:05:00']))
  5771. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  5772. ... '2018-02-27 09:04:30']))
  5773. a b
  5774. 2018-02-27 09:03:30 NaN NaN
  5775. 2018-02-27 09:04:30 NaN NaN
  5776. Take a single column into consideration
  5777. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  5778. ... '2018-02-27 09:04:30']),
  5779. ... subset=['a'])
  5780. a b
  5781. 2018-02-27 09:03:30 30.0 NaN
  5782. 2018-02-27 09:04:30 40.0 NaN
  5783. """
  5784. if isinstance(where, compat.string_types):
  5785. from pandas import to_datetime
  5786. where = to_datetime(where)
  5787. if not self.index.is_monotonic:
  5788. raise ValueError("asof requires a sorted index")
  5789. is_series = isinstance(self, ABCSeries)
  5790. if is_series:
  5791. if subset is not None:
  5792. raise ValueError("subset is not valid for Series")
  5793. elif self.ndim > 2:
  5794. raise NotImplementedError("asof is not implemented "
  5795. "for {type}".format(type=type(self)))
  5796. else:
  5797. if subset is None:
  5798. subset = self.columns
  5799. if not is_list_like(subset):
  5800. subset = [subset]
  5801. is_list = is_list_like(where)
  5802. if not is_list:
  5803. start = self.index[0]
  5804. if isinstance(self.index, PeriodIndex):
  5805. where = Period(where, freq=self.index.freq).ordinal
  5806. start = start.ordinal
  5807. if where < start:
  5808. if not is_series:
  5809. from pandas import Series
  5810. return Series(index=self.columns, name=where)
  5811. return np.nan
  5812. # It's always much faster to use a *while* loop here for
  5813. # Series than pre-computing all the NAs. However a
  5814. # *while* loop is extremely expensive for DataFrame
  5815. # so we later pre-compute all the NAs and use the same
  5816. # code path whether *where* is a scalar or list.
  5817. # See PR: https://github.com/pandas-dev/pandas/pull/14476
  5818. if is_series:
  5819. loc = self.index.searchsorted(where, side='right')
  5820. if loc > 0:
  5821. loc -= 1
  5822. values = self._values
  5823. while loc > 0 and isna(values[loc]):
  5824. loc -= 1
  5825. return values[loc]
  5826. if not isinstance(where, Index):
  5827. where = Index(where) if is_list else Index([where])
  5828. nulls = self.isna() if is_series else self[subset].isna().any(1)
  5829. if nulls.all():
  5830. if is_series:
  5831. return self._constructor(np.nan, index=where, name=self.name)
  5832. elif is_list:
  5833. from pandas import DataFrame
  5834. return DataFrame(np.nan, index=where, columns=self.columns)
  5835. else:
  5836. from pandas import Series
  5837. return Series(np.nan, index=self.columns, name=where[0])
  5838. locs = self.index.asof_locs(where, ~(nulls.values))
  5839. # mask the missing
  5840. missing = locs == -1
  5841. data = self.take(locs, is_copy=False)
  5842. data.index = where
  5843. data.loc[missing] = np.nan
  5844. return data if is_list else data.iloc[-1]
  5845. # ----------------------------------------------------------------------
  5846. # Action Methods
  5847. _shared_docs['isna'] = """
  5848. Detect missing values.
  5849. Return a boolean same-sized object indicating if the values are NA.
  5850. NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
  5851. values.
  5852. Everything else gets mapped to False values. Characters such as empty
  5853. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  5854. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  5855. Returns
  5856. -------
  5857. %(klass)s
  5858. Mask of bool values for each element in %(klass)s that
  5859. indicates whether an element is not an NA value.
  5860. See Also
  5861. --------
  5862. %(klass)s.isnull : Alias of isna.
  5863. %(klass)s.notna : Boolean inverse of isna.
  5864. %(klass)s.dropna : Omit axes labels with missing values.
  5865. isna : Top-level isna.
  5866. Examples
  5867. --------
  5868. Show which entries in a DataFrame are NA.
  5869. >>> df = pd.DataFrame({'age': [5, 6, np.NaN],
  5870. ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'),
  5871. ... pd.Timestamp('1940-04-25')],
  5872. ... 'name': ['Alfred', 'Batman', ''],
  5873. ... 'toy': [None, 'Batmobile', 'Joker']})
  5874. >>> df
  5875. age born name toy
  5876. 0 5.0 NaT Alfred None
  5877. 1 6.0 1939-05-27 Batman Batmobile
  5878. 2 NaN 1940-04-25 Joker
  5879. >>> df.isna()
  5880. age born name toy
  5881. 0 False True False True
  5882. 1 False False False False
  5883. 2 True False False False
  5884. Show which entries in a Series are NA.
  5885. >>> ser = pd.Series([5, 6, np.NaN])
  5886. >>> ser
  5887. 0 5.0
  5888. 1 6.0
  5889. 2 NaN
  5890. dtype: float64
  5891. >>> ser.isna()
  5892. 0 False
  5893. 1 False
  5894. 2 True
  5895. dtype: bool
  5896. """
  5897. @Appender(_shared_docs['isna'] % _shared_doc_kwargs)
  5898. def isna(self):
  5899. return isna(self).__finalize__(self)
  5900. @Appender(_shared_docs['isna'] % _shared_doc_kwargs)
  5901. def isnull(self):
  5902. return isna(self).__finalize__(self)
  5903. _shared_docs['notna'] = """
  5904. Detect existing (non-missing) values.
  5905. Return a boolean same-sized object indicating if the values are not NA.
  5906. Non-missing values get mapped to True. Characters such as empty
  5907. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  5908. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  5909. NA values, such as None or :attr:`numpy.NaN`, get mapped to False
  5910. values.
  5911. Returns
  5912. -------
  5913. %(klass)s
  5914. Mask of bool values for each element in %(klass)s that
  5915. indicates whether an element is not an NA value.
  5916. See Also
  5917. --------
  5918. %(klass)s.notnull : Alias of notna.
  5919. %(klass)s.isna : Boolean inverse of notna.
  5920. %(klass)s.dropna : Omit axes labels with missing values.
  5921. notna : Top-level notna.
  5922. Examples
  5923. --------
  5924. Show which entries in a DataFrame are not NA.
  5925. >>> df = pd.DataFrame({'age': [5, 6, np.NaN],
  5926. ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'),
  5927. ... pd.Timestamp('1940-04-25')],
  5928. ... 'name': ['Alfred', 'Batman', ''],
  5929. ... 'toy': [None, 'Batmobile', 'Joker']})
  5930. >>> df
  5931. age born name toy
  5932. 0 5.0 NaT Alfred None
  5933. 1 6.0 1939-05-27 Batman Batmobile
  5934. 2 NaN 1940-04-25 Joker
  5935. >>> df.notna()
  5936. age born name toy
  5937. 0 True False True False
  5938. 1 True True True True
  5939. 2 False True True True
  5940. Show which entries in a Series are not NA.
  5941. >>> ser = pd.Series([5, 6, np.NaN])
  5942. >>> ser
  5943. 0 5.0
  5944. 1 6.0
  5945. 2 NaN
  5946. dtype: float64
  5947. >>> ser.notna()
  5948. 0 True
  5949. 1 True
  5950. 2 False
  5951. dtype: bool
  5952. """
  5953. @Appender(_shared_docs['notna'] % _shared_doc_kwargs)
  5954. def notna(self):
  5955. return notna(self).__finalize__(self)
  5956. @Appender(_shared_docs['notna'] % _shared_doc_kwargs)
  5957. def notnull(self):
  5958. return notna(self).__finalize__(self)
  5959. def _clip_with_scalar(self, lower, upper, inplace=False):
  5960. if ((lower is not None and np.any(isna(lower))) or
  5961. (upper is not None and np.any(isna(upper)))):
  5962. raise ValueError("Cannot use an NA value as a clip threshold")
  5963. result = self
  5964. mask = isna(self.values)
  5965. with np.errstate(all='ignore'):
  5966. if upper is not None:
  5967. subset = self.to_numpy() <= upper
  5968. result = result.where(subset, upper, axis=None, inplace=False)
  5969. if lower is not None:
  5970. subset = self.to_numpy() >= lower
  5971. result = result.where(subset, lower, axis=None, inplace=False)
  5972. if np.any(mask):
  5973. result[mask] = np.nan
  5974. if inplace:
  5975. self._update_inplace(result)
  5976. else:
  5977. return result
  5978. def _clip_with_one_bound(self, threshold, method, axis, inplace):
  5979. if axis is not None:
  5980. axis = self._get_axis_number(axis)
  5981. # method is self.le for upper bound and self.ge for lower bound
  5982. if is_scalar(threshold) and is_number(threshold):
  5983. if method.__name__ == 'le':
  5984. return self._clip_with_scalar(None, threshold, inplace=inplace)
  5985. return self._clip_with_scalar(threshold, None, inplace=inplace)
  5986. subset = method(threshold, axis=axis) | isna(self)
  5987. # GH #15390
  5988. # In order for where method to work, the threshold must
  5989. # be transformed to NDFrame from other array like structure.
  5990. if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
  5991. if isinstance(self, ABCSeries):
  5992. threshold = pd.Series(threshold, index=self.index)
  5993. else:
  5994. threshold = _align_method_FRAME(self, threshold,
  5995. axis)
  5996. return self.where(subset, threshold, axis=axis, inplace=inplace)
  5997. def clip(self, lower=None, upper=None, axis=None, inplace=False,
  5998. *args, **kwargs):
  5999. """
  6000. Trim values at input threshold(s).
  6001. Assigns values outside boundary to boundary values. Thresholds
  6002. can be singular values or array like, and in the latter case
  6003. the clipping is performed element-wise in the specified axis.
  6004. Parameters
  6005. ----------
  6006. lower : float or array_like, default None
  6007. Minimum threshold value. All values below this
  6008. threshold will be set to it.
  6009. upper : float or array_like, default None
  6010. Maximum threshold value. All values above this
  6011. threshold will be set to it.
  6012. axis : int or string axis name, optional
  6013. Align object with lower and upper along the given axis.
  6014. inplace : boolean, default False
  6015. Whether to perform the operation in place on the data.
  6016. .. versionadded:: 0.21.0
  6017. *args, **kwargs
  6018. Additional keywords have no effect but might be accepted
  6019. for compatibility with numpy.
  6020. Returns
  6021. -------
  6022. Series or DataFrame
  6023. Same type as calling object with the values outside the
  6024. clip boundaries replaced
  6025. Examples
  6026. --------
  6027. >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
  6028. >>> df = pd.DataFrame(data)
  6029. >>> df
  6030. col_0 col_1
  6031. 0 9 -2
  6032. 1 -3 -7
  6033. 2 0 6
  6034. 3 -1 8
  6035. 4 5 -5
  6036. Clips per column using lower and upper thresholds:
  6037. >>> df.clip(-4, 6)
  6038. col_0 col_1
  6039. 0 6 -2
  6040. 1 -3 -4
  6041. 2 0 6
  6042. 3 -1 6
  6043. 4 5 -4
  6044. Clips using specific lower and upper thresholds per column element:
  6045. >>> t = pd.Series([2, -4, -1, 6, 3])
  6046. >>> t
  6047. 0 2
  6048. 1 -4
  6049. 2 -1
  6050. 3 6
  6051. 4 3
  6052. dtype: int64
  6053. >>> df.clip(t, t + 4, axis=0)
  6054. col_0 col_1
  6055. 0 6 2
  6056. 1 -3 -4
  6057. 2 0 3
  6058. 3 6 8
  6059. 4 5 3
  6060. """
  6061. if isinstance(self, ABCPanel):
  6062. raise NotImplementedError("clip is not supported yet for panels")
  6063. inplace = validate_bool_kwarg(inplace, 'inplace')
  6064. axis = nv.validate_clip_with_axis(axis, args, kwargs)
  6065. if axis is not None:
  6066. axis = self._get_axis_number(axis)
  6067. # GH 17276
  6068. # numpy doesn't like NaN as a clip value
  6069. # so ignore
  6070. # GH 19992
  6071. # numpy doesn't drop a list-like bound containing NaN
  6072. if not is_list_like(lower) and np.any(pd.isnull(lower)):
  6073. lower = None
  6074. if not is_list_like(upper) and np.any(pd.isnull(upper)):
  6075. upper = None
  6076. # GH 2747 (arguments were reversed)
  6077. if lower is not None and upper is not None:
  6078. if is_scalar(lower) and is_scalar(upper):
  6079. lower, upper = min(lower, upper), max(lower, upper)
  6080. # fast-path for scalars
  6081. if ((lower is None or (is_scalar(lower) and is_number(lower))) and
  6082. (upper is None or (is_scalar(upper) and is_number(upper)))):
  6083. return self._clip_with_scalar(lower, upper, inplace=inplace)
  6084. result = self
  6085. if lower is not None:
  6086. result = result._clip_with_one_bound(lower, method=self.ge,
  6087. axis=axis, inplace=inplace)
  6088. if upper is not None:
  6089. if inplace:
  6090. result = self
  6091. result = result._clip_with_one_bound(upper, method=self.le,
  6092. axis=axis, inplace=inplace)
  6093. return result
  6094. def clip_upper(self, threshold, axis=None, inplace=False):
  6095. """
  6096. Trim values above a given threshold.
  6097. .. deprecated:: 0.24.0
  6098. Use clip(upper=threshold) instead.
  6099. Elements above the `threshold` will be changed to match the
  6100. `threshold` value(s). Threshold can be a single value or an array,
  6101. in the latter case it performs the truncation element-wise.
  6102. Parameters
  6103. ----------
  6104. threshold : numeric or array-like
  6105. Maximum value allowed. All values above threshold will be set to
  6106. this value.
  6107. * float : every value is compared to `threshold`.
  6108. * array-like : The shape of `threshold` should match the object
  6109. it's compared to. When `self` is a Series, `threshold` should be
  6110. the length. When `self` is a DataFrame, `threshold` should 2-D
  6111. and the same shape as `self` for ``axis=None``, or 1-D and the
  6112. same length as the axis being compared.
  6113. axis : {0 or 'index', 1 or 'columns'}, default 0
  6114. Align object with `threshold` along the given axis.
  6115. inplace : boolean, default False
  6116. Whether to perform the operation in place on the data.
  6117. .. versionadded:: 0.21.0
  6118. Returns
  6119. -------
  6120. Series or DataFrame
  6121. Original data with values trimmed.
  6122. See Also
  6123. --------
  6124. Series.clip : General purpose method to trim Series values to given
  6125. threshold(s).
  6126. DataFrame.clip : General purpose method to trim DataFrame values to
  6127. given threshold(s).
  6128. Examples
  6129. --------
  6130. >>> s = pd.Series([1, 2, 3, 4, 5])
  6131. >>> s
  6132. 0 1
  6133. 1 2
  6134. 2 3
  6135. 3 4
  6136. 4 5
  6137. dtype: int64
  6138. >>> s.clip(upper=3)
  6139. 0 1
  6140. 1 2
  6141. 2 3
  6142. 3 3
  6143. 4 3
  6144. dtype: int64
  6145. >>> elemwise_thresholds = [5, 4, 3, 2, 1]
  6146. >>> elemwise_thresholds
  6147. [5, 4, 3, 2, 1]
  6148. >>> s.clip(upper=elemwise_thresholds)
  6149. 0 1
  6150. 1 2
  6151. 2 3
  6152. 3 2
  6153. 4 1
  6154. dtype: int64
  6155. """
  6156. warnings.warn('clip_upper(threshold) is deprecated, '
  6157. 'use clip(upper=threshold) instead',
  6158. FutureWarning, stacklevel=2)
  6159. return self._clip_with_one_bound(threshold, method=self.le,
  6160. axis=axis, inplace=inplace)
  6161. def clip_lower(self, threshold, axis=None, inplace=False):
  6162. """
  6163. Trim values below a given threshold.
  6164. .. deprecated:: 0.24.0
  6165. Use clip(lower=threshold) instead.
  6166. Elements below the `threshold` will be changed to match the
  6167. `threshold` value(s). Threshold can be a single value or an array,
  6168. in the latter case it performs the truncation element-wise.
  6169. Parameters
  6170. ----------
  6171. threshold : numeric or array-like
  6172. Minimum value allowed. All values below threshold will be set to
  6173. this value.
  6174. * float : every value is compared to `threshold`.
  6175. * array-like : The shape of `threshold` should match the object
  6176. it's compared to. When `self` is a Series, `threshold` should be
  6177. the length. When `self` is a DataFrame, `threshold` should 2-D
  6178. and the same shape as `self` for ``axis=None``, or 1-D and the
  6179. same length as the axis being compared.
  6180. axis : {0 or 'index', 1 or 'columns'}, default 0
  6181. Align `self` with `threshold` along the given axis.
  6182. inplace : boolean, default False
  6183. Whether to perform the operation in place on the data.
  6184. .. versionadded:: 0.21.0
  6185. Returns
  6186. -------
  6187. Series or DataFrame
  6188. Original data with values trimmed.
  6189. See Also
  6190. --------
  6191. Series.clip : General purpose method to trim Series values to given
  6192. threshold(s).
  6193. DataFrame.clip : General purpose method to trim DataFrame values to
  6194. given threshold(s).
  6195. Examples
  6196. --------
  6197. Series single threshold clipping:
  6198. >>> s = pd.Series([5, 6, 7, 8, 9])
  6199. >>> s.clip(lower=8)
  6200. 0 8
  6201. 1 8
  6202. 2 8
  6203. 3 8
  6204. 4 9
  6205. dtype: int64
  6206. Series clipping element-wise using an array of thresholds. `threshold`
  6207. should be the same length as the Series.
  6208. >>> elemwise_thresholds = [4, 8, 7, 2, 5]
  6209. >>> s.clip(lower=elemwise_thresholds)
  6210. 0 5
  6211. 1 8
  6212. 2 7
  6213. 3 8
  6214. 4 9
  6215. dtype: int64
  6216. DataFrames can be compared to a scalar.
  6217. >>> df = pd.DataFrame({"A": [1, 3, 5], "B": [2, 4, 6]})
  6218. >>> df
  6219. A B
  6220. 0 1 2
  6221. 1 3 4
  6222. 2 5 6
  6223. >>> df.clip(lower=3)
  6224. A B
  6225. 0 3 3
  6226. 1 3 4
  6227. 2 5 6
  6228. Or to an array of values. By default, `threshold` should be the same
  6229. shape as the DataFrame.
  6230. >>> df.clip(lower=np.array([[3, 4], [2, 2], [6, 2]]))
  6231. A B
  6232. 0 3 4
  6233. 1 3 4
  6234. 2 6 6
  6235. Control how `threshold` is broadcast with `axis`. In this case
  6236. `threshold` should be the same length as the axis specified by
  6237. `axis`.
  6238. >>> df.clip(lower=[3, 3, 5], axis='index')
  6239. A B
  6240. 0 3 3
  6241. 1 3 4
  6242. 2 5 6
  6243. >>> df.clip(lower=[4, 5], axis='columns')
  6244. A B
  6245. 0 4 5
  6246. 1 4 5
  6247. 2 5 6
  6248. """
  6249. warnings.warn('clip_lower(threshold) is deprecated, '
  6250. 'use clip(lower=threshold) instead',
  6251. FutureWarning, stacklevel=2)
  6252. return self._clip_with_one_bound(threshold, method=self.ge,
  6253. axis=axis, inplace=inplace)
  6254. def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
  6255. group_keys=True, squeeze=False, observed=False, **kwargs):
  6256. """
  6257. Group DataFrame or Series using a mapper or by a Series of columns.
  6258. A groupby operation involves some combination of splitting the
  6259. object, applying a function, and combining the results. This can be
  6260. used to group large amounts of data and compute operations on these
  6261. groups.
  6262. Parameters
  6263. ----------
  6264. by : mapping, function, label, or list of labels
  6265. Used to determine the groups for the groupby.
  6266. If ``by`` is a function, it's called on each value of the object's
  6267. index. If a dict or Series is passed, the Series or dict VALUES
  6268. will be used to determine the groups (the Series' values are first
  6269. aligned; see ``.align()`` method). If an ndarray is passed, the
  6270. values are used as-is determine the groups. A label or list of
  6271. labels may be passed to group by the columns in ``self``. Notice
  6272. that a tuple is interpreted a (single) key.
  6273. axis : {0 or 'index', 1 or 'columns'}, default 0
  6274. Split along rows (0) or columns (1).
  6275. level : int, level name, or sequence of such, default None
  6276. If the axis is a MultiIndex (hierarchical), group by a particular
  6277. level or levels.
  6278. as_index : bool, default True
  6279. For aggregated output, return object with group labels as the
  6280. index. Only relevant for DataFrame input. as_index=False is
  6281. effectively "SQL-style" grouped output.
  6282. sort : bool, default True
  6283. Sort group keys. Get better performance by turning this off.
  6284. Note this does not influence the order of observations within each
  6285. group. Groupby preserves the order of rows within each group.
  6286. group_keys : bool, default True
  6287. When calling apply, add group keys to index to identify pieces.
  6288. squeeze : bool, default False
  6289. Reduce the dimensionality of the return type if possible,
  6290. otherwise return a consistent type.
  6291. observed : bool, default False
  6292. This only applies if any of the groupers are Categoricals.
  6293. If True: only show observed values for categorical groupers.
  6294. If False: show all values for categorical groupers.
  6295. .. versionadded:: 0.23.0
  6296. **kwargs
  6297. Optional, only accepts keyword argument 'mutated' and is passed
  6298. to groupby.
  6299. Returns
  6300. -------
  6301. DataFrameGroupBy or SeriesGroupBy
  6302. Depends on the calling object and returns groupby object that
  6303. contains information about the groups.
  6304. See Also
  6305. --------
  6306. resample : Convenience method for frequency conversion and resampling
  6307. of time series.
  6308. Notes
  6309. -----
  6310. See the `user guide
  6311. <http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
  6312. Examples
  6313. --------
  6314. >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon',
  6315. ... 'Parrot', 'Parrot'],
  6316. ... 'Max Speed' : [380., 370., 24., 26.]})
  6317. >>> df
  6318. Animal Max Speed
  6319. 0 Falcon 380.0
  6320. 1 Falcon 370.0
  6321. 2 Parrot 24.0
  6322. 3 Parrot 26.0
  6323. >>> df.groupby(['Animal']).mean()
  6324. Max Speed
  6325. Animal
  6326. Falcon 375.0
  6327. Parrot 25.0
  6328. **Hierarchical Indexes**
  6329. We can groupby different levels of a hierarchical index
  6330. using the `level` parameter:
  6331. >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
  6332. ... ['Capitve', 'Wild', 'Capitve', 'Wild']]
  6333. >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
  6334. >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]},
  6335. ... index=index)
  6336. >>> df
  6337. Max Speed
  6338. Animal Type
  6339. Falcon Capitve 390.0
  6340. Wild 350.0
  6341. Parrot Capitve 30.0
  6342. Wild 20.0
  6343. >>> df.groupby(level=0).mean()
  6344. Max Speed
  6345. Animal
  6346. Falcon 370.0
  6347. Parrot 25.0
  6348. >>> df.groupby(level=1).mean()
  6349. Max Speed
  6350. Type
  6351. Capitve 210.0
  6352. Wild 185.0
  6353. """
  6354. from pandas.core.groupby.groupby import groupby
  6355. if level is None and by is None:
  6356. raise TypeError("You have to supply one of 'by' and 'level'")
  6357. axis = self._get_axis_number(axis)
  6358. return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
  6359. sort=sort, group_keys=group_keys, squeeze=squeeze,
  6360. observed=observed, **kwargs)
  6361. def asfreq(self, freq, method=None, how=None, normalize=False,
  6362. fill_value=None):
  6363. """
  6364. Convert TimeSeries to specified frequency.
  6365. Optionally provide filling method to pad/backfill missing values.
  6366. Returns the original data conformed to a new index with the specified
  6367. frequency. ``resample`` is more appropriate if an operation, such as
  6368. summarization, is necessary to represent the data at the new frequency.
  6369. Parameters
  6370. ----------
  6371. freq : DateOffset object, or string
  6372. method : {'backfill'/'bfill', 'pad'/'ffill'}, default None
  6373. Method to use for filling holes in reindexed Series (note this
  6374. does not fill NaNs that already were present):
  6375. * 'pad' / 'ffill': propagate last valid observation forward to next
  6376. valid
  6377. * 'backfill' / 'bfill': use NEXT valid observation to fill
  6378. how : {'start', 'end'}, default end
  6379. For PeriodIndex only, see PeriodIndex.asfreq
  6380. normalize : bool, default False
  6381. Whether to reset output index to midnight
  6382. fill_value : scalar, optional
  6383. Value to use for missing values, applied during upsampling (note
  6384. this does not fill NaNs that already were present).
  6385. .. versionadded:: 0.20.0
  6386. Returns
  6387. -------
  6388. converted : same type as caller
  6389. See Also
  6390. --------
  6391. reindex
  6392. Notes
  6393. -----
  6394. To learn more about the frequency strings, please see `this link
  6395. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
  6396. Examples
  6397. --------
  6398. Start by creating a series with 4 one minute timestamps.
  6399. >>> index = pd.date_range('1/1/2000', periods=4, freq='T')
  6400. >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
  6401. >>> df = pd.DataFrame({'s':series})
  6402. >>> df
  6403. s
  6404. 2000-01-01 00:00:00 0.0
  6405. 2000-01-01 00:01:00 NaN
  6406. 2000-01-01 00:02:00 2.0
  6407. 2000-01-01 00:03:00 3.0
  6408. Upsample the series into 30 second bins.
  6409. >>> df.asfreq(freq='30S')
  6410. s
  6411. 2000-01-01 00:00:00 0.0
  6412. 2000-01-01 00:00:30 NaN
  6413. 2000-01-01 00:01:00 NaN
  6414. 2000-01-01 00:01:30 NaN
  6415. 2000-01-01 00:02:00 2.0
  6416. 2000-01-01 00:02:30 NaN
  6417. 2000-01-01 00:03:00 3.0
  6418. Upsample again, providing a ``fill value``.
  6419. >>> df.asfreq(freq='30S', fill_value=9.0)
  6420. s
  6421. 2000-01-01 00:00:00 0.0
  6422. 2000-01-01 00:00:30 9.0
  6423. 2000-01-01 00:01:00 NaN
  6424. 2000-01-01 00:01:30 9.0
  6425. 2000-01-01 00:02:00 2.0
  6426. 2000-01-01 00:02:30 9.0
  6427. 2000-01-01 00:03:00 3.0
  6428. Upsample again, providing a ``method``.
  6429. >>> df.asfreq(freq='30S', method='bfill')
  6430. s
  6431. 2000-01-01 00:00:00 0.0
  6432. 2000-01-01 00:00:30 NaN
  6433. 2000-01-01 00:01:00 NaN
  6434. 2000-01-01 00:01:30 2.0
  6435. 2000-01-01 00:02:00 2.0
  6436. 2000-01-01 00:02:30 3.0
  6437. 2000-01-01 00:03:00 3.0
  6438. """
  6439. from pandas.core.resample import asfreq
  6440. return asfreq(self, freq, method=method, how=how, normalize=normalize,
  6441. fill_value=fill_value)
  6442. def at_time(self, time, asof=False, axis=None):
  6443. """
  6444. Select values at particular time of day (e.g. 9:30AM).
  6445. Parameters
  6446. ----------
  6447. time : datetime.time or string
  6448. axis : {0 or 'index', 1 or 'columns'}, default 0
  6449. .. versionadded:: 0.24.0
  6450. Returns
  6451. -------
  6452. values_at_time : same type as caller
  6453. Raises
  6454. ------
  6455. TypeError
  6456. If the index is not a :class:`DatetimeIndex`
  6457. See Also
  6458. --------
  6459. between_time : Select values between particular times of the day.
  6460. first : Select initial periods of time series based on a date offset.
  6461. last : Select final periods of time series based on a date offset.
  6462. DatetimeIndex.indexer_at_time : Get just the index locations for
  6463. values at particular time of the day.
  6464. Examples
  6465. --------
  6466. >>> i = pd.date_range('2018-04-09', periods=4, freq='12H')
  6467. >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
  6468. >>> ts
  6469. A
  6470. 2018-04-09 00:00:00 1
  6471. 2018-04-09 12:00:00 2
  6472. 2018-04-10 00:00:00 3
  6473. 2018-04-10 12:00:00 4
  6474. >>> ts.at_time('12:00')
  6475. A
  6476. 2018-04-09 12:00:00 2
  6477. 2018-04-10 12:00:00 4
  6478. """
  6479. if axis is None:
  6480. axis = self._stat_axis_number
  6481. axis = self._get_axis_number(axis)
  6482. index = self._get_axis(axis)
  6483. try:
  6484. indexer = index.indexer_at_time(time, asof=asof)
  6485. except AttributeError:
  6486. raise TypeError('Index must be DatetimeIndex')
  6487. return self._take(indexer, axis=axis)
  6488. def between_time(self, start_time, end_time, include_start=True,
  6489. include_end=True, axis=None):
  6490. """
  6491. Select values between particular times of the day (e.g., 9:00-9:30 AM).
  6492. By setting ``start_time`` to be later than ``end_time``,
  6493. you can get the times that are *not* between the two times.
  6494. Parameters
  6495. ----------
  6496. start_time : datetime.time or string
  6497. end_time : datetime.time or string
  6498. include_start : boolean, default True
  6499. include_end : boolean, default True
  6500. axis : {0 or 'index', 1 or 'columns'}, default 0
  6501. .. versionadded:: 0.24.0
  6502. Returns
  6503. -------
  6504. values_between_time : same type as caller
  6505. Raises
  6506. ------
  6507. TypeError
  6508. If the index is not a :class:`DatetimeIndex`
  6509. See Also
  6510. --------
  6511. at_time : Select values at a particular time of the day.
  6512. first : Select initial periods of time series based on a date offset.
  6513. last : Select final periods of time series based on a date offset.
  6514. DatetimeIndex.indexer_between_time : Get just the index locations for
  6515. values between particular times of the day.
  6516. Examples
  6517. --------
  6518. >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
  6519. >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
  6520. >>> ts
  6521. A
  6522. 2018-04-09 00:00:00 1
  6523. 2018-04-10 00:20:00 2
  6524. 2018-04-11 00:40:00 3
  6525. 2018-04-12 01:00:00 4
  6526. >>> ts.between_time('0:15', '0:45')
  6527. A
  6528. 2018-04-10 00:20:00 2
  6529. 2018-04-11 00:40:00 3
  6530. You get the times that are *not* between two times by setting
  6531. ``start_time`` later than ``end_time``:
  6532. >>> ts.between_time('0:45', '0:15')
  6533. A
  6534. 2018-04-09 00:00:00 1
  6535. 2018-04-12 01:00:00 4
  6536. """
  6537. if axis is None:
  6538. axis = self._stat_axis_number
  6539. axis = self._get_axis_number(axis)
  6540. index = self._get_axis(axis)
  6541. try:
  6542. indexer = index.indexer_between_time(
  6543. start_time, end_time, include_start=include_start,
  6544. include_end=include_end)
  6545. except AttributeError:
  6546. raise TypeError('Index must be DatetimeIndex')
  6547. return self._take(indexer, axis=axis)
  6548. def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
  6549. label=None, convention='start', kind=None, loffset=None,
  6550. limit=None, base=0, on=None, level=None):
  6551. """
  6552. Resample time-series data.
  6553. Convenience method for frequency conversion and resampling of time
  6554. series. Object must have a datetime-like index (`DatetimeIndex`,
  6555. `PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values
  6556. to the `on` or `level` keyword.
  6557. Parameters
  6558. ----------
  6559. rule : str
  6560. The offset string or object representing target conversion.
  6561. how : str
  6562. Method for down/re-sampling, default to 'mean' for downsampling.
  6563. .. deprecated:: 0.18.0
  6564. The new syntax is ``.resample(...).mean()``, or
  6565. ``.resample(...).apply(<func>)``
  6566. axis : {0 or 'index', 1 or 'columns'}, default 0
  6567. Which axis to use for up- or down-sampling. For `Series` this
  6568. will default to 0, i.e. along the rows. Must be
  6569. `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`.
  6570. fill_method : str, default None
  6571. Filling method for upsampling.
  6572. .. deprecated:: 0.18.0
  6573. The new syntax is ``.resample(...).<func>()``,
  6574. e.g. ``.resample(...).pad()``
  6575. closed : {'right', 'left'}, default None
  6576. Which side of bin interval is closed. The default is 'left'
  6577. for all frequency offsets except for 'M', 'A', 'Q', 'BM',
  6578. 'BA', 'BQ', and 'W' which all have a default of 'right'.
  6579. label : {'right', 'left'}, default None
  6580. Which bin edge label to label bucket with. The default is 'left'
  6581. for all frequency offsets except for 'M', 'A', 'Q', 'BM',
  6582. 'BA', 'BQ', and 'W' which all have a default of 'right'.
  6583. convention : {'start', 'end', 's', 'e'}, default 'start'
  6584. For `PeriodIndex` only, controls whether to use the start or
  6585. end of `rule`.
  6586. kind : {'timestamp', 'period'}, optional, default None
  6587. Pass 'timestamp' to convert the resulting index to a
  6588. `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`.
  6589. By default the input representation is retained.
  6590. loffset : timedelta, default None
  6591. Adjust the resampled time labels.
  6592. limit : int, default None
  6593. Maximum size gap when reindexing with `fill_method`.
  6594. .. deprecated:: 0.18.0
  6595. base : int, default 0
  6596. For frequencies that evenly subdivide 1 day, the "origin" of the
  6597. aggregated intervals. For example, for '5min' frequency, base could
  6598. range from 0 through 4. Defaults to 0.
  6599. on : str, optional
  6600. For a DataFrame, column to use instead of index for resampling.
  6601. Column must be datetime-like.
  6602. .. versionadded:: 0.19.0
  6603. level : str or int, optional
  6604. For a MultiIndex, level (name or number) to use for
  6605. resampling. `level` must be datetime-like.
  6606. .. versionadded:: 0.19.0
  6607. Returns
  6608. -------
  6609. Resampler object
  6610. See Also
  6611. --------
  6612. groupby : Group by mapping, function, label, or list of labels.
  6613. Series.resample : Resample a Series.
  6614. DataFrame.resample: Resample a DataFrame.
  6615. Notes
  6616. -----
  6617. See the `user guide
  6618. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#resampling>`_
  6619. for more.
  6620. To learn more about the offset strings, please see `this link
  6621. <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
  6622. Examples
  6623. --------
  6624. Start by creating a series with 9 one minute timestamps.
  6625. >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
  6626. >>> series = pd.Series(range(9), index=index)
  6627. >>> series
  6628. 2000-01-01 00:00:00 0
  6629. 2000-01-01 00:01:00 1
  6630. 2000-01-01 00:02:00 2
  6631. 2000-01-01 00:03:00 3
  6632. 2000-01-01 00:04:00 4
  6633. 2000-01-01 00:05:00 5
  6634. 2000-01-01 00:06:00 6
  6635. 2000-01-01 00:07:00 7
  6636. 2000-01-01 00:08:00 8
  6637. Freq: T, dtype: int64
  6638. Downsample the series into 3 minute bins and sum the values
  6639. of the timestamps falling into a bin.
  6640. >>> series.resample('3T').sum()
  6641. 2000-01-01 00:00:00 3
  6642. 2000-01-01 00:03:00 12
  6643. 2000-01-01 00:06:00 21
  6644. Freq: 3T, dtype: int64
  6645. Downsample the series into 3 minute bins as above, but label each
  6646. bin using the right edge instead of the left. Please note that the
  6647. value in the bucket used as the label is not included in the bucket,
  6648. which it labels. For example, in the original series the
  6649. bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
  6650. value in the resampled bucket with the label ``2000-01-01 00:03:00``
  6651. does not include 3 (if it did, the summed value would be 6, not 3).
  6652. To include this value close the right side of the bin interval as
  6653. illustrated in the example below this one.
  6654. >>> series.resample('3T', label='right').sum()
  6655. 2000-01-01 00:03:00 3
  6656. 2000-01-01 00:06:00 12
  6657. 2000-01-01 00:09:00 21
  6658. Freq: 3T, dtype: int64
  6659. Downsample the series into 3 minute bins as above, but close the right
  6660. side of the bin interval.
  6661. >>> series.resample('3T', label='right', closed='right').sum()
  6662. 2000-01-01 00:00:00 0
  6663. 2000-01-01 00:03:00 6
  6664. 2000-01-01 00:06:00 15
  6665. 2000-01-01 00:09:00 15
  6666. Freq: 3T, dtype: int64
  6667. Upsample the series into 30 second bins.
  6668. >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows
  6669. 2000-01-01 00:00:00 0.0
  6670. 2000-01-01 00:00:30 NaN
  6671. 2000-01-01 00:01:00 1.0
  6672. 2000-01-01 00:01:30 NaN
  6673. 2000-01-01 00:02:00 2.0
  6674. Freq: 30S, dtype: float64
  6675. Upsample the series into 30 second bins and fill the ``NaN``
  6676. values using the ``pad`` method.
  6677. >>> series.resample('30S').pad()[0:5]
  6678. 2000-01-01 00:00:00 0
  6679. 2000-01-01 00:00:30 0
  6680. 2000-01-01 00:01:00 1
  6681. 2000-01-01 00:01:30 1
  6682. 2000-01-01 00:02:00 2
  6683. Freq: 30S, dtype: int64
  6684. Upsample the series into 30 second bins and fill the
  6685. ``NaN`` values using the ``bfill`` method.
  6686. >>> series.resample('30S').bfill()[0:5]
  6687. 2000-01-01 00:00:00 0
  6688. 2000-01-01 00:00:30 1
  6689. 2000-01-01 00:01:00 1
  6690. 2000-01-01 00:01:30 2
  6691. 2000-01-01 00:02:00 2
  6692. Freq: 30S, dtype: int64
  6693. Pass a custom function via ``apply``
  6694. >>> def custom_resampler(array_like):
  6695. ... return np.sum(array_like) + 5
  6696. ...
  6697. >>> series.resample('3T').apply(custom_resampler)
  6698. 2000-01-01 00:00:00 8
  6699. 2000-01-01 00:03:00 17
  6700. 2000-01-01 00:06:00 26
  6701. Freq: 3T, dtype: int64
  6702. For a Series with a PeriodIndex, the keyword `convention` can be
  6703. used to control whether to use the start or end of `rule`.
  6704. Resample a year by quarter using 'start' `convention`. Values are
  6705. assigned to the first quarter of the period.
  6706. >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',
  6707. ... freq='A',
  6708. ... periods=2))
  6709. >>> s
  6710. 2012 1
  6711. 2013 2
  6712. Freq: A-DEC, dtype: int64
  6713. >>> s.resample('Q', convention='start').asfreq()
  6714. 2012Q1 1.0
  6715. 2012Q2 NaN
  6716. 2012Q3 NaN
  6717. 2012Q4 NaN
  6718. 2013Q1 2.0
  6719. 2013Q2 NaN
  6720. 2013Q3 NaN
  6721. 2013Q4 NaN
  6722. Freq: Q-DEC, dtype: float64
  6723. Resample quarters by month using 'end' `convention`. Values are
  6724. assigned to the last month of the period.
  6725. >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01',
  6726. ... freq='Q',
  6727. ... periods=4))
  6728. >>> q
  6729. 2018Q1 1
  6730. 2018Q2 2
  6731. 2018Q3 3
  6732. 2018Q4 4
  6733. Freq: Q-DEC, dtype: int64
  6734. >>> q.resample('M', convention='end').asfreq()
  6735. 2018-03 1.0
  6736. 2018-04 NaN
  6737. 2018-05 NaN
  6738. 2018-06 2.0
  6739. 2018-07 NaN
  6740. 2018-08 NaN
  6741. 2018-09 3.0
  6742. 2018-10 NaN
  6743. 2018-11 NaN
  6744. 2018-12 4.0
  6745. Freq: M, dtype: float64
  6746. For DataFrame objects, the keyword `on` can be used to specify the
  6747. column instead of the index for resampling.
  6748. >>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19],
  6749. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]})
  6750. >>> df = pd.DataFrame(d)
  6751. >>> df['week_starting'] = pd.date_range('01/01/2018',
  6752. ... periods=8,
  6753. ... freq='W')
  6754. >>> df
  6755. price volume week_starting
  6756. 0 10 50 2018-01-07
  6757. 1 11 60 2018-01-14
  6758. 2 9 40 2018-01-21
  6759. 3 13 100 2018-01-28
  6760. 4 14 50 2018-02-04
  6761. 5 18 100 2018-02-11
  6762. 6 17 40 2018-02-18
  6763. 7 19 50 2018-02-25
  6764. >>> df.resample('M', on='week_starting').mean()
  6765. price volume
  6766. week_starting
  6767. 2018-01-31 10.75 62.5
  6768. 2018-02-28 17.00 60.0
  6769. For a DataFrame with MultiIndex, the keyword `level` can be used to
  6770. specify on which level the resampling needs to take place.
  6771. >>> days = pd.date_range('1/1/2000', periods=4, freq='D')
  6772. >>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19],
  6773. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]})
  6774. >>> df2 = pd.DataFrame(d2,
  6775. ... index=pd.MultiIndex.from_product([days,
  6776. ... ['morning',
  6777. ... 'afternoon']]
  6778. ... ))
  6779. >>> df2
  6780. price volume
  6781. 2000-01-01 morning 10 50
  6782. afternoon 11 60
  6783. 2000-01-02 morning 9 40
  6784. afternoon 13 100
  6785. 2000-01-03 morning 14 50
  6786. afternoon 18 100
  6787. 2000-01-04 morning 17 40
  6788. afternoon 19 50
  6789. >>> df2.resample('D', level=0).sum()
  6790. price volume
  6791. 2000-01-01 21 110
  6792. 2000-01-02 22 140
  6793. 2000-01-03 32 150
  6794. 2000-01-04 36 90
  6795. """
  6796. from pandas.core.resample import (resample,
  6797. _maybe_process_deprecations)
  6798. axis = self._get_axis_number(axis)
  6799. r = resample(self, freq=rule, label=label, closed=closed,
  6800. axis=axis, kind=kind, loffset=loffset,
  6801. convention=convention,
  6802. base=base, key=on, level=level)
  6803. return _maybe_process_deprecations(r,
  6804. how=how,
  6805. fill_method=fill_method,
  6806. limit=limit)
  6807. def first(self, offset):
  6808. """
  6809. Convenience method for subsetting initial periods of time series data
  6810. based on a date offset.
  6811. Parameters
  6812. ----------
  6813. offset : string, DateOffset, dateutil.relativedelta
  6814. Returns
  6815. -------
  6816. subset : same type as caller
  6817. Raises
  6818. ------
  6819. TypeError
  6820. If the index is not a :class:`DatetimeIndex`
  6821. See Also
  6822. --------
  6823. last : Select final periods of time series based on a date offset.
  6824. at_time : Select values at a particular time of the day.
  6825. between_time : Select values between particular times of the day.
  6826. Examples
  6827. --------
  6828. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  6829. >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
  6830. >>> ts
  6831. A
  6832. 2018-04-09 1
  6833. 2018-04-11 2
  6834. 2018-04-13 3
  6835. 2018-04-15 4
  6836. Get the rows for the first 3 days:
  6837. >>> ts.first('3D')
  6838. A
  6839. 2018-04-09 1
  6840. 2018-04-11 2
  6841. Notice the data for 3 first calender days were returned, not the first
  6842. 3 days observed in the dataset, and therefore data for 2018-04-13 was
  6843. not returned.
  6844. """
  6845. if not isinstance(self.index, DatetimeIndex):
  6846. raise TypeError("'first' only supports a DatetimeIndex index")
  6847. if len(self.index) == 0:
  6848. return self
  6849. offset = to_offset(offset)
  6850. end_date = end = self.index[0] + offset
  6851. # Tick-like, e.g. 3 weeks
  6852. if not offset.isAnchored() and hasattr(offset, '_inc'):
  6853. if end_date in self.index:
  6854. end = self.index.searchsorted(end_date, side='left')
  6855. return self.iloc[:end]
  6856. return self.loc[:end]
  6857. def last(self, offset):
  6858. """
  6859. Convenience method for subsetting final periods of time series data
  6860. based on a date offset.
  6861. Parameters
  6862. ----------
  6863. offset : string, DateOffset, dateutil.relativedelta
  6864. Returns
  6865. -------
  6866. subset : same type as caller
  6867. Raises
  6868. ------
  6869. TypeError
  6870. If the index is not a :class:`DatetimeIndex`
  6871. See Also
  6872. --------
  6873. first : Select initial periods of time series based on a date offset.
  6874. at_time : Select values at a particular time of the day.
  6875. between_time : Select values between particular times of the day.
  6876. Examples
  6877. --------
  6878. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  6879. >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
  6880. >>> ts
  6881. A
  6882. 2018-04-09 1
  6883. 2018-04-11 2
  6884. 2018-04-13 3
  6885. 2018-04-15 4
  6886. Get the rows for the last 3 days:
  6887. >>> ts.last('3D')
  6888. A
  6889. 2018-04-13 3
  6890. 2018-04-15 4
  6891. Notice the data for 3 last calender days were returned, not the last
  6892. 3 observed days in the dataset, and therefore data for 2018-04-11 was
  6893. not returned.
  6894. """
  6895. if not isinstance(self.index, DatetimeIndex):
  6896. raise TypeError("'last' only supports a DatetimeIndex index")
  6897. if len(self.index) == 0:
  6898. return self
  6899. offset = to_offset(offset)
  6900. start_date = self.index[-1] - offset
  6901. start = self.index.searchsorted(start_date, side='right')
  6902. return self.iloc[start:]
  6903. def rank(self, axis=0, method='average', numeric_only=None,
  6904. na_option='keep', ascending=True, pct=False):
  6905. """
  6906. Compute numerical data ranks (1 through n) along axis. Equal values are
  6907. assigned a rank that is the average of the ranks of those values.
  6908. Parameters
  6909. ----------
  6910. axis : {0 or 'index', 1 or 'columns'}, default 0
  6911. index to direct ranking
  6912. method : {'average', 'min', 'max', 'first', 'dense'}
  6913. * average: average rank of group
  6914. * min: lowest rank in group
  6915. * max: highest rank in group
  6916. * first: ranks assigned in order they appear in the array
  6917. * dense: like 'min', but rank always increases by 1 between groups
  6918. numeric_only : boolean, default None
  6919. Include only float, int, boolean data. Valid only for DataFrame or
  6920. Panel objects
  6921. na_option : {'keep', 'top', 'bottom'}
  6922. * keep: leave NA values where they are
  6923. * top: smallest rank if ascending
  6924. * bottom: smallest rank if descending
  6925. ascending : boolean, default True
  6926. False for ranks by high (1) to low (N)
  6927. pct : boolean, default False
  6928. Computes percentage rank of data
  6929. Returns
  6930. -------
  6931. ranks : same type as caller
  6932. """
  6933. axis = self._get_axis_number(axis)
  6934. if self.ndim > 2:
  6935. msg = "rank does not make sense when ndim > 2"
  6936. raise NotImplementedError(msg)
  6937. if na_option not in {'keep', 'top', 'bottom'}:
  6938. msg = "na_option must be one of 'keep', 'top', or 'bottom'"
  6939. raise ValueError(msg)
  6940. def ranker(data):
  6941. ranks = algos.rank(data.values, axis=axis, method=method,
  6942. ascending=ascending, na_option=na_option,
  6943. pct=pct)
  6944. ranks = self._constructor(ranks, **data._construct_axes_dict())
  6945. return ranks.__finalize__(self)
  6946. # if numeric_only is None, and we can't get anything, we try with
  6947. # numeric_only=True
  6948. if numeric_only is None:
  6949. try:
  6950. return ranker(self)
  6951. except TypeError:
  6952. numeric_only = True
  6953. if numeric_only:
  6954. data = self._get_numeric_data()
  6955. else:
  6956. data = self
  6957. return ranker(data)
  6958. _shared_docs['align'] = ("""
  6959. Align two objects on their axes with the
  6960. specified join method for each axis Index.
  6961. Parameters
  6962. ----------
  6963. other : DataFrame or Series
  6964. join : {'outer', 'inner', 'left', 'right'}, default 'outer'
  6965. axis : allowed axis of the other object, default None
  6966. Align on index (0), columns (1), or both (None)
  6967. level : int or level name, default None
  6968. Broadcast across a level, matching Index values on the
  6969. passed MultiIndex level
  6970. copy : boolean, default True
  6971. Always returns new objects. If copy=False and no reindexing is
  6972. required then original objects are returned.
  6973. fill_value : scalar, default np.NaN
  6974. Value to use for missing values. Defaults to NaN, but can be any
  6975. "compatible" value
  6976. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  6977. Method to use for filling holes in reindexed Series
  6978. pad / ffill: propagate last valid observation forward to next valid
  6979. backfill / bfill: use NEXT valid observation to fill gap
  6980. limit : int, default None
  6981. If method is specified, this is the maximum number of consecutive
  6982. NaN values to forward/backward fill. In other words, if there is
  6983. a gap with more than this number of consecutive NaNs, it will only
  6984. be partially filled. If method is not specified, this is the
  6985. maximum number of entries along the entire axis where NaNs will be
  6986. filled. Must be greater than 0 if not None.
  6987. fill_axis : %(axes_single_arg)s, default 0
  6988. Filling axis, method and limit
  6989. broadcast_axis : %(axes_single_arg)s, default None
  6990. Broadcast values along this axis, if aligning two objects of
  6991. different dimensions
  6992. Returns
  6993. -------
  6994. (left, right) : (%(klass)s, type of other)
  6995. Aligned objects
  6996. """)
  6997. @Appender(_shared_docs['align'] % _shared_doc_kwargs)
  6998. def align(self, other, join='outer', axis=None, level=None, copy=True,
  6999. fill_value=None, method=None, limit=None, fill_axis=0,
  7000. broadcast_axis=None):
  7001. from pandas import DataFrame, Series
  7002. method = missing.clean_fill_method(method)
  7003. if broadcast_axis == 1 and self.ndim != other.ndim:
  7004. if isinstance(self, Series):
  7005. # this means other is a DataFrame, and we need to broadcast
  7006. # self
  7007. cons = self._constructor_expanddim
  7008. df = cons({c: self for c in other.columns},
  7009. **other._construct_axes_dict())
  7010. return df._align_frame(other, join=join, axis=axis,
  7011. level=level, copy=copy,
  7012. fill_value=fill_value, method=method,
  7013. limit=limit, fill_axis=fill_axis)
  7014. elif isinstance(other, Series):
  7015. # this means self is a DataFrame, and we need to broadcast
  7016. # other
  7017. cons = other._constructor_expanddim
  7018. df = cons({c: other for c in self.columns},
  7019. **self._construct_axes_dict())
  7020. return self._align_frame(df, join=join, axis=axis, level=level,
  7021. copy=copy, fill_value=fill_value,
  7022. method=method, limit=limit,
  7023. fill_axis=fill_axis)
  7024. if axis is not None:
  7025. axis = self._get_axis_number(axis)
  7026. if isinstance(other, DataFrame):
  7027. return self._align_frame(other, join=join, axis=axis, level=level,
  7028. copy=copy, fill_value=fill_value,
  7029. method=method, limit=limit,
  7030. fill_axis=fill_axis)
  7031. elif isinstance(other, Series):
  7032. return self._align_series(other, join=join, axis=axis, level=level,
  7033. copy=copy, fill_value=fill_value,
  7034. method=method, limit=limit,
  7035. fill_axis=fill_axis)
  7036. else: # pragma: no cover
  7037. raise TypeError('unsupported type: %s' % type(other))
  7038. def _align_frame(self, other, join='outer', axis=None, level=None,
  7039. copy=True, fill_value=None, method=None, limit=None,
  7040. fill_axis=0):
  7041. # defaults
  7042. join_index, join_columns = None, None
  7043. ilidx, iridx = None, None
  7044. clidx, cridx = None, None
  7045. is_series = isinstance(self, ABCSeries)
  7046. if axis is None or axis == 0:
  7047. if not self.index.equals(other.index):
  7048. join_index, ilidx, iridx = self.index.join(
  7049. other.index, how=join, level=level, return_indexers=True)
  7050. if axis is None or axis == 1:
  7051. if not is_series and not self.columns.equals(other.columns):
  7052. join_columns, clidx, cridx = self.columns.join(
  7053. other.columns, how=join, level=level, return_indexers=True)
  7054. if is_series:
  7055. reindexers = {0: [join_index, ilidx]}
  7056. else:
  7057. reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}
  7058. left = self._reindex_with_indexers(reindexers, copy=copy,
  7059. fill_value=fill_value,
  7060. allow_dups=True)
  7061. # other must be always DataFrame
  7062. right = other._reindex_with_indexers({0: [join_index, iridx],
  7063. 1: [join_columns, cridx]},
  7064. copy=copy, fill_value=fill_value,
  7065. allow_dups=True)
  7066. if method is not None:
  7067. left = left.fillna(axis=fill_axis, method=method, limit=limit)
  7068. right = right.fillna(axis=fill_axis, method=method, limit=limit)
  7069. # if DatetimeIndex have different tz, convert to UTC
  7070. if is_datetime64tz_dtype(left.index):
  7071. if left.index.tz != right.index.tz:
  7072. if join_index is not None:
  7073. left.index = join_index
  7074. right.index = join_index
  7075. return left.__finalize__(self), right.__finalize__(other)
  7076. def _align_series(self, other, join='outer', axis=None, level=None,
  7077. copy=True, fill_value=None, method=None, limit=None,
  7078. fill_axis=0):
  7079. is_series = isinstance(self, ABCSeries)
  7080. # series/series compat, other must always be a Series
  7081. if is_series:
  7082. if axis:
  7083. raise ValueError('cannot align series to a series other than '
  7084. 'axis 0')
  7085. # equal
  7086. if self.index.equals(other.index):
  7087. join_index, lidx, ridx = None, None, None
  7088. else:
  7089. join_index, lidx, ridx = self.index.join(other.index, how=join,
  7090. level=level,
  7091. return_indexers=True)
  7092. left = self._reindex_indexer(join_index, lidx, copy)
  7093. right = other._reindex_indexer(join_index, ridx, copy)
  7094. else:
  7095. # one has > 1 ndim
  7096. fdata = self._data
  7097. if axis == 0:
  7098. join_index = self.index
  7099. lidx, ridx = None, None
  7100. if not self.index.equals(other.index):
  7101. join_index, lidx, ridx = self.index.join(
  7102. other.index, how=join, level=level,
  7103. return_indexers=True)
  7104. if lidx is not None:
  7105. fdata = fdata.reindex_indexer(join_index, lidx, axis=1)
  7106. elif axis == 1:
  7107. join_index = self.columns
  7108. lidx, ridx = None, None
  7109. if not self.columns.equals(other.index):
  7110. join_index, lidx, ridx = self.columns.join(
  7111. other.index, how=join, level=level,
  7112. return_indexers=True)
  7113. if lidx is not None:
  7114. fdata = fdata.reindex_indexer(join_index, lidx, axis=0)
  7115. else:
  7116. raise ValueError('Must specify axis=0 or 1')
  7117. if copy and fdata is self._data:
  7118. fdata = fdata.copy()
  7119. left = self._constructor(fdata)
  7120. if ridx is None:
  7121. right = other
  7122. else:
  7123. right = other.reindex(join_index, level=level)
  7124. # fill
  7125. fill_na = notna(fill_value) or (method is not None)
  7126. if fill_na:
  7127. left = left.fillna(fill_value, method=method, limit=limit,
  7128. axis=fill_axis)
  7129. right = right.fillna(fill_value, method=method, limit=limit)
  7130. # if DatetimeIndex have different tz, convert to UTC
  7131. if is_series or (not is_series and axis == 0):
  7132. if is_datetime64tz_dtype(left.index):
  7133. if left.index.tz != right.index.tz:
  7134. if join_index is not None:
  7135. left.index = join_index
  7136. right.index = join_index
  7137. return left.__finalize__(self), right.__finalize__(other)
  7138. def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
  7139. errors='raise', try_cast=False):
  7140. """
  7141. Equivalent to public method `where`, except that `other` is not
  7142. applied as a function even if callable. Used in __setitem__.
  7143. """
  7144. inplace = validate_bool_kwarg(inplace, 'inplace')
  7145. # align the cond to same shape as myself
  7146. cond = com.apply_if_callable(cond, self)
  7147. if isinstance(cond, NDFrame):
  7148. cond, _ = cond.align(self, join='right', broadcast_axis=1)
  7149. else:
  7150. if not hasattr(cond, 'shape'):
  7151. cond = np.asanyarray(cond)
  7152. if cond.shape != self.shape:
  7153. raise ValueError('Array conditional must be same shape as '
  7154. 'self')
  7155. cond = self._constructor(cond, **self._construct_axes_dict())
  7156. # make sure we are boolean
  7157. fill_value = True if inplace else False
  7158. cond = cond.fillna(fill_value)
  7159. msg = "Boolean array expected for the condition, not {dtype}"
  7160. if not isinstance(cond, pd.DataFrame):
  7161. # This is a single-dimensional object.
  7162. if not is_bool_dtype(cond):
  7163. raise ValueError(msg.format(dtype=cond.dtype))
  7164. elif not cond.empty:
  7165. for dt in cond.dtypes:
  7166. if not is_bool_dtype(dt):
  7167. raise ValueError(msg.format(dtype=dt))
  7168. cond = -cond if inplace else cond
  7169. # try to align with other
  7170. try_quick = True
  7171. if hasattr(other, 'align'):
  7172. # align with me
  7173. if other.ndim <= self.ndim:
  7174. _, other = self.align(other, join='left', axis=axis,
  7175. level=level, fill_value=np.nan)
  7176. # if we are NOT aligned, raise as we cannot where index
  7177. if (axis is None and
  7178. not all(other._get_axis(i).equals(ax)
  7179. for i, ax in enumerate(self.axes))):
  7180. raise InvalidIndexError
  7181. # slice me out of the other
  7182. else:
  7183. raise NotImplementedError("cannot align with a higher "
  7184. "dimensional NDFrame")
  7185. if isinstance(other, np.ndarray):
  7186. if other.shape != self.shape:
  7187. if self.ndim == 1:
  7188. icond = cond.values
  7189. # GH 2745 / GH 4192
  7190. # treat like a scalar
  7191. if len(other) == 1:
  7192. other = np.array(other[0])
  7193. # GH 3235
  7194. # match True cond to other
  7195. elif len(cond[icond]) == len(other):
  7196. # try to not change dtype at first (if try_quick)
  7197. if try_quick:
  7198. try:
  7199. new_other = com.values_from_object(self)
  7200. new_other = new_other.copy()
  7201. new_other[icond] = other
  7202. other = new_other
  7203. except Exception:
  7204. try_quick = False
  7205. # let's create a new (if we failed at the above
  7206. # or not try_quick
  7207. if not try_quick:
  7208. dtype, fill_value = maybe_promote(other.dtype)
  7209. new_other = np.empty(len(icond), dtype=dtype)
  7210. new_other.fill(fill_value)
  7211. maybe_upcast_putmask(new_other, icond, other)
  7212. other = new_other
  7213. else:
  7214. raise ValueError('Length of replacements must equal '
  7215. 'series length')
  7216. else:
  7217. raise ValueError('other must be the same shape as self '
  7218. 'when an ndarray')
  7219. # we are the same shape, so create an actual object for alignment
  7220. else:
  7221. other = self._constructor(other, **self._construct_axes_dict())
  7222. if axis is None:
  7223. axis = 0
  7224. if self.ndim == getattr(other, 'ndim', 0):
  7225. align = True
  7226. else:
  7227. align = (self._get_axis_number(axis) == 1)
  7228. block_axis = self._get_block_manager_axis(axis)
  7229. if inplace:
  7230. # we may have different type blocks come out of putmask, so
  7231. # reconstruct the block manager
  7232. self._check_inplace_setting(other)
  7233. new_data = self._data.putmask(mask=cond, new=other, align=align,
  7234. inplace=True, axis=block_axis,
  7235. transpose=self._AXIS_REVERSED)
  7236. self._update_inplace(new_data)
  7237. else:
  7238. new_data = self._data.where(other=other, cond=cond, align=align,
  7239. errors=errors,
  7240. try_cast=try_cast, axis=block_axis,
  7241. transpose=self._AXIS_REVERSED)
  7242. return self._constructor(new_data).__finalize__(self)
  7243. _shared_docs['where'] = ("""
  7244. Replace values where the condition is %(cond_rev)s.
  7245. Parameters
  7246. ----------
  7247. cond : boolean %(klass)s, array-like, or callable
  7248. Where `cond` is %(cond)s, keep the original value. Where
  7249. %(cond_rev)s, replace with corresponding value from `other`.
  7250. If `cond` is callable, it is computed on the %(klass)s and
  7251. should return boolean %(klass)s or array. The callable must
  7252. not change input %(klass)s (though pandas doesn't check it).
  7253. .. versionadded:: 0.18.1
  7254. A callable can be used as cond.
  7255. other : scalar, %(klass)s, or callable
  7256. Entries where `cond` is %(cond_rev)s are replaced with
  7257. corresponding value from `other`.
  7258. If other is callable, it is computed on the %(klass)s and
  7259. should return scalar or %(klass)s. The callable must not
  7260. change input %(klass)s (though pandas doesn't check it).
  7261. .. versionadded:: 0.18.1
  7262. A callable can be used as other.
  7263. inplace : boolean, default False
  7264. Whether to perform the operation in place on the data.
  7265. axis : int, default None
  7266. Alignment axis if needed.
  7267. level : int, default None
  7268. Alignment level if needed.
  7269. errors : str, {'raise', 'ignore'}, default `raise`
  7270. Note that currently this parameter won't affect
  7271. the results and will always coerce to a suitable dtype.
  7272. - `raise` : allow exceptions to be raised.
  7273. - `ignore` : suppress exceptions. On error return original object.
  7274. try_cast : boolean, default False
  7275. Try to cast the result back to the input type (if possible).
  7276. raise_on_error : boolean, default True
  7277. Whether to raise on invalid data types (e.g. trying to where on
  7278. strings).
  7279. .. deprecated:: 0.21.0
  7280. Use `errors`.
  7281. Returns
  7282. -------
  7283. wh : same type as caller
  7284. See Also
  7285. --------
  7286. :func:`DataFrame.%(name_other)s` : Return an object of same shape as
  7287. self.
  7288. Notes
  7289. -----
  7290. The %(name)s method is an application of the if-then idiom. For each
  7291. element in the calling DataFrame, if ``cond`` is ``%(cond)s`` the
  7292. element is used; otherwise the corresponding element from the DataFrame
  7293. ``other`` is used.
  7294. The signature for :func:`DataFrame.where` differs from
  7295. :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to
  7296. ``np.where(m, df1, df2)``.
  7297. For further details and examples see the ``%(name)s`` documentation in
  7298. :ref:`indexing <indexing.where_mask>`.
  7299. Examples
  7300. --------
  7301. >>> s = pd.Series(range(5))
  7302. >>> s.where(s > 0)
  7303. 0 NaN
  7304. 1 1.0
  7305. 2 2.0
  7306. 3 3.0
  7307. 4 4.0
  7308. dtype: float64
  7309. >>> s.mask(s > 0)
  7310. 0 0.0
  7311. 1 NaN
  7312. 2 NaN
  7313. 3 NaN
  7314. 4 NaN
  7315. dtype: float64
  7316. >>> s.where(s > 1, 10)
  7317. 0 10
  7318. 1 10
  7319. 2 2
  7320. 3 3
  7321. 4 4
  7322. dtype: int64
  7323. >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
  7324. >>> m = df %% 3 == 0
  7325. >>> df.where(m, -df)
  7326. A B
  7327. 0 0 -1
  7328. 1 -2 3
  7329. 2 -4 -5
  7330. 3 6 -7
  7331. 4 -8 9
  7332. >>> df.where(m, -df) == np.where(m, df, -df)
  7333. A B
  7334. 0 True True
  7335. 1 True True
  7336. 2 True True
  7337. 3 True True
  7338. 4 True True
  7339. >>> df.where(m, -df) == df.mask(~m, -df)
  7340. A B
  7341. 0 True True
  7342. 1 True True
  7343. 2 True True
  7344. 3 True True
  7345. 4 True True
  7346. """)
  7347. @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="True",
  7348. cond_rev="False", name='where',
  7349. name_other='mask'))
  7350. def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
  7351. errors='raise', try_cast=False, raise_on_error=None):
  7352. if raise_on_error is not None:
  7353. warnings.warn(
  7354. "raise_on_error is deprecated in "
  7355. "favor of errors='raise|ignore'",
  7356. FutureWarning, stacklevel=2)
  7357. if raise_on_error:
  7358. errors = 'raise'
  7359. else:
  7360. errors = 'ignore'
  7361. other = com.apply_if_callable(other, self)
  7362. return self._where(cond, other, inplace, axis, level,
  7363. errors=errors, try_cast=try_cast)
  7364. @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False",
  7365. cond_rev="True", name='mask',
  7366. name_other='where'))
  7367. def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None,
  7368. errors='raise', try_cast=False, raise_on_error=None):
  7369. if raise_on_error is not None:
  7370. warnings.warn(
  7371. "raise_on_error is deprecated in "
  7372. "favor of errors='raise|ignore'",
  7373. FutureWarning, stacklevel=2)
  7374. if raise_on_error:
  7375. errors = 'raise'
  7376. else:
  7377. errors = 'ignore'
  7378. inplace = validate_bool_kwarg(inplace, 'inplace')
  7379. cond = com.apply_if_callable(cond, self)
  7380. # see gh-21891
  7381. if not hasattr(cond, "__invert__"):
  7382. cond = np.array(cond)
  7383. return self.where(~cond, other=other, inplace=inplace, axis=axis,
  7384. level=level, try_cast=try_cast,
  7385. errors=errors)
  7386. _shared_docs['shift'] = ("""
  7387. Shift index by desired number of periods with an optional time `freq`.
  7388. When `freq` is not passed, shift the index without realigning the data.
  7389. If `freq` is passed (in this case, the index must be date or datetime,
  7390. or it will raise a `NotImplementedError`), the index will be
  7391. increased using the periods and the `freq`.
  7392. Parameters
  7393. ----------
  7394. periods : int
  7395. Number of periods to shift. Can be positive or negative.
  7396. freq : DateOffset, tseries.offsets, timedelta, or str, optional
  7397. Offset to use from the tseries module or time rule (e.g. 'EOM').
  7398. If `freq` is specified then the index values are shifted but the
  7399. data is not realigned. That is, use `freq` if you would like to
  7400. extend the index when shifting and preserve the original data.
  7401. axis : {0 or 'index', 1 or 'columns', None}, default None
  7402. Shift direction.
  7403. fill_value : object, optional
  7404. The scalar value to use for newly introduced missing values.
  7405. the default depends on the dtype of `self`.
  7406. For numeric data, ``np.nan`` is used.
  7407. For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
  7408. For extension dtypes, ``self.dtype.na_value`` is used.
  7409. .. versionchanged:: 0.24.0
  7410. Returns
  7411. -------
  7412. %(klass)s
  7413. Copy of input object, shifted.
  7414. See Also
  7415. --------
  7416. Index.shift : Shift values of Index.
  7417. DatetimeIndex.shift : Shift values of DatetimeIndex.
  7418. PeriodIndex.shift : Shift values of PeriodIndex.
  7419. tshift : Shift the time index, using the index's frequency if
  7420. available.
  7421. Examples
  7422. --------
  7423. >>> df = pd.DataFrame({'Col1': [10, 20, 15, 30, 45],
  7424. ... 'Col2': [13, 23, 18, 33, 48],
  7425. ... 'Col3': [17, 27, 22, 37, 52]})
  7426. >>> df.shift(periods=3)
  7427. Col1 Col2 Col3
  7428. 0 NaN NaN NaN
  7429. 1 NaN NaN NaN
  7430. 2 NaN NaN NaN
  7431. 3 10.0 13.0 17.0
  7432. 4 20.0 23.0 27.0
  7433. >>> df.shift(periods=1, axis='columns')
  7434. Col1 Col2 Col3
  7435. 0 NaN 10.0 13.0
  7436. 1 NaN 20.0 23.0
  7437. 2 NaN 15.0 18.0
  7438. 3 NaN 30.0 33.0
  7439. 4 NaN 45.0 48.0
  7440. >>> df.shift(periods=3, fill_value=0)
  7441. Col1 Col2 Col3
  7442. 0 0 0 0
  7443. 1 0 0 0
  7444. 2 0 0 0
  7445. 3 10 13 17
  7446. 4 20 23 27
  7447. """)
  7448. @Appender(_shared_docs['shift'] % _shared_doc_kwargs)
  7449. def shift(self, periods=1, freq=None, axis=0, fill_value=None):
  7450. if periods == 0:
  7451. return self.copy()
  7452. block_axis = self._get_block_manager_axis(axis)
  7453. if freq is None:
  7454. new_data = self._data.shift(periods=periods, axis=block_axis,
  7455. fill_value=fill_value)
  7456. else:
  7457. return self.tshift(periods, freq)
  7458. return self._constructor(new_data).__finalize__(self)
  7459. def slice_shift(self, periods=1, axis=0):
  7460. """
  7461. Equivalent to `shift` without copying data. The shifted data will
  7462. not include the dropped periods and the shifted axis will be smaller
  7463. than the original.
  7464. Parameters
  7465. ----------
  7466. periods : int
  7467. Number of periods to move, can be positive or negative
  7468. Returns
  7469. -------
  7470. shifted : same type as caller
  7471. Notes
  7472. -----
  7473. While the `slice_shift` is faster than `shift`, you may pay for it
  7474. later during alignment.
  7475. """
  7476. if periods == 0:
  7477. return self
  7478. if periods > 0:
  7479. vslicer = slice(None, -periods)
  7480. islicer = slice(periods, None)
  7481. else:
  7482. vslicer = slice(-periods, None)
  7483. islicer = slice(None, periods)
  7484. new_obj = self._slice(vslicer, axis=axis)
  7485. shifted_axis = self._get_axis(axis)[islicer]
  7486. new_obj.set_axis(shifted_axis, axis=axis, inplace=True)
  7487. return new_obj.__finalize__(self)
  7488. def tshift(self, periods=1, freq=None, axis=0):
  7489. """
  7490. Shift the time index, using the index's frequency if available.
  7491. Parameters
  7492. ----------
  7493. periods : int
  7494. Number of periods to move, can be positive or negative
  7495. freq : DateOffset, timedelta, or time rule string, default None
  7496. Increment to use from the tseries module or time rule (e.g. 'EOM')
  7497. axis : int or basestring
  7498. Corresponds to the axis that contains the Index
  7499. Returns
  7500. -------
  7501. shifted : NDFrame
  7502. Notes
  7503. -----
  7504. If freq is not specified then tries to use the freq or inferred_freq
  7505. attributes of the index. If neither of those attributes exist, a
  7506. ValueError is thrown
  7507. """
  7508. index = self._get_axis(axis)
  7509. if freq is None:
  7510. freq = getattr(index, 'freq', None)
  7511. if freq is None:
  7512. freq = getattr(index, 'inferred_freq', None)
  7513. if freq is None:
  7514. msg = 'Freq was not given and was not set in the index'
  7515. raise ValueError(msg)
  7516. if periods == 0:
  7517. return self
  7518. if isinstance(freq, string_types):
  7519. freq = to_offset(freq)
  7520. block_axis = self._get_block_manager_axis(axis)
  7521. if isinstance(index, PeriodIndex):
  7522. orig_freq = to_offset(index.freq)
  7523. if freq == orig_freq:
  7524. new_data = self._data.copy()
  7525. new_data.axes[block_axis] = index.shift(periods)
  7526. else:
  7527. msg = ('Given freq %s does not match PeriodIndex freq %s' %
  7528. (freq.rule_code, orig_freq.rule_code))
  7529. raise ValueError(msg)
  7530. else:
  7531. new_data = self._data.copy()
  7532. new_data.axes[block_axis] = index.shift(periods, freq)
  7533. return self._constructor(new_data).__finalize__(self)
  7534. def truncate(self, before=None, after=None, axis=None, copy=True):
  7535. """
  7536. Truncate a Series or DataFrame before and after some index value.
  7537. This is a useful shorthand for boolean indexing based on index
  7538. values above or below certain thresholds.
  7539. Parameters
  7540. ----------
  7541. before : date, string, int
  7542. Truncate all rows before this index value.
  7543. after : date, string, int
  7544. Truncate all rows after this index value.
  7545. axis : {0 or 'index', 1 or 'columns'}, optional
  7546. Axis to truncate. Truncates the index (rows) by default.
  7547. copy : boolean, default is True,
  7548. Return a copy of the truncated section.
  7549. Returns
  7550. -------
  7551. type of caller
  7552. The truncated Series or DataFrame.
  7553. See Also
  7554. --------
  7555. DataFrame.loc : Select a subset of a DataFrame by label.
  7556. DataFrame.iloc : Select a subset of a DataFrame by position.
  7557. Notes
  7558. -----
  7559. If the index being truncated contains only datetime values,
  7560. `before` and `after` may be specified as strings instead of
  7561. Timestamps.
  7562. Examples
  7563. --------
  7564. >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
  7565. ... 'B': ['f', 'g', 'h', 'i', 'j'],
  7566. ... 'C': ['k', 'l', 'm', 'n', 'o']},
  7567. ... index=[1, 2, 3, 4, 5])
  7568. >>> df
  7569. A B C
  7570. 1 a f k
  7571. 2 b g l
  7572. 3 c h m
  7573. 4 d i n
  7574. 5 e j o
  7575. >>> df.truncate(before=2, after=4)
  7576. A B C
  7577. 2 b g l
  7578. 3 c h m
  7579. 4 d i n
  7580. The columns of a DataFrame can be truncated.
  7581. >>> df.truncate(before="A", after="B", axis="columns")
  7582. A B
  7583. 1 a f
  7584. 2 b g
  7585. 3 c h
  7586. 4 d i
  7587. 5 e j
  7588. For Series, only rows can be truncated.
  7589. >>> df['A'].truncate(before=2, after=4)
  7590. 2 b
  7591. 3 c
  7592. 4 d
  7593. Name: A, dtype: object
  7594. The index values in ``truncate`` can be datetimes or string
  7595. dates.
  7596. >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s')
  7597. >>> df = pd.DataFrame(index=dates, data={'A': 1})
  7598. >>> df.tail()
  7599. A
  7600. 2016-01-31 23:59:56 1
  7601. 2016-01-31 23:59:57 1
  7602. 2016-01-31 23:59:58 1
  7603. 2016-01-31 23:59:59 1
  7604. 2016-02-01 00:00:00 1
  7605. >>> df.truncate(before=pd.Timestamp('2016-01-05'),
  7606. ... after=pd.Timestamp('2016-01-10')).tail()
  7607. A
  7608. 2016-01-09 23:59:56 1
  7609. 2016-01-09 23:59:57 1
  7610. 2016-01-09 23:59:58 1
  7611. 2016-01-09 23:59:59 1
  7612. 2016-01-10 00:00:00 1
  7613. Because the index is a DatetimeIndex containing only dates, we can
  7614. specify `before` and `after` as strings. They will be coerced to
  7615. Timestamps before truncation.
  7616. >>> df.truncate('2016-01-05', '2016-01-10').tail()
  7617. A
  7618. 2016-01-09 23:59:56 1
  7619. 2016-01-09 23:59:57 1
  7620. 2016-01-09 23:59:58 1
  7621. 2016-01-09 23:59:59 1
  7622. 2016-01-10 00:00:00 1
  7623. Note that ``truncate`` assumes a 0 value for any unspecified time
  7624. component (midnight). This differs from partial string slicing, which
  7625. returns any partially matching dates.
  7626. >>> df.loc['2016-01-05':'2016-01-10', :].tail()
  7627. A
  7628. 2016-01-10 23:59:55 1
  7629. 2016-01-10 23:59:56 1
  7630. 2016-01-10 23:59:57 1
  7631. 2016-01-10 23:59:58 1
  7632. 2016-01-10 23:59:59 1
  7633. """
  7634. if axis is None:
  7635. axis = self._stat_axis_number
  7636. axis = self._get_axis_number(axis)
  7637. ax = self._get_axis(axis)
  7638. # GH 17935
  7639. # Check that index is sorted
  7640. if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
  7641. raise ValueError("truncate requires a sorted index")
  7642. # if we have a date index, convert to dates, otherwise
  7643. # treat like a slice
  7644. if ax.is_all_dates:
  7645. from pandas.core.tools.datetimes import to_datetime
  7646. before = to_datetime(before)
  7647. after = to_datetime(after)
  7648. if before is not None and after is not None:
  7649. if before > after:
  7650. raise ValueError('Truncate: %s must be after %s' %
  7651. (after, before))
  7652. slicer = [slice(None, None)] * self._AXIS_LEN
  7653. slicer[axis] = slice(before, after)
  7654. result = self.loc[tuple(slicer)]
  7655. if isinstance(ax, MultiIndex):
  7656. setattr(result, self._get_axis_name(axis),
  7657. ax.truncate(before, after))
  7658. if copy:
  7659. result = result.copy()
  7660. return result
  7661. def tz_convert(self, tz, axis=0, level=None, copy=True):
  7662. """
  7663. Convert tz-aware axis to target time zone.
  7664. Parameters
  7665. ----------
  7666. tz : string or pytz.timezone object
  7667. axis : the axis to convert
  7668. level : int, str, default None
  7669. If axis ia a MultiIndex, convert a specific level. Otherwise
  7670. must be None
  7671. copy : boolean, default True
  7672. Also make a copy of the underlying data
  7673. Returns
  7674. -------
  7675. Raises
  7676. ------
  7677. TypeError
  7678. If the axis is tz-naive.
  7679. """
  7680. axis = self._get_axis_number(axis)
  7681. ax = self._get_axis(axis)
  7682. def _tz_convert(ax, tz):
  7683. if not hasattr(ax, 'tz_convert'):
  7684. if len(ax) > 0:
  7685. ax_name = self._get_axis_name(axis)
  7686. raise TypeError('%s is not a valid DatetimeIndex or '
  7687. 'PeriodIndex' % ax_name)
  7688. else:
  7689. ax = DatetimeIndex([], tz=tz)
  7690. else:
  7691. ax = ax.tz_convert(tz)
  7692. return ax
  7693. # if a level is given it must be a MultiIndex level or
  7694. # equivalent to the axis name
  7695. if isinstance(ax, MultiIndex):
  7696. level = ax._get_level_number(level)
  7697. new_level = _tz_convert(ax.levels[level], tz)
  7698. ax = ax.set_levels(new_level, level=level)
  7699. else:
  7700. if level not in (None, 0, ax.name):
  7701. raise ValueError("The level {0} is not valid".format(level))
  7702. ax = _tz_convert(ax, tz)
  7703. result = self._constructor(self._data, copy=copy)
  7704. result = result.set_axis(ax, axis=axis, inplace=False)
  7705. return result.__finalize__(self)
  7706. def tz_localize(self, tz, axis=0, level=None, copy=True,
  7707. ambiguous='raise', nonexistent='raise'):
  7708. """
  7709. Localize tz-naive index of a Series or DataFrame to target time zone.
  7710. This operation localizes the Index. To localize the values in a
  7711. timezone-naive Series, use :meth:`Series.dt.tz_localize`.
  7712. Parameters
  7713. ----------
  7714. tz : string or pytz.timezone object
  7715. axis : the axis to localize
  7716. level : int, str, default None
  7717. If axis ia a MultiIndex, localize a specific level. Otherwise
  7718. must be None
  7719. copy : boolean, default True
  7720. Also make a copy of the underlying data
  7721. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  7722. When clocks moved backward due to DST, ambiguous times may arise.
  7723. For example in Central European Time (UTC+01), when going from
  7724. 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
  7725. 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
  7726. `ambiguous` parameter dictates how ambiguous times should be
  7727. handled.
  7728. - 'infer' will attempt to infer fall dst-transition hours based on
  7729. order
  7730. - bool-ndarray where True signifies a DST time, False designates
  7731. a non-DST time (note that this flag is only applicable for
  7732. ambiguous times)
  7733. - 'NaT' will return NaT where there are ambiguous times
  7734. - 'raise' will raise an AmbiguousTimeError if there are ambiguous
  7735. times
  7736. nonexistent : str, default 'raise'
  7737. A nonexistent time does not exist in a particular timezone
  7738. where clocks moved forward due to DST. Valid valuse are:
  7739. - 'shift_forward' will shift the nonexistent time forward to the
  7740. closest existing time
  7741. - 'shift_backward' will shift the nonexistent time backward to the
  7742. closest existing time
  7743. - 'NaT' will return NaT where there are nonexistent times
  7744. - timedelta objects will shift nonexistent times by the timedelta
  7745. - 'raise' will raise an NonExistentTimeError if there are
  7746. nonexistent times
  7747. .. versionadded:: 0.24.0
  7748. Returns
  7749. -------
  7750. Series or DataFrame
  7751. Same type as the input.
  7752. Raises
  7753. ------
  7754. TypeError
  7755. If the TimeSeries is tz-aware and tz is not None.
  7756. Examples
  7757. --------
  7758. Localize local times:
  7759. >>> s = pd.Series([1],
  7760. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']))
  7761. >>> s.tz_localize('CET')
  7762. 2018-09-15 01:30:00+02:00 1
  7763. dtype: int64
  7764. Be careful with DST changes. When there is sequential data, pandas
  7765. can infer the DST time:
  7766. >>> s = pd.Series(range(7), index=pd.DatetimeIndex([
  7767. ... '2018-10-28 01:30:00',
  7768. ... '2018-10-28 02:00:00',
  7769. ... '2018-10-28 02:30:00',
  7770. ... '2018-10-28 02:00:00',
  7771. ... '2018-10-28 02:30:00',
  7772. ... '2018-10-28 03:00:00',
  7773. ... '2018-10-28 03:30:00']))
  7774. >>> s.tz_localize('CET', ambiguous='infer')
  7775. 2018-10-28 01:30:00+02:00 0
  7776. 2018-10-28 02:00:00+02:00 1
  7777. 2018-10-28 02:30:00+02:00 2
  7778. 2018-10-28 02:00:00+01:00 3
  7779. 2018-10-28 02:30:00+01:00 4
  7780. 2018-10-28 03:00:00+01:00 5
  7781. 2018-10-28 03:30:00+01:00 6
  7782. dtype: int64
  7783. In some cases, inferring the DST is impossible. In such cases, you can
  7784. pass an ndarray to the ambiguous parameter to set the DST explicitly
  7785. >>> s = pd.Series(range(3), index=pd.DatetimeIndex([
  7786. ... '2018-10-28 01:20:00',
  7787. ... '2018-10-28 02:36:00',
  7788. ... '2018-10-28 03:46:00']))
  7789. >>> s.tz_localize('CET', ambiguous=np.array([True, True, False]))
  7790. 2018-10-28 01:20:00+02:00 0
  7791. 2018-10-28 02:36:00+02:00 1
  7792. 2018-10-28 03:46:00+01:00 2
  7793. dtype: int64
  7794. If the DST transition causes nonexistent times, you can shift these
  7795. dates forward or backwards with a timedelta object or `'shift_forward'`
  7796. or `'shift_backwards'`.
  7797. >>> s = pd.Series(range(2), index=pd.DatetimeIndex([
  7798. ... '2015-03-29 02:30:00',
  7799. ... '2015-03-29 03:30:00']))
  7800. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
  7801. 2015-03-29 03:00:00+02:00 0
  7802. 2015-03-29 03:30:00+02:00 1
  7803. dtype: int64
  7804. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
  7805. 2015-03-29 01:59:59.999999999+01:00 0
  7806. 2015-03-29 03:30:00+02:00 1
  7807. dtype: int64
  7808. >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
  7809. 2015-03-29 03:30:00+02:00 0
  7810. 2015-03-29 03:30:00+02:00 1
  7811. dtype: int64
  7812. """
  7813. nonexistent_options = ('raise', 'NaT', 'shift_forward',
  7814. 'shift_backward')
  7815. if nonexistent not in nonexistent_options and not isinstance(
  7816. nonexistent, timedelta):
  7817. raise ValueError("The nonexistent argument must be one of 'raise',"
  7818. " 'NaT', 'shift_forward', 'shift_backward' or"
  7819. " a timedelta object")
  7820. axis = self._get_axis_number(axis)
  7821. ax = self._get_axis(axis)
  7822. def _tz_localize(ax, tz, ambiguous, nonexistent):
  7823. if not hasattr(ax, 'tz_localize'):
  7824. if len(ax) > 0:
  7825. ax_name = self._get_axis_name(axis)
  7826. raise TypeError('%s is not a valid DatetimeIndex or '
  7827. 'PeriodIndex' % ax_name)
  7828. else:
  7829. ax = DatetimeIndex([], tz=tz)
  7830. else:
  7831. ax = ax.tz_localize(
  7832. tz, ambiguous=ambiguous, nonexistent=nonexistent
  7833. )
  7834. return ax
  7835. # if a level is given it must be a MultiIndex level or
  7836. # equivalent to the axis name
  7837. if isinstance(ax, MultiIndex):
  7838. level = ax._get_level_number(level)
  7839. new_level = _tz_localize(
  7840. ax.levels[level], tz, ambiguous, nonexistent
  7841. )
  7842. ax = ax.set_levels(new_level, level=level)
  7843. else:
  7844. if level not in (None, 0, ax.name):
  7845. raise ValueError("The level {0} is not valid".format(level))
  7846. ax = _tz_localize(ax, tz, ambiguous, nonexistent)
  7847. result = self._constructor(self._data, copy=copy)
  7848. result = result.set_axis(ax, axis=axis, inplace=False)
  7849. return result.__finalize__(self)
  7850. # ----------------------------------------------------------------------
  7851. # Numeric Methods
  7852. def abs(self):
  7853. """
  7854. Return a Series/DataFrame with absolute numeric value of each element.
  7855. This function only applies to elements that are all numeric.
  7856. Returns
  7857. -------
  7858. abs
  7859. Series/DataFrame containing the absolute value of each element.
  7860. See Also
  7861. --------
  7862. numpy.absolute : Calculate the absolute value element-wise.
  7863. Notes
  7864. -----
  7865. For ``complex`` inputs, ``1.2 + 1j``, the absolute value is
  7866. :math:`\\sqrt{ a^2 + b^2 }`.
  7867. Examples
  7868. --------
  7869. Absolute numeric values in a Series.
  7870. >>> s = pd.Series([-1.10, 2, -3.33, 4])
  7871. >>> s.abs()
  7872. 0 1.10
  7873. 1 2.00
  7874. 2 3.33
  7875. 3 4.00
  7876. dtype: float64
  7877. Absolute numeric values in a Series with complex numbers.
  7878. >>> s = pd.Series([1.2 + 1j])
  7879. >>> s.abs()
  7880. 0 1.56205
  7881. dtype: float64
  7882. Absolute numeric values in a Series with a Timedelta element.
  7883. >>> s = pd.Series([pd.Timedelta('1 days')])
  7884. >>> s.abs()
  7885. 0 1 days
  7886. dtype: timedelta64[ns]
  7887. Select rows with data closest to certain value using argsort (from
  7888. `StackOverflow <https://stackoverflow.com/a/17758115>`__).
  7889. >>> df = pd.DataFrame({
  7890. ... 'a': [4, 5, 6, 7],
  7891. ... 'b': [10, 20, 30, 40],
  7892. ... 'c': [100, 50, -30, -50]
  7893. ... })
  7894. >>> df
  7895. a b c
  7896. 0 4 10 100
  7897. 1 5 20 50
  7898. 2 6 30 -30
  7899. 3 7 40 -50
  7900. >>> df.loc[(df.c - 43).abs().argsort()]
  7901. a b c
  7902. 1 5 20 50
  7903. 0 4 10 100
  7904. 2 6 30 -30
  7905. 3 7 40 -50
  7906. """
  7907. return np.abs(self)
  7908. def describe(self, percentiles=None, include=None, exclude=None):
  7909. """
  7910. Generate descriptive statistics that summarize the central tendency,
  7911. dispersion and shape of a dataset's distribution, excluding
  7912. ``NaN`` values.
  7913. Analyzes both numeric and object series, as well
  7914. as ``DataFrame`` column sets of mixed data types. The output
  7915. will vary depending on what is provided. Refer to the notes
  7916. below for more detail.
  7917. Parameters
  7918. ----------
  7919. percentiles : list-like of numbers, optional
  7920. The percentiles to include in the output. All should
  7921. fall between 0 and 1. The default is
  7922. ``[.25, .5, .75]``, which returns the 25th, 50th, and
  7923. 75th percentiles.
  7924. include : 'all', list-like of dtypes or None (default), optional
  7925. A white list of data types to include in the result. Ignored
  7926. for ``Series``. Here are the options:
  7927. - 'all' : All columns of the input will be included in the output.
  7928. - A list-like of dtypes : Limits the results to the
  7929. provided data types.
  7930. To limit the result to numeric types submit
  7931. ``numpy.number``. To limit it instead to object columns submit
  7932. the ``numpy.object`` data type. Strings
  7933. can also be used in the style of
  7934. ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
  7935. select pandas categorical columns, use ``'category'``
  7936. - None (default) : The result will include all numeric columns.
  7937. exclude : list-like of dtypes or None (default), optional,
  7938. A black list of data types to omit from the result. Ignored
  7939. for ``Series``. Here are the options:
  7940. - A list-like of dtypes : Excludes the provided data types
  7941. from the result. To exclude numeric types submit
  7942. ``numpy.number``. To exclude object columns submit the data
  7943. type ``numpy.object``. Strings can also be used in the style of
  7944. ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
  7945. exclude pandas categorical columns, use ``'category'``
  7946. - None (default) : The result will exclude nothing.
  7947. Returns
  7948. -------
  7949. Series or DataFrame
  7950. Summary statistics of the Series or Dataframe provided.
  7951. See Also
  7952. --------
  7953. DataFrame.count: Count number of non-NA/null observations.
  7954. DataFrame.max: Maximum of the values in the object.
  7955. DataFrame.min: Minimum of the values in the object.
  7956. DataFrame.mean: Mean of the values.
  7957. DataFrame.std: Standard deviation of the obersvations.
  7958. DataFrame.select_dtypes: Subset of a DataFrame including/excluding
  7959. columns based on their dtype.
  7960. Notes
  7961. -----
  7962. For numeric data, the result's index will include ``count``,
  7963. ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
  7964. upper percentiles. By default the lower percentile is ``25`` and the
  7965. upper percentile is ``75``. The ``50`` percentile is the
  7966. same as the median.
  7967. For object data (e.g. strings or timestamps), the result's index
  7968. will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
  7969. is the most common value. The ``freq`` is the most common value's
  7970. frequency. Timestamps also include the ``first`` and ``last`` items.
  7971. If multiple object values have the highest count, then the
  7972. ``count`` and ``top`` results will be arbitrarily chosen from
  7973. among those with the highest count.
  7974. For mixed data types provided via a ``DataFrame``, the default is to
  7975. return only an analysis of numeric columns. If the dataframe consists
  7976. only of object and categorical data without any numeric columns, the
  7977. default is to return an analysis of both the object and categorical
  7978. columns. If ``include='all'`` is provided as an option, the result
  7979. will include a union of attributes of each type.
  7980. The `include` and `exclude` parameters can be used to limit
  7981. which columns in a ``DataFrame`` are analyzed for the output.
  7982. The parameters are ignored when analyzing a ``Series``.
  7983. Examples
  7984. --------
  7985. Describing a numeric ``Series``.
  7986. >>> s = pd.Series([1, 2, 3])
  7987. >>> s.describe()
  7988. count 3.0
  7989. mean 2.0
  7990. std 1.0
  7991. min 1.0
  7992. 25% 1.5
  7993. 50% 2.0
  7994. 75% 2.5
  7995. max 3.0
  7996. dtype: float64
  7997. Describing a categorical ``Series``.
  7998. >>> s = pd.Series(['a', 'a', 'b', 'c'])
  7999. >>> s.describe()
  8000. count 4
  8001. unique 3
  8002. top a
  8003. freq 2
  8004. dtype: object
  8005. Describing a timestamp ``Series``.
  8006. >>> s = pd.Series([
  8007. ... np.datetime64("2000-01-01"),
  8008. ... np.datetime64("2010-01-01"),
  8009. ... np.datetime64("2010-01-01")
  8010. ... ])
  8011. >>> s.describe()
  8012. count 3
  8013. unique 2
  8014. top 2010-01-01 00:00:00
  8015. freq 2
  8016. first 2000-01-01 00:00:00
  8017. last 2010-01-01 00:00:00
  8018. dtype: object
  8019. Describing a ``DataFrame``. By default only numeric fields
  8020. are returned.
  8021. >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),
  8022. ... 'numeric': [1, 2, 3],
  8023. ... 'object': ['a', 'b', 'c']
  8024. ... })
  8025. >>> df.describe()
  8026. numeric
  8027. count 3.0
  8028. mean 2.0
  8029. std 1.0
  8030. min 1.0
  8031. 25% 1.5
  8032. 50% 2.0
  8033. 75% 2.5
  8034. max 3.0
  8035. Describing all columns of a ``DataFrame`` regardless of data type.
  8036. >>> df.describe(include='all')
  8037. categorical numeric object
  8038. count 3 3.0 3
  8039. unique 3 NaN 3
  8040. top f NaN c
  8041. freq 1 NaN 1
  8042. mean NaN 2.0 NaN
  8043. std NaN 1.0 NaN
  8044. min NaN 1.0 NaN
  8045. 25% NaN 1.5 NaN
  8046. 50% NaN 2.0 NaN
  8047. 75% NaN 2.5 NaN
  8048. max NaN 3.0 NaN
  8049. Describing a column from a ``DataFrame`` by accessing it as
  8050. an attribute.
  8051. >>> df.numeric.describe()
  8052. count 3.0
  8053. mean 2.0
  8054. std 1.0
  8055. min 1.0
  8056. 25% 1.5
  8057. 50% 2.0
  8058. 75% 2.5
  8059. max 3.0
  8060. Name: numeric, dtype: float64
  8061. Including only numeric columns in a ``DataFrame`` description.
  8062. >>> df.describe(include=[np.number])
  8063. numeric
  8064. count 3.0
  8065. mean 2.0
  8066. std 1.0
  8067. min 1.0
  8068. 25% 1.5
  8069. 50% 2.0
  8070. 75% 2.5
  8071. max 3.0
  8072. Including only string columns in a ``DataFrame`` description.
  8073. >>> df.describe(include=[np.object])
  8074. object
  8075. count 3
  8076. unique 3
  8077. top c
  8078. freq 1
  8079. Including only categorical columns from a ``DataFrame`` description.
  8080. >>> df.describe(include=['category'])
  8081. categorical
  8082. count 3
  8083. unique 3
  8084. top f
  8085. freq 1
  8086. Excluding numeric columns from a ``DataFrame`` description.
  8087. >>> df.describe(exclude=[np.number])
  8088. categorical object
  8089. count 3 3
  8090. unique 3 3
  8091. top f c
  8092. freq 1 1
  8093. Excluding object columns from a ``DataFrame`` description.
  8094. >>> df.describe(exclude=[np.object])
  8095. categorical numeric
  8096. count 3 3.0
  8097. unique 3 NaN
  8098. top f NaN
  8099. freq 1 NaN
  8100. mean NaN 2.0
  8101. std NaN 1.0
  8102. min NaN 1.0
  8103. 25% NaN 1.5
  8104. 50% NaN 2.0
  8105. 75% NaN 2.5
  8106. max NaN 3.0
  8107. """
  8108. if self.ndim >= 3:
  8109. msg = "describe is not implemented on Panel objects."
  8110. raise NotImplementedError(msg)
  8111. elif self.ndim == 2 and self.columns.size == 0:
  8112. raise ValueError("Cannot describe a DataFrame without columns")
  8113. if percentiles is not None:
  8114. # explicit conversion of `percentiles` to list
  8115. percentiles = list(percentiles)
  8116. # get them all to be in [0, 1]
  8117. self._check_percentile(percentiles)
  8118. # median should always be included
  8119. if 0.5 not in percentiles:
  8120. percentiles.append(0.5)
  8121. percentiles = np.asarray(percentiles)
  8122. else:
  8123. percentiles = np.array([0.25, 0.5, 0.75])
  8124. # sort and check for duplicates
  8125. unique_pcts = np.unique(percentiles)
  8126. if len(unique_pcts) < len(percentiles):
  8127. raise ValueError("percentiles cannot contain duplicates")
  8128. percentiles = unique_pcts
  8129. formatted_percentiles = format_percentiles(percentiles)
  8130. def describe_numeric_1d(series):
  8131. stat_index = (['count', 'mean', 'std', 'min'] +
  8132. formatted_percentiles + ['max'])
  8133. d = ([series.count(), series.mean(), series.std(), series.min()] +
  8134. series.quantile(percentiles).tolist() + [series.max()])
  8135. return pd.Series(d, index=stat_index, name=series.name)
  8136. def describe_categorical_1d(data):
  8137. names = ['count', 'unique']
  8138. objcounts = data.value_counts()
  8139. count_unique = len(objcounts[objcounts != 0])
  8140. result = [data.count(), count_unique]
  8141. if result[1] > 0:
  8142. top, freq = objcounts.index[0], objcounts.iloc[0]
  8143. if is_datetime64_any_dtype(data):
  8144. tz = data.dt.tz
  8145. asint = data.dropna().values.view('i8')
  8146. top = Timestamp(top)
  8147. if top.tzinfo is not None and tz is not None:
  8148. # Don't tz_localize(None) if key is already tz-aware
  8149. top = top.tz_convert(tz)
  8150. else:
  8151. top = top.tz_localize(tz)
  8152. names += ['top', 'freq', 'first', 'last']
  8153. result += [top, freq,
  8154. Timestamp(asint.min(), tz=tz),
  8155. Timestamp(asint.max(), tz=tz)]
  8156. else:
  8157. names += ['top', 'freq']
  8158. result += [top, freq]
  8159. return pd.Series(result, index=names, name=data.name)
  8160. def describe_1d(data):
  8161. if is_bool_dtype(data):
  8162. return describe_categorical_1d(data)
  8163. elif is_numeric_dtype(data):
  8164. return describe_numeric_1d(data)
  8165. elif is_timedelta64_dtype(data):
  8166. return describe_numeric_1d(data)
  8167. else:
  8168. return describe_categorical_1d(data)
  8169. if self.ndim == 1:
  8170. return describe_1d(self)
  8171. elif (include is None) and (exclude is None):
  8172. # when some numerics are found, keep only numerics
  8173. data = self.select_dtypes(include=[np.number])
  8174. if len(data.columns) == 0:
  8175. data = self
  8176. elif include == 'all':
  8177. if exclude is not None:
  8178. msg = "exclude must be None when include is 'all'"
  8179. raise ValueError(msg)
  8180. data = self
  8181. else:
  8182. data = self.select_dtypes(include=include, exclude=exclude)
  8183. ldesc = [describe_1d(s) for _, s in data.iteritems()]
  8184. # set a convenient order for rows
  8185. names = []
  8186. ldesc_indexes = sorted((x.index for x in ldesc), key=len)
  8187. for idxnames in ldesc_indexes:
  8188. for name in idxnames:
  8189. if name not in names:
  8190. names.append(name)
  8191. d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)
  8192. d.columns = data.columns.copy()
  8193. return d
  8194. def _check_percentile(self, q):
  8195. """
  8196. Validate percentiles (used by describe and quantile).
  8197. """
  8198. msg = ("percentiles should all be in the interval [0, 1]. "
  8199. "Try {0} instead.")
  8200. q = np.asarray(q)
  8201. if q.ndim == 0:
  8202. if not 0 <= q <= 1:
  8203. raise ValueError(msg.format(q / 100.0))
  8204. else:
  8205. if not all(0 <= qs <= 1 for qs in q):
  8206. raise ValueError(msg.format(q / 100.0))
  8207. return q
  8208. _shared_docs['pct_change'] = """
  8209. Percentage change between the current and a prior element.
  8210. Computes the percentage change from the immediately previous row by
  8211. default. This is useful in comparing the percentage of change in a time
  8212. series of elements.
  8213. Parameters
  8214. ----------
  8215. periods : int, default 1
  8216. Periods to shift for forming percent change.
  8217. fill_method : str, default 'pad'
  8218. How to handle NAs before computing percent changes.
  8219. limit : int, default None
  8220. The number of consecutive NAs to fill before stopping.
  8221. freq : DateOffset, timedelta, or offset alias string, optional
  8222. Increment to use from time series API (e.g. 'M' or BDay()).
  8223. **kwargs
  8224. Additional keyword arguments are passed into
  8225. `DataFrame.shift` or `Series.shift`.
  8226. Returns
  8227. -------
  8228. chg : Series or DataFrame
  8229. The same type as the calling object.
  8230. See Also
  8231. --------
  8232. Series.diff : Compute the difference of two elements in a Series.
  8233. DataFrame.diff : Compute the difference of two elements in a DataFrame.
  8234. Series.shift : Shift the index by some number of periods.
  8235. DataFrame.shift : Shift the index by some number of periods.
  8236. Examples
  8237. --------
  8238. **Series**
  8239. >>> s = pd.Series([90, 91, 85])
  8240. >>> s
  8241. 0 90
  8242. 1 91
  8243. 2 85
  8244. dtype: int64
  8245. >>> s.pct_change()
  8246. 0 NaN
  8247. 1 0.011111
  8248. 2 -0.065934
  8249. dtype: float64
  8250. >>> s.pct_change(periods=2)
  8251. 0 NaN
  8252. 1 NaN
  8253. 2 -0.055556
  8254. dtype: float64
  8255. See the percentage change in a Series where filling NAs with last
  8256. valid observation forward to next valid.
  8257. >>> s = pd.Series([90, 91, None, 85])
  8258. >>> s
  8259. 0 90.0
  8260. 1 91.0
  8261. 2 NaN
  8262. 3 85.0
  8263. dtype: float64
  8264. >>> s.pct_change(fill_method='ffill')
  8265. 0 NaN
  8266. 1 0.011111
  8267. 2 0.000000
  8268. 3 -0.065934
  8269. dtype: float64
  8270. **DataFrame**
  8271. Percentage change in French franc, Deutsche Mark, and Italian lira from
  8272. 1980-01-01 to 1980-03-01.
  8273. >>> df = pd.DataFrame({
  8274. ... 'FR': [4.0405, 4.0963, 4.3149],
  8275. ... 'GR': [1.7246, 1.7482, 1.8519],
  8276. ... 'IT': [804.74, 810.01, 860.13]},
  8277. ... index=['1980-01-01', '1980-02-01', '1980-03-01'])
  8278. >>> df
  8279. FR GR IT
  8280. 1980-01-01 4.0405 1.7246 804.74
  8281. 1980-02-01 4.0963 1.7482 810.01
  8282. 1980-03-01 4.3149 1.8519 860.13
  8283. >>> df.pct_change()
  8284. FR GR IT
  8285. 1980-01-01 NaN NaN NaN
  8286. 1980-02-01 0.013810 0.013684 0.006549
  8287. 1980-03-01 0.053365 0.059318 0.061876
  8288. Percentage of change in GOOG and APPL stock volume. Shows computing
  8289. the percentage change between columns.
  8290. >>> df = pd.DataFrame({
  8291. ... '2016': [1769950, 30586265],
  8292. ... '2015': [1500923, 40912316],
  8293. ... '2014': [1371819, 41403351]},
  8294. ... index=['GOOG', 'APPL'])
  8295. >>> df
  8296. 2016 2015 2014
  8297. GOOG 1769950 1500923 1371819
  8298. APPL 30586265 40912316 41403351
  8299. >>> df.pct_change(axis='columns')
  8300. 2016 2015 2014
  8301. GOOG NaN -0.151997 -0.086016
  8302. APPL NaN 0.337604 0.012002
  8303. """
  8304. @Appender(_shared_docs['pct_change'] % _shared_doc_kwargs)
  8305. def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
  8306. **kwargs):
  8307. # TODO: Not sure if above is correct - need someone to confirm.
  8308. axis = self._get_axis_number(kwargs.pop('axis', self._stat_axis_name))
  8309. if fill_method is None:
  8310. data = self
  8311. else:
  8312. data = self.fillna(method=fill_method, limit=limit, axis=axis)
  8313. rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis,
  8314. **kwargs)) - 1)
  8315. rs = rs.reindex_like(data)
  8316. if freq is None:
  8317. mask = isna(com.values_from_object(data))
  8318. np.putmask(rs.values, mask, np.nan)
  8319. return rs
  8320. def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
  8321. if axis is None:
  8322. raise ValueError("Must specify 'axis' when aggregating by level.")
  8323. grouped = self.groupby(level=level, axis=axis, sort=False)
  8324. if hasattr(grouped, name) and skipna:
  8325. return getattr(grouped, name)(**kwargs)
  8326. axis = self._get_axis_number(axis)
  8327. method = getattr(type(self), name)
  8328. applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs)
  8329. return grouped.aggregate(applyf)
  8330. @classmethod
  8331. def _add_numeric_operations(cls):
  8332. """
  8333. Add the operations to the cls; evaluate the doc strings again
  8334. """
  8335. axis_descr, name, name2 = _doc_parms(cls)
  8336. cls.any = _make_logical_function(
  8337. cls, 'any', name, name2, axis_descr, _any_desc, nanops.nanany,
  8338. _any_see_also, _any_examples, empty_value=False)
  8339. cls.all = _make_logical_function(
  8340. cls, 'all', name, name2, axis_descr, _all_desc, nanops.nanall,
  8341. _all_see_also, _all_examples, empty_value=True)
  8342. @Substitution(outname='mad',
  8343. desc="Return the mean absolute deviation of the values "
  8344. "for the requested axis.",
  8345. name1=name, name2=name2, axis_descr=axis_descr,
  8346. min_count='', see_also='', examples='')
  8347. @Appender(_num_doc)
  8348. def mad(self, axis=None, skipna=None, level=None):
  8349. if skipna is None:
  8350. skipna = True
  8351. if axis is None:
  8352. axis = self._stat_axis_number
  8353. if level is not None:
  8354. return self._agg_by_level('mad', axis=axis, level=level,
  8355. skipna=skipna)
  8356. data = self._get_numeric_data()
  8357. if axis == 0:
  8358. demeaned = data - data.mean(axis=0)
  8359. else:
  8360. demeaned = data.sub(data.mean(axis=1), axis=0)
  8361. return np.abs(demeaned).mean(axis=axis, skipna=skipna)
  8362. cls.mad = mad
  8363. cls.sem = _make_stat_function_ddof(
  8364. cls, 'sem', name, name2, axis_descr,
  8365. "Return unbiased standard error of the mean over requested "
  8366. "axis.\n\nNormalized by N-1 by default. This can be changed "
  8367. "using the ddof argument",
  8368. nanops.nansem)
  8369. cls.var = _make_stat_function_ddof(
  8370. cls, 'var', name, name2, axis_descr,
  8371. "Return unbiased variance over requested axis.\n\nNormalized by "
  8372. "N-1 by default. This can be changed using the ddof argument",
  8373. nanops.nanvar)
  8374. cls.std = _make_stat_function_ddof(
  8375. cls, 'std', name, name2, axis_descr,
  8376. "Return sample standard deviation over requested axis."
  8377. "\n\nNormalized by N-1 by default. This can be changed using the "
  8378. "ddof argument",
  8379. nanops.nanstd)
  8380. @Substitution(outname='compounded',
  8381. desc="Return the compound percentage of the values for "
  8382. "the requested axis.", name1=name, name2=name2,
  8383. axis_descr=axis_descr,
  8384. min_count='', see_also='', examples='')
  8385. @Appender(_num_doc)
  8386. def compound(self, axis=None, skipna=None, level=None):
  8387. if skipna is None:
  8388. skipna = True
  8389. return (1 + self).prod(axis=axis, skipna=skipna, level=level) - 1
  8390. cls.compound = compound
  8391. cls.cummin = _make_cum_function(
  8392. cls, 'cummin', name, name2, axis_descr, "minimum",
  8393. lambda y, axis: np.minimum.accumulate(y, axis), "min",
  8394. np.inf, np.nan, _cummin_examples)
  8395. cls.cumsum = _make_cum_function(
  8396. cls, 'cumsum', name, name2, axis_descr, "sum",
  8397. lambda y, axis: y.cumsum(axis), "sum", 0.,
  8398. np.nan, _cumsum_examples)
  8399. cls.cumprod = _make_cum_function(
  8400. cls, 'cumprod', name, name2, axis_descr, "product",
  8401. lambda y, axis: y.cumprod(axis), "prod", 1.,
  8402. np.nan, _cumprod_examples)
  8403. cls.cummax = _make_cum_function(
  8404. cls, 'cummax', name, name2, axis_descr, "maximum",
  8405. lambda y, axis: np.maximum.accumulate(y, axis), "max",
  8406. -np.inf, np.nan, _cummax_examples)
  8407. cls.sum = _make_min_count_stat_function(
  8408. cls, 'sum', name, name2, axis_descr,
  8409. """Return the sum of the values for the requested axis.\n
  8410. This is equivalent to the method ``numpy.sum``.""",
  8411. nanops.nansum, _stat_func_see_also, _sum_examples)
  8412. cls.mean = _make_stat_function(
  8413. cls, 'mean', name, name2, axis_descr,
  8414. 'Return the mean of the values for the requested axis.',
  8415. nanops.nanmean)
  8416. cls.skew = _make_stat_function(
  8417. cls, 'skew', name, name2, axis_descr,
  8418. 'Return unbiased skew over requested axis\nNormalized by N-1.',
  8419. nanops.nanskew)
  8420. cls.kurt = _make_stat_function(
  8421. cls, 'kurt', name, name2, axis_descr,
  8422. "Return unbiased kurtosis over requested axis using Fisher's "
  8423. "definition of\nkurtosis (kurtosis of normal == 0.0). Normalized "
  8424. "by N-1.",
  8425. nanops.nankurt)
  8426. cls.kurtosis = cls.kurt
  8427. cls.prod = _make_min_count_stat_function(
  8428. cls, 'prod', name, name2, axis_descr,
  8429. 'Return the product of the values for the requested axis.',
  8430. nanops.nanprod, examples=_prod_examples)
  8431. cls.product = cls.prod
  8432. cls.median = _make_stat_function(
  8433. cls, 'median', name, name2, axis_descr,
  8434. 'Return the median of the values for the requested axis.',
  8435. nanops.nanmedian)
  8436. cls.max = _make_stat_function(
  8437. cls, 'max', name, name2, axis_descr,
  8438. """Return the maximum of the values for the requested axis.\n
  8439. If you want the *index* of the maximum, use ``idxmax``. This is
  8440. the equivalent of the ``numpy.ndarray`` method ``argmax``.""",
  8441. nanops.nanmax, _stat_func_see_also, _max_examples)
  8442. cls.min = _make_stat_function(
  8443. cls, 'min', name, name2, axis_descr,
  8444. """Return the minimum of the values for the requested axis.\n
  8445. If you want the *index* of the minimum, use ``idxmin``. This is
  8446. the equivalent of the ``numpy.ndarray`` method ``argmin``.""",
  8447. nanops.nanmin, _stat_func_see_also, _min_examples)
  8448. @classmethod
  8449. def _add_series_only_operations(cls):
  8450. """
  8451. Add the series only operations to the cls; evaluate the doc
  8452. strings again.
  8453. """
  8454. axis_descr, name, name2 = _doc_parms(cls)
  8455. def nanptp(values, axis=0, skipna=True):
  8456. nmax = nanops.nanmax(values, axis, skipna)
  8457. nmin = nanops.nanmin(values, axis, skipna)
  8458. warnings.warn("Method .ptp is deprecated and will be removed "
  8459. "in a future version. Use numpy.ptp instead.",
  8460. FutureWarning, stacklevel=4)
  8461. return nmax - nmin
  8462. cls.ptp = _make_stat_function(
  8463. cls, 'ptp', name, name2, axis_descr,
  8464. """Returns the difference between the maximum value and the
  8465. minimum value in the object. This is the equivalent of the
  8466. ``numpy.ndarray`` method ``ptp``.\n\n.. deprecated:: 0.24.0
  8467. Use numpy.ptp instead""",
  8468. nanptp)
  8469. @classmethod
  8470. def _add_series_or_dataframe_operations(cls):
  8471. """
  8472. Add the series or dataframe only operations to the cls; evaluate
  8473. the doc strings again.
  8474. """
  8475. from pandas.core import window as rwindow
  8476. @Appender(rwindow.rolling.__doc__)
  8477. def rolling(self, window, min_periods=None, center=False,
  8478. win_type=None, on=None, axis=0, closed=None):
  8479. axis = self._get_axis_number(axis)
  8480. return rwindow.rolling(self, window=window,
  8481. min_periods=min_periods,
  8482. center=center, win_type=win_type,
  8483. on=on, axis=axis, closed=closed)
  8484. cls.rolling = rolling
  8485. @Appender(rwindow.expanding.__doc__)
  8486. def expanding(self, min_periods=1, center=False, axis=0):
  8487. axis = self._get_axis_number(axis)
  8488. return rwindow.expanding(self, min_periods=min_periods,
  8489. center=center, axis=axis)
  8490. cls.expanding = expanding
  8491. @Appender(rwindow.ewm.__doc__)
  8492. def ewm(self, com=None, span=None, halflife=None, alpha=None,
  8493. min_periods=0, adjust=True, ignore_na=False,
  8494. axis=0):
  8495. axis = self._get_axis_number(axis)
  8496. return rwindow.ewm(self, com=com, span=span, halflife=halflife,
  8497. alpha=alpha, min_periods=min_periods,
  8498. adjust=adjust, ignore_na=ignore_na, axis=axis)
  8499. cls.ewm = ewm
  8500. @Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs))
  8501. def transform(self, func, *args, **kwargs):
  8502. result = self.agg(func, *args, **kwargs)
  8503. if is_scalar(result) or len(result) != len(self):
  8504. raise ValueError("transforms cannot produce "
  8505. "aggregated results")
  8506. return result
  8507. # ----------------------------------------------------------------------
  8508. # Misc methods
  8509. _shared_docs['valid_index'] = """
  8510. Return index for %(position)s non-NA/null value.
  8511. Returns
  8512. --------
  8513. scalar : type of index
  8514. Notes
  8515. --------
  8516. If all elements are non-NA/null, returns None.
  8517. Also returns None for empty %(klass)s.
  8518. """
  8519. def _find_valid_index(self, how):
  8520. """
  8521. Retrieves the index of the first valid value.
  8522. Parameters
  8523. ----------
  8524. how : {'first', 'last'}
  8525. Use this parameter to change between the first or last valid index.
  8526. Returns
  8527. -------
  8528. idx_first_valid : type of index
  8529. """
  8530. assert how in ['first', 'last']
  8531. if len(self) == 0: # early stop
  8532. return None
  8533. is_valid = ~self.isna()
  8534. if self.ndim == 2:
  8535. is_valid = is_valid.any(1) # reduce axis 1
  8536. if how == 'first':
  8537. idxpos = is_valid.values[::].argmax()
  8538. if how == 'last':
  8539. idxpos = len(self) - 1 - is_valid.values[::-1].argmax()
  8540. chk_notna = is_valid.iat[idxpos]
  8541. idx = self.index[idxpos]
  8542. if not chk_notna:
  8543. return None
  8544. return idx
  8545. @Appender(_shared_docs['valid_index'] % {'position': 'first',
  8546. 'klass': 'NDFrame'})
  8547. def first_valid_index(self):
  8548. return self._find_valid_index('first')
  8549. @Appender(_shared_docs['valid_index'] % {'position': 'last',
  8550. 'klass': 'NDFrame'})
  8551. def last_valid_index(self):
  8552. return self._find_valid_index('last')
  8553. def _doc_parms(cls):
  8554. """Return a tuple of the doc parms."""
  8555. axis_descr = "{%s}" % ', '.join(["{0} ({1})".format(a, i)
  8556. for i, a in enumerate(cls._AXIS_ORDERS)])
  8557. name = (cls._constructor_sliced.__name__
  8558. if cls._AXIS_LEN > 1 else 'scalar')
  8559. name2 = cls.__name__
  8560. return axis_descr, name, name2
  8561. _num_doc = """
  8562. %(desc)s
  8563. Parameters
  8564. ----------
  8565. axis : %(axis_descr)s
  8566. Axis for the function to be applied on.
  8567. skipna : bool, default True
  8568. Exclude NA/null values when computing the result.
  8569. level : int or level name, default None
  8570. If the axis is a MultiIndex (hierarchical), count along a
  8571. particular level, collapsing into a %(name1)s.
  8572. numeric_only : bool, default None
  8573. Include only float, int, boolean columns. If None, will attempt to use
  8574. everything, then use only numeric data. Not implemented for Series.
  8575. %(min_count)s\
  8576. **kwargs
  8577. Additional keyword arguments to be passed to the function.
  8578. Returns
  8579. -------
  8580. %(outname)s : %(name1)s or %(name2)s (if level specified)
  8581. %(see_also)s
  8582. %(examples)s\
  8583. """
  8584. _num_ddof_doc = """
  8585. %(desc)s
  8586. Parameters
  8587. ----------
  8588. axis : %(axis_descr)s
  8589. skipna : boolean, default True
  8590. Exclude NA/null values. If an entire row/column is NA, the result
  8591. will be NA
  8592. level : int or level name, default None
  8593. If the axis is a MultiIndex (hierarchical), count along a
  8594. particular level, collapsing into a %(name1)s
  8595. ddof : int, default 1
  8596. Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
  8597. where N represents the number of elements.
  8598. numeric_only : boolean, default None
  8599. Include only float, int, boolean columns. If None, will attempt to use
  8600. everything, then use only numeric data. Not implemented for Series.
  8601. Returns
  8602. -------
  8603. %(outname)s : %(name1)s or %(name2)s (if level specified)\n"""
  8604. _bool_doc = """
  8605. %(desc)s
  8606. Parameters
  8607. ----------
  8608. axis : {0 or 'index', 1 or 'columns', None}, default 0
  8609. Indicate which axis or axes should be reduced.
  8610. * 0 / 'index' : reduce the index, return a Series whose index is the
  8611. original column labels.
  8612. * 1 / 'columns' : reduce the columns, return a Series whose index is the
  8613. original index.
  8614. * None : reduce all axes, return a scalar.
  8615. bool_only : bool, default None
  8616. Include only boolean columns. If None, will attempt to use everything,
  8617. then use only boolean data. Not implemented for Series.
  8618. skipna : bool, default True
  8619. Exclude NA/null values. If the entire row/column is NA and skipna is
  8620. True, then the result will be %(empty_value)s, as for an empty row/column.
  8621. If skipna is False, then NA are treated as True, because these are not
  8622. equal to zero.
  8623. level : int or level name, default None
  8624. If the axis is a MultiIndex (hierarchical), count along a
  8625. particular level, collapsing into a %(name1)s.
  8626. **kwargs : any, default None
  8627. Additional keywords have no effect but might be accepted for
  8628. compatibility with NumPy.
  8629. Returns
  8630. -------
  8631. %(name1)s or %(name2)s
  8632. If level is specified, then, %(name2)s is returned; otherwise, %(name1)s
  8633. is returned.
  8634. %(see_also)s
  8635. %(examples)s"""
  8636. _all_desc = """\
  8637. Return whether all elements are True, potentially over an axis.
  8638. Returns True unless there at least one element within a series or
  8639. along a Dataframe axis that is False or equivalent (e.g. zero or
  8640. empty)."""
  8641. _all_examples = """\
  8642. Examples
  8643. --------
  8644. **Series**
  8645. >>> pd.Series([True, True]).all()
  8646. True
  8647. >>> pd.Series([True, False]).all()
  8648. False
  8649. >>> pd.Series([]).all()
  8650. True
  8651. >>> pd.Series([np.nan]).all()
  8652. True
  8653. >>> pd.Series([np.nan]).all(skipna=False)
  8654. True
  8655. **DataFrames**
  8656. Create a dataframe from a dictionary.
  8657. >>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})
  8658. >>> df
  8659. col1 col2
  8660. 0 True True
  8661. 1 True False
  8662. Default behaviour checks if column-wise values all return True.
  8663. >>> df.all()
  8664. col1 True
  8665. col2 False
  8666. dtype: bool
  8667. Specify ``axis='columns'`` to check if row-wise values all return True.
  8668. >>> df.all(axis='columns')
  8669. 0 True
  8670. 1 False
  8671. dtype: bool
  8672. Or ``axis=None`` for whether every value is True.
  8673. >>> df.all(axis=None)
  8674. False
  8675. """
  8676. _all_see_also = """\
  8677. See Also
  8678. --------
  8679. Series.all : Return True if all elements are True.
  8680. DataFrame.any : Return True if one (or more) elements are True.
  8681. """
  8682. _cnum_doc = """
  8683. Return cumulative %(desc)s over a DataFrame or Series axis.
  8684. Returns a DataFrame or Series of the same size containing the cumulative
  8685. %(desc)s.
  8686. Parameters
  8687. ----------
  8688. axis : {0 or 'index', 1 or 'columns'}, default 0
  8689. The index or the name of the axis. 0 is equivalent to None or 'index'.
  8690. skipna : boolean, default True
  8691. Exclude NA/null values. If an entire row/column is NA, the result
  8692. will be NA.
  8693. *args, **kwargs :
  8694. Additional keywords have no effect but might be accepted for
  8695. compatibility with NumPy.
  8696. Returns
  8697. -------
  8698. %(outname)s : %(name1)s or %(name2)s\n
  8699. See Also
  8700. --------
  8701. core.window.Expanding.%(accum_func_name)s : Similar functionality
  8702. but ignores ``NaN`` values.
  8703. %(name2)s.%(accum_func_name)s : Return the %(desc)s over
  8704. %(name2)s axis.
  8705. %(name2)s.cummax : Return cumulative maximum over %(name2)s axis.
  8706. %(name2)s.cummin : Return cumulative minimum over %(name2)s axis.
  8707. %(name2)s.cumsum : Return cumulative sum over %(name2)s axis.
  8708. %(name2)s.cumprod : Return cumulative product over %(name2)s axis.
  8709. %(examples)s
  8710. """
  8711. _cummin_examples = """\
  8712. Examples
  8713. --------
  8714. **Series**
  8715. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  8716. >>> s
  8717. 0 2.0
  8718. 1 NaN
  8719. 2 5.0
  8720. 3 -1.0
  8721. 4 0.0
  8722. dtype: float64
  8723. By default, NA values are ignored.
  8724. >>> s.cummin()
  8725. 0 2.0
  8726. 1 NaN
  8727. 2 2.0
  8728. 3 -1.0
  8729. 4 -1.0
  8730. dtype: float64
  8731. To include NA values in the operation, use ``skipna=False``
  8732. >>> s.cummin(skipna=False)
  8733. 0 2.0
  8734. 1 NaN
  8735. 2 NaN
  8736. 3 NaN
  8737. 4 NaN
  8738. dtype: float64
  8739. **DataFrame**
  8740. >>> df = pd.DataFrame([[2.0, 1.0],
  8741. ... [3.0, np.nan],
  8742. ... [1.0, 0.0]],
  8743. ... columns=list('AB'))
  8744. >>> df
  8745. A B
  8746. 0 2.0 1.0
  8747. 1 3.0 NaN
  8748. 2 1.0 0.0
  8749. By default, iterates over rows and finds the minimum
  8750. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  8751. >>> df.cummin()
  8752. A B
  8753. 0 2.0 1.0
  8754. 1 2.0 NaN
  8755. 2 1.0 0.0
  8756. To iterate over columns and find the minimum in each row,
  8757. use ``axis=1``
  8758. >>> df.cummin(axis=1)
  8759. A B
  8760. 0 2.0 1.0
  8761. 1 3.0 NaN
  8762. 2 1.0 0.0
  8763. """
  8764. _cumsum_examples = """\
  8765. Examples
  8766. --------
  8767. **Series**
  8768. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  8769. >>> s
  8770. 0 2.0
  8771. 1 NaN
  8772. 2 5.0
  8773. 3 -1.0
  8774. 4 0.0
  8775. dtype: float64
  8776. By default, NA values are ignored.
  8777. >>> s.cumsum()
  8778. 0 2.0
  8779. 1 NaN
  8780. 2 7.0
  8781. 3 6.0
  8782. 4 6.0
  8783. dtype: float64
  8784. To include NA values in the operation, use ``skipna=False``
  8785. >>> s.cumsum(skipna=False)
  8786. 0 2.0
  8787. 1 NaN
  8788. 2 NaN
  8789. 3 NaN
  8790. 4 NaN
  8791. dtype: float64
  8792. **DataFrame**
  8793. >>> df = pd.DataFrame([[2.0, 1.0],
  8794. ... [3.0, np.nan],
  8795. ... [1.0, 0.0]],
  8796. ... columns=list('AB'))
  8797. >>> df
  8798. A B
  8799. 0 2.0 1.0
  8800. 1 3.0 NaN
  8801. 2 1.0 0.0
  8802. By default, iterates over rows and finds the sum
  8803. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  8804. >>> df.cumsum()
  8805. A B
  8806. 0 2.0 1.0
  8807. 1 5.0 NaN
  8808. 2 6.0 1.0
  8809. To iterate over columns and find the sum in each row,
  8810. use ``axis=1``
  8811. >>> df.cumsum(axis=1)
  8812. A B
  8813. 0 2.0 3.0
  8814. 1 3.0 NaN
  8815. 2 1.0 1.0
  8816. """
  8817. _cumprod_examples = """\
  8818. Examples
  8819. --------
  8820. **Series**
  8821. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  8822. >>> s
  8823. 0 2.0
  8824. 1 NaN
  8825. 2 5.0
  8826. 3 -1.0
  8827. 4 0.0
  8828. dtype: float64
  8829. By default, NA values are ignored.
  8830. >>> s.cumprod()
  8831. 0 2.0
  8832. 1 NaN
  8833. 2 10.0
  8834. 3 -10.0
  8835. 4 -0.0
  8836. dtype: float64
  8837. To include NA values in the operation, use ``skipna=False``
  8838. >>> s.cumprod(skipna=False)
  8839. 0 2.0
  8840. 1 NaN
  8841. 2 NaN
  8842. 3 NaN
  8843. 4 NaN
  8844. dtype: float64
  8845. **DataFrame**
  8846. >>> df = pd.DataFrame([[2.0, 1.0],
  8847. ... [3.0, np.nan],
  8848. ... [1.0, 0.0]],
  8849. ... columns=list('AB'))
  8850. >>> df
  8851. A B
  8852. 0 2.0 1.0
  8853. 1 3.0 NaN
  8854. 2 1.0 0.0
  8855. By default, iterates over rows and finds the product
  8856. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  8857. >>> df.cumprod()
  8858. A B
  8859. 0 2.0 1.0
  8860. 1 6.0 NaN
  8861. 2 6.0 0.0
  8862. To iterate over columns and find the product in each row,
  8863. use ``axis=1``
  8864. >>> df.cumprod(axis=1)
  8865. A B
  8866. 0 2.0 2.0
  8867. 1 3.0 NaN
  8868. 2 1.0 0.0
  8869. """
  8870. _cummax_examples = """\
  8871. Examples
  8872. --------
  8873. **Series**
  8874. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  8875. >>> s
  8876. 0 2.0
  8877. 1 NaN
  8878. 2 5.0
  8879. 3 -1.0
  8880. 4 0.0
  8881. dtype: float64
  8882. By default, NA values are ignored.
  8883. >>> s.cummax()
  8884. 0 2.0
  8885. 1 NaN
  8886. 2 5.0
  8887. 3 5.0
  8888. 4 5.0
  8889. dtype: float64
  8890. To include NA values in the operation, use ``skipna=False``
  8891. >>> s.cummax(skipna=False)
  8892. 0 2.0
  8893. 1 NaN
  8894. 2 NaN
  8895. 3 NaN
  8896. 4 NaN
  8897. dtype: float64
  8898. **DataFrame**
  8899. >>> df = pd.DataFrame([[2.0, 1.0],
  8900. ... [3.0, np.nan],
  8901. ... [1.0, 0.0]],
  8902. ... columns=list('AB'))
  8903. >>> df
  8904. A B
  8905. 0 2.0 1.0
  8906. 1 3.0 NaN
  8907. 2 1.0 0.0
  8908. By default, iterates over rows and finds the maximum
  8909. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  8910. >>> df.cummax()
  8911. A B
  8912. 0 2.0 1.0
  8913. 1 3.0 NaN
  8914. 2 3.0 1.0
  8915. To iterate over columns and find the maximum in each row,
  8916. use ``axis=1``
  8917. >>> df.cummax(axis=1)
  8918. A B
  8919. 0 2.0 2.0
  8920. 1 3.0 NaN
  8921. 2 1.0 1.0
  8922. """
  8923. _any_see_also = """\
  8924. See Also
  8925. --------
  8926. numpy.any : Numpy version of this method.
  8927. Series.any : Return whether any element is True.
  8928. Series.all : Return whether all elements are True.
  8929. DataFrame.any : Return whether any element is True over requested axis.
  8930. DataFrame.all : Return whether all elements are True over requested axis.
  8931. """
  8932. _any_desc = """\
  8933. Return whether any element is True, potentially over an axis.
  8934. Returns False unless there at least one element within a series or
  8935. along a Dataframe axis that is True or equivalent (e.g. non-zero or
  8936. non-empty)."""
  8937. _any_examples = """\
  8938. Examples
  8939. --------
  8940. **Series**
  8941. For Series input, the output is a scalar indicating whether any element
  8942. is True.
  8943. >>> pd.Series([False, False]).any()
  8944. False
  8945. >>> pd.Series([True, False]).any()
  8946. True
  8947. >>> pd.Series([]).any()
  8948. False
  8949. >>> pd.Series([np.nan]).any()
  8950. False
  8951. >>> pd.Series([np.nan]).any(skipna=False)
  8952. True
  8953. **DataFrame**
  8954. Whether each column contains at least one True element (the default).
  8955. >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})
  8956. >>> df
  8957. A B C
  8958. 0 1 0 0
  8959. 1 2 2 0
  8960. >>> df.any()
  8961. A True
  8962. B True
  8963. C False
  8964. dtype: bool
  8965. Aggregating over the columns.
  8966. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]})
  8967. >>> df
  8968. A B
  8969. 0 True 1
  8970. 1 False 2
  8971. >>> df.any(axis='columns')
  8972. 0 True
  8973. 1 True
  8974. dtype: bool
  8975. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]})
  8976. >>> df
  8977. A B
  8978. 0 True 1
  8979. 1 False 0
  8980. >>> df.any(axis='columns')
  8981. 0 True
  8982. 1 False
  8983. dtype: bool
  8984. Aggregating over the entire DataFrame with ``axis=None``.
  8985. >>> df.any(axis=None)
  8986. True
  8987. `any` for an empty DataFrame is an empty Series.
  8988. >>> pd.DataFrame([]).any()
  8989. Series([], dtype: bool)
  8990. """
  8991. _shared_docs['stat_func_example'] = """\
  8992. Examples
  8993. --------
  8994. >>> idx = pd.MultiIndex.from_arrays([
  8995. ... ['warm', 'warm', 'cold', 'cold'],
  8996. ... ['dog', 'falcon', 'fish', 'spider']],
  8997. ... names=['blooded', 'animal'])
  8998. >>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx)
  8999. >>> s
  9000. blooded animal
  9001. warm dog 4
  9002. falcon 2
  9003. cold fish 0
  9004. spider 8
  9005. Name: legs, dtype: int64
  9006. >>> s.{stat_func}()
  9007. {default_output}
  9008. {verb} using level names, as well as indices.
  9009. >>> s.{stat_func}(level='blooded')
  9010. blooded
  9011. warm {level_output_0}
  9012. cold {level_output_1}
  9013. Name: legs, dtype: int64
  9014. >>> s.{stat_func}(level=0)
  9015. blooded
  9016. warm {level_output_0}
  9017. cold {level_output_1}
  9018. Name: legs, dtype: int64
  9019. """
  9020. _sum_examples = _shared_docs['stat_func_example'].format(
  9021. stat_func='sum',
  9022. verb='Sum',
  9023. default_output=14,
  9024. level_output_0=6,
  9025. level_output_1=8)
  9026. _sum_examples += """
  9027. By default, the sum of an empty or all-NA Series is ``0``.
  9028. >>> pd.Series([]).sum() # min_count=0 is the default
  9029. 0.0
  9030. This can be controlled with the ``min_count`` parameter. For example, if
  9031. you'd like the sum of an empty series to be NaN, pass ``min_count=1``.
  9032. >>> pd.Series([]).sum(min_count=1)
  9033. nan
  9034. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  9035. empty series identically.
  9036. >>> pd.Series([np.nan]).sum()
  9037. 0.0
  9038. >>> pd.Series([np.nan]).sum(min_count=1)
  9039. nan
  9040. """
  9041. _max_examples = _shared_docs['stat_func_example'].format(
  9042. stat_func='max',
  9043. verb='Max',
  9044. default_output=8,
  9045. level_output_0=4,
  9046. level_output_1=8)
  9047. _min_examples = _shared_docs['stat_func_example'].format(
  9048. stat_func='min',
  9049. verb='Min',
  9050. default_output=0,
  9051. level_output_0=2,
  9052. level_output_1=0)
  9053. _stat_func_see_also = """
  9054. See Also
  9055. --------
  9056. Series.sum : Return the sum.
  9057. Series.min : Return the minimum.
  9058. Series.max : Return the maximum.
  9059. Series.idxmin : Return the index of the minimum.
  9060. Series.idxmax : Return the index of the maximum.
  9061. DataFrame.min : Return the sum over the requested axis.
  9062. DataFrame.min : Return the minimum over the requested axis.
  9063. DataFrame.max : Return the maximum over the requested axis.
  9064. DataFrame.idxmin : Return the index of the minimum over the requested axis.
  9065. DataFrame.idxmax : Return the index of the maximum over the requested axis.
  9066. """
  9067. _prod_examples = """\
  9068. Examples
  9069. --------
  9070. By default, the product of an empty or all-NA Series is ``1``
  9071. >>> pd.Series([]).prod()
  9072. 1.0
  9073. This can be controlled with the ``min_count`` parameter
  9074. >>> pd.Series([]).prod(min_count=1)
  9075. nan
  9076. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  9077. empty series identically.
  9078. >>> pd.Series([np.nan]).prod()
  9079. 1.0
  9080. >>> pd.Series([np.nan]).prod(min_count=1)
  9081. nan
  9082. """
  9083. _min_count_stub = """\
  9084. min_count : int, default 0
  9085. The required number of valid values to perform the operation. If fewer than
  9086. ``min_count`` non-NA values are present the result will be NA.
  9087. .. versionadded :: 0.22.0
  9088. Added with the default being 0. This means the sum of an all-NA
  9089. or empty Series is 0, and the product of an all-NA or empty
  9090. Series is 1.
  9091. """
  9092. def _make_min_count_stat_function(cls, name, name1, name2, axis_descr, desc,
  9093. f, see_also='', examples=''):
  9094. @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
  9095. axis_descr=axis_descr, min_count=_min_count_stub,
  9096. see_also=see_also, examples=examples)
  9097. @Appender(_num_doc)
  9098. def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
  9099. min_count=0,
  9100. **kwargs):
  9101. if name == 'sum':
  9102. nv.validate_sum(tuple(), kwargs)
  9103. elif name == 'prod':
  9104. nv.validate_prod(tuple(), kwargs)
  9105. else:
  9106. nv.validate_stat_func(tuple(), kwargs, fname=name)
  9107. if skipna is None:
  9108. skipna = True
  9109. if axis is None:
  9110. axis = self._stat_axis_number
  9111. if level is not None:
  9112. return self._agg_by_level(name, axis=axis, level=level,
  9113. skipna=skipna, min_count=min_count)
  9114. return self._reduce(f, name, axis=axis, skipna=skipna,
  9115. numeric_only=numeric_only, min_count=min_count)
  9116. return set_function_name(stat_func, name, cls)
  9117. def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f,
  9118. see_also='', examples=''):
  9119. @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
  9120. axis_descr=axis_descr, min_count='', see_also=see_also,
  9121. examples=examples)
  9122. @Appender(_num_doc)
  9123. def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
  9124. **kwargs):
  9125. if name == 'median':
  9126. nv.validate_median(tuple(), kwargs)
  9127. else:
  9128. nv.validate_stat_func(tuple(), kwargs, fname=name)
  9129. if skipna is None:
  9130. skipna = True
  9131. if axis is None:
  9132. axis = self._stat_axis_number
  9133. if level is not None:
  9134. return self._agg_by_level(name, axis=axis, level=level,
  9135. skipna=skipna)
  9136. return self._reduce(f, name, axis=axis, skipna=skipna,
  9137. numeric_only=numeric_only)
  9138. return set_function_name(stat_func, name, cls)
  9139. def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f):
  9140. @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
  9141. axis_descr=axis_descr)
  9142. @Appender(_num_ddof_doc)
  9143. def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
  9144. numeric_only=None, **kwargs):
  9145. nv.validate_stat_ddof_func(tuple(), kwargs, fname=name)
  9146. if skipna is None:
  9147. skipna = True
  9148. if axis is None:
  9149. axis = self._stat_axis_number
  9150. if level is not None:
  9151. return self._agg_by_level(name, axis=axis, level=level,
  9152. skipna=skipna, ddof=ddof)
  9153. return self._reduce(f, name, axis=axis, numeric_only=numeric_only,
  9154. skipna=skipna, ddof=ddof)
  9155. return set_function_name(stat_func, name, cls)
  9156. def _make_cum_function(cls, name, name1, name2, axis_descr, desc,
  9157. accum_func, accum_func_name, mask_a, mask_b, examples):
  9158. @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
  9159. axis_descr=axis_descr, accum_func_name=accum_func_name,
  9160. examples=examples)
  9161. @Appender(_cnum_doc)
  9162. def cum_func(self, axis=None, skipna=True, *args, **kwargs):
  9163. skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name)
  9164. if axis is None:
  9165. axis = self._stat_axis_number
  9166. else:
  9167. axis = self._get_axis_number(axis)
  9168. y = com.values_from_object(self).copy()
  9169. if (skipna and
  9170. issubclass(y.dtype.type, (np.datetime64, np.timedelta64))):
  9171. result = accum_func(y, axis)
  9172. mask = isna(self)
  9173. np.putmask(result, mask, iNaT)
  9174. elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
  9175. mask = isna(self)
  9176. np.putmask(y, mask, mask_a)
  9177. result = accum_func(y, axis)
  9178. np.putmask(result, mask, mask_b)
  9179. else:
  9180. result = accum_func(y, axis)
  9181. d = self._construct_axes_dict()
  9182. d['copy'] = False
  9183. return self._constructor(result, **d).__finalize__(self)
  9184. return set_function_name(cum_func, name, cls)
  9185. def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f,
  9186. see_also, examples, empty_value):
  9187. @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
  9188. axis_descr=axis_descr, see_also=see_also, examples=examples,
  9189. empty_value=empty_value)
  9190. @Appender(_bool_doc)
  9191. def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,
  9192. **kwargs):
  9193. nv.validate_logical_func(tuple(), kwargs, fname=name)
  9194. if level is not None:
  9195. if bool_only is not None:
  9196. raise NotImplementedError("Option bool_only is not "
  9197. "implemented with option level.")
  9198. return self._agg_by_level(name, axis=axis, level=level,
  9199. skipna=skipna)
  9200. return self._reduce(f, name, axis=axis, skipna=skipna,
  9201. numeric_only=bool_only, filter_type='bool')
  9202. return set_function_name(logical_func, name, cls)
  9203. # install the indexes
  9204. for _name, _indexer in indexing.get_indexers_list():
  9205. NDFrame._create_indexer(_name, _indexer)