| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691 |
- from contextlib import contextmanager
- import datetime
- from datetime import timedelta
- from distutils.version import LooseVersion
- import os
- import tempfile
- from warnings import catch_warnings, simplefilter
- import numpy as np
- import pytest
- from pandas.compat import (
- PY35, PY36, BytesIO, is_platform_little_endian, is_platform_windows,
- lrange, range, text_type, u)
- import pandas.util._test_decorators as td
- from pandas.core.dtypes.common import is_categorical_dtype
- import pandas as pd
- from pandas import (
- Categorical, DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex,
- Panel, RangeIndex, Series, Timestamp, bdate_range, compat, concat,
- date_range, isna, timedelta_range)
- import pandas.util.testing as tm
- from pandas.util.testing import (
- assert_frame_equal, assert_panel_equal, assert_series_equal, set_timezone)
- from pandas.io import pytables as pytables # noqa:E402
- from pandas.io.formats.printing import pprint_thing
- from pandas.io.pytables import (
- ClosedFileError, HDFStore, PossibleDataLossError, Term, read_hdf)
- from pandas.io.pytables import TableIterator # noqa:E402
- tables = pytest.importorskip('tables')
- # TODO:
- # remove when gh-24839 is fixed; this affects numpy 1.16
- # and pytables 3.4.4
- xfail_non_writeable = pytest.mark.xfail(
- LooseVersion(np.__version__) >= LooseVersion('1.16'),
- reason=('gh-25511, gh-24839. pytables needs a '
- 'release beyong 3.4.4 to support numpy 1.16x'))
- _default_compressor = ('blosc' if LooseVersion(tables.__version__) >=
- LooseVersion('2.2') else 'zlib')
- ignore_natural_naming_warning = pytest.mark.filterwarnings(
- "ignore:object name:tables.exceptions.NaturalNameWarning"
- )
- # contextmanager to ensure the file cleanup
- def safe_remove(path):
- if path is not None:
- try:
- os.remove(path)
- except OSError:
- pass
- def safe_close(store):
- try:
- if store is not None:
- store.close()
- except IOError:
- pass
- def create_tempfile(path):
- """ create an unopened named temporary file """
- return os.path.join(tempfile.gettempdir(), path)
- @contextmanager
- def ensure_clean_store(path, mode='a', complevel=None, complib=None,
- fletcher32=False):
- try:
- # put in the temporary path if we don't have one already
- if not len(os.path.dirname(path)):
- path = create_tempfile(path)
- store = HDFStore(path, mode=mode, complevel=complevel,
- complib=complib, fletcher32=False)
- yield store
- finally:
- safe_close(store)
- if mode == 'w' or mode == 'a':
- safe_remove(path)
- @contextmanager
- def ensure_clean_path(path):
- """
- return essentially a named temporary file that is not opened
- and deleted on existing; if path is a list, then create and
- return list of filenames
- """
- try:
- if isinstance(path, list):
- filenames = [create_tempfile(p) for p in path]
- yield filenames
- else:
- filenames = [create_tempfile(path)]
- yield filenames[0]
- finally:
- for f in filenames:
- safe_remove(f)
- # set these parameters so we don't have file sharing
- tables.parameters.MAX_NUMEXPR_THREADS = 1
- tables.parameters.MAX_BLOSC_THREADS = 1
- tables.parameters.MAX_THREADS = 1
- def _maybe_remove(store, key):
- """For tests using tables, try removing the table to be sure there is
- no content from previous tests using the same table name."""
- try:
- store.remove(key)
- except (ValueError, KeyError):
- pass
- class Base(object):
- @classmethod
- def setup_class(cls):
- # Pytables 3.0.0 deprecates lots of things
- tm.reset_testing_mode()
- @classmethod
- def teardown_class(cls):
- # Pytables 3.0.0 deprecates lots of things
- tm.set_testing_mode()
- def setup_method(self, method):
- self.path = 'tmp.__%s__.h5' % tm.rands(10)
- def teardown_method(self, method):
- pass
- @pytest.mark.single
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- class TestHDFStore(Base):
- def test_format_kwarg_in_constructor(self):
- # GH 13291
- with ensure_clean_path(self.path) as path:
- pytest.raises(ValueError, HDFStore, path, format='table')
- def test_context(self):
- path = create_tempfile(self.path)
- try:
- with HDFStore(path) as tbl:
- raise ValueError('blah')
- except ValueError:
- pass
- finally:
- safe_remove(path)
- try:
- with HDFStore(path) as tbl:
- tbl['a'] = tm.makeDataFrame()
- with HDFStore(path) as tbl:
- assert len(tbl) == 1
- assert type(tbl['a']) == DataFrame
- finally:
- safe_remove(path)
- def test_conv_read_write(self):
- path = create_tempfile(self.path)
- try:
- def roundtrip(key, obj, **kwargs):
- obj.to_hdf(path, key, **kwargs)
- return read_hdf(path, key)
- o = tm.makeTimeSeries()
- assert_series_equal(o, roundtrip('series', o))
- o = tm.makeStringSeries()
- assert_series_equal(o, roundtrip('string_series', o))
- o = tm.makeDataFrame()
- assert_frame_equal(o, roundtrip('frame', o))
- with catch_warnings(record=True):
- o = tm.makePanel()
- assert_panel_equal(o, roundtrip('panel', o))
- # table
- df = DataFrame(dict(A=lrange(5), B=lrange(5)))
- df.to_hdf(path, 'table', append=True)
- result = read_hdf(path, 'table', where=['index>2'])
- assert_frame_equal(df[df.index > 2], result)
- finally:
- safe_remove(path)
- def test_long_strings(self):
- # GH6166
- df = DataFrame({'a': tm.rands_array(100, size=10)},
- index=tm.rands_array(100, size=10))
- with ensure_clean_store(self.path) as store:
- store.append('df', df, data_columns=['a'])
- result = store.select('df')
- assert_frame_equal(df, result)
- def test_api(self):
- # GH4584
- # API issue when to_hdf doesn't acdept append AND format args
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- df.iloc[:10].to_hdf(path, 'df', append=True, format='table')
- df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
- assert_frame_equal(read_hdf(path, 'df'), df)
- # append to False
- df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
- df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
- assert_frame_equal(read_hdf(path, 'df'), df)
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- df.iloc[:10].to_hdf(path, 'df', append=True)
- df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
- assert_frame_equal(read_hdf(path, 'df'), df)
- # append to False
- df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
- df.iloc[10:].to_hdf(path, 'df', append=True)
- assert_frame_equal(read_hdf(path, 'df'), df)
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- df.to_hdf(path, 'df', append=False, format='fixed')
- assert_frame_equal(read_hdf(path, 'df'), df)
- df.to_hdf(path, 'df', append=False, format='f')
- assert_frame_equal(read_hdf(path, 'df'), df)
- df.to_hdf(path, 'df', append=False)
- assert_frame_equal(read_hdf(path, 'df'), df)
- df.to_hdf(path, 'df')
- assert_frame_equal(read_hdf(path, 'df'), df)
- with ensure_clean_store(self.path) as store:
- path = store._path
- df = tm.makeDataFrame()
- _maybe_remove(store, 'df')
- store.append('df', df.iloc[:10], append=True, format='table')
- store.append('df', df.iloc[10:], append=True, format='table')
- assert_frame_equal(store.select('df'), df)
- # append to False
- _maybe_remove(store, 'df')
- store.append('df', df.iloc[:10], append=False, format='table')
- store.append('df', df.iloc[10:], append=True, format='table')
- assert_frame_equal(store.select('df'), df)
- # formats
- _maybe_remove(store, 'df')
- store.append('df', df.iloc[:10], append=False, format='table')
- store.append('df', df.iloc[10:], append=True, format='table')
- assert_frame_equal(store.select('df'), df)
- _maybe_remove(store, 'df')
- store.append('df', df.iloc[:10], append=False, format='table')
- store.append('df', df.iloc[10:], append=True, format=None)
- assert_frame_equal(store.select('df'), df)
- with ensure_clean_path(self.path) as path:
- # invalid
- df = tm.makeDataFrame()
- pytest.raises(ValueError, df.to_hdf, path,
- 'df', append=True, format='f')
- pytest.raises(ValueError, df.to_hdf, path,
- 'df', append=True, format='fixed')
- pytest.raises(TypeError, df.to_hdf, path,
- 'df', append=True, format='foo')
- pytest.raises(TypeError, df.to_hdf, path,
- 'df', append=False, format='bar')
- # File path doesn't exist
- path = ""
- pytest.raises(compat.FileNotFoundError,
- read_hdf, path, 'df')
- def test_api_default_format(self):
- # default_format option
- with ensure_clean_store(self.path) as store:
- df = tm.makeDataFrame()
- pd.set_option('io.hdf.default_format', 'fixed')
- _maybe_remove(store, 'df')
- store.put('df', df)
- assert not store.get_storer('df').is_table
- pytest.raises(ValueError, store.append, 'df2', df)
- pd.set_option('io.hdf.default_format', 'table')
- _maybe_remove(store, 'df')
- store.put('df', df)
- assert store.get_storer('df').is_table
- _maybe_remove(store, 'df2')
- store.append('df2', df)
- assert store.get_storer('df').is_table
- pd.set_option('io.hdf.default_format', None)
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- pd.set_option('io.hdf.default_format', 'fixed')
- df.to_hdf(path, 'df')
- with HDFStore(path) as store:
- assert not store.get_storer('df').is_table
- pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True)
- pd.set_option('io.hdf.default_format', 'table')
- df.to_hdf(path, 'df3')
- with HDFStore(path) as store:
- assert store.get_storer('df3').is_table
- df.to_hdf(path, 'df4', append=True)
- with HDFStore(path) as store:
- assert store.get_storer('df4').is_table
- pd.set_option('io.hdf.default_format', None)
- def test_keys(self):
- with ensure_clean_store(self.path) as store:
- store['a'] = tm.makeTimeSeries()
- store['b'] = tm.makeStringSeries()
- store['c'] = tm.makeDataFrame()
- with catch_warnings(record=True):
- store['d'] = tm.makePanel()
- store['foo/bar'] = tm.makePanel()
- assert len(store) == 5
- expected = {'/a', '/b', '/c', '/d', '/foo/bar'}
- assert set(store.keys()) == expected
- assert set(store) == expected
- def test_keys_ignore_hdf_softlink(self):
- # GH 20523
- # Puts a softlink into HDF file and rereads
- with ensure_clean_store(self.path) as store:
- df = DataFrame(dict(A=lrange(5), B=lrange(5)))
- store.put("df", df)
- assert store.keys() == ["/df"]
- store._handle.create_soft_link(store._handle.root, "symlink", "df")
- # Should ignore the softlink
- assert store.keys() == ["/df"]
- def test_iter_empty(self):
- with ensure_clean_store(self.path) as store:
- # GH 12221
- assert list(store) == []
- def test_repr(self):
- with ensure_clean_store(self.path) as store:
- repr(store)
- store.info()
- store['a'] = tm.makeTimeSeries()
- store['b'] = tm.makeStringSeries()
- store['c'] = tm.makeDataFrame()
- with catch_warnings(record=True):
- store['d'] = tm.makePanel()
- store['foo/bar'] = tm.makePanel()
- store.append('e', tm.makePanel())
- df = tm.makeDataFrame()
- df['obj1'] = 'foo'
- df['obj2'] = 'bar'
- df['bool1'] = df['A'] > 0
- df['bool2'] = df['B'] > 0
- df['bool3'] = True
- df['int1'] = 1
- df['int2'] = 2
- df['timestamp1'] = Timestamp('20010102')
- df['timestamp2'] = Timestamp('20010103')
- df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
- df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
- df.loc[3:6, ['obj1']] = np.nan
- df = df._consolidate()._convert(datetime=True)
- with catch_warnings(record=True):
- simplefilter("ignore", pd.errors.PerformanceWarning)
- store['df'] = df
- # make a random group in hdf space
- store._handle.create_group(store._handle.root, 'bah')
- assert store.filename in repr(store)
- assert store.filename in str(store)
- store.info()
- # storers
- with ensure_clean_store(self.path) as store:
- df = tm.makeDataFrame()
- store.append('df', df)
- s = store.get_storer('df')
- repr(s)
- str(s)
- @ignore_natural_naming_warning
- def test_contains(self):
- with ensure_clean_store(self.path) as store:
- store['a'] = tm.makeTimeSeries()
- store['b'] = tm.makeDataFrame()
- store['foo/bar'] = tm.makeDataFrame()
- assert 'a' in store
- assert 'b' in store
- assert 'c' not in store
- assert 'foo/bar' in store
- assert '/foo/bar' in store
- assert '/foo/b' not in store
- assert 'bar' not in store
- # gh-2694: tables.NaturalNameWarning
- with catch_warnings(record=True):
- store['node())'] = tm.makeDataFrame()
- assert 'node())' in store
- def test_versioning(self):
- with ensure_clean_store(self.path) as store:
- store['a'] = tm.makeTimeSeries()
- store['b'] = tm.makeDataFrame()
- df = tm.makeTimeDataFrame()
- _maybe_remove(store, 'df1')
- store.append('df1', df[:10])
- store.append('df1', df[10:])
- assert store.root.a._v_attrs.pandas_version == '0.15.2'
- assert store.root.b._v_attrs.pandas_version == '0.15.2'
- assert store.root.df1._v_attrs.pandas_version == '0.15.2'
- # write a file and wipe its versioning
- _maybe_remove(store, 'df2')
- store.append('df2', df)
- # this is an error because its table_type is appendable, but no
- # version info
- store.get_node('df2')._v_attrs.pandas_version = None
- pytest.raises(Exception, store.select, 'df2')
- def test_mode(self):
- df = tm.makeTimeDataFrame()
- def check(mode):
- with ensure_clean_path(self.path) as path:
- # constructor
- if mode in ['r', 'r+']:
- pytest.raises(IOError, HDFStore, path, mode=mode)
- else:
- store = HDFStore(path, mode=mode)
- assert store._handle.mode == mode
- store.close()
- with ensure_clean_path(self.path) as path:
- # context
- if mode in ['r', 'r+']:
- def f():
- with HDFStore(path, mode=mode) as store: # noqa
- pass
- pytest.raises(IOError, f)
- else:
- with HDFStore(path, mode=mode) as store:
- assert store._handle.mode == mode
- with ensure_clean_path(self.path) as path:
- # conv write
- if mode in ['r', 'r+']:
- pytest.raises(IOError, df.to_hdf,
- path, 'df', mode=mode)
- df.to_hdf(path, 'df', mode='w')
- else:
- df.to_hdf(path, 'df', mode=mode)
- # conv read
- if mode in ['w']:
- pytest.raises(ValueError, read_hdf,
- path, 'df', mode=mode)
- else:
- result = read_hdf(path, 'df', mode=mode)
- assert_frame_equal(result, df)
- def check_default_mode():
- # read_hdf uses default mode
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', mode='w')
- result = read_hdf(path, 'df')
- assert_frame_equal(result, df)
- check('r')
- check('r+')
- check('a')
- check('w')
- check_default_mode()
- def test_reopen_handle(self):
- with ensure_clean_path(self.path) as path:
- store = HDFStore(path, mode='a')
- store['a'] = tm.makeTimeSeries()
- # invalid mode change
- pytest.raises(PossibleDataLossError, store.open, 'w')
- store.close()
- assert not store.is_open
- # truncation ok here
- store.open('w')
- assert store.is_open
- assert len(store) == 0
- store.close()
- assert not store.is_open
- store = HDFStore(path, mode='a')
- store['a'] = tm.makeTimeSeries()
- # reopen as read
- store.open('r')
- assert store.is_open
- assert len(store) == 1
- assert store._mode == 'r'
- store.close()
- assert not store.is_open
- # reopen as append
- store.open('a')
- assert store.is_open
- assert len(store) == 1
- assert store._mode == 'a'
- store.close()
- assert not store.is_open
- # reopen as append (again)
- store.open('a')
- assert store.is_open
- assert len(store) == 1
- assert store._mode == 'a'
- store.close()
- assert not store.is_open
- def test_open_args(self):
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- # create an in memory store
- store = HDFStore(path, mode='a', driver='H5FD_CORE',
- driver_core_backing_store=0)
- store['df'] = df
- store.append('df2', df)
- tm.assert_frame_equal(store['df'], df)
- tm.assert_frame_equal(store['df2'], df)
- store.close()
- # the file should not have actually been written
- assert not os.path.exists(path)
- def test_flush(self):
- with ensure_clean_store(self.path) as store:
- store['a'] = tm.makeTimeSeries()
- store.flush()
- store.flush(fsync=True)
- def test_get(self):
- with ensure_clean_store(self.path) as store:
- store['a'] = tm.makeTimeSeries()
- left = store.get('a')
- right = store['a']
- tm.assert_series_equal(left, right)
- left = store.get('/a')
- right = store['/a']
- tm.assert_series_equal(left, right)
- pytest.raises(KeyError, store.get, 'b')
- @pytest.mark.parametrize('where, expected', [
- ('/', {
- '': ({'first_group', 'second_group'}, set()),
- '/first_group': (set(), {'df1', 'df2'}),
- '/second_group': ({'third_group'}, {'df3', 's1'}),
- '/second_group/third_group': (set(), {'df4'}),
- }),
- ('/second_group', {
- '/second_group': ({'third_group'}, {'df3', 's1'}),
- '/second_group/third_group': (set(), {'df4'}),
- })
- ])
- def test_walk(self, where, expected):
- # GH10143
- objs = {
- 'df1': pd.DataFrame([1, 2, 3]),
- 'df2': pd.DataFrame([4, 5, 6]),
- 'df3': pd.DataFrame([6, 7, 8]),
- 'df4': pd.DataFrame([9, 10, 11]),
- 's1': pd.Series([10, 9, 8]),
- # Next 3 items aren't pandas objects and should be ignored
- 'a1': np.array([[1, 2, 3], [4, 5, 6]]),
- 'tb1': np.array([(1, 2, 3), (4, 5, 6)], dtype='i,i,i'),
- 'tb2': np.array([(7, 8, 9), (10, 11, 12)], dtype='i,i,i')
- }
- with ensure_clean_store('walk_groups.hdf', mode='w') as store:
- store.put('/first_group/df1', objs['df1'])
- store.put('/first_group/df2', objs['df2'])
- store.put('/second_group/df3', objs['df3'])
- store.put('/second_group/s1', objs['s1'])
- store.put('/second_group/third_group/df4', objs['df4'])
- # Create non-pandas objects
- store._handle.create_array('/first_group', 'a1', objs['a1'])
- store._handle.create_table('/first_group', 'tb1', obj=objs['tb1'])
- store._handle.create_table('/second_group', 'tb2', obj=objs['tb2'])
- assert len(list(store.walk(where=where))) == len(expected)
- for path, groups, leaves in store.walk(where=where):
- assert path in expected
- expected_groups, expected_frames = expected[path]
- assert expected_groups == set(groups)
- assert expected_frames == set(leaves)
- for leaf in leaves:
- frame_path = '/'.join([path, leaf])
- obj = store.get(frame_path)
- if 'df' in leaf:
- tm.assert_frame_equal(obj, objs[leaf])
- else:
- tm.assert_series_equal(obj, objs[leaf])
- def test_getattr(self):
- with ensure_clean_store(self.path) as store:
- s = tm.makeTimeSeries()
- store['a'] = s
- # test attribute access
- result = store.a
- tm.assert_series_equal(result, s)
- result = getattr(store, 'a')
- tm.assert_series_equal(result, s)
- df = tm.makeTimeDataFrame()
- store['df'] = df
- result = store.df
- tm.assert_frame_equal(result, df)
- # errors
- pytest.raises(AttributeError, getattr, store, 'd')
- for x in ['mode', 'path', 'handle', 'complib']:
- pytest.raises(AttributeError, getattr, store, x)
- # not stores
- for x in ['mode', 'path', 'handle', 'complib']:
- getattr(store, "_%s" % x)
- def test_put(self):
- with ensure_clean_store(self.path) as store:
- ts = tm.makeTimeSeries()
- df = tm.makeTimeDataFrame()
- store['a'] = ts
- store['b'] = df[:10]
- store['foo/bar/bah'] = df[:10]
- store['foo'] = df[:10]
- store['/foo'] = df[:10]
- store.put('c', df[:10], format='table')
- # not OK, not a table
- pytest.raises(
- ValueError, store.put, 'b', df[10:], append=True)
- # node does not currently exist, test _is_table_type returns False
- # in this case
- # _maybe_remove(store, 'f')
- # pytest.raises(ValueError, store.put, 'f', df[10:],
- # append=True)
- # can't put to a table (use append instead)
- pytest.raises(ValueError, store.put, 'c', df[10:], append=True)
- # overwrite table
- store.put('c', df[:10], format='table', append=False)
- tm.assert_frame_equal(df[:10], store['c'])
- def test_put_string_index(self):
- with ensure_clean_store(self.path) as store:
- index = Index(
- ["I am a very long string index: %s" % i for i in range(20)])
- s = Series(np.arange(20), index=index)
- df = DataFrame({'A': s, 'B': s})
- store['a'] = s
- tm.assert_series_equal(store['a'], s)
- store['b'] = df
- tm.assert_frame_equal(store['b'], df)
- # mixed length
- index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] +
- ["I am a very long string index: %s" % i
- for i in range(20)])
- s = Series(np.arange(21), index=index)
- df = DataFrame({'A': s, 'B': s})
- store['a'] = s
- tm.assert_series_equal(store['a'], s)
- store['b'] = df
- tm.assert_frame_equal(store['b'], df)
- def test_put_compression(self):
- with ensure_clean_store(self.path) as store:
- df = tm.makeTimeDataFrame()
- store.put('c', df, format='table', complib='zlib')
- tm.assert_frame_equal(store['c'], df)
- # can't compress if format='fixed'
- pytest.raises(ValueError, store.put, 'b', df,
- format='fixed', complib='zlib')
- @td.skip_if_windows_python_3
- def test_put_compression_blosc(self):
- df = tm.makeTimeDataFrame()
- with ensure_clean_store(self.path) as store:
- # can't compress if format='fixed'
- pytest.raises(ValueError, store.put, 'b', df,
- format='fixed', complib='blosc')
- store.put('c', df, format='table', complib='blosc')
- tm.assert_frame_equal(store['c'], df)
- def test_complibs_default_settings(self):
- # GH15943
- df = tm.makeDataFrame()
- # Set complevel and check if complib is automatically set to
- # default value
- with ensure_clean_path(self.path) as tmpfile:
- df.to_hdf(tmpfile, 'df', complevel=9)
- result = pd.read_hdf(tmpfile, 'df')
- tm.assert_frame_equal(result, df)
- with tables.open_file(tmpfile, mode='r') as h5file:
- for node in h5file.walk_nodes(where='/df', classname='Leaf'):
- assert node.filters.complevel == 9
- assert node.filters.complib == 'zlib'
- # Set complib and check to see if compression is disabled
- with ensure_clean_path(self.path) as tmpfile:
- df.to_hdf(tmpfile, 'df', complib='zlib')
- result = pd.read_hdf(tmpfile, 'df')
- tm.assert_frame_equal(result, df)
- with tables.open_file(tmpfile, mode='r') as h5file:
- for node in h5file.walk_nodes(where='/df', classname='Leaf'):
- assert node.filters.complevel == 0
- assert node.filters.complib is None
- # Check if not setting complib or complevel results in no compression
- with ensure_clean_path(self.path) as tmpfile:
- df.to_hdf(tmpfile, 'df')
- result = pd.read_hdf(tmpfile, 'df')
- tm.assert_frame_equal(result, df)
- with tables.open_file(tmpfile, mode='r') as h5file:
- for node in h5file.walk_nodes(where='/df', classname='Leaf'):
- assert node.filters.complevel == 0
- assert node.filters.complib is None
- # Check if file-defaults can be overridden on a per table basis
- with ensure_clean_path(self.path) as tmpfile:
- store = pd.HDFStore(tmpfile)
- store.append('dfc', df, complevel=9, complib='blosc')
- store.append('df', df)
- store.close()
- with tables.open_file(tmpfile, mode='r') as h5file:
- for node in h5file.walk_nodes(where='/df', classname='Leaf'):
- assert node.filters.complevel == 0
- assert node.filters.complib is None
- for node in h5file.walk_nodes(where='/dfc', classname='Leaf'):
- assert node.filters.complevel == 9
- assert node.filters.complib == 'blosc'
- def test_complibs(self):
- # GH14478
- df = tm.makeDataFrame()
- # Building list of all complibs and complevels tuples
- all_complibs = tables.filters.all_complibs
- # Remove lzo if its not available on this platform
- if not tables.which_lib_version('lzo'):
- all_complibs.remove('lzo')
- # Remove bzip2 if its not available on this platform
- if not tables.which_lib_version("bzip2"):
- all_complibs.remove("bzip2")
- all_levels = range(0, 10)
- all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
- for (lib, lvl) in all_tests:
- with ensure_clean_path(self.path) as tmpfile:
- gname = 'foo'
- # Write and read file to see if data is consistent
- df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
- result = pd.read_hdf(tmpfile, gname)
- tm.assert_frame_equal(result, df)
- # Open file and check metadata
- # for correct amount of compression
- h5table = tables.open_file(tmpfile, mode='r')
- for node in h5table.walk_nodes(where='/' + gname,
- classname='Leaf'):
- assert node.filters.complevel == lvl
- if lvl == 0:
- assert node.filters.complib is None
- else:
- assert node.filters.complib == lib
- h5table.close()
- def test_put_integer(self):
- # non-date, non-string index
- df = DataFrame(np.random.randn(50, 100))
- self._check_roundtrip(df, tm.assert_frame_equal)
- @xfail_non_writeable
- def test_put_mixed_type(self):
- df = tm.makeTimeDataFrame()
- df['obj1'] = 'foo'
- df['obj2'] = 'bar'
- df['bool1'] = df['A'] > 0
- df['bool2'] = df['B'] > 0
- df['bool3'] = True
- df['int1'] = 1
- df['int2'] = 2
- df['timestamp1'] = Timestamp('20010102')
- df['timestamp2'] = Timestamp('20010103')
- df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
- df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
- df.loc[3:6, ['obj1']] = np.nan
- df = df._consolidate()._convert(datetime=True)
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'df')
- # PerformanceWarning
- with catch_warnings(record=True):
- simplefilter("ignore", pd.errors.PerformanceWarning)
- store.put('df', df)
- expected = store.get('df')
- tm.assert_frame_equal(expected, df)
- @pytest.mark.filterwarnings(
- "ignore:object name:tables.exceptions.NaturalNameWarning"
- )
- def test_append(self):
- with ensure_clean_store(self.path) as store:
- # this is allowed by almost always don't want to do it
- # tables.NaturalNameWarning):
- with catch_warnings(record=True):
- df = tm.makeTimeDataFrame()
- _maybe_remove(store, 'df1')
- store.append('df1', df[:10])
- store.append('df1', df[10:])
- tm.assert_frame_equal(store['df1'], df)
- _maybe_remove(store, 'df2')
- store.put('df2', df[:10], format='table')
- store.append('df2', df[10:])
- tm.assert_frame_equal(store['df2'], df)
- _maybe_remove(store, 'df3')
- store.append('/df3', df[:10])
- store.append('/df3', df[10:])
- tm.assert_frame_equal(store['df3'], df)
- # this is allowed by almost always don't want to do it
- # tables.NaturalNameWarning
- _maybe_remove(store, '/df3 foo')
- store.append('/df3 foo', df[:10])
- store.append('/df3 foo', df[10:])
- tm.assert_frame_equal(store['df3 foo'], df)
- # panel
- wp = tm.makePanel()
- _maybe_remove(store, 'wp1')
- store.append('wp1', wp.iloc[:, :10, :])
- store.append('wp1', wp.iloc[:, 10:, :])
- assert_panel_equal(store['wp1'], wp)
- # test using differt order of items on the non-index axes
- _maybe_remove(store, 'wp1')
- wp_append1 = wp.iloc[:, :10, :]
- store.append('wp1', wp_append1)
- wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1])
- store.append('wp1', wp_append2)
- assert_panel_equal(store['wp1'], wp)
- # dtype issues - mizxed type in a single object column
- df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
- df['mixed_column'] = 'testing'
- df.loc[2, 'mixed_column'] = np.nan
- _maybe_remove(store, 'df')
- store.append('df', df)
- tm.assert_frame_equal(store['df'], df)
- # uints - test storage of uints
- uint_data = DataFrame({
- 'u08': Series(np.random.randint(0, high=255, size=5),
- dtype=np.uint8),
- 'u16': Series(np.random.randint(0, high=65535, size=5),
- dtype=np.uint16),
- 'u32': Series(np.random.randint(0, high=2**30, size=5),
- dtype=np.uint32),
- 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62],
- dtype=np.uint64)}, index=np.arange(5))
- _maybe_remove(store, 'uints')
- store.append('uints', uint_data)
- tm.assert_frame_equal(store['uints'], uint_data)
- # uints - test storage of uints in indexable columns
- _maybe_remove(store, 'uints')
- # 64-bit indices not yet supported
- store.append('uints', uint_data, data_columns=[
- 'u08', 'u16', 'u32'])
- tm.assert_frame_equal(store['uints'], uint_data)
- def test_append_series(self):
- with ensure_clean_store(self.path) as store:
- # basic
- ss = tm.makeStringSeries()
- ts = tm.makeTimeSeries()
- ns = Series(np.arange(100))
- store.append('ss', ss)
- result = store['ss']
- tm.assert_series_equal(result, ss)
- assert result.name is None
- store.append('ts', ts)
- result = store['ts']
- tm.assert_series_equal(result, ts)
- assert result.name is None
- ns.name = 'foo'
- store.append('ns', ns)
- result = store['ns']
- tm.assert_series_equal(result, ns)
- assert result.name == ns.name
- # select on the values
- expected = ns[ns > 60]
- result = store.select('ns', 'foo>60')
- tm.assert_series_equal(result, expected)
- # select on the index and values
- expected = ns[(ns > 70) & (ns.index < 90)]
- result = store.select('ns', 'foo>70 and index<90')
- tm.assert_series_equal(result, expected)
- # multi-index
- mi = DataFrame(np.random.randn(5, 1), columns=['A'])
- mi['B'] = np.arange(len(mi))
- mi['C'] = 'foo'
- mi.loc[3:5, 'C'] = 'bar'
- mi.set_index(['C', 'B'], inplace=True)
- s = mi.stack()
- s.index = s.index.droplevel(2)
- store.append('mi', s)
- tm.assert_series_equal(store['mi'], s)
- def test_store_index_types(self):
- # GH5386
- # test storing various index types
- with ensure_clean_store(self.path) as store:
- def check(format, index):
- df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
- df.index = index(len(df))
- _maybe_remove(store, 'df')
- store.put('df', df, format=format)
- assert_frame_equal(df, store['df'])
- for index in [tm.makeFloatIndex, tm.makeStringIndex,
- tm.makeIntIndex, tm.makeDateIndex]:
- check('table', index)
- check('fixed', index)
- # period index currently broken for table
- # seee GH7796 FIXME
- check('fixed', tm.makePeriodIndex)
- # check('table',tm.makePeriodIndex)
- # unicode
- index = tm.makeUnicodeIndex
- if compat.PY3:
- check('table', index)
- check('fixed', index)
- else:
- # only support for fixed types (and they have a perf warning)
- pytest.raises(TypeError, check, 'table', index)
- # PerformanceWarning
- with catch_warnings(record=True):
- simplefilter("ignore", pd.errors.PerformanceWarning)
- check('fixed', index)
- @pytest.mark.skipif(not is_platform_little_endian(),
- reason="reason platform is not little endian")
- def test_encoding(self):
- with ensure_clean_store(self.path) as store:
- df = DataFrame(dict(A='foo', B='bar'), index=range(5))
- df.loc[2, 'A'] = np.nan
- df.loc[3, 'B'] = np.nan
- _maybe_remove(store, 'df')
- store.append('df', df, encoding='ascii')
- tm.assert_frame_equal(store['df'], df)
- expected = df.reindex(columns=['A'])
- result = store.select('df', Term('columns=A', encoding='ascii'))
- tm.assert_frame_equal(result, expected)
- def test_latin_encoding(self):
- if compat.PY2:
- pytest.skip("[unicode] is not implemented as a table column")
- values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
- [b'E\xc9, 17', b'a', b'b', b'c'],
- [b'EE, 17', b'', b'a', b'b', b'c'],
- [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
- [b'', b'a', b'b', b'c'],
- [b'\xf8\xfc', b'a', b'b', b'c'],
- [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
- [np.nan, b'', b'b', b'c'],
- [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
- def _try_decode(x, encoding='latin-1'):
- try:
- return x.decode(encoding)
- except AttributeError:
- return x
- # not sure how to remove latin-1 from code in python 2 and 3
- values = [[_try_decode(x) for x in y] for y in values]
- examples = []
- for dtype in ['category', object]:
- for val in values:
- examples.append(pd.Series(val, dtype=dtype))
- def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
- with ensure_clean_path(self.path) as store:
- s.to_hdf(store, key, format='table', encoding=encoding,
- nan_rep=nan_rep)
- retr = read_hdf(store, key)
- s_nan = s.replace(nan_rep, np.nan)
- if is_categorical_dtype(s_nan):
- assert is_categorical_dtype(retr)
- assert_series_equal(s_nan, retr, check_dtype=False,
- check_categorical=False)
- else:
- assert_series_equal(s_nan, retr)
- for s in examples:
- roundtrip(s)
- # fails:
- # for x in examples:
- # roundtrip(s, nan_rep=b'\xf8\xfc')
- def test_append_some_nans(self):
- with ensure_clean_store(self.path) as store:
- df = DataFrame({'A': Series(np.random.randn(20)).astype('int32'),
- 'A1': np.random.randn(20),
- 'A2': np.random.randn(20),
- 'B': 'foo', 'C': 'bar',
- 'D': Timestamp("20010101"),
- 'E': datetime.datetime(2001, 1, 2, 0, 0)},
- index=np.arange(20))
- # some nans
- _maybe_remove(store, 'df1')
- df.loc[0:15, ['A1', 'B', 'D', 'E']] = np.nan
- store.append('df1', df[:10])
- store.append('df1', df[10:])
- tm.assert_frame_equal(store['df1'], df)
- # first column
- df1 = df.copy()
- df1.loc[:, 'A1'] = np.nan
- _maybe_remove(store, 'df1')
- store.append('df1', df1[:10])
- store.append('df1', df1[10:])
- tm.assert_frame_equal(store['df1'], df1)
- # 2nd column
- df2 = df.copy()
- df2.loc[:, 'A2'] = np.nan
- _maybe_remove(store, 'df2')
- store.append('df2', df2[:10])
- store.append('df2', df2[10:])
- tm.assert_frame_equal(store['df2'], df2)
- # datetimes
- df3 = df.copy()
- df3.loc[:, 'E'] = np.nan
- _maybe_remove(store, 'df3')
- store.append('df3', df3[:10])
- store.append('df3', df3[10:])
- tm.assert_frame_equal(store['df3'], df3)
- def test_append_all_nans(self):
- with ensure_clean_store(self.path) as store:
- df = DataFrame({'A1': np.random.randn(20),
- 'A2': np.random.randn(20)},
- index=np.arange(20))
- df.loc[0:15, :] = np.nan
- # nan some entire rows (dropna=True)
- _maybe_remove(store, 'df')
- store.append('df', df[:10], dropna=True)
- store.append('df', df[10:], dropna=True)
- tm.assert_frame_equal(store['df'], df[-4:])
- # nan some entire rows (dropna=False)
- _maybe_remove(store, 'df2')
- store.append('df2', df[:10], dropna=False)
- store.append('df2', df[10:], dropna=False)
- tm.assert_frame_equal(store['df2'], df)
- # tests the option io.hdf.dropna_table
- pd.set_option('io.hdf.dropna_table', False)
- _maybe_remove(store, 'df3')
- store.append('df3', df[:10])
- store.append('df3', df[10:])
- tm.assert_frame_equal(store['df3'], df)
- pd.set_option('io.hdf.dropna_table', True)
- _maybe_remove(store, 'df4')
- store.append('df4', df[:10])
- store.append('df4', df[10:])
- tm.assert_frame_equal(store['df4'], df[-4:])
- # nan some entire rows (string are still written!)
- df = DataFrame({'A1': np.random.randn(20),
- 'A2': np.random.randn(20),
- 'B': 'foo', 'C': 'bar'},
- index=np.arange(20))
- df.loc[0:15, :] = np.nan
- _maybe_remove(store, 'df')
- store.append('df', df[:10], dropna=True)
- store.append('df', df[10:], dropna=True)
- tm.assert_frame_equal(store['df'], df)
- _maybe_remove(store, 'df2')
- store.append('df2', df[:10], dropna=False)
- store.append('df2', df[10:], dropna=False)
- tm.assert_frame_equal(store['df2'], df)
- # nan some entire rows (but since we have dates they are still
- # written!)
- df = DataFrame({'A1': np.random.randn(20),
- 'A2': np.random.randn(20),
- 'B': 'foo', 'C': 'bar',
- 'D': Timestamp("20010101"),
- 'E': datetime.datetime(2001, 1, 2, 0, 0)},
- index=np.arange(20))
- df.loc[0:15, :] = np.nan
- _maybe_remove(store, 'df')
- store.append('df', df[:10], dropna=True)
- store.append('df', df[10:], dropna=True)
- tm.assert_frame_equal(store['df'], df)
- _maybe_remove(store, 'df2')
- store.append('df2', df[:10], dropna=False)
- store.append('df2', df[10:], dropna=False)
- tm.assert_frame_equal(store['df2'], df)
- # Test to make sure defaults are to not drop.
- # Corresponding to Issue 9382
- df_with_missing = DataFrame(
- {'col1': [0, np.nan, 2], 'col2': [1, np.nan, np.nan]})
- with ensure_clean_path(self.path) as path:
- df_with_missing.to_hdf(path, 'df_with_missing', format='table')
- reloaded = read_hdf(path, 'df_with_missing')
- tm.assert_frame_equal(df_with_missing, reloaded)
- matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
- [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
- [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
- with catch_warnings(record=True):
- panel_with_missing = Panel(matrix,
- items=['Item1', 'Item2', 'Item3'],
- major_axis=[1, 2],
- minor_axis=['A', 'B', 'C'])
- with ensure_clean_path(self.path) as path:
- panel_with_missing.to_hdf(
- path, 'panel_with_missing', format='table')
- reloaded_panel = read_hdf(path, 'panel_with_missing')
- tm.assert_panel_equal(panel_with_missing, reloaded_panel)
- def test_append_frame_column_oriented(self):
- with ensure_clean_store(self.path) as store:
- # column oriented
- df = tm.makeTimeDataFrame()
- _maybe_remove(store, 'df1')
- store.append('df1', df.iloc[:, :2], axes=['columns'])
- store.append('df1', df.iloc[:, 2:])
- tm.assert_frame_equal(store['df1'], df)
- result = store.select('df1', 'columns=A')
- expected = df.reindex(columns=['A'])
- tm.assert_frame_equal(expected, result)
- # selection on the non-indexable
- result = store.select(
- 'df1', ('columns=A', 'index=df.index[0:4]'))
- expected = df.reindex(columns=['A'], index=df.index[0:4])
- tm.assert_frame_equal(expected, result)
- # this isn't supported
- with pytest.raises(TypeError):
- store.select('df1',
- 'columns=A and index>df.index[4]')
- def test_append_with_different_block_ordering(self):
- # GH 4096; using same frames, but different block orderings
- with ensure_clean_store(self.path) as store:
- for i in range(10):
- df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
- df['index'] = range(10)
- df['index'] += i * 10
- df['int64'] = Series([1] * len(df), dtype='int64')
- df['int16'] = Series([1] * len(df), dtype='int16')
- if i % 2 == 0:
- del df['int64']
- df['int64'] = Series([1] * len(df), dtype='int64')
- if i % 3 == 0:
- a = df.pop('A')
- df['A'] = a
- df.set_index('index', inplace=True)
- store.append('df', df)
- # test a different ordering but with more fields (like invalid
- # combinate)
- with ensure_clean_store(self.path) as store:
- df = DataFrame(np.random.randn(10, 2),
- columns=list('AB'), dtype='float64')
- df['int64'] = Series([1] * len(df), dtype='int64')
- df['int16'] = Series([1] * len(df), dtype='int16')
- store.append('df', df)
- # store additional fields in different blocks
- df['int16_2'] = Series([1] * len(df), dtype='int16')
- pytest.raises(ValueError, store.append, 'df', df)
- # store multile additional fields in different blocks
- df['float_3'] = Series([1.] * len(df), dtype='float64')
- pytest.raises(ValueError, store.append, 'df', df)
- def test_append_with_strings(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- simplefilter("ignore", FutureWarning)
- wp = tm.makePanel()
- wp2 = wp.rename(
- minor_axis={x: "%s_extra" % x for x in wp.minor_axis})
- def check_col(key, name, size):
- assert getattr(store.get_storer(key)
- .table.description, name).itemsize == size
- store.append('s1', wp, min_itemsize=20)
- store.append('s1', wp2)
- expected = concat([wp, wp2], axis=2)
- expected = expected.reindex(
- minor_axis=sorted(expected.minor_axis))
- assert_panel_equal(store['s1'], expected)
- check_col('s1', 'minor_axis', 20)
- # test dict format
- store.append('s2', wp, min_itemsize={'minor_axis': 20})
- store.append('s2', wp2)
- expected = concat([wp, wp2], axis=2)
- expected = expected.reindex(
- minor_axis=sorted(expected.minor_axis))
- assert_panel_equal(store['s2'], expected)
- check_col('s2', 'minor_axis', 20)
- # apply the wrong field (similar to #1)
- store.append('s3', wp, min_itemsize={'major_axis': 20})
- pytest.raises(ValueError, store.append, 's3', wp2)
- # test truncation of bigger strings
- store.append('s4', wp)
- pytest.raises(ValueError, store.append, 's4', wp2)
- # avoid truncation on elements
- df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
- store.append('df_big', df)
- tm.assert_frame_equal(store.select('df_big'), df)
- check_col('df_big', 'values_block_1', 15)
- # appending smaller string ok
- df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
- store.append('df_big', df2)
- expected = concat([df, df2])
- tm.assert_frame_equal(store.select('df_big'), expected)
- check_col('df_big', 'values_block_1', 15)
- # avoid truncation on elements
- df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
- store.append('df_big2', df, min_itemsize={'values': 50})
- tm.assert_frame_equal(store.select('df_big2'), df)
- check_col('df_big2', 'values_block_1', 50)
- # bigger string on next append
- store.append('df_new', df)
- df_new = DataFrame(
- [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
- pytest.raises(ValueError, store.append, 'df_new', df_new)
- # min_itemsize on Series index (GH 11412)
- df = tm.makeMixedDataFrame().set_index('C')
- store.append('ss', df['B'], min_itemsize={'index': 4})
- tm.assert_series_equal(store.select('ss'), df['B'])
- # same as above, with data_columns=True
- store.append('ss2', df['B'], data_columns=True,
- min_itemsize={'index': 4})
- tm.assert_series_equal(store.select('ss2'), df['B'])
- # min_itemsize in index without appending (GH 10381)
- store.put('ss3', df, format='table',
- min_itemsize={'index': 6})
- # just make sure there is a longer string:
- df2 = df.copy().reset_index().assign(C='longer').set_index('C')
- store.append('ss3', df2)
- tm.assert_frame_equal(store.select('ss3'),
- pd.concat([df, df2]))
- # same as above, with a Series
- store.put('ss4', df['B'], format='table',
- min_itemsize={'index': 6})
- store.append('ss4', df2['B'])
- tm.assert_series_equal(store.select('ss4'),
- pd.concat([df['B'], df2['B']]))
- # with nans
- _maybe_remove(store, 'df')
- df = tm.makeTimeDataFrame()
- df['string'] = 'foo'
- df.loc[1:4, 'string'] = np.nan
- df['string2'] = 'bar'
- df.loc[4:8, 'string2'] = np.nan
- df['string3'] = 'bah'
- df.loc[1:, 'string3'] = np.nan
- store.append('df', df)
- result = store.select('df')
- tm.assert_frame_equal(result, df)
- with ensure_clean_store(self.path) as store:
- def check_col(key, name, size):
- assert getattr(store.get_storer(key)
- .table.description, name).itemsize, size
- df = DataFrame(dict(A='foo', B='bar'), index=range(10))
- # a min_itemsize that creates a data_column
- _maybe_remove(store, 'df')
- store.append('df', df, min_itemsize={'A': 200})
- check_col('df', 'A', 200)
- assert store.get_storer('df').data_columns == ['A']
- # a min_itemsize that creates a data_column2
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=['B'], min_itemsize={'A': 200})
- check_col('df', 'A', 200)
- assert store.get_storer('df').data_columns == ['B', 'A']
- # a min_itemsize that creates a data_column2
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=[
- 'B'], min_itemsize={'values': 200})
- check_col('df', 'B', 200)
- check_col('df', 'values_block_0', 200)
- assert store.get_storer('df').data_columns == ['B']
- # infer the .typ on subsequent appends
- _maybe_remove(store, 'df')
- store.append('df', df[:5], min_itemsize=200)
- store.append('df', df[5:], min_itemsize=200)
- tm.assert_frame_equal(store['df'], df)
- # invalid min_itemsize keys
- df = DataFrame(['foo', 'foo', 'foo', 'barh',
- 'barh', 'barh'], columns=['A'])
- _maybe_remove(store, 'df')
- pytest.raises(ValueError, store.append, 'df',
- df, min_itemsize={'foo': 20, 'foobar': 20})
- def test_append_with_empty_string(self):
- with ensure_clean_store(self.path) as store:
- # with all empty strings (GH 12242)
- df = DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', '']})
- store.append('df', df[:-1], min_itemsize={'x': 1})
- store.append('df', df[-1:], min_itemsize={'x': 1})
- tm.assert_frame_equal(store.select('df'), df)
- def test_to_hdf_with_min_itemsize(self):
- with ensure_clean_path(self.path) as path:
- # min_itemsize in index with to_hdf (GH 10381)
- df = tm.makeMixedDataFrame().set_index('C')
- df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
- # just make sure there is a longer string:
- df2 = df.copy().reset_index().assign(C='longer').set_index('C')
- df2.to_hdf(path, 'ss3', append=True, format='table')
- tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
- pd.concat([df, df2]))
- # same as above, with a Series
- df['B'].to_hdf(path, 'ss4', format='table',
- min_itemsize={'index': 6})
- df2['B'].to_hdf(path, 'ss4', append=True, format='table')
- tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
- pd.concat([df['B'], df2['B']]))
- @pytest.mark.parametrize(
- "format",
- [pytest.param('fixed', marks=xfail_non_writeable),
- 'table'])
- def test_to_hdf_errors(self, format):
- data = ['\ud800foo']
- ser = pd.Series(data, index=pd.Index(data))
- with ensure_clean_path(self.path) as path:
- # GH 20835
- ser.to_hdf(path, 'table', format=format, errors='surrogatepass')
- result = pd.read_hdf(path, 'table', errors='surrogatepass')
- tm.assert_series_equal(result, ser)
- def test_append_with_data_columns(self):
- with ensure_clean_store(self.path) as store:
- df = tm.makeTimeDataFrame()
- df.iloc[0, df.columns.get_loc('B')] = 1.
- _maybe_remove(store, 'df')
- store.append('df', df[:2], data_columns=['B'])
- store.append('df', df[2:])
- tm.assert_frame_equal(store['df'], df)
- # check that we have indices created
- assert(store._handle.root.df.table.cols.index.is_indexed is True)
- assert(store._handle.root.df.table.cols.B.is_indexed is True)
- # data column searching
- result = store.select('df', 'B>0')
- expected = df[df.B > 0]
- tm.assert_frame_equal(result, expected)
- # data column searching (with an indexable and a data_columns)
- result = store.select(
- 'df', 'B>0 and index>df.index[3]')
- df_new = df.reindex(index=df.index[4:])
- expected = df_new[df_new.B > 0]
- tm.assert_frame_equal(result, expected)
- # data column selection with a string data_column
- df_new = df.copy()
- df_new['string'] = 'foo'
- df_new.loc[1:4, 'string'] = np.nan
- df_new.loc[5:6, 'string'] = 'bar'
- _maybe_remove(store, 'df')
- store.append('df', df_new, data_columns=['string'])
- result = store.select('df', "string='foo'")
- expected = df_new[df_new.string == 'foo']
- tm.assert_frame_equal(result, expected)
- # using min_itemsize and a data column
- def check_col(key, name, size):
- assert getattr(store.get_storer(key)
- .table.description, name).itemsize == size
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'df')
- store.append('df', df_new, data_columns=['string'],
- min_itemsize={'string': 30})
- check_col('df', 'string', 30)
- _maybe_remove(store, 'df')
- store.append(
- 'df', df_new, data_columns=['string'], min_itemsize=30)
- check_col('df', 'string', 30)
- _maybe_remove(store, 'df')
- store.append('df', df_new, data_columns=['string'],
- min_itemsize={'values': 30})
- check_col('df', 'string', 30)
- with ensure_clean_store(self.path) as store:
- df_new['string2'] = 'foobarbah'
- df_new['string_block1'] = 'foobarbah1'
- df_new['string_block2'] = 'foobarbah2'
- _maybe_remove(store, 'df')
- store.append('df', df_new, data_columns=['string', 'string2'],
- min_itemsize={'string': 30, 'string2': 40,
- 'values': 50})
- check_col('df', 'string', 30)
- check_col('df', 'string2', 40)
- check_col('df', 'values_block_1', 50)
- with ensure_clean_store(self.path) as store:
- # multiple data columns
- df_new = df.copy()
- df_new.iloc[0, df_new.columns.get_loc('A')] = 1.
- df_new.iloc[0, df_new.columns.get_loc('B')] = -1.
- df_new['string'] = 'foo'
- sl = df_new.columns.get_loc('string')
- df_new.iloc[1:4, sl] = np.nan
- df_new.iloc[5:6, sl] = 'bar'
- df_new['string2'] = 'foo'
- sl = df_new.columns.get_loc('string2')
- df_new.iloc[2:5, sl] = np.nan
- df_new.iloc[7:8, sl] = 'bar'
- _maybe_remove(store, 'df')
- store.append(
- 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
- result = store.select('df',
- "string='foo' and string2='foo'"
- " and A>0 and B<0")
- expected = df_new[(df_new.string == 'foo') & (
- df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
- tm.assert_frame_equal(result, expected, check_index_type=False)
- # yield an empty frame
- result = store.select('df', "string='foo' and string2='cool'")
- expected = df_new[(df_new.string == 'foo') & (
- df_new.string2 == 'cool')]
- tm.assert_frame_equal(result, expected, check_index_type=False)
- with ensure_clean_store(self.path) as store:
- # doc example
- df_dc = df.copy()
- df_dc['string'] = 'foo'
- df_dc.loc[4:6, 'string'] = np.nan
- df_dc.loc[7:9, 'string'] = 'bar'
- df_dc['string2'] = 'cool'
- df_dc['datetime'] = Timestamp('20010102')
- df_dc = df_dc._convert(datetime=True)
- df_dc.loc[3:5, ['A', 'B', 'datetime']] = np.nan
- _maybe_remove(store, 'df_dc')
- store.append('df_dc', df_dc,
- data_columns=['B', 'C', 'string',
- 'string2', 'datetime'])
- result = store.select('df_dc', 'B>0')
- expected = df_dc[df_dc.B > 0]
- tm.assert_frame_equal(result, expected, check_index_type=False)
- result = store.select(
- 'df_dc', ['B > 0', 'C > 0', 'string == foo'])
- expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
- df_dc.string == 'foo')]
- tm.assert_frame_equal(result, expected, check_index_type=False)
- with ensure_clean_store(self.path) as store:
- # doc example part 2
- np.random.seed(1234)
- index = date_range('1/1/2000', periods=8)
- df_dc = DataFrame(np.random.randn(8, 3), index=index,
- columns=['A', 'B', 'C'])
- df_dc['string'] = 'foo'
- df_dc.loc[4:6, 'string'] = np.nan
- df_dc.loc[7:9, 'string'] = 'bar'
- df_dc.loc[:, ['B', 'C']] = df_dc.loc[:, ['B', 'C']].abs()
- df_dc['string2'] = 'cool'
- # on-disk operations
- store.append('df_dc', df_dc, data_columns=[
- 'B', 'C', 'string', 'string2'])
- result = store.select('df_dc', 'B>0')
- expected = df_dc[df_dc.B > 0]
- tm.assert_frame_equal(result, expected)
- result = store.select(
- 'df_dc', ['B > 0', 'C > 0', 'string == "foo"'])
- expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) &
- (df_dc.string == 'foo')]
- tm.assert_frame_equal(result, expected)
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- # panel
- # GH5717 not handling data_columns
- np.random.seed(1234)
- p = tm.makePanel()
- store.append('p1', p)
- tm.assert_panel_equal(store.select('p1'), p)
- store.append('p2', p, data_columns=True)
- tm.assert_panel_equal(store.select('p2'), p)
- result = store.select('p2', where='ItemA>0')
- expected = p.to_frame()
- expected = expected[expected['ItemA'] > 0]
- tm.assert_frame_equal(result.to_frame(), expected)
- result = store.select(
- 'p2', where='ItemA>0 & minor_axis=["A","B"]')
- expected = p.to_frame()
- expected = expected[expected['ItemA'] > 0]
- expected = expected[expected.reset_index(
- level=['major']).index.isin(['A', 'B'])]
- tm.assert_frame_equal(result.to_frame(), expected)
- def test_create_table_index(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- def col(t, column):
- return getattr(store.get_storer(t).table.cols, column)
- # index=False
- wp = tm.makePanel()
- store.append('p5', wp, index=False)
- store.create_table_index('p5', columns=['major_axis'])
- assert(col('p5', 'major_axis').is_indexed is True)
- assert(col('p5', 'minor_axis').is_indexed is False)
- # index=True
- store.append('p5i', wp, index=True)
- assert(col('p5i', 'major_axis').is_indexed is True)
- assert(col('p5i', 'minor_axis').is_indexed is True)
- # default optlevels
- store.get_storer('p5').create_index()
- assert(col('p5', 'major_axis').index.optlevel == 6)
- assert(col('p5', 'minor_axis').index.kind == 'medium')
- # let's change the indexing scheme
- store.create_table_index('p5')
- assert(col('p5', 'major_axis').index.optlevel == 6)
- assert(col('p5', 'minor_axis').index.kind == 'medium')
- store.create_table_index('p5', optlevel=9)
- assert(col('p5', 'major_axis').index.optlevel == 9)
- assert(col('p5', 'minor_axis').index.kind == 'medium')
- store.create_table_index('p5', kind='full')
- assert(col('p5', 'major_axis').index.optlevel == 9)
- assert(col('p5', 'minor_axis').index.kind == 'full')
- store.create_table_index('p5', optlevel=1, kind='light')
- assert(col('p5', 'major_axis').index.optlevel == 1)
- assert(col('p5', 'minor_axis').index.kind == 'light')
- # data columns
- df = tm.makeTimeDataFrame()
- df['string'] = 'foo'
- df['string2'] = 'bar'
- store.append('f', df, data_columns=['string', 'string2'])
- assert(col('f', 'index').is_indexed is True)
- assert(col('f', 'string').is_indexed is True)
- assert(col('f', 'string2').is_indexed is True)
- # specify index=columns
- store.append(
- 'f2', df, index=['string'],
- data_columns=['string', 'string2'])
- assert(col('f2', 'index').is_indexed is False)
- assert(col('f2', 'string').is_indexed is True)
- assert(col('f2', 'string2').is_indexed is False)
- # try to index a non-table
- _maybe_remove(store, 'f2')
- store.put('f2', df)
- pytest.raises(TypeError, store.create_table_index, 'f2')
- def test_append_diff_item_order(self):
- with catch_warnings(record=True):
- wp = tm.makePanel()
- wp1 = wp.iloc[:, :10, :]
- wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']),
- 10:, :]
- with ensure_clean_store(self.path) as store:
- store.put('panel', wp1, format='table')
- pytest.raises(ValueError, store.put, 'panel', wp2,
- append=True)
- def test_append_hierarchical(self):
- index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
- ['one', 'two', 'three']],
- codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
- [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
- names=['foo', 'bar'])
- df = DataFrame(np.random.randn(10, 3), index=index,
- columns=['A', 'B', 'C'])
- with ensure_clean_store(self.path) as store:
- store.append('mi', df)
- result = store.select('mi')
- tm.assert_frame_equal(result, df)
- # GH 3748
- result = store.select('mi', columns=['A', 'B'])
- expected = df.reindex(columns=['A', 'B'])
- tm.assert_frame_equal(result, expected)
- with ensure_clean_path('test.hdf') as path:
- df.to_hdf(path, 'df', format='table')
- result = read_hdf(path, 'df', columns=['A', 'B'])
- expected = df.reindex(columns=['A', 'B'])
- tm.assert_frame_equal(result, expected)
- def test_column_multiindex(self):
- # GH 4710
- # recreate multi-indexes properly
- index = MultiIndex.from_tuples([('A', 'a'), ('A', 'b'),
- ('B', 'a'), ('B', 'b')],
- names=['first', 'second'])
- df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
- expected = df.copy()
- if isinstance(expected.index, RangeIndex):
- expected.index = Int64Index(expected.index)
- with ensure_clean_store(self.path) as store:
- store.put('df', df)
- tm.assert_frame_equal(store['df'], expected,
- check_index_type=True,
- check_column_type=True)
- store.put('df1', df, format='table')
- tm.assert_frame_equal(store['df1'], expected,
- check_index_type=True,
- check_column_type=True)
- pytest.raises(ValueError, store.put, 'df2', df,
- format='table', data_columns=['A'])
- pytest.raises(ValueError, store.put, 'df3', df,
- format='table', data_columns=True)
- # appending multi-column on existing table (see GH 6167)
- with ensure_clean_store(self.path) as store:
- store.append('df2', df)
- store.append('df2', df)
- tm.assert_frame_equal(store['df2'], concat((df, df)))
- # non_index_axes name
- df = DataFrame(np.arange(12).reshape(3, 4),
- columns=Index(list('ABCD'), name='foo'))
- expected = df.copy()
- if isinstance(expected.index, RangeIndex):
- expected.index = Int64Index(expected.index)
- with ensure_clean_store(self.path) as store:
- store.put('df1', df, format='table')
- tm.assert_frame_equal(store['df1'], expected,
- check_index_type=True,
- check_column_type=True)
- def test_store_multiindex(self):
- # validate multi-index names
- # GH 5527
- with ensure_clean_store(self.path) as store:
- def make_index(names=None):
- return MultiIndex.from_tuples([(datetime.datetime(2013, 12, d),
- s, t)
- for d in range(1, 3)
- for s in range(2)
- for t in range(3)],
- names=names)
- # no names
- _maybe_remove(store, 'df')
- df = DataFrame(np.zeros((12, 2)), columns=[
- 'a', 'b'], index=make_index())
- store.append('df', df)
- tm.assert_frame_equal(store.select('df'), df)
- # partial names
- _maybe_remove(store, 'df')
- df = DataFrame(np.zeros((12, 2)), columns=[
- 'a', 'b'], index=make_index(['date', None, None]))
- store.append('df', df)
- tm.assert_frame_equal(store.select('df'), df)
- # series
- _maybe_remove(store, 's')
- s = Series(np.zeros(12), index=make_index(['date', None, None]))
- store.append('s', s)
- xp = Series(np.zeros(12), index=make_index(
- ['date', 'level_1', 'level_2']))
- tm.assert_series_equal(store.select('s'), xp)
- # dup with column
- _maybe_remove(store, 'df')
- df = DataFrame(np.zeros((12, 2)), columns=[
- 'a', 'b'], index=make_index(['date', 'a', 't']))
- pytest.raises(ValueError, store.append, 'df', df)
- # dup within level
- _maybe_remove(store, 'df')
- df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
- index=make_index(['date', 'date', 'date']))
- pytest.raises(ValueError, store.append, 'df', df)
- # fully names
- _maybe_remove(store, 'df')
- df = DataFrame(np.zeros((12, 2)), columns=[
- 'a', 'b'], index=make_index(['date', 's', 't']))
- store.append('df', df)
- tm.assert_frame_equal(store.select('df'), df)
- def test_select_columns_in_where(self):
- # GH 6169
- # recreate multi-indexes when columns is passed
- # in the `where` argument
- index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
- ['one', 'two', 'three']],
- codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
- [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
- names=['foo_name', 'bar_name'])
- # With a DataFrame
- df = DataFrame(np.random.randn(10, 3), index=index,
- columns=['A', 'B', 'C'])
- with ensure_clean_store(self.path) as store:
- store.put('df', df, format='table')
- expected = df[['A']]
- tm.assert_frame_equal(store.select('df', columns=['A']), expected)
- tm.assert_frame_equal(store.select(
- 'df', where="columns=['A']"), expected)
- # With a Series
- s = Series(np.random.randn(10), index=index,
- name='A')
- with ensure_clean_store(self.path) as store:
- store.put('s', s, format='table')
- tm.assert_series_equal(store.select('s', where="columns=['A']"), s)
- def test_mi_data_columns(self):
- # GH 14435
- idx = pd.MultiIndex.from_arrays([date_range('2000-01-01', periods=5),
- range(5)], names=['date', 'id'])
- df = pd.DataFrame({'a': [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)
- with ensure_clean_store(self.path) as store:
- store.append('df', df, data_columns=True)
- actual = store.select('df', where='id == 1')
- expected = df.iloc[[1], :]
- tm.assert_frame_equal(actual, expected)
- def test_pass_spec_to_storer(self):
- df = tm.makeDataFrame()
- with ensure_clean_store(self.path) as store:
- store.put('df', df)
- pytest.raises(TypeError, store.select, 'df', columns=['A'])
- pytest.raises(TypeError, store.select,
- 'df', where=[('columns=A')])
- @xfail_non_writeable
- def test_append_misc(self):
- with ensure_clean_store(self.path) as store:
- df = tm.makeDataFrame()
- store.append('df', df, chunksize=1)
- result = store.select('df')
- tm.assert_frame_equal(result, df)
- store.append('df1', df, expectedrows=10)
- result = store.select('df1')
- tm.assert_frame_equal(result, df)
- # more chunksize in append tests
- def check(obj, comparator):
- for c in [10, 200, 1000]:
- with ensure_clean_store(self.path, mode='w') as store:
- store.append('obj', obj, chunksize=c)
- result = store.select('obj')
- comparator(result, obj)
- df = tm.makeDataFrame()
- df['string'] = 'foo'
- df['float322'] = 1.
- df['float322'] = df['float322'].astype('float32')
- df['bool'] = df['float322'] > 0
- df['time1'] = Timestamp('20130101')
- df['time2'] = Timestamp('20130102')
- check(df, tm.assert_frame_equal)
- with catch_warnings(record=True):
- p = tm.makePanel()
- check(p, assert_panel_equal)
- # empty frame, GH4273
- with ensure_clean_store(self.path) as store:
- # 0 len
- df_empty = DataFrame(columns=list('ABC'))
- store.append('df', df_empty)
- pytest.raises(KeyError, store.select, 'df')
- # repeated append of 0/non-zero frames
- df = DataFrame(np.random.rand(10, 3), columns=list('ABC'))
- store.append('df', df)
- assert_frame_equal(store.select('df'), df)
- store.append('df', df_empty)
- assert_frame_equal(store.select('df'), df)
- # store
- df = DataFrame(columns=list('ABC'))
- store.put('df2', df)
- assert_frame_equal(store.select('df2'), df)
- with catch_warnings(record=True):
- # 0 len
- p_empty = Panel(items=list('ABC'))
- store.append('p', p_empty)
- pytest.raises(KeyError, store.select, 'p')
- # repeated append of 0/non-zero frames
- p = Panel(np.random.randn(3, 4, 5), items=list('ABC'))
- store.append('p', p)
- assert_panel_equal(store.select('p'), p)
- store.append('p', p_empty)
- assert_panel_equal(store.select('p'), p)
- # store
- store.put('p2', p_empty)
- assert_panel_equal(store.select('p2'), p_empty)
- def test_append_raise(self):
- with ensure_clean_store(self.path) as store:
- # test append with invalid input to get good error messages
- # list in column
- df = tm.makeDataFrame()
- df['invalid'] = [['a']] * len(df)
- assert df.dtypes['invalid'] == np.object_
- pytest.raises(TypeError, store.append, 'df', df)
- # multiple invalid columns
- df['invalid2'] = [['a']] * len(df)
- df['invalid3'] = [['a']] * len(df)
- pytest.raises(TypeError, store.append, 'df', df)
- # datetime with embedded nans as object
- df = tm.makeDataFrame()
- s = Series(datetime.datetime(2001, 1, 2), index=df.index)
- s = s.astype(object)
- s[0:5] = np.nan
- df['invalid'] = s
- assert df.dtypes['invalid'] == np.object_
- pytest.raises(TypeError, store.append, 'df', df)
- # directly ndarray
- pytest.raises(TypeError, store.append, 'df', np.arange(10))
- # series directly
- pytest.raises(TypeError, store.append,
- 'df', Series(np.arange(10)))
- # appending an incompatible table
- df = tm.makeDataFrame()
- store.append('df', df)
- df['foo'] = 'foo'
- pytest.raises(ValueError, store.append, 'df', df)
- def test_table_index_incompatible_dtypes(self):
- df1 = DataFrame({'a': [1, 2, 3]})
- df2 = DataFrame({'a': [4, 5, 6]},
- index=date_range('1/1/2000', periods=3))
- with ensure_clean_store(self.path) as store:
- store.put('frame', df1, format='table')
- pytest.raises(TypeError, store.put, 'frame', df2,
- format='table', append=True)
- def test_table_values_dtypes_roundtrip(self):
- with ensure_clean_store(self.path) as store:
- df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
- store.append('df_f8', df1)
- assert_series_equal(df1.dtypes, store['df_f8'].dtypes)
- df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
- store.append('df_i8', df2)
- assert_series_equal(df2.dtypes, store['df_i8'].dtypes)
- # incompatible dtype
- pytest.raises(ValueError, store.append, 'df_i8', df1)
- # check creation/storage/retrieval of float32 (a bit hacky to
- # actually create them thought)
- df1 = DataFrame(
- np.array([[1], [2], [3]], dtype='f4'), columns=['A'])
- store.append('df_f4', df1)
- assert_series_equal(df1.dtypes, store['df_f4'].dtypes)
- assert df1.dtypes[0] == 'float32'
- # check with mixed dtypes
- df1 = DataFrame({c: Series(np.random.randint(5), dtype=c)
- for c in ['float32', 'float64', 'int32',
- 'int64', 'int16', 'int8']})
- df1['string'] = 'foo'
- df1['float322'] = 1.
- df1['float322'] = df1['float322'].astype('float32')
- df1['bool'] = df1['float32'] > 0
- df1['time1'] = Timestamp('20130101')
- df1['time2'] = Timestamp('20130102')
- store.append('df_mixed_dtypes1', df1)
- result = store.select('df_mixed_dtypes1').get_dtype_counts()
- expected = Series({'float32': 2, 'float64': 1, 'int32': 1,
- 'bool': 1, 'int16': 1, 'int8': 1,
- 'int64': 1, 'object': 1, 'datetime64[ns]': 2})
- result = result.sort_index()
- expected = expected.sort_index()
- tm.assert_series_equal(result, expected)
- def test_table_mixed_dtypes(self):
- # frame
- df = tm.makeDataFrame()
- df['obj1'] = 'foo'
- df['obj2'] = 'bar'
- df['bool1'] = df['A'] > 0
- df['bool2'] = df['B'] > 0
- df['bool3'] = True
- df['int1'] = 1
- df['int2'] = 2
- df['timestamp1'] = Timestamp('20010102')
- df['timestamp2'] = Timestamp('20010103')
- df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
- df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
- df.loc[3:6, ['obj1']] = np.nan
- df = df._consolidate()._convert(datetime=True)
- with ensure_clean_store(self.path) as store:
- store.append('df1_mixed', df)
- tm.assert_frame_equal(store.select('df1_mixed'), df)
- with catch_warnings(record=True):
- # panel
- wp = tm.makePanel()
- wp['obj1'] = 'foo'
- wp['obj2'] = 'bar'
- wp['bool1'] = wp['ItemA'] > 0
- wp['bool2'] = wp['ItemB'] > 0
- wp['int1'] = 1
- wp['int2'] = 2
- wp = wp._consolidate()
- with catch_warnings(record=True):
- with ensure_clean_store(self.path) as store:
- store.append('p1_mixed', wp)
- assert_panel_equal(store.select('p1_mixed'), wp)
- def test_unimplemented_dtypes_table_columns(self):
- with ensure_clean_store(self.path) as store:
- dtypes = [('date', datetime.date(2001, 1, 2))]
- # py3 ok for unicode
- if not compat.PY3:
- dtypes.append(('unicode', u('\\u03c3')))
- # currently not supported dtypes ####
- for n, f in dtypes:
- df = tm.makeDataFrame()
- df[n] = f
- pytest.raises(
- TypeError, store.append, 'df1_%s' % n, df)
- # frame
- df = tm.makeDataFrame()
- df['obj1'] = 'foo'
- df['obj2'] = 'bar'
- df['datetime1'] = datetime.date(2001, 1, 2)
- df = df._consolidate()._convert(datetime=True)
- with ensure_clean_store(self.path) as store:
- # this fails because we have a date in the object block......
- pytest.raises(TypeError, store.append, 'df_unimplemented', df)
- @xfail_non_writeable
- @pytest.mark.skipif(
- LooseVersion(np.__version__) == LooseVersion('1.15.0'),
- reason=("Skipping pytables test when numpy version is "
- "exactly equal to 1.15.0: gh-22098"))
- def test_calendar_roundtrip_issue(self):
- # 8591
- # doc example from tseries holiday section
- weekmask_egypt = 'Sun Mon Tue Wed Thu'
- holidays = ['2012-05-01',
- datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
- bday_egypt = pd.offsets.CustomBusinessDay(
- holidays=holidays, weekmask=weekmask_egypt)
- dt = datetime.datetime(2013, 4, 30)
- dts = date_range(dt, periods=5, freq=bday_egypt)
- s = (Series(dts.weekday, dts).map(
- Series('Mon Tue Wed Thu Fri Sat Sun'.split())))
- with ensure_clean_store(self.path) as store:
- store.put('fixed', s)
- result = store.select('fixed')
- assert_series_equal(result, s)
- store.append('table', s)
- result = store.select('table')
- assert_series_equal(result, s)
- def test_roundtrip_tz_aware_index(self):
- # GH 17618
- time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern')
- df = pd.DataFrame(data=[0], index=[time])
- with ensure_clean_store(self.path) as store:
- store.put('frame', df, format='fixed')
- recons = store['frame']
- tm.assert_frame_equal(recons, df)
- assert recons.index[0].value == 946706400000000000
- def test_append_with_timedelta(self):
- # GH 3577
- # append timedelta
- df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp(
- '20130101') + timedelta(days=i, seconds=10) for i in range(10)]))
- df['C'] = df['A'] - df['B']
- df.loc[3:5, 'C'] = np.nan
- with ensure_clean_store(self.path) as store:
- # table
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=True)
- result = store.select('df')
- assert_frame_equal(result, df)
- result = store.select('df', where="C<100000")
- assert_frame_equal(result, df)
- result = store.select('df', where="C<pd.Timedelta('-3D')")
- assert_frame_equal(result, df.iloc[3:])
- result = store.select('df', "C<'-3D'")
- assert_frame_equal(result, df.iloc[3:])
- # a bit hacky here as we don't really deal with the NaT properly
- result = store.select('df', "C<'-500000s'")
- result = result.dropna(subset=['C'])
- assert_frame_equal(result, df.iloc[6:])
- result = store.select('df', "C<'-3.5D'")
- result = result.iloc[1:]
- assert_frame_equal(result, df.iloc[4:])
- # fixed
- _maybe_remove(store, 'df2')
- store.put('df2', df)
- result = store.select('df2')
- assert_frame_equal(result, df)
- def test_remove(self):
- with ensure_clean_store(self.path) as store:
- ts = tm.makeTimeSeries()
- df = tm.makeDataFrame()
- store['a'] = ts
- store['b'] = df
- _maybe_remove(store, 'a')
- assert len(store) == 1
- tm.assert_frame_equal(df, store['b'])
- _maybe_remove(store, 'b')
- assert len(store) == 0
- # nonexistence
- pytest.raises(KeyError, store.remove, 'a_nonexistent_store')
- # pathing
- store['a'] = ts
- store['b/foo'] = df
- _maybe_remove(store, 'foo')
- _maybe_remove(store, 'b/foo')
- assert len(store) == 1
- store['a'] = ts
- store['b/foo'] = df
- _maybe_remove(store, 'b')
- assert len(store) == 1
- # __delitem__
- store['a'] = ts
- store['b'] = df
- del store['a']
- del store['b']
- assert len(store) == 0
- def test_remove_where(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- # non-existance
- crit1 = 'index>foo'
- pytest.raises(KeyError, store.remove, 'a', [crit1])
- # try to remove non-table (with crit)
- # non-table ok (where = None)
- wp = tm.makePanel(30)
- store.put('wp', wp, format='table')
- store.remove('wp', ["minor_axis=['A', 'D']"])
- rs = store.select('wp')
- expected = wp.reindex(minor_axis=['B', 'C'])
- assert_panel_equal(rs, expected)
- # empty where
- _maybe_remove(store, 'wp')
- store.put('wp', wp, format='table')
- # deleted number (entire table)
- n = store.remove('wp', [])
- assert n == 120
- # non - empty where
- _maybe_remove(store, 'wp')
- store.put('wp', wp, format='table')
- pytest.raises(ValueError, store.remove,
- 'wp', ['foo'])
- def test_remove_startstop(self):
- # GH #4835 and #6177
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = tm.makePanel(30)
- # start
- _maybe_remove(store, 'wp1')
- store.put('wp1', wp, format='t')
- n = store.remove('wp1', start=32)
- assert n == 120 - 32
- result = store.select('wp1')
- expected = wp.reindex(major_axis=wp.major_axis[:32 // 4])
- assert_panel_equal(result, expected)
- _maybe_remove(store, 'wp2')
- store.put('wp2', wp, format='t')
- n = store.remove('wp2', start=-32)
- assert n == 32
- result = store.select('wp2')
- expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4])
- assert_panel_equal(result, expected)
- # stop
- _maybe_remove(store, 'wp3')
- store.put('wp3', wp, format='t')
- n = store.remove('wp3', stop=32)
- assert n == 32
- result = store.select('wp3')
- expected = wp.reindex(major_axis=wp.major_axis[32 // 4:])
- assert_panel_equal(result, expected)
- _maybe_remove(store, 'wp4')
- store.put('wp4', wp, format='t')
- n = store.remove('wp4', stop=-32)
- assert n == 120 - 32
- result = store.select('wp4')
- expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:])
- assert_panel_equal(result, expected)
- # start n stop
- _maybe_remove(store, 'wp5')
- store.put('wp5', wp, format='t')
- n = store.remove('wp5', start=16, stop=-16)
- assert n == 120 - 32
- result = store.select('wp5')
- expected = wp.reindex(
- major_axis=(wp.major_axis[:16 // 4]
- .union(wp.major_axis[-16 // 4:])))
- assert_panel_equal(result, expected)
- _maybe_remove(store, 'wp6')
- store.put('wp6', wp, format='t')
- n = store.remove('wp6', start=16, stop=16)
- assert n == 0
- result = store.select('wp6')
- expected = wp.reindex(major_axis=wp.major_axis)
- assert_panel_equal(result, expected)
- # with where
- _maybe_remove(store, 'wp7')
- # TODO: unused?
- date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa
- crit = 'major_axis=date'
- store.put('wp7', wp, format='t')
- n = store.remove('wp7', where=[crit], stop=80)
- assert n == 28
- result = store.select('wp7')
- expected = wp.reindex(major_axis=wp.major_axis.difference(
- wp.major_axis[np.arange(0, 20, 3)]))
- assert_panel_equal(result, expected)
- def test_remove_crit(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = tm.makePanel(30)
- # group row removal
- _maybe_remove(store, 'wp3')
- date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
- crit4 = 'major_axis=date4'
- store.put('wp3', wp, format='t')
- n = store.remove('wp3', where=[crit4])
- assert n == 36
- result = store.select('wp3')
- expected = wp.reindex(
- major_axis=wp.major_axis.difference(date4))
- assert_panel_equal(result, expected)
- # upper half
- _maybe_remove(store, 'wp')
- store.put('wp', wp, format='table')
- date = wp.major_axis[len(wp.major_axis) // 2]
- crit1 = 'major_axis>date'
- crit2 = "minor_axis=['A', 'D']"
- n = store.remove('wp', where=[crit1])
- assert n == 56
- n = store.remove('wp', where=[crit2])
- assert n == 32
- result = store['wp']
- expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
- assert_panel_equal(result, expected)
- # individual row elements
- _maybe_remove(store, 'wp2')
- store.put('wp2', wp, format='table')
- date1 = wp.major_axis[1:3]
- crit1 = 'major_axis=date1'
- store.remove('wp2', where=[crit1])
- result = store.select('wp2')
- expected = wp.reindex(
- major_axis=wp.major_axis.difference(date1))
- assert_panel_equal(result, expected)
- date2 = wp.major_axis[5]
- crit2 = 'major_axis=date2'
- store.remove('wp2', where=[crit2])
- result = store['wp2']
- expected = wp.reindex(
- major_axis=(wp.major_axis
- .difference(date1)
- .difference(Index([date2]))
- ))
- assert_panel_equal(result, expected)
- date3 = [wp.major_axis[7], wp.major_axis[9]]
- crit3 = 'major_axis=date3'
- store.remove('wp2', where=[crit3])
- result = store['wp2']
- expected = wp.reindex(major_axis=wp.major_axis
- .difference(date1)
- .difference(Index([date2]))
- .difference(Index(date3)))
- assert_panel_equal(result, expected)
- # corners
- _maybe_remove(store, 'wp4')
- store.put('wp4', wp, format='table')
- n = store.remove(
- 'wp4', where="major_axis>wp.major_axis[-1]")
- result = store.select('wp4')
- assert_panel_equal(result, wp)
- def test_invalid_terms(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- df = tm.makeTimeDataFrame()
- df['string'] = 'foo'
- df.loc[0:4, 'string'] = 'bar'
- wp = tm.makePanel()
- store.put('df', df, format='table')
- store.put('wp', wp, format='table')
- # some invalid terms
- pytest.raises(ValueError, store.select,
- 'wp', "minor=['A', 'B']")
- pytest.raises(ValueError, store.select,
- 'wp', ["index=['20121114']"])
- pytest.raises(ValueError, store.select, 'wp', [
- "index=['20121114', '20121114']"])
- pytest.raises(TypeError, Term)
- # more invalid
- pytest.raises(
- ValueError, store.select, 'df', 'df.index[3]')
- pytest.raises(SyntaxError, store.select, 'df', 'index>')
- pytest.raises(
- ValueError, store.select, 'wp',
- "major_axis<'20000108' & minor_axis['A', 'B']")
- # from the docs
- with ensure_clean_path(self.path) as path:
- dfq = DataFrame(np.random.randn(10, 4), columns=list(
- 'ABCD'), index=date_range('20130101', periods=10))
- dfq.to_hdf(path, 'dfq', format='table', data_columns=True)
- # check ok
- read_hdf(path, 'dfq',
- where="index>Timestamp('20130104') & columns=['A', 'B']")
- read_hdf(path, 'dfq', where="A>0 or C>0")
- # catch the invalid reference
- with ensure_clean_path(self.path) as path:
- dfq = DataFrame(np.random.randn(10, 4), columns=list(
- 'ABCD'), index=date_range('20130101', periods=10))
- dfq.to_hdf(path, 'dfq', format='table')
- pytest.raises(ValueError, read_hdf, path,
- 'dfq', where="A>0 or C>0")
- def test_terms(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- simplefilter("ignore", FutureWarning)
- wp = tm.makePanel()
- wpneg = Panel.fromDict({-1: tm.makeDataFrame(),
- 0: tm.makeDataFrame(),
- 1: tm.makeDataFrame()})
- store.put('wp', wp, format='table')
- store.put('wpneg', wpneg, format='table')
- # panel
- result = store.select(
- 'wp',
- "major_axis<'20000108' and minor_axis=['A', 'B']")
- expected = wp.truncate(
- after='20000108').reindex(minor=['A', 'B'])
- assert_panel_equal(result, expected)
- # with deprecation
- result = store.select(
- 'wp', where=("major_axis<'20000108' "
- "and minor_axis=['A', 'B']"))
- expected = wp.truncate(
- after='20000108').reindex(minor=['A', 'B'])
- tm.assert_panel_equal(result, expected)
- with catch_warnings(record=True):
- # valid terms
- terms = [('major_axis=20121114'),
- ('major_axis>20121114'),
- (("major_axis=['20121114', '20121114']"),),
- ('major_axis=datetime.datetime(2012, 11, 14)'),
- 'major_axis> 20121114',
- 'major_axis >20121114',
- 'major_axis > 20121114',
- (("minor_axis=['A', 'B']"),),
- (("minor_axis=['A', 'B']"),),
- ((("minor_axis==['A', 'B']"),),),
- (("items=['ItemA', 'ItemB']"),),
- ('items=ItemA'),
- ]
- for t in terms:
- store.select('wp', t)
- with pytest.raises(TypeError,
- match='Only named functions are supported'):
- store.select(
- 'wp',
- 'major_axis == (lambda x: x)("20130101")')
- with catch_warnings(record=True):
- # check USub node parsing
- res = store.select('wpneg', 'items == -1')
- expected = Panel({-1: wpneg[-1]})
- tm.assert_panel_equal(res, expected)
- msg = 'Unary addition not supported'
- with pytest.raises(NotImplementedError, match=msg):
- store.select('wpneg', 'items == +1')
- def test_term_compat(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
- major_axis=date_range('1/1/2000', periods=5),
- minor_axis=['A', 'B', 'C', 'D'])
- store.append('wp', wp)
- result = store.select(
- 'wp', where=("major_axis>20000102 "
- "and minor_axis=['A', 'B']"))
- expected = wp.loc[:, wp.major_axis >
- Timestamp('20000102'), ['A', 'B']]
- assert_panel_equal(result, expected)
- store.remove('wp', 'major_axis>20000103')
- result = store.select('wp')
- expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :]
- assert_panel_equal(result, expected)
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = Panel(np.random.randn(2, 5, 4),
- items=['Item1', 'Item2'],
- major_axis=date_range('1/1/2000', periods=5),
- minor_axis=['A', 'B', 'C', 'D'])
- store.append('wp', wp)
- # stringified datetimes
- result = store.select(
- 'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
- expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
- assert_panel_equal(result, expected)
- result = store.select(
- 'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
- expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
- assert_panel_equal(result, expected)
- result = store.select(
- 'wp',
- "major_axis=[datetime.datetime(2000, 1, 2, 0, 0), "
- "datetime.datetime(2000, 1, 3, 0, 0)]")
- expected = wp.loc[:, [Timestamp('20000102'),
- Timestamp('20000103')]]
- assert_panel_equal(result, expected)
- result = store.select(
- 'wp', "minor_axis=['A', 'B']")
- expected = wp.loc[:, :, ['A', 'B']]
- assert_panel_equal(result, expected)
- def test_same_name_scoping(self):
- with ensure_clean_store(self.path) as store:
- import pandas as pd
- df = DataFrame(np.random.randn(20, 2),
- index=pd.date_range('20130101', periods=20))
- store.put('df', df, format='table')
- expected = df[df.index > pd.Timestamp('20130105')]
- import datetime # noqa
- result = store.select('df', 'index>datetime.datetime(2013,1,5)')
- assert_frame_equal(result, expected)
- from datetime import datetime # noqa
- # technically an error, but allow it
- result = store.select('df', 'index>datetime.datetime(2013,1,5)')
- assert_frame_equal(result, expected)
- result = store.select('df', 'index>datetime(2013,1,5)')
- assert_frame_equal(result, expected)
- def test_series(self):
- s = tm.makeStringSeries()
- self._check_roundtrip(s, tm.assert_series_equal)
- ts = tm.makeTimeSeries()
- self._check_roundtrip(ts, tm.assert_series_equal)
- ts2 = Series(ts.index, Index(ts.index, dtype=object))
- self._check_roundtrip(ts2, tm.assert_series_equal)
- ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object),
- dtype=object))
- self._check_roundtrip(ts3, tm.assert_series_equal,
- check_index_type=False)
- def test_sparse_series(self):
- s = tm.makeStringSeries()
- s.iloc[3:5] = np.nan
- ss = s.to_sparse()
- self._check_roundtrip(ss, tm.assert_series_equal,
- check_series_type=True)
- ss2 = s.to_sparse(kind='integer')
- self._check_roundtrip(ss2, tm.assert_series_equal,
- check_series_type=True)
- ss3 = s.to_sparse(fill_value=0)
- self._check_roundtrip(ss3, tm.assert_series_equal,
- check_series_type=True)
- def test_sparse_frame(self):
- s = tm.makeDataFrame()
- s.iloc[3:5, 1:3] = np.nan
- s.iloc[8:10, -2] = np.nan
- ss = s.to_sparse()
- self._check_double_roundtrip(ss, tm.assert_frame_equal,
- check_frame_type=True)
- ss2 = s.to_sparse(kind='integer')
- self._check_double_roundtrip(ss2, tm.assert_frame_equal,
- check_frame_type=True)
- ss3 = s.to_sparse(fill_value=0)
- self._check_double_roundtrip(ss3, tm.assert_frame_equal,
- check_frame_type=True)
- def test_float_index(self):
- # GH #454
- index = np.random.randn(10)
- s = Series(np.random.randn(10), index=index)
- self._check_roundtrip(s, tm.assert_series_equal)
- @xfail_non_writeable
- def test_tuple_index(self):
- # GH #492
- col = np.arange(10)
- idx = [(0., 1.), (2., 3.), (4., 5.)]
- data = np.random.randn(30).reshape((3, 10))
- DF = DataFrame(data, index=idx, columns=col)
- with catch_warnings(record=True):
- simplefilter("ignore", pd.errors.PerformanceWarning)
- self._check_roundtrip(DF, tm.assert_frame_equal)
- @xfail_non_writeable
- @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
- def test_index_types(self):
- with catch_warnings(record=True):
- values = np.random.randn(2)
- func = lambda l, r: tm.assert_series_equal(l, r,
- check_dtype=True,
- check_index_type=True,
- check_series_type=True)
- with catch_warnings(record=True):
- ser = Series(values, [0, 'y'])
- self._check_roundtrip(ser, func)
- with catch_warnings(record=True):
- ser = Series(values, [datetime.datetime.today(), 0])
- self._check_roundtrip(ser, func)
- with catch_warnings(record=True):
- ser = Series(values, ['y', 0])
- self._check_roundtrip(ser, func)
- with catch_warnings(record=True):
- ser = Series(values, [datetime.date.today(), 'a'])
- self._check_roundtrip(ser, func)
- with catch_warnings(record=True):
- ser = Series(values, [0, 'y'])
- self._check_roundtrip(ser, func)
- ser = Series(values, [datetime.datetime.today(), 0])
- self._check_roundtrip(ser, func)
- ser = Series(values, ['y', 0])
- self._check_roundtrip(ser, func)
- ser = Series(values, [datetime.date.today(), 'a'])
- self._check_roundtrip(ser, func)
- ser = Series(values, [1.23, 'b'])
- self._check_roundtrip(ser, func)
- ser = Series(values, [1, 1.53])
- self._check_roundtrip(ser, func)
- ser = Series(values, [1, 5])
- self._check_roundtrip(ser, func)
- ser = Series(values, [datetime.datetime(
- 2012, 1, 1), datetime.datetime(2012, 1, 2)])
- self._check_roundtrip(ser, func)
- def test_timeseries_preepoch(self):
- dr = bdate_range('1/1/1940', '1/1/1960')
- ts = Series(np.random.randn(len(dr)), index=dr)
- try:
- self._check_roundtrip(ts, tm.assert_series_equal)
- except OverflowError:
- pytest.skip('known failer on some windows platforms')
- @xfail_non_writeable
- @pytest.mark.parametrize("compression", [
- False, pytest.param(True, marks=td.skip_if_windows_python_3)
- ])
- def test_frame(self, compression):
- df = tm.makeDataFrame()
- # put in some random NAs
- df.values[0, 0] = np.nan
- df.values[5, 3] = np.nan
- self._check_roundtrip_table(df, tm.assert_frame_equal,
- compression=compression)
- self._check_roundtrip(df, tm.assert_frame_equal,
- compression=compression)
- tdf = tm.makeTimeDataFrame()
- self._check_roundtrip(tdf, tm.assert_frame_equal,
- compression=compression)
- with ensure_clean_store(self.path) as store:
- # not consolidated
- df['foo'] = np.random.randn(len(df))
- store['df'] = df
- recons = store['df']
- assert recons._data.is_consolidated()
- # empty
- self._check_roundtrip(df[:0], tm.assert_frame_equal)
- @xfail_non_writeable
- def test_empty_series_frame(self):
- s0 = Series()
- s1 = Series(name='myseries')
- df0 = DataFrame()
- df1 = DataFrame(index=['a', 'b', 'c'])
- df2 = DataFrame(columns=['d', 'e', 'f'])
- self._check_roundtrip(s0, tm.assert_series_equal)
- self._check_roundtrip(s1, tm.assert_series_equal)
- self._check_roundtrip(df0, tm.assert_frame_equal)
- self._check_roundtrip(df1, tm.assert_frame_equal)
- self._check_roundtrip(df2, tm.assert_frame_equal)
- @xfail_non_writeable
- @pytest.mark.parametrize(
- 'dtype', [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]'])
- def test_empty_series(self, dtype):
- s = Series(dtype=dtype)
- self._check_roundtrip(s, tm.assert_series_equal)
- def test_can_serialize_dates(self):
- rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- self._check_roundtrip(frame, tm.assert_frame_equal)
- def test_store_hierarchical(self):
- index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
- ['one', 'two', 'three']],
- codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
- [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
- names=['foo', 'bar'])
- frame = DataFrame(np.random.randn(10, 3), index=index,
- columns=['A', 'B', 'C'])
- self._check_roundtrip(frame, tm.assert_frame_equal)
- self._check_roundtrip(frame.T, tm.assert_frame_equal)
- self._check_roundtrip(frame['A'], tm.assert_series_equal)
- # check that the names are stored
- with ensure_clean_store(self.path) as store:
- store['frame'] = frame
- recons = store['frame']
- tm.assert_frame_equal(recons, frame)
- def test_store_index_name(self):
- df = tm.makeDataFrame()
- df.index.name = 'foo'
- with ensure_clean_store(self.path) as store:
- store['frame'] = df
- recons = store['frame']
- tm.assert_frame_equal(recons, df)
- def test_store_index_name_with_tz(self):
- # GH 13884
- df = pd.DataFrame({'A': [1, 2]})
- df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])
- df.index = df.index.tz_localize('UTC')
- df.index.name = 'foo'
- with ensure_clean_store(self.path) as store:
- store.put('frame', df, format='table')
- recons = store['frame']
- tm.assert_frame_equal(recons, df)
- @pytest.mark.parametrize('table_format', ['table', 'fixed'])
- def test_store_index_name_numpy_str(self, table_format):
- # GH #13492
- idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
- datetime.date(2000, 1, 2)]),
- name=u('cols\u05d2'))
- idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
- datetime.date(2010, 1, 2)]),
- name=u('rows\u05d0'))
- df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
- # This used to fail, returning numpy strings instead of python strings.
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format=table_format)
- df2 = read_hdf(path, 'df')
- assert_frame_equal(df, df2, check_names=True)
- assert type(df2.index.name) == text_type
- assert type(df2.columns.name) == text_type
- def test_store_series_name(self):
- df = tm.makeDataFrame()
- series = df['A']
- with ensure_clean_store(self.path) as store:
- store['series'] = series
- recons = store['series']
- tm.assert_series_equal(recons, series)
- @xfail_non_writeable
- @pytest.mark.parametrize("compression", [
- False, pytest.param(True, marks=td.skip_if_windows_python_3)
- ])
- def test_store_mixed(self, compression):
- def _make_one():
- df = tm.makeDataFrame()
- df['obj1'] = 'foo'
- df['obj2'] = 'bar'
- df['bool1'] = df['A'] > 0
- df['bool2'] = df['B'] > 0
- df['int1'] = 1
- df['int2'] = 2
- return df._consolidate()
- df1 = _make_one()
- df2 = _make_one()
- self._check_roundtrip(df1, tm.assert_frame_equal)
- self._check_roundtrip(df2, tm.assert_frame_equal)
- with ensure_clean_store(self.path) as store:
- store['obj'] = df1
- tm.assert_frame_equal(store['obj'], df1)
- store['obj'] = df2
- tm.assert_frame_equal(store['obj'], df2)
- # check that can store Series of all of these types
- self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
- compression=compression)
- self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
- compression=compression)
- self._check_roundtrip(df1['int1'], tm.assert_series_equal,
- compression=compression)
- def test_wide(self):
- with catch_warnings(record=True):
- wp = tm.makePanel()
- self._check_roundtrip(wp, assert_panel_equal)
- @pytest.mark.filterwarnings(
- "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
- )
- def test_select_with_dups(self):
- # single dtypes
- df = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
- df.index = date_range('20130101 9:30', periods=10, freq='T')
- with ensure_clean_store(self.path) as store:
- store.append('df', df)
- result = store.select('df')
- expected = df
- assert_frame_equal(result, expected, by_blocks=True)
- result = store.select('df', columns=df.columns)
- expected = df
- assert_frame_equal(result, expected, by_blocks=True)
- result = store.select('df', columns=['A'])
- expected = df.loc[:, ['A']]
- assert_frame_equal(result, expected)
- # dups across dtypes
- df = concat([DataFrame(np.random.randn(10, 4),
- columns=['A', 'A', 'B', 'B']),
- DataFrame(np.random.randint(0, 10, size=20)
- .reshape(10, 2),
- columns=['A', 'C'])],
- axis=1)
- df.index = date_range('20130101 9:30', periods=10, freq='T')
- with ensure_clean_store(self.path) as store:
- store.append('df', df)
- result = store.select('df')
- expected = df
- assert_frame_equal(result, expected, by_blocks=True)
- result = store.select('df', columns=df.columns)
- expected = df
- assert_frame_equal(result, expected, by_blocks=True)
- expected = df.loc[:, ['A']]
- result = store.select('df', columns=['A'])
- assert_frame_equal(result, expected, by_blocks=True)
- expected = df.loc[:, ['B', 'A']]
- result = store.select('df', columns=['B', 'A'])
- assert_frame_equal(result, expected, by_blocks=True)
- # duplicates on both index and columns
- with ensure_clean_store(self.path) as store:
- store.append('df', df)
- store.append('df', df)
- expected = df.loc[:, ['B', 'A']]
- expected = concat([expected, expected])
- result = store.select('df', columns=['B', 'A'])
- assert_frame_equal(result, expected, by_blocks=True)
- @pytest.mark.filterwarnings(
- "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
- )
- def test_wide_table_dups(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = tm.makePanel()
- store.put('panel', wp, format='table')
- store.put('panel', wp, format='table', append=True)
- recons = store['panel']
- assert_panel_equal(recons, wp)
- def test_long(self):
- def _check(left, right):
- assert_panel_equal(left.to_panel(), right.to_panel())
- with catch_warnings(record=True):
- wp = tm.makePanel()
- self._check_roundtrip(wp.to_frame(), _check)
- def test_overwrite_node(self):
- with ensure_clean_store(self.path) as store:
- store['a'] = tm.makeTimeDataFrame()
- ts = tm.makeTimeSeries()
- store['a'] = ts
- tm.assert_series_equal(store['a'], ts)
- def test_sparse_with_compression(self):
- # GH 2931
- # make sparse dataframe
- arr = np.random.binomial(n=1, p=.01, size=(1000, 10))
- df = DataFrame(arr).to_sparse(fill_value=0)
- # case 1: store uncompressed
- self._check_double_roundtrip(df, tm.assert_frame_equal,
- compression=False,
- check_frame_type=True)
- # case 2: store compressed (works)
- self._check_double_roundtrip(df, tm.assert_frame_equal,
- compression='zlib',
- check_frame_type=True)
- # set one series to be completely sparse
- df[0] = np.zeros(1000)
- # case 3: store df with completely sparse series uncompressed
- self._check_double_roundtrip(df, tm.assert_frame_equal,
- compression=False,
- check_frame_type=True)
- # case 4: try storing df with completely sparse series compressed
- # (fails)
- self._check_double_roundtrip(df, tm.assert_frame_equal,
- compression='zlib',
- check_frame_type=True)
- def test_select(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = tm.makePanel()
- # put/select ok
- _maybe_remove(store, 'wp')
- store.put('wp', wp, format='table')
- store.select('wp')
- # non-table ok (where = None)
- _maybe_remove(store, 'wp')
- store.put('wp2', wp)
- store.select('wp2')
- # selection on the non-indexable with a large number of columns
- wp = Panel(np.random.randn(100, 100, 100),
- items=['Item%03d' % i for i in range(100)],
- major_axis=date_range('1/1/2000', periods=100),
- minor_axis=['E%03d' % i for i in range(100)])
- _maybe_remove(store, 'wp')
- store.append('wp', wp)
- items = ['Item%03d' % i for i in range(80)]
- result = store.select('wp', 'items=items')
- expected = wp.reindex(items=items)
- assert_panel_equal(expected, result)
- # selectin non-table with a where
- # pytest.raises(ValueError, store.select,
- # 'wp2', ('column', ['A', 'D']))
- # select with columns=
- df = tm.makeTimeDataFrame()
- _maybe_remove(store, 'df')
- store.append('df', df)
- result = store.select('df', columns=['A', 'B'])
- expected = df.reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
- # equivalentsly
- result = store.select('df', [("columns=['A', 'B']")])
- expected = df.reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
- # with a data column
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=['A'])
- result = store.select('df', ['A > 0'], columns=['A', 'B'])
- expected = df[df.A > 0].reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
- # all a data columns
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=True)
- result = store.select('df', ['A > 0'], columns=['A', 'B'])
- expected = df[df.A > 0].reindex(columns=['A', 'B'])
- tm.assert_frame_equal(expected, result)
- # with a data column, but different columns
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=['A'])
- result = store.select('df', ['A > 0'], columns=['C', 'D'])
- expected = df[df.A > 0].reindex(columns=['C', 'D'])
- tm.assert_frame_equal(expected, result)
- def test_select_dtypes(self):
- with ensure_clean_store(self.path) as store:
- # with a Timestamp data column (GH #2637)
- df = DataFrame(dict(
- ts=bdate_range('2012-01-01', periods=300),
- A=np.random.randn(300)))
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=['ts', 'A'])
- result = store.select('df', "ts>=Timestamp('2012-02-01')")
- expected = df[df.ts >= Timestamp('2012-02-01')]
- tm.assert_frame_equal(expected, result)
- # bool columns (GH #2849)
- df = DataFrame(np.random.randn(5, 2), columns=['A', 'B'])
- df['object'] = 'foo'
- df.loc[4:5, 'object'] = 'bar'
- df['boolv'] = df['A'] > 0
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=True)
- expected = (df[df.boolv == True] # noqa
- .reindex(columns=['A', 'boolv']))
- for v in [True, 'true', 1]:
- result = store.select('df', 'boolv == %s' % str(v),
- columns=['A', 'boolv'])
- tm.assert_frame_equal(expected, result)
- expected = (df[df.boolv == False] # noqa
- .reindex(columns=['A', 'boolv']))
- for v in [False, 'false', 0]:
- result = store.select(
- 'df', 'boolv == %s' % str(v), columns=['A', 'boolv'])
- tm.assert_frame_equal(expected, result)
- # integer index
- df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
- _maybe_remove(store, 'df_int')
- store.append('df_int', df)
- result = store.select(
- 'df_int', "index<10 and columns=['A']")
- expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
- tm.assert_frame_equal(expected, result)
- # float index
- df = DataFrame(dict(A=np.random.rand(
- 20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
- _maybe_remove(store, 'df_float')
- store.append('df_float', df)
- result = store.select(
- 'df_float', "index<10.0 and columns=['A']")
- expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
- tm.assert_frame_equal(expected, result)
- with ensure_clean_store(self.path) as store:
- # floats w/o NaN
- df = DataFrame(
- dict(cols=range(11), values=range(11)), dtype='float64')
- df['cols'] = (df['cols'] + 10).apply(str)
- store.append('df1', df, data_columns=True)
- result = store.select(
- 'df1', where='values>2.0')
- expected = df[df['values'] > 2.0]
- tm.assert_frame_equal(expected, result)
- # floats with NaN
- df.iloc[0] = np.nan
- expected = df[df['values'] > 2.0]
- store.append('df2', df, data_columns=True, index=False)
- result = store.select(
- 'df2', where='values>2.0')
- tm.assert_frame_equal(expected, result)
- # https://github.com/PyTables/PyTables/issues/282
- # bug in selection when 0th row has a np.nan and an index
- # store.append('df3',df,data_columns=True)
- # result = store.select(
- # 'df3', where='values>2.0')
- # tm.assert_frame_equal(expected, result)
- # not in first position float with NaN ok too
- df = DataFrame(
- dict(cols=range(11), values=range(11)), dtype='float64')
- df['cols'] = (df['cols'] + 10).apply(str)
- df.iloc[1] = np.nan
- expected = df[df['values'] > 2.0]
- store.append('df4', df, data_columns=True)
- result = store.select(
- 'df4', where='values>2.0')
- tm.assert_frame_equal(expected, result)
- # test selection with comparison against numpy scalar
- # GH 11283
- with ensure_clean_store(self.path) as store:
- df = tm.makeDataFrame()
- expected = df[df['A'] > 0]
- store.append('df', df, data_columns=True)
- np_zero = np.float64(0) # noqa
- result = store.select('df', where=["A>np_zero"])
- tm.assert_frame_equal(expected, result)
- def test_select_with_many_inputs(self):
- with ensure_clean_store(self.path) as store:
- df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300),
- A=np.random.randn(300),
- B=range(300),
- users=['a'] * 50 + ['b'] * 50 + ['c'] * 100 +
- ['a%03d' % i for i in range(100)]))
- _maybe_remove(store, 'df')
- store.append('df', df, data_columns=['ts', 'A', 'B', 'users'])
- # regular select
- result = store.select('df', "ts>=Timestamp('2012-02-01')")
- expected = df[df.ts >= Timestamp('2012-02-01')]
- tm.assert_frame_equal(expected, result)
- # small selector
- result = store.select(
- 'df',
- "ts>=Timestamp('2012-02-01') & users=['a','b','c']")
- expected = df[(df.ts >= Timestamp('2012-02-01')) &
- df.users.isin(['a', 'b', 'c'])]
- tm.assert_frame_equal(expected, result)
- # big selector along the columns
- selector = ['a', 'b', 'c'] + ['a%03d' % i for i in range(60)]
- result = store.select(
- 'df',
- "ts>=Timestamp('2012-02-01') and users=selector")
- expected = df[(df.ts >= Timestamp('2012-02-01')) &
- df.users.isin(selector)]
- tm.assert_frame_equal(expected, result)
- selector = range(100, 200)
- result = store.select('df', 'B=selector')
- expected = df[df.B.isin(selector)]
- tm.assert_frame_equal(expected, result)
- assert len(result) == 100
- # big selector along the index
- selector = Index(df.ts[0:100].values)
- result = store.select('df', 'ts=selector')
- expected = df[df.ts.isin(selector.values)]
- tm.assert_frame_equal(expected, result)
- assert len(result) == 100
- def test_select_iterator(self):
- # single table
- with ensure_clean_store(self.path) as store:
- df = tm.makeTimeDataFrame(500)
- _maybe_remove(store, 'df')
- store.append('df', df)
- expected = store.select('df')
- results = [s for s in store.select('df', iterator=True)]
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- results = [s for s in store.select('df', chunksize=100)]
- assert len(results) == 5
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- results = [s for s in store.select('df', chunksize=150)]
- result = concat(results)
- tm.assert_frame_equal(result, expected)
- with ensure_clean_path(self.path) as path:
- df = tm.makeTimeDataFrame(500)
- df.to_hdf(path, 'df_non_table')
- pytest.raises(TypeError, read_hdf, path,
- 'df_non_table', chunksize=100)
- pytest.raises(TypeError, read_hdf, path,
- 'df_non_table', iterator=True)
- with ensure_clean_path(self.path) as path:
- df = tm.makeTimeDataFrame(500)
- df.to_hdf(path, 'df', format='table')
- results = [s for s in read_hdf(path, 'df', chunksize=100)]
- result = concat(results)
- assert len(results) == 5
- tm.assert_frame_equal(result, df)
- tm.assert_frame_equal(result, read_hdf(path, 'df'))
- # multiple
- with ensure_clean_store(self.path) as store:
- df1 = tm.makeTimeDataFrame(500)
- store.append('df1', df1, data_columns=True)
- df2 = tm.makeTimeDataFrame(500).rename(
- columns=lambda x: "%s_2" % x)
- df2['foo'] = 'bar'
- store.append('df2', df2)
- df = concat([df1, df2], axis=1)
- # full selection
- expected = store.select_as_multiple(
- ['df1', 'df2'], selector='df1')
- results = [s for s in store.select_as_multiple(
- ['df1', 'df2'], selector='df1', chunksize=150)]
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- def test_select_iterator_complete_8014(self):
- # GH 8014
- # using iterator and where clause
- chunksize = 1e4
- # no iterator
- with ensure_clean_store(self.path) as store:
- expected = tm.makeTimeDataFrame(100064, 'S')
- _maybe_remove(store, 'df')
- store.append('df', expected)
- beg_dt = expected.index[0]
- end_dt = expected.index[-1]
- # select w/o iteration and no where clause works
- result = store.select('df')
- tm.assert_frame_equal(expected, result)
- # select w/o iterator and where clause, single term, begin
- # of range, works
- where = "index >= '%s'" % beg_dt
- result = store.select('df', where=where)
- tm.assert_frame_equal(expected, result)
- # select w/o iterator and where clause, single term, end
- # of range, works
- where = "index <= '%s'" % end_dt
- result = store.select('df', where=where)
- tm.assert_frame_equal(expected, result)
- # select w/o iterator and where clause, inclusive range,
- # works
- where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
- result = store.select('df', where=where)
- tm.assert_frame_equal(expected, result)
- # with iterator, full range
- with ensure_clean_store(self.path) as store:
- expected = tm.makeTimeDataFrame(100064, 'S')
- _maybe_remove(store, 'df')
- store.append('df', expected)
- beg_dt = expected.index[0]
- end_dt = expected.index[-1]
- # select w/iterator and no where clause works
- results = [s for s in store.select('df', chunksize=chunksize)]
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- # select w/iterator and where clause, single term, begin of range
- where = "index >= '%s'" % beg_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- # select w/iterator and where clause, single term, end of range
- where = "index <= '%s'" % end_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- # select w/iterator and where clause, inclusive range
- where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- tm.assert_frame_equal(expected, result)
- def test_select_iterator_non_complete_8014(self):
- # GH 8014
- # using iterator and where clause
- chunksize = 1e4
- # with iterator, non complete range
- with ensure_clean_store(self.path) as store:
- expected = tm.makeTimeDataFrame(100064, 'S')
- _maybe_remove(store, 'df')
- store.append('df', expected)
- beg_dt = expected.index[1]
- end_dt = expected.index[-2]
- # select w/iterator and where clause, single term, begin of range
- where = "index >= '%s'" % beg_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- rexpected = expected[expected.index >= beg_dt]
- tm.assert_frame_equal(rexpected, result)
- # select w/iterator and where clause, single term, end of range
- where = "index <= '%s'" % end_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- rexpected = expected[expected.index <= end_dt]
- tm.assert_frame_equal(rexpected, result)
- # select w/iterator and where clause, inclusive range
- where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- rexpected = expected[(expected.index >= beg_dt) &
- (expected.index <= end_dt)]
- tm.assert_frame_equal(rexpected, result)
- # with iterator, empty where
- with ensure_clean_store(self.path) as store:
- expected = tm.makeTimeDataFrame(100064, 'S')
- _maybe_remove(store, 'df')
- store.append('df', expected)
- end_dt = expected.index[-1]
- # select w/iterator and where clause, single term, begin of range
- where = "index > '%s'" % end_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- assert 0 == len(results)
- def test_select_iterator_many_empty_frames(self):
- # GH 8014
- # using iterator and where clause can return many empty
- # frames.
- chunksize = int(1e4)
- # with iterator, range limited to the first chunk
- with ensure_clean_store(self.path) as store:
- expected = tm.makeTimeDataFrame(100000, 'S')
- _maybe_remove(store, 'df')
- store.append('df', expected)
- beg_dt = expected.index[0]
- end_dt = expected.index[chunksize - 1]
- # select w/iterator and where clause, single term, begin of range
- where = "index >= '%s'" % beg_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- result = concat(results)
- rexpected = expected[expected.index >= beg_dt]
- tm.assert_frame_equal(rexpected, result)
- # select w/iterator and where clause, single term, end of range
- where = "index <= '%s'" % end_dt
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- assert len(results) == 1
- result = concat(results)
- rexpected = expected[expected.index <= end_dt]
- tm.assert_frame_equal(rexpected, result)
- # select w/iterator and where clause, inclusive range
- where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- # should be 1, is 10
- assert len(results) == 1
- result = concat(results)
- rexpected = expected[(expected.index >= beg_dt) &
- (expected.index <= end_dt)]
- tm.assert_frame_equal(rexpected, result)
- # select w/iterator and where clause which selects
- # *nothing*.
- #
- # To be consistent with Python idiom I suggest this should
- # return [] e.g. `for e in []: print True` never prints
- # True.
- where = "index <= '%s' & index >= '%s'" % (beg_dt, end_dt)
- results = [s for s in store.select(
- 'df', where=where, chunksize=chunksize)]
- # should be []
- assert len(results) == 0
- @pytest.mark.filterwarnings(
- "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
- )
- def test_retain_index_attributes(self):
- # GH 3499, losing frequency info on index recreation
- df = DataFrame(dict(
- A=Series(lrange(3),
- index=date_range('2000-1-1', periods=3, freq='H'))))
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'data')
- store.put('data', df, format='table')
- result = store.get('data')
- tm.assert_frame_equal(df, result)
- for attr in ['freq', 'tz', 'name']:
- for idx in ['index', 'columns']:
- assert (getattr(getattr(df, idx), attr, None) ==
- getattr(getattr(result, idx), attr, None))
- # try to append a table with a different frequency
- with catch_warnings(record=True):
- df2 = DataFrame(dict(
- A=Series(lrange(3),
- index=date_range('2002-1-1',
- periods=3, freq='D'))))
- store.append('data', df2)
- assert store.get_storer('data').info['index']['freq'] is None
- # this is ok
- _maybe_remove(store, 'df2')
- df2 = DataFrame(dict(
- A=Series(lrange(3),
- index=[Timestamp('20010101'), Timestamp('20010102'),
- Timestamp('20020101')])))
- store.append('df2', df2)
- df3 = DataFrame(dict(
- A=Series(lrange(3),
- index=date_range('2002-1-1', periods=3,
- freq='D'))))
- store.append('df2', df3)
- @pytest.mark.filterwarnings(
- "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
- )
- def test_retain_index_attributes2(self):
- with ensure_clean_path(self.path) as path:
- with catch_warnings(record=True):
- df = DataFrame(dict(
- A=Series(lrange(3),
- index=date_range('2000-1-1',
- periods=3, freq='H'))))
- df.to_hdf(path, 'data', mode='w', append=True)
- df2 = DataFrame(dict(
- A=Series(lrange(3),
- index=date_range('2002-1-1', periods=3,
- freq='D'))))
- df2.to_hdf(path, 'data', append=True)
- idx = date_range('2000-1-1', periods=3, freq='H')
- idx.name = 'foo'
- df = DataFrame(dict(A=Series(lrange(3), index=idx)))
- df.to_hdf(path, 'data', mode='w', append=True)
- assert read_hdf(path, 'data').index.name == 'foo'
- with catch_warnings(record=True):
- idx2 = date_range('2001-1-1', periods=3, freq='H')
- idx2.name = 'bar'
- df2 = DataFrame(dict(A=Series(lrange(3), index=idx2)))
- df2.to_hdf(path, 'data', append=True)
- assert read_hdf(path, 'data').index.name is None
- def test_panel_select(self):
- with ensure_clean_store(self.path) as store:
- with catch_warnings(record=True):
- wp = tm.makePanel()
- store.put('wp', wp, format='table')
- date = wp.major_axis[len(wp.major_axis) // 2]
- crit1 = ('major_axis>=date')
- crit2 = ("minor_axis=['A', 'D']")
- result = store.select('wp', [crit1, crit2])
- expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
- assert_panel_equal(result, expected)
- result = store.select(
- 'wp', ['major_axis>="20000124"',
- ("minor_axis=['A', 'B']")])
- expected = wp.truncate(
- before='20000124').reindex(minor=['A', 'B'])
- assert_panel_equal(result, expected)
- def test_frame_select(self):
- df = tm.makeTimeDataFrame()
- with ensure_clean_store(self.path) as store:
- store.put('frame', df, format='table')
- date = df.index[len(df) // 2]
- crit1 = Term('index>=date')
- assert crit1.env.scope['date'] == date
- crit2 = ("columns=['A', 'D']")
- crit3 = ('columns=A')
- result = store.select('frame', [crit1, crit2])
- expected = df.loc[date:, ['A', 'D']]
- tm.assert_frame_equal(result, expected)
- result = store.select('frame', [crit3])
- expected = df.loc[:, ['A']]
- tm.assert_frame_equal(result, expected)
- # invalid terms
- df = tm.makeTimeDataFrame()
- store.append('df_time', df)
- pytest.raises(
- ValueError, store.select, 'df_time', "index>0")
- # can't select if not written as table
- # store['frame'] = df
- # pytest.raises(ValueError, store.select,
- # 'frame', [crit1, crit2])
- def test_frame_select_complex(self):
- # select via complex criteria
- df = tm.makeTimeDataFrame()
- df['string'] = 'foo'
- df.loc[df.index[0:4], 'string'] = 'bar'
- with ensure_clean_store(self.path) as store:
- store.put('df', df, format='table', data_columns=['string'])
- # empty
- result = store.select('df', 'index>df.index[3] & string="bar"')
- expected = df.loc[(df.index > df.index[3]) & (df.string == 'bar')]
- tm.assert_frame_equal(result, expected)
- result = store.select('df', 'index>df.index[3] & string="foo"')
- expected = df.loc[(df.index > df.index[3]) & (df.string == 'foo')]
- tm.assert_frame_equal(result, expected)
- # or
- result = store.select('df', 'index>df.index[3] | string="bar"')
- expected = df.loc[(df.index > df.index[3]) | (df.string == 'bar')]
- tm.assert_frame_equal(result, expected)
- result = store.select('df', '(index>df.index[3] & '
- 'index<=df.index[6]) | string="bar"')
- expected = df.loc[((df.index > df.index[3]) & (
- df.index <= df.index[6])) | (df.string == 'bar')]
- tm.assert_frame_equal(result, expected)
- # invert
- result = store.select('df', 'string!="bar"')
- expected = df.loc[df.string != 'bar']
- tm.assert_frame_equal(result, expected)
- # invert not implemented in numexpr :(
- pytest.raises(NotImplementedError,
- store.select, 'df', '~(string="bar")')
- # invert ok for filters
- result = store.select('df', "~(columns=['A','B'])")
- expected = df.loc[:, df.columns.difference(['A', 'B'])]
- tm.assert_frame_equal(result, expected)
- # in
- result = store.select(
- 'df', "index>df.index[3] & columns in ['A','B']")
- expected = df.loc[df.index > df.index[3]].reindex(columns=[
- 'A', 'B'])
- tm.assert_frame_equal(result, expected)
- def test_frame_select_complex2(self):
- with ensure_clean_path(['parms.hdf', 'hist.hdf']) as paths:
- pp, hh = paths
- # use non-trivial selection criteria
- parms = DataFrame({'A': [1, 1, 2, 2, 3]})
- parms.to_hdf(pp, 'df', mode='w',
- format='table', data_columns=['A'])
- selection = read_hdf(pp, 'df', where='A=[2,3]')
- hist = DataFrame(np.random.randn(25, 1),
- columns=['data'],
- index=MultiIndex.from_tuples(
- [(i, j) for i in range(5)
- for j in range(5)],
- names=['l1', 'l2']))
- hist.to_hdf(hh, 'df', mode='w', format='table')
- expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]')
- # sccope with list like
- l = selection.index.tolist() # noqa
- store = HDFStore(hh)
- result = store.select('df', where='l1=l')
- assert_frame_equal(result, expected)
- store.close()
- result = read_hdf(hh, 'df', where='l1=l')
- assert_frame_equal(result, expected)
- # index
- index = selection.index # noqa
- result = read_hdf(hh, 'df', where='l1=index')
- assert_frame_equal(result, expected)
- result = read_hdf(hh, 'df', where='l1=selection.index')
- assert_frame_equal(result, expected)
- result = read_hdf(hh, 'df', where='l1=selection.index.tolist()')
- assert_frame_equal(result, expected)
- result = read_hdf(hh, 'df', where='l1=list(selection.index)')
- assert_frame_equal(result, expected)
- # sccope with index
- store = HDFStore(hh)
- result = store.select('df', where='l1=index')
- assert_frame_equal(result, expected)
- result = store.select('df', where='l1=selection.index')
- assert_frame_equal(result, expected)
- result = store.select('df', where='l1=selection.index.tolist()')
- assert_frame_equal(result, expected)
- result = store.select('df', where='l1=list(selection.index)')
- assert_frame_equal(result, expected)
- store.close()
- def test_invalid_filtering(self):
- # can't use more than one filter (atm)
- df = tm.makeTimeDataFrame()
- with ensure_clean_store(self.path) as store:
- store.put('df', df, format='table')
- # not implemented
- pytest.raises(NotImplementedError, store.select,
- 'df', "columns=['A'] | columns=['B']")
- # in theory we could deal with this
- pytest.raises(NotImplementedError, store.select,
- 'df', "columns=['A','B'] & columns=['C']")
- def test_string_select(self):
- # GH 2973
- with ensure_clean_store(self.path) as store:
- df = tm.makeTimeDataFrame()
- # test string ==/!=
- df['x'] = 'none'
- df.loc[2:7, 'x'] = ''
- store.append('df', df, data_columns=['x'])
- result = store.select('df', 'x=none')
- expected = df[df.x == 'none']
- assert_frame_equal(result, expected)
- try:
- result = store.select('df', 'x!=none')
- expected = df[df.x != 'none']
- assert_frame_equal(result, expected)
- except Exception as detail:
- pprint_thing("[{0}]".format(detail))
- pprint_thing(store)
- pprint_thing(expected)
- df2 = df.copy()
- df2.loc[df2.x == '', 'x'] = np.nan
- store.append('df2', df2, data_columns=['x'])
- result = store.select('df2', 'x!=none')
- expected = df2[isna(df2.x)]
- assert_frame_equal(result, expected)
- # int ==/!=
- df['int'] = 1
- df.loc[2:7, 'int'] = 2
- store.append('df3', df, data_columns=['int'])
- result = store.select('df3', 'int=2')
- expected = df[df.int == 2]
- assert_frame_equal(result, expected)
- result = store.select('df3', 'int!=2')
- expected = df[df.int != 2]
- assert_frame_equal(result, expected)
- def test_read_column(self):
- df = tm.makeTimeDataFrame()
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'df')
- # GH 17912
- # HDFStore.select_column should raise a KeyError
- # exception if the key is not a valid store
- with pytest.raises(KeyError,
- match='No object named df in the file'):
- store.select_column('df', 'index')
- store.append('df', df)
- # error
- pytest.raises(KeyError, store.select_column, 'df', 'foo')
- def f():
- store.select_column('df', 'index', where=['index>5'])
- pytest.raises(Exception, f)
- # valid
- result = store.select_column('df', 'index')
- tm.assert_almost_equal(result.values, Series(df.index).values)
- assert isinstance(result, Series)
- # not a data indexable column
- pytest.raises(
- ValueError, store.select_column, 'df', 'values_block_0')
- # a data column
- df2 = df.copy()
- df2['string'] = 'foo'
- store.append('df2', df2, data_columns=['string'])
- result = store.select_column('df2', 'string')
- tm.assert_almost_equal(result.values, df2['string'].values)
- # a data column with NaNs, result excludes the NaNs
- df3 = df.copy()
- df3['string'] = 'foo'
- df3.loc[4:6, 'string'] = np.nan
- store.append('df3', df3, data_columns=['string'])
- result = store.select_column('df3', 'string')
- tm.assert_almost_equal(result.values, df3['string'].values)
- # start/stop
- result = store.select_column('df3', 'string', start=2)
- tm.assert_almost_equal(result.values, df3['string'].values[2:])
- result = store.select_column('df3', 'string', start=-2)
- tm.assert_almost_equal(result.values, df3['string'].values[-2:])
- result = store.select_column('df3', 'string', stop=2)
- tm.assert_almost_equal(result.values, df3['string'].values[:2])
- result = store.select_column('df3', 'string', stop=-2)
- tm.assert_almost_equal(result.values, df3['string'].values[:-2])
- result = store.select_column('df3', 'string', start=2, stop=-2)
- tm.assert_almost_equal(result.values, df3['string'].values[2:-2])
- result = store.select_column('df3', 'string', start=-2, stop=2)
- tm.assert_almost_equal(result.values, df3['string'].values[-2:2])
- # GH 10392 - make sure column name is preserved
- df4 = DataFrame({'A': np.random.randn(10), 'B': 'foo'})
- store.append('df4', df4, data_columns=True)
- expected = df4['B']
- result = store.select_column('df4', 'B')
- tm.assert_series_equal(result, expected)
- def test_coordinates(self):
- df = tm.makeTimeDataFrame()
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'df')
- store.append('df', df)
- # all
- c = store.select_as_coordinates('df')
- assert((c.values == np.arange(len(df.index))).all())
- # get coordinates back & test vs frame
- _maybe_remove(store, 'df')
- df = DataFrame(dict(A=lrange(5), B=lrange(5)))
- store.append('df', df)
- c = store.select_as_coordinates('df', ['index<3'])
- assert((c.values == np.arange(3)).all())
- result = store.select('df', where=c)
- expected = df.loc[0:2, :]
- tm.assert_frame_equal(result, expected)
- c = store.select_as_coordinates('df', ['index>=3', 'index<=4'])
- assert((c.values == np.arange(2) + 3).all())
- result = store.select('df', where=c)
- expected = df.loc[3:4, :]
- tm.assert_frame_equal(result, expected)
- assert isinstance(c, Index)
- # multiple tables
- _maybe_remove(store, 'df1')
- _maybe_remove(store, 'df2')
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
- store.append('df1', df1, data_columns=['A', 'B'])
- store.append('df2', df2)
- c = store.select_as_coordinates('df1', ['A>0', 'B>0'])
- df1_result = store.select('df1', c)
- df2_result = store.select('df2', c)
- result = concat([df1_result, df2_result], axis=1)
- expected = concat([df1, df2], axis=1)
- expected = expected[(expected.A > 0) & (expected.B > 0)]
- tm.assert_frame_equal(result, expected)
- # pass array/mask as the coordinates
- with ensure_clean_store(self.path) as store:
- df = DataFrame(np.random.randn(1000, 2),
- index=date_range('20000101', periods=1000))
- store.append('df', df)
- c = store.select_column('df', 'index')
- where = c[DatetimeIndex(c).month == 5].index
- expected = df.iloc[where]
- # locations
- result = store.select('df', where=where)
- tm.assert_frame_equal(result, expected)
- # boolean
- result = store.select('df', where=where)
- tm.assert_frame_equal(result, expected)
- # invalid
- pytest.raises(ValueError, store.select, 'df',
- where=np.arange(len(df), dtype='float64'))
- pytest.raises(ValueError, store.select, 'df',
- where=np.arange(len(df) + 1))
- pytest.raises(ValueError, store.select, 'df',
- where=np.arange(len(df)), start=5)
- pytest.raises(ValueError, store.select, 'df',
- where=np.arange(len(df)), start=5, stop=10)
- # selection with filter
- selection = date_range('20000101', periods=500)
- result = store.select('df', where='index in selection')
- expected = df[df.index.isin(selection)]
- tm.assert_frame_equal(result, expected)
- # list
- df = DataFrame(np.random.randn(10, 2))
- store.append('df2', df)
- result = store.select('df2', where=[0, 3, 5])
- expected = df.iloc[[0, 3, 5]]
- tm.assert_frame_equal(result, expected)
- # boolean
- where = [True] * 10
- where[-2] = False
- result = store.select('df2', where=where)
- expected = df.loc[where]
- tm.assert_frame_equal(result, expected)
- # start/stop
- result = store.select('df2', start=5, stop=10)
- expected = df[5:10]
- tm.assert_frame_equal(result, expected)
- def test_append_to_multiple(self):
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
- df2['foo'] = 'bar'
- df = concat([df1, df2], axis=1)
- with ensure_clean_store(self.path) as store:
- # exceptions
- pytest.raises(ValueError, store.append_to_multiple,
- {'df1': ['A', 'B'], 'df2': None}, df,
- selector='df3')
- pytest.raises(ValueError, store.append_to_multiple,
- {'df1': None, 'df2': None}, df, selector='df3')
- pytest.raises(
- ValueError, store.append_to_multiple, 'df1', df, 'df1')
- # regular operation
- store.append_to_multiple(
- {'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
- result = store.select_as_multiple(
- ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
- expected = df[(df.A > 0) & (df.B > 0)]
- tm.assert_frame_equal(result, expected)
- def test_append_to_multiple_dropna(self):
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
- df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
- df = concat([df1, df2], axis=1)
- with ensure_clean_store(self.path) as store:
- # dropna=True should guarantee rows are synchronized
- store.append_to_multiple(
- {'df1': ['A', 'B'], 'df2': None}, df, selector='df1',
- dropna=True)
- result = store.select_as_multiple(['df1', 'df2'])
- expected = df.dropna()
- tm.assert_frame_equal(result, expected)
- tm.assert_index_equal(store.select('df1').index,
- store.select('df2').index)
- @pytest.mark.xfail(run=False,
- reason="append_to_multiple_dropna_false "
- "is not raising as failed")
- def test_append_to_multiple_dropna_false(self):
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
- df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
- df = concat([df1, df2], axis=1)
- with ensure_clean_store(self.path) as store:
- # dropna=False shouldn't synchronize row indexes
- store.append_to_multiple(
- {'df1a': ['A', 'B'], 'df2a': None}, df, selector='df1a',
- dropna=False)
- with pytest.raises(ValueError):
- store.select_as_multiple(['df1a', 'df2a'])
- assert not store.select('df1a').index.equals(
- store.select('df2a').index)
- def test_select_as_multiple(self):
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
- df2['foo'] = 'bar'
- with ensure_clean_store(self.path) as store:
- # no tables stored
- pytest.raises(Exception, store.select_as_multiple,
- None, where=['A>0', 'B>0'], selector='df1')
- store.append('df1', df1, data_columns=['A', 'B'])
- store.append('df2', df2)
- # exceptions
- pytest.raises(Exception, store.select_as_multiple,
- None, where=['A>0', 'B>0'], selector='df1')
- pytest.raises(Exception, store.select_as_multiple,
- [None], where=['A>0', 'B>0'], selector='df1')
- pytest.raises(KeyError, store.select_as_multiple,
- ['df1', 'df3'], where=['A>0', 'B>0'],
- selector='df1')
- pytest.raises(KeyError, store.select_as_multiple,
- ['df3'], where=['A>0', 'B>0'], selector='df1')
- pytest.raises(KeyError, store.select_as_multiple,
- ['df1', 'df2'], where=['A>0', 'B>0'],
- selector='df4')
- # default select
- result = store.select('df1', ['A>0', 'B>0'])
- expected = store.select_as_multiple(
- ['df1'], where=['A>0', 'B>0'], selector='df1')
- tm.assert_frame_equal(result, expected)
- expected = store.select_as_multiple(
- 'df1', where=['A>0', 'B>0'], selector='df1')
- tm.assert_frame_equal(result, expected)
- # multiple
- result = store.select_as_multiple(
- ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
- expected = concat([df1, df2], axis=1)
- expected = expected[(expected.A > 0) & (expected.B > 0)]
- tm.assert_frame_equal(result, expected)
- # multiple (diff selector)
- result = store.select_as_multiple(
- ['df1', 'df2'], where='index>df2.index[4]', selector='df2')
- expected = concat([df1, df2], axis=1)
- expected = expected[5:]
- tm.assert_frame_equal(result, expected)
- # test excpection for diff rows
- store.append('df3', tm.makeTimeDataFrame(nper=50))
- pytest.raises(ValueError, store.select_as_multiple,
- ['df1', 'df3'], where=['A>0', 'B>0'],
- selector='df1')
- @pytest.mark.skipif(
- LooseVersion(tables.__version__) < LooseVersion('3.1.0'),
- reason=("tables version does not support fix for nan selection "
- "bug: GH 4858"))
- def test_nan_selection_bug_4858(self):
- with ensure_clean_store(self.path) as store:
- df = DataFrame(dict(cols=range(6), values=range(6)),
- dtype='float64')
- df['cols'] = (df['cols'] + 10).apply(str)
- df.iloc[0] = np.nan
- expected = DataFrame(dict(cols=['13.0', '14.0', '15.0'], values=[
- 3., 4., 5.]), index=[3, 4, 5])
- # write w/o the index on that particular column
- store.append('df', df, data_columns=True, index=['cols'])
- result = store.select('df', where='values>2.0')
- assert_frame_equal(result, expected)
- def test_start_stop_table(self):
- with ensure_clean_store(self.path) as store:
- # table
- df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
- store.append('df', df)
- result = store.select(
- 'df', "columns=['A']", start=0, stop=5)
- expected = df.loc[0:4, ['A']]
- tm.assert_frame_equal(result, expected)
- # out of range
- result = store.select(
- 'df', "columns=['A']", start=30, stop=40)
- assert len(result) == 0
- expected = df.loc[30:40, ['A']]
- tm.assert_frame_equal(result, expected)
- def test_start_stop_multiple(self):
- # GH 16209
- with ensure_clean_store(self.path) as store:
- df = DataFrame({"foo": [1, 2], "bar": [1, 2]})
- store.append_to_multiple({'selector': ['foo'], 'data': None}, df,
- selector='selector')
- result = store.select_as_multiple(['selector', 'data'],
- selector='selector', start=0,
- stop=1)
- expected = df.loc[[0], ['foo', 'bar']]
- tm.assert_frame_equal(result, expected)
- def test_start_stop_fixed(self):
- with ensure_clean_store(self.path) as store:
- # fixed, GH 8287
- df = DataFrame(dict(A=np.random.rand(20),
- B=np.random.rand(20)),
- index=pd.date_range('20130101', periods=20))
- store.put('df', df)
- result = store.select(
- 'df', start=0, stop=5)
- expected = df.iloc[0:5, :]
- tm.assert_frame_equal(result, expected)
- result = store.select(
- 'df', start=5, stop=10)
- expected = df.iloc[5:10, :]
- tm.assert_frame_equal(result, expected)
- # out of range
- result = store.select(
- 'df', start=30, stop=40)
- expected = df.iloc[30:40, :]
- tm.assert_frame_equal(result, expected)
- # series
- s = df.A
- store.put('s', s)
- result = store.select('s', start=0, stop=5)
- expected = s.iloc[0:5]
- tm.assert_series_equal(result, expected)
- result = store.select('s', start=5, stop=10)
- expected = s.iloc[5:10]
- tm.assert_series_equal(result, expected)
- # sparse; not implemented
- df = tm.makeDataFrame()
- df.iloc[3:5, 1:3] = np.nan
- df.iloc[8:10, -2] = np.nan
- dfs = df.to_sparse()
- store.put('dfs', dfs)
- with pytest.raises(NotImplementedError):
- store.select('dfs', start=0, stop=5)
- def test_select_filter_corner(self):
- df = DataFrame(np.random.randn(50, 100))
- df.index = ['%.3d' % c for c in df.index]
- df.columns = ['%.3d' % c for c in df.columns]
- with ensure_clean_store(self.path) as store:
- store.put('frame', df, format='table')
- crit = 'columns=df.columns[:75]'
- result = store.select('frame', [crit])
- tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])
- crit = 'columns=df.columns[:75:2]'
- result = store.select('frame', [crit])
- tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])
- def test_path_pathlib(self):
- df = tm.makeDataFrame()
- result = tm.round_trip_pathlib(
- lambda p: df.to_hdf(p, 'df'),
- lambda p: pd.read_hdf(p, 'df'))
- tm.assert_frame_equal(df, result)
- @pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)])
- def test_contiguous_mixed_data_table(self, start, stop):
- # GH 17021
- # ValueError when reading a contiguous mixed-data table ft. VLArray
- df = DataFrame({'a': Series([20111010, 20111011, 20111012]),
- 'b': Series(['ab', 'cd', 'ab'])})
- with ensure_clean_store(self.path) as store:
- store.append('test_dataset', df)
- result = store.select('test_dataset', start=start, stop=stop)
- assert_frame_equal(df[start:stop], result)
- def test_path_pathlib_hdfstore(self):
- df = tm.makeDataFrame()
- def writer(path):
- with pd.HDFStore(path) as store:
- df.to_hdf(store, 'df')
- def reader(path):
- with pd.HDFStore(path) as store:
- return pd.read_hdf(store, 'df')
- result = tm.round_trip_pathlib(writer, reader)
- tm.assert_frame_equal(df, result)
- def test_pickle_path_localpath(self):
- df = tm.makeDataFrame()
- result = tm.round_trip_pathlib(
- lambda p: df.to_hdf(p, 'df'),
- lambda p: pd.read_hdf(p, 'df'))
- tm.assert_frame_equal(df, result)
- def test_path_localpath_hdfstore(self):
- df = tm.makeDataFrame()
- def writer(path):
- with pd.HDFStore(path) as store:
- df.to_hdf(store, 'df')
- def reader(path):
- with pd.HDFStore(path) as store:
- return pd.read_hdf(store, 'df')
- result = tm.round_trip_localpath(writer, reader)
- tm.assert_frame_equal(df, result)
- def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
- options = {}
- if compression:
- options['complib'] = _default_compressor
- with ensure_clean_store(self.path, 'w', **options) as store:
- store['obj'] = obj
- retrieved = store['obj']
- comparator(retrieved, obj, **kwargs)
- def _check_double_roundtrip(self, obj, comparator, compression=False,
- **kwargs):
- options = {}
- if compression:
- options['complib'] = compression or _default_compressor
- with ensure_clean_store(self.path, 'w', **options) as store:
- store['obj'] = obj
- retrieved = store['obj']
- comparator(retrieved, obj, **kwargs)
- store['obj'] = retrieved
- again = store['obj']
- comparator(again, obj, **kwargs)
- def _check_roundtrip_table(self, obj, comparator, compression=False):
- options = {}
- if compression:
- options['complib'] = _default_compressor
- with ensure_clean_store(self.path, 'w', **options) as store:
- store.put('obj', obj, format='table')
- retrieved = store['obj']
- comparator(retrieved, obj)
- def test_multiple_open_close(self):
- # gh-4409: open & close multiple times
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- df.to_hdf(path, 'df', mode='w', format='table')
- # single
- store = HDFStore(path)
- assert 'CLOSED' not in store.info()
- assert store.is_open
- store.close()
- assert 'CLOSED' in store.info()
- assert not store.is_open
- with ensure_clean_path(self.path) as path:
- if pytables._table_file_open_policy_is_strict:
- # multiples
- store1 = HDFStore(path)
- def f():
- HDFStore(path)
- pytest.raises(ValueError, f)
- store1.close()
- else:
- # multiples
- store1 = HDFStore(path)
- store2 = HDFStore(path)
- assert 'CLOSED' not in store1.info()
- assert 'CLOSED' not in store2.info()
- assert store1.is_open
- assert store2.is_open
- store1.close()
- assert 'CLOSED' in store1.info()
- assert not store1.is_open
- assert 'CLOSED' not in store2.info()
- assert store2.is_open
- store2.close()
- assert 'CLOSED' in store1.info()
- assert 'CLOSED' in store2.info()
- assert not store1.is_open
- assert not store2.is_open
- # nested close
- store = HDFStore(path, mode='w')
- store.append('df', df)
- store2 = HDFStore(path)
- store2.append('df2', df)
- store2.close()
- assert 'CLOSED' in store2.info()
- assert not store2.is_open
- store.close()
- assert 'CLOSED' in store.info()
- assert not store.is_open
- # double closing
- store = HDFStore(path, mode='w')
- store.append('df', df)
- store2 = HDFStore(path)
- store.close()
- assert 'CLOSED' in store.info()
- assert not store.is_open
- store2.close()
- assert 'CLOSED' in store2.info()
- assert not store2.is_open
- # ops on a closed store
- with ensure_clean_path(self.path) as path:
- df = tm.makeDataFrame()
- df.to_hdf(path, 'df', mode='w', format='table')
- store = HDFStore(path)
- store.close()
- pytest.raises(ClosedFileError, store.keys)
- pytest.raises(ClosedFileError, lambda: 'df' in store)
- pytest.raises(ClosedFileError, lambda: len(store))
- pytest.raises(ClosedFileError, lambda: store['df'])
- pytest.raises(AttributeError, lambda: store.df)
- pytest.raises(ClosedFileError, store.select, 'df')
- pytest.raises(ClosedFileError, store.get, 'df')
- pytest.raises(ClosedFileError, store.append, 'df2', df)
- pytest.raises(ClosedFileError, store.put, 'df3', df)
- pytest.raises(ClosedFileError, store.get_storer, 'df2')
- pytest.raises(ClosedFileError, store.remove, 'df2')
- with pytest.raises(ClosedFileError, match='file is not open'):
- store.select('df')
- def test_pytables_native_read(self, datapath):
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
- mode='r') as store:
- d2 = store['detector/readout']
- assert isinstance(d2, DataFrame)
- @pytest.mark.skipif(PY35 and is_platform_windows(),
- reason="native2 read fails oddly on windows / 3.5")
- def test_pytables_native2_read(self, datapath):
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
- mode='r') as store:
- str(store)
- d1 = store['detector']
- assert isinstance(d1, DataFrame)
- @xfail_non_writeable
- def test_legacy_table_fixed_format_read_py2(self, datapath):
- # GH 24510
- # legacy table with fixed format written in Python 2
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf',
- 'legacy_table_fixed_py2.h5'),
- mode='r') as store:
- result = store.select('df')
- expected = pd.DataFrame([[1, 2, 3, 'D']],
- columns=['A', 'B', 'C', 'D'],
- index=pd.Index(['ABC'],
- name='INDEX_NAME'))
- assert_frame_equal(expected, result)
- def test_legacy_table_read_py2(self, datapath):
- # issue: 24925
- # legacy table written in Python 2
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf',
- 'legacy_table_py2.h5'),
- mode='r') as store:
- result = store.select('table')
- expected = pd.DataFrame({
- "a": ["a", "b"],
- "b": [2, 3]
- })
- assert_frame_equal(expected, result)
- def test_legacy_table_read(self, datapath):
- # legacy table types
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
- mode='r') as store:
- with catch_warnings():
- simplefilter("ignore", pd.io.pytables.IncompatibilityWarning)
- store.select('df1')
- store.select('df2')
- store.select('wp1')
- # force the frame
- store.select('df2', typ='legacy_frame')
- # old version warning
- pytest.raises(
- Exception, store.select, 'wp1', 'minor_axis=B')
- df2 = store.select('df2')
- result = store.select('df2', 'index>df2.index[2]')
- expected = df2[df2.index > df2.index[2]]
- assert_frame_equal(expected, result)
- def test_copy(self):
- with catch_warnings(record=True):
- def do_copy(f, new_f=None, keys=None,
- propindexes=True, **kwargs):
- try:
- store = HDFStore(f, 'r')
- if new_f is None:
- import tempfile
- fd, new_f = tempfile.mkstemp()
- tstore = store.copy(
- new_f, keys=keys, propindexes=propindexes, **kwargs)
- # check keys
- if keys is None:
- keys = store.keys()
- assert set(keys) == set(tstore.keys())
- # check indices & nrows
- for k in tstore.keys():
- if tstore.get_storer(k).is_table:
- new_t = tstore.get_storer(k)
- orig_t = store.get_storer(k)
- assert orig_t.nrows == new_t.nrows
- # check propindixes
- if propindexes:
- for a in orig_t.axes:
- if a.is_indexed:
- assert new_t[a.name].is_indexed
- finally:
- safe_close(store)
- safe_close(tstore)
- try:
- os.close(fd)
- except (OSError, ValueError):
- pass
- safe_remove(new_f)
- # new table
- df = tm.makeDataFrame()
- try:
- path = create_tempfile(self.path)
- st = HDFStore(path)
- st.append('df', df, data_columns=['A'])
- st.close()
- do_copy(f=path)
- do_copy(f=path, propindexes=False)
- finally:
- safe_remove(path)
- def test_store_datetime_fractional_secs(self):
- with ensure_clean_store(self.path) as store:
- dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
- series = Series([0], [dt])
- store['a'] = series
- assert store['a'].index[0] == dt
- def test_tseries_indices_series(self):
- with ensure_clean_store(self.path) as store:
- idx = tm.makeDateIndex(10)
- ser = Series(np.random.randn(len(idx)), idx)
- store['a'] = ser
- result = store['a']
- tm.assert_series_equal(result, ser)
- assert result.index.freq == ser.index.freq
- tm.assert_class_equal(result.index, ser.index, obj="series index")
- idx = tm.makePeriodIndex(10)
- ser = Series(np.random.randn(len(idx)), idx)
- store['a'] = ser
- result = store['a']
- tm.assert_series_equal(result, ser)
- assert result.index.freq == ser.index.freq
- tm.assert_class_equal(result.index, ser.index, obj="series index")
- def test_tseries_indices_frame(self):
- with ensure_clean_store(self.path) as store:
- idx = tm.makeDateIndex(10)
- df = DataFrame(np.random.randn(len(idx), 3), index=idx)
- store['a'] = df
- result = store['a']
- assert_frame_equal(result, df)
- assert result.index.freq == df.index.freq
- tm.assert_class_equal(result.index, df.index,
- obj="dataframe index")
- idx = tm.makePeriodIndex(10)
- df = DataFrame(np.random.randn(len(idx), 3), idx)
- store['a'] = df
- result = store['a']
- assert_frame_equal(result, df)
- assert result.index.freq == df.index.freq
- tm.assert_class_equal(result.index, df.index,
- obj="dataframe index")
- def test_unicode_index(self):
- unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]
- # PerformanceWarning
- with catch_warnings(record=True):
- simplefilter("ignore", pd.errors.PerformanceWarning)
- s = Series(np.random.randn(len(unicode_values)), unicode_values)
- self._check_roundtrip(s, tm.assert_series_equal)
- def test_unicode_longer_encoded(self):
- # GH 11234
- char = '\u0394'
- df = pd.DataFrame({'A': [char]})
- with ensure_clean_store(self.path) as store:
- store.put('df', df, format='table', encoding='utf-8')
- result = store.get('df')
- tm.assert_frame_equal(result, df)
- df = pd.DataFrame({'A': ['a', char], 'B': ['b', 'b']})
- with ensure_clean_store(self.path) as store:
- store.put('df', df, format='table', encoding='utf-8')
- result = store.get('df')
- tm.assert_frame_equal(result, df)
- @xfail_non_writeable
- def test_store_datetime_mixed(self):
- df = DataFrame(
- {'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']})
- ts = tm.makeTimeSeries()
- df['d'] = ts.index[:3]
- self._check_roundtrip(df, tm.assert_frame_equal)
- # def test_cant_write_multiindex_table(self):
- # # for now, #1848
- # df = DataFrame(np.random.randn(10, 4),
- # index=[np.arange(5).repeat(2),
- # np.tile(np.arange(2), 5)])
- # pytest.raises(Exception, store.put, 'foo', df, format='table')
- def test_append_with_diff_col_name_types_raises_value_error(self):
- df = DataFrame(np.random.randn(10, 1))
- df2 = DataFrame({'a': np.random.randn(10)})
- df3 = DataFrame({(1, 2): np.random.randn(10)})
- df4 = DataFrame({('1', 2): np.random.randn(10)})
- df5 = DataFrame({('1', 2, object): np.random.randn(10)})
- with ensure_clean_store(self.path) as store:
- name = 'df_%s' % tm.rands(10)
- store.append(name, df)
- for d in (df2, df3, df4, df5):
- with pytest.raises(ValueError):
- store.append(name, d)
- def test_query_with_nested_special_character(self):
- df = DataFrame({'a': ['a', 'a', 'c', 'b',
- 'test & test', 'c', 'b', 'e'],
- 'b': [1, 2, 3, 4, 5, 6, 7, 8]})
- expected = df[df.a == 'test & test']
- with ensure_clean_store(self.path) as store:
- store.append('test', df, format='table', data_columns=True)
- result = store.select('test', 'a = "test & test"')
- tm.assert_frame_equal(expected, result)
- def test_categorical(self):
- with ensure_clean_store(self.path) as store:
- # Basic
- _maybe_remove(store, 's')
- s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
- 'a', 'b', 'c', 'd'], ordered=False))
- store.append('s', s, format='table')
- result = store.select('s')
- tm.assert_series_equal(s, result)
- _maybe_remove(store, 's_ordered')
- s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
- 'a', 'b', 'c', 'd'], ordered=True))
- store.append('s_ordered', s, format='table')
- result = store.select('s_ordered')
- tm.assert_series_equal(s, result)
- _maybe_remove(store, 'df')
- df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
- store.append('df', df, format='table')
- result = store.select('df')
- tm.assert_frame_equal(result, df)
- # Dtypes
- _maybe_remove(store, 'si')
- s = Series([1, 1, 2, 2, 3, 4, 5]).astype('category')
- store.append('si', s)
- result = store.select('si')
- tm.assert_series_equal(result, s)
- _maybe_remove(store, 'si2')
- s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype('category')
- store.append('si2', s)
- result = store.select('si2')
- tm.assert_series_equal(result, s)
- # Multiple
- _maybe_remove(store, 'df2')
- df2 = df.copy()
- df2['s2'] = Series(list('abcdefg')).astype('category')
- store.append('df2', df2)
- result = store.select('df2')
- tm.assert_frame_equal(result, df2)
- # Make sure the metadata is OK
- info = store.info()
- assert '/df2 ' in info
- # assert '/df2/meta/values_block_0/meta' in info
- assert '/df2/meta/values_block_1/meta' in info
- # unordered
- _maybe_remove(store, 's2')
- s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
- 'a', 'b', 'c', 'd'], ordered=False))
- store.append('s2', s, format='table')
- result = store.select('s2')
- tm.assert_series_equal(result, s)
- # Query
- _maybe_remove(store, 'df3')
- store.append('df3', df, data_columns=['s'])
- expected = df[df.s.isin(['b', 'c'])]
- result = store.select('df3', where=['s in ["b","c"]'])
- tm.assert_frame_equal(result, expected)
- expected = df[df.s.isin(['b', 'c'])]
- result = store.select('df3', where=['s = ["b","c"]'])
- tm.assert_frame_equal(result, expected)
- expected = df[df.s.isin(['d'])]
- result = store.select('df3', where=['s in ["d"]'])
- tm.assert_frame_equal(result, expected)
- expected = df[df.s.isin(['f'])]
- result = store.select('df3', where=['s in ["f"]'])
- tm.assert_frame_equal(result, expected)
- # Appending with same categories is ok
- store.append('df3', df)
- df = concat([df, df])
- expected = df[df.s.isin(['b', 'c'])]
- result = store.select('df3', where=['s in ["b","c"]'])
- tm.assert_frame_equal(result, expected)
- # Appending must have the same categories
- df3 = df.copy()
- df3['s'].cat.remove_unused_categories(inplace=True)
- with pytest.raises(ValueError):
- store.append('df3', df3)
- # Remove, and make sure meta data is removed (its a recursive
- # removal so should be).
- result = store.select('df3/meta/s/meta')
- assert result is not None
- store.remove('df3')
- with pytest.raises(KeyError):
- store.select('df3/meta/s/meta')
- def test_categorical_conversion(self):
- # GH13322
- # Check that read_hdf with categorical columns doesn't return rows if
- # where criteria isn't met.
- obsids = ['ESP_012345_6789', 'ESP_987654_3210']
- imgids = ['APF00006np', 'APF0001imm']
- data = [4.3, 9.8]
- # Test without categories
- df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))
- # We are expecting an empty DataFrame matching types of df
- expected = df.iloc[[], :]
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table', data_columns=True)
- result = read_hdf(path, 'df', where='obsids=B')
- tm.assert_frame_equal(result, expected)
- # Test with categories
- df.obsids = df.obsids.astype('category')
- df.imgids = df.imgids.astype('category')
- # We are expecting an empty DataFrame matching types of df
- expected = df.iloc[[], :]
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table', data_columns=True)
- result = read_hdf(path, 'df', where='obsids=B')
- tm.assert_frame_equal(result, expected)
- def test_categorical_nan_only_columns(self):
- # GH18413
- # Check that read_hdf with categorical columns with NaN-only values can
- # be read back.
- df = pd.DataFrame({
- 'a': ['a', 'b', 'c', np.nan],
- 'b': [np.nan, np.nan, np.nan, np.nan],
- 'c': [1, 2, 3, 4],
- 'd': pd.Series([None] * 4, dtype=object)
- })
- df['a'] = df.a.astype('category')
- df['b'] = df.b.astype('category')
- df['d'] = df.b.astype('category')
- expected = df
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table', data_columns=True)
- result = read_hdf(path, 'df')
- tm.assert_frame_equal(result, expected)
- def test_duplicate_column_name(self):
- df = DataFrame(columns=["a", "a"], data=[[0, 0]])
- with ensure_clean_path(self.path) as path:
- pytest.raises(ValueError, df.to_hdf,
- path, 'df', format='fixed')
- df.to_hdf(path, 'df', format='table')
- other = read_hdf(path, 'df')
- tm.assert_frame_equal(df, other)
- assert df.equals(other)
- assert other.equals(df)
- def test_round_trip_equals(self):
- # GH 9330
- df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table')
- other = read_hdf(path, 'df')
- tm.assert_frame_equal(df, other)
- assert df.equals(other)
- assert other.equals(df)
- def test_preserve_timedeltaindex_type(self):
- # GH9635
- # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
- # the type of the index.
- df = DataFrame(np.random.normal(size=(10, 5)))
- df.index = timedelta_range(
- start='0s', periods=10, freq='1s', name='example')
- with ensure_clean_store(self.path) as store:
- store['df'] = df
- assert_frame_equal(store['df'], df)
- def test_columns_multiindex_modified(self):
- # BUG: 7212
- # read_hdf store.select modified the passed columns parameters
- # when multi-indexed.
- df = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- df.index.name = 'letters'
- df = df.set_index(keys='E', append=True)
- data_columns = df.index.names + df.columns.tolist()
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df',
- mode='a',
- append=True,
- data_columns=data_columns,
- index=False)
- cols2load = list('BCD')
- cols2load_original = list(cols2load)
- df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa
- assert cols2load_original == cols2load
- @ignore_natural_naming_warning
- def test_to_hdf_with_object_column_names(self):
- # GH9057
- # Writing HDF5 table format should only work for string-like
- # column types
- types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
- tm.makeDateIndex, tm.makeTimedeltaIndex,
- tm.makePeriodIndex]
- types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]
- if compat.PY3:
- types_should_run.append(tm.makeUnicodeIndex)
- else:
- # TODO: Add back to types_should_fail
- # https://github.com/pandas-dev/pandas/issues/20907
- pass
- for index in types_should_fail:
- df = DataFrame(np.random.randn(10, 2), columns=index(2))
- with ensure_clean_path(self.path) as path:
- with catch_warnings(record=True):
- msg = "cannot have non-object label DataIndexableCol"
- with pytest.raises(ValueError, match=msg):
- df.to_hdf(path, 'df', format='table',
- data_columns=True)
- for index in types_should_run:
- df = DataFrame(np.random.randn(10, 2), columns=index(2))
- with ensure_clean_path(self.path) as path:
- with catch_warnings(record=True):
- df.to_hdf(path, 'df', format='table', data_columns=True)
- result = pd.read_hdf(
- path, 'df', where="index = [{0}]".format(df.index[0]))
- assert(len(result))
- def test_read_hdf_open_store(self):
- # GH10330
- # No check for non-string path_or-buf, and no test of open store
- df = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- df.index.name = 'letters'
- df = df.set_index(keys='E', append=True)
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', mode='w')
- direct = read_hdf(path, 'df')
- store = HDFStore(path, mode='r')
- indirect = read_hdf(store, 'df')
- tm.assert_frame_equal(direct, indirect)
- assert store.is_open
- store.close()
- def test_read_hdf_iterator(self):
- df = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- df.index.name = 'letters'
- df = df.set_index(keys='E', append=True)
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', mode='w', format='t')
- direct = read_hdf(path, 'df')
- iterator = read_hdf(path, 'df', iterator=True)
- assert isinstance(iterator, TableIterator)
- indirect = next(iterator.__iter__())
- tm.assert_frame_equal(direct, indirect)
- iterator.store.close()
- def test_read_hdf_errors(self):
- df = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as path:
- pytest.raises(IOError, read_hdf, path, 'key')
- df.to_hdf(path, 'df')
- store = HDFStore(path, mode='r')
- store.close()
- pytest.raises(IOError, read_hdf, store, 'df')
- def test_read_hdf_generic_buffer_errors(self):
- pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df')
- def test_invalid_complib(self):
- df = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as path:
- with pytest.raises(ValueError):
- df.to_hdf(path, 'df', complib='foolib')
- # GH10443
- def test_read_nokey(self):
- df = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- # Categorical dtype not supported for "fixed" format. So no need
- # to test with that dtype in the dataframe here.
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', mode='a')
- reread = read_hdf(path)
- assert_frame_equal(df, reread)
- df.to_hdf(path, 'df2', mode='a')
- pytest.raises(ValueError, read_hdf, path)
- def test_read_nokey_table(self):
- # GH13231
- df = DataFrame({'i': range(5),
- 'c': Series(list('abacd'), dtype='category')})
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', mode='a', format='table')
- reread = read_hdf(path)
- assert_frame_equal(df, reread)
- df.to_hdf(path, 'df2', mode='a', format='table')
- pytest.raises(ValueError, read_hdf, path)
- def test_read_nokey_empty(self):
- with ensure_clean_path(self.path) as path:
- store = HDFStore(path)
- store.close()
- pytest.raises(ValueError, read_hdf, path)
- @td.skip_if_no('pathlib')
- def test_read_from_pathlib_path(self):
- # GH11773
- from pathlib import Path
- expected = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as filename:
- path_obj = Path(filename)
- expected.to_hdf(path_obj, 'df', mode='a')
- actual = read_hdf(path_obj, 'df')
- tm.assert_frame_equal(expected, actual)
- @td.skip_if_no('py.path')
- def test_read_from_py_localpath(self):
- # GH11773
- from py.path import local as LocalPath
- expected = DataFrame(np.random.rand(4, 5),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as filename:
- path_obj = LocalPath(filename)
- expected.to_hdf(path_obj, 'df', mode='a')
- actual = read_hdf(path_obj, 'df')
- tm.assert_frame_equal(expected, actual)
- def test_query_long_float_literal(self):
- # GH 14241
- df = pd.DataFrame({'A': [1000000000.0009,
- 1000000000.0011,
- 1000000000.0015]})
- with ensure_clean_store(self.path) as store:
- store.append('test', df, format='table', data_columns=True)
- cutoff = 1000000000.0006
- result = store.select('test', "A < %.4f" % cutoff)
- assert result.empty
- cutoff = 1000000000.0010
- result = store.select('test', "A > %.4f" % cutoff)
- expected = df.loc[[1, 2], :]
- tm.assert_frame_equal(expected, result)
- exact = 1000000000.0011
- result = store.select('test', 'A == %.4f' % exact)
- expected = df.loc[[1], :]
- tm.assert_frame_equal(expected, result)
- def test_query_compare_column_type(self):
- # GH 15492
- df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'],
- 'real_date': date_range('2014-01-01', periods=2),
- 'float': [1.1, 1.2],
- 'int': [1, 2]},
- columns=['date', 'real_date', 'float', 'int'])
- with ensure_clean_store(self.path) as store:
- store.append('test', df, format='table', data_columns=True)
- ts = pd.Timestamp('2014-01-01') # noqa
- result = store.select('test', where='real_date > ts')
- expected = df.loc[[1], :]
- tm.assert_frame_equal(expected, result)
- for op in ['<', '>', '==']:
- # non strings to string column always fail
- for v in [2.1, True, pd.Timestamp('2014-01-01'),
- pd.Timedelta(1, 's')]:
- query = 'date {op} v'.format(op=op)
- with pytest.raises(TypeError):
- store.select('test', where=query)
- # strings to other columns must be convertible to type
- v = 'a'
- for col in ['int', 'float', 'real_date']:
- query = '{col} {op} v'.format(op=op, col=col)
- with pytest.raises(ValueError):
- store.select('test', where=query)
- for v, col in zip(['1', '1.1', '2014-01-01'],
- ['int', 'float', 'real_date']):
- query = '{col} {op} v'.format(op=op, col=col)
- result = store.select('test', where=query)
- if op == '==':
- expected = df.loc[[0], :]
- elif op == '>':
- expected = df.loc[[1], :]
- else:
- expected = df.loc[[], :]
- tm.assert_frame_equal(expected, result)
- @pytest.mark.parametrize('format', ['fixed', 'table'])
- def test_read_hdf_series_mode_r(self, format):
- # GH 16583
- # Tests that reading a Series saved to an HDF file
- # still works if a mode='r' argument is supplied
- series = tm.makeFloatSeries()
- with ensure_clean_path(self.path) as path:
- series.to_hdf(path, key='data', format=format)
- result = pd.read_hdf(path, key='data', mode='r')
- tm.assert_series_equal(result, series)
- @pytest.mark.skipif(not PY36, reason="Need python 3.6")
- def test_fspath(self):
- with tm.ensure_clean('foo.h5') as path:
- with pd.HDFStore(path) as store:
- assert os.fspath(store) == str(path)
- def test_read_py2_hdf_file_in_py3(self, datapath):
- # GH 16781
- # tests reading a PeriodIndex DataFrame written in Python2 in Python3
- # the file was generated in Python 2.7 like so:
- #
- # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
- # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
- # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
- expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
- ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf',
- 'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
- mode='r') as store:
- result = store['p']
- assert_frame_equal(result, expected)
- class TestHDFComplexValues(Base):
- # GH10447
- def test_complex_fixed(self):
- df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df')
- reread = read_hdf(path, 'df')
- assert_frame_equal(df, reread)
- df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df')
- reread = read_hdf(path, 'df')
- assert_frame_equal(df, reread)
- def test_complex_table(self):
- df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table')
- reread = read_hdf(path, 'df')
- assert_frame_equal(df, reread)
- df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
- index=list('abcd'),
- columns=list('ABCDE'))
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table', mode='w')
- reread = read_hdf(path, 'df')
- assert_frame_equal(df, reread)
- @xfail_non_writeable
- def test_complex_mixed_fixed(self):
- complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
- 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
- complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
- dtype=np.complex128)
- df = DataFrame({'A': [1, 2, 3, 4],
- 'B': ['a', 'b', 'c', 'd'],
- 'C': complex64,
- 'D': complex128,
- 'E': [1.0, 2.0, 3.0, 4.0]},
- index=list('abcd'))
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df')
- reread = read_hdf(path, 'df')
- assert_frame_equal(df, reread)
- def test_complex_mixed_table(self):
- complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
- 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
- complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
- dtype=np.complex128)
- df = DataFrame({'A': [1, 2, 3, 4],
- 'B': ['a', 'b', 'c', 'd'],
- 'C': complex64,
- 'D': complex128,
- 'E': [1.0, 2.0, 3.0, 4.0]},
- index=list('abcd'))
- with ensure_clean_store(self.path) as store:
- store.append('df', df, data_columns=['A', 'B'])
- result = store.select('df', where='A>2')
- assert_frame_equal(df.loc[df.A > 2], result)
- with ensure_clean_path(self.path) as path:
- df.to_hdf(path, 'df', format='table')
- reread = read_hdf(path, 'df')
- assert_frame_equal(df, reread)
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_complex_across_dimensions_fixed(self):
- with catch_warnings(record=True):
- complex128 = np.array(
- [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
- s = Series(complex128, index=list('abcd'))
- df = DataFrame({'A': s, 'B': s})
- p = Panel({'One': df, 'Two': df})
- objs = [s, df, p]
- comps = [tm.assert_series_equal, tm.assert_frame_equal,
- tm.assert_panel_equal]
- for obj, comp in zip(objs, comps):
- with ensure_clean_path(self.path) as path:
- obj.to_hdf(path, 'obj', format='fixed')
- reread = read_hdf(path, 'obj')
- comp(obj, reread)
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_complex_across_dimensions(self):
- complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
- s = Series(complex128, index=list('abcd'))
- df = DataFrame({'A': s, 'B': s})
- with catch_warnings(record=True):
- p = Panel({'One': df, 'Two': df})
- objs = [df, p]
- comps = [tm.assert_frame_equal, tm.assert_panel_equal]
- for obj, comp in zip(objs, comps):
- with ensure_clean_path(self.path) as path:
- obj.to_hdf(path, 'obj', format='table')
- reread = read_hdf(path, 'obj')
- comp(obj, reread)
- def test_complex_indexing_error(self):
- complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
- dtype=np.complex128)
- df = DataFrame({'A': [1, 2, 3, 4],
- 'B': ['a', 'b', 'c', 'd'],
- 'C': complex128},
- index=list('abcd'))
- with ensure_clean_store(self.path) as store:
- pytest.raises(TypeError, store.append,
- 'df', df, data_columns=['C'])
- def test_complex_series_error(self):
- complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
- s = Series(complex128, index=list('abcd'))
- with ensure_clean_path(self.path) as path:
- pytest.raises(TypeError, s.to_hdf, path, 'obj', format='t')
- with ensure_clean_path(self.path) as path:
- s.to_hdf(path, 'obj', format='t', index=False)
- reread = read_hdf(path, 'obj')
- tm.assert_series_equal(s, reread)
- def test_complex_append(self):
- df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
- 'b': np.random.randn(100)})
- with ensure_clean_store(self.path) as store:
- store.append('df', df, data_columns=['b'])
- store.append('df', df)
- result = store.select('df')
- assert_frame_equal(pd.concat([df, df], 0), result)
- class TestTimezones(Base):
- def _compare_with_tz(self, a, b):
- tm.assert_frame_equal(a, b)
- # compare the zones on each element
- for c in a.columns:
- for i in a.index:
- a_e = a.loc[i, c]
- b_e = b.loc[i, c]
- if not (a_e == b_e and a_e.tz == b_e.tz):
- raise AssertionError(
- "invalid tz comparison [%s] [%s]" % (a_e, b_e))
- def test_append_with_timezones_dateutil(self):
- from datetime import timedelta
- # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
- # filename issues.
- from pandas._libs.tslibs.timezones import maybe_get_tz
- gettz = lambda x: maybe_get_tz('dateutil/' + x)
- # as columns
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'df_tz')
- df = DataFrame(dict(A=[Timestamp('20130102 2:00:00', tz=gettz(
- 'US/Eastern')) + timedelta(hours=1) * i for i in range(5)]))
- store.append('df_tz', df, data_columns=['A'])
- result = store['df_tz']
- self._compare_with_tz(result, df)
- assert_frame_equal(result, df)
- # select with tz aware
- expected = df[df.A >= df.A[3]]
- result = store.select('df_tz', where='A>=df.A[3]')
- self._compare_with_tz(result, expected)
- # ensure we include dates in DST and STD time here.
- _maybe_remove(store, 'df_tz')
- df = DataFrame(dict(A=Timestamp('20130102',
- tz=gettz('US/Eastern')),
- B=Timestamp('20130603',
- tz=gettz('US/Eastern'))),
- index=range(5))
- store.append('df_tz', df)
- result = store['df_tz']
- self._compare_with_tz(result, df)
- assert_frame_equal(result, df)
- df = DataFrame(dict(A=Timestamp('20130102',
- tz=gettz('US/Eastern')),
- B=Timestamp('20130102', tz=gettz('EET'))),
- index=range(5))
- pytest.raises(ValueError, store.append, 'df_tz', df)
- # this is ok
- _maybe_remove(store, 'df_tz')
- store.append('df_tz', df, data_columns=['A', 'B'])
- result = store['df_tz']
- self._compare_with_tz(result, df)
- assert_frame_equal(result, df)
- # can't append with diff timezone
- df = DataFrame(dict(A=Timestamp('20130102',
- tz=gettz('US/Eastern')),
- B=Timestamp('20130102', tz=gettz('CET'))),
- index=range(5))
- pytest.raises(ValueError, store.append, 'df_tz', df)
- # as index
- with ensure_clean_store(self.path) as store:
- # GH 4098 example
- df = DataFrame(dict(A=Series(lrange(3), index=date_range(
- '2000-1-1', periods=3, freq='H', tz=gettz('US/Eastern')))))
- _maybe_remove(store, 'df')
- store.put('df', df)
- result = store.select('df')
- assert_frame_equal(result, df)
- _maybe_remove(store, 'df')
- store.append('df', df)
- result = store.select('df')
- assert_frame_equal(result, df)
- def test_append_with_timezones_pytz(self):
- from datetime import timedelta
- # as columns
- with ensure_clean_store(self.path) as store:
- _maybe_remove(store, 'df_tz')
- df = DataFrame(dict(A=[Timestamp('20130102 2:00:00',
- tz='US/Eastern') +
- timedelta(hours=1) * i
- for i in range(5)]))
- store.append('df_tz', df, data_columns=['A'])
- result = store['df_tz']
- self._compare_with_tz(result, df)
- assert_frame_equal(result, df)
- # select with tz aware
- self._compare_with_tz(store.select(
- 'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]])
- _maybe_remove(store, 'df_tz')
- # ensure we include dates in DST and STD time here.
- df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
- B=Timestamp('20130603', tz='US/Eastern')),
- index=range(5))
- store.append('df_tz', df)
- result = store['df_tz']
- self._compare_with_tz(result, df)
- assert_frame_equal(result, df)
- df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
- B=Timestamp('20130102', tz='EET')),
- index=range(5))
- pytest.raises(ValueError, store.append, 'df_tz', df)
- # this is ok
- _maybe_remove(store, 'df_tz')
- store.append('df_tz', df, data_columns=['A', 'B'])
- result = store['df_tz']
- self._compare_with_tz(result, df)
- assert_frame_equal(result, df)
- # can't append with diff timezone
- df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
- B=Timestamp('20130102', tz='CET')),
- index=range(5))
- pytest.raises(ValueError, store.append, 'df_tz', df)
- # as index
- with ensure_clean_store(self.path) as store:
- # GH 4098 example
- df = DataFrame(dict(A=Series(lrange(3), index=date_range(
- '2000-1-1', periods=3, freq='H', tz='US/Eastern'))))
- _maybe_remove(store, 'df')
- store.put('df', df)
- result = store.select('df')
- assert_frame_equal(result, df)
- _maybe_remove(store, 'df')
- store.append('df', df)
- result = store.select('df')
- assert_frame_equal(result, df)
- def test_tseries_select_index_column(self):
- # GH7777
- # selecting a UTC datetimeindex column did
- # not preserve UTC tzinfo set before storing
- # check that no tz still works
- rng = date_range('1/1/2000', '1/30/2000')
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(self.path) as store:
- store.append('frame', frame)
- result = store.select_column('frame', 'index')
- assert rng.tz == DatetimeIndex(result.values).tz
- # check utc
- rng = date_range('1/1/2000', '1/30/2000', tz='UTC')
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(self.path) as store:
- store.append('frame', frame)
- result = store.select_column('frame', 'index')
- assert rng.tz == result.dt.tz
- # double check non-utc
- rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(self.path) as store:
- store.append('frame', frame)
- result = store.select_column('frame', 'index')
- assert rng.tz == result.dt.tz
- def test_timezones_fixed(self):
- with ensure_clean_store(self.path) as store:
- # index
- rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
- df = DataFrame(np.random.randn(len(rng), 4), index=rng)
- store['df'] = df
- result = store['df']
- assert_frame_equal(result, df)
- # as data
- # GH11411
- _maybe_remove(store, 'df')
- df = DataFrame({'A': rng,
- 'B': rng.tz_convert('UTC').tz_localize(None),
- 'C': rng.tz_convert('CET'),
- 'D': range(len(rng))}, index=rng)
- store['df'] = df
- result = store['df']
- assert_frame_equal(result, df)
- def test_fixed_offset_tz(self):
- rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(self.path) as store:
- store['frame'] = frame
- recons = store['frame']
- tm.assert_index_equal(recons.index, rng)
- assert rng.tz == recons.index.tz
- @td.skip_if_windows
- def test_store_timezone(self):
- # GH2852
- # issue storing datetime.date with a timezone as it resets when read
- # back in a new timezone
- # original method
- with ensure_clean_store(self.path) as store:
- today = datetime.date(2013, 9, 10)
- df = DataFrame([1, 2, 3], index=[today, today, today])
- store['obj1'] = df
- result = store['obj1']
- assert_frame_equal(result, df)
- # with tz setting
- with ensure_clean_store(self.path) as store:
- with set_timezone('EST5EDT'):
- today = datetime.date(2013, 9, 10)
- df = DataFrame([1, 2, 3], index=[today, today, today])
- store['obj1'] = df
- with set_timezone('CST6CDT'):
- result = store['obj1']
- assert_frame_equal(result, df)
- def test_legacy_datetimetz_object(self, datapath):
- # legacy from < 0.17.0
- # 8260
- expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
- B=Timestamp('20130603', tz='CET')),
- index=range(5))
- with ensure_clean_store(
- datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
- mode='r') as store:
- result = store['df']
- assert_frame_equal(result, expected)
- def test_dst_transitions(self):
- # make sure we are not failing on transaitions
- with ensure_clean_store(self.path) as store:
- times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00",
- tz="Europe/London",
- freq="H",
- ambiguous='infer')
- for i in [times, times + pd.Timedelta('10min')]:
- _maybe_remove(store, 'df')
- df = DataFrame({'A': range(len(i)), 'B': i}, index=i)
- store.append('df', df)
- result = store.select('df')
- assert_frame_equal(result, df)
|