test_pytables.py 212 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691
  1. from contextlib import contextmanager
  2. import datetime
  3. from datetime import timedelta
  4. from distutils.version import LooseVersion
  5. import os
  6. import tempfile
  7. from warnings import catch_warnings, simplefilter
  8. import numpy as np
  9. import pytest
  10. from pandas.compat import (
  11. PY35, PY36, BytesIO, is_platform_little_endian, is_platform_windows,
  12. lrange, range, text_type, u)
  13. import pandas.util._test_decorators as td
  14. from pandas.core.dtypes.common import is_categorical_dtype
  15. import pandas as pd
  16. from pandas import (
  17. Categorical, DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex,
  18. Panel, RangeIndex, Series, Timestamp, bdate_range, compat, concat,
  19. date_range, isna, timedelta_range)
  20. import pandas.util.testing as tm
  21. from pandas.util.testing import (
  22. assert_frame_equal, assert_panel_equal, assert_series_equal, set_timezone)
  23. from pandas.io import pytables as pytables # noqa:E402
  24. from pandas.io.formats.printing import pprint_thing
  25. from pandas.io.pytables import (
  26. ClosedFileError, HDFStore, PossibleDataLossError, Term, read_hdf)
  27. from pandas.io.pytables import TableIterator # noqa:E402
  28. tables = pytest.importorskip('tables')
  29. # TODO:
  30. # remove when gh-24839 is fixed; this affects numpy 1.16
  31. # and pytables 3.4.4
  32. xfail_non_writeable = pytest.mark.xfail(
  33. LooseVersion(np.__version__) >= LooseVersion('1.16'),
  34. reason=('gh-25511, gh-24839. pytables needs a '
  35. 'release beyong 3.4.4 to support numpy 1.16x'))
  36. _default_compressor = ('blosc' if LooseVersion(tables.__version__) >=
  37. LooseVersion('2.2') else 'zlib')
  38. ignore_natural_naming_warning = pytest.mark.filterwarnings(
  39. "ignore:object name:tables.exceptions.NaturalNameWarning"
  40. )
  41. # contextmanager to ensure the file cleanup
  42. def safe_remove(path):
  43. if path is not None:
  44. try:
  45. os.remove(path)
  46. except OSError:
  47. pass
  48. def safe_close(store):
  49. try:
  50. if store is not None:
  51. store.close()
  52. except IOError:
  53. pass
  54. def create_tempfile(path):
  55. """ create an unopened named temporary file """
  56. return os.path.join(tempfile.gettempdir(), path)
  57. @contextmanager
  58. def ensure_clean_store(path, mode='a', complevel=None, complib=None,
  59. fletcher32=False):
  60. try:
  61. # put in the temporary path if we don't have one already
  62. if not len(os.path.dirname(path)):
  63. path = create_tempfile(path)
  64. store = HDFStore(path, mode=mode, complevel=complevel,
  65. complib=complib, fletcher32=False)
  66. yield store
  67. finally:
  68. safe_close(store)
  69. if mode == 'w' or mode == 'a':
  70. safe_remove(path)
  71. @contextmanager
  72. def ensure_clean_path(path):
  73. """
  74. return essentially a named temporary file that is not opened
  75. and deleted on existing; if path is a list, then create and
  76. return list of filenames
  77. """
  78. try:
  79. if isinstance(path, list):
  80. filenames = [create_tempfile(p) for p in path]
  81. yield filenames
  82. else:
  83. filenames = [create_tempfile(path)]
  84. yield filenames[0]
  85. finally:
  86. for f in filenames:
  87. safe_remove(f)
  88. # set these parameters so we don't have file sharing
  89. tables.parameters.MAX_NUMEXPR_THREADS = 1
  90. tables.parameters.MAX_BLOSC_THREADS = 1
  91. tables.parameters.MAX_THREADS = 1
  92. def _maybe_remove(store, key):
  93. """For tests using tables, try removing the table to be sure there is
  94. no content from previous tests using the same table name."""
  95. try:
  96. store.remove(key)
  97. except (ValueError, KeyError):
  98. pass
  99. class Base(object):
  100. @classmethod
  101. def setup_class(cls):
  102. # Pytables 3.0.0 deprecates lots of things
  103. tm.reset_testing_mode()
  104. @classmethod
  105. def teardown_class(cls):
  106. # Pytables 3.0.0 deprecates lots of things
  107. tm.set_testing_mode()
  108. def setup_method(self, method):
  109. self.path = 'tmp.__%s__.h5' % tm.rands(10)
  110. def teardown_method(self, method):
  111. pass
  112. @pytest.mark.single
  113. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  114. class TestHDFStore(Base):
  115. def test_format_kwarg_in_constructor(self):
  116. # GH 13291
  117. with ensure_clean_path(self.path) as path:
  118. pytest.raises(ValueError, HDFStore, path, format='table')
  119. def test_context(self):
  120. path = create_tempfile(self.path)
  121. try:
  122. with HDFStore(path) as tbl:
  123. raise ValueError('blah')
  124. except ValueError:
  125. pass
  126. finally:
  127. safe_remove(path)
  128. try:
  129. with HDFStore(path) as tbl:
  130. tbl['a'] = tm.makeDataFrame()
  131. with HDFStore(path) as tbl:
  132. assert len(tbl) == 1
  133. assert type(tbl['a']) == DataFrame
  134. finally:
  135. safe_remove(path)
  136. def test_conv_read_write(self):
  137. path = create_tempfile(self.path)
  138. try:
  139. def roundtrip(key, obj, **kwargs):
  140. obj.to_hdf(path, key, **kwargs)
  141. return read_hdf(path, key)
  142. o = tm.makeTimeSeries()
  143. assert_series_equal(o, roundtrip('series', o))
  144. o = tm.makeStringSeries()
  145. assert_series_equal(o, roundtrip('string_series', o))
  146. o = tm.makeDataFrame()
  147. assert_frame_equal(o, roundtrip('frame', o))
  148. with catch_warnings(record=True):
  149. o = tm.makePanel()
  150. assert_panel_equal(o, roundtrip('panel', o))
  151. # table
  152. df = DataFrame(dict(A=lrange(5), B=lrange(5)))
  153. df.to_hdf(path, 'table', append=True)
  154. result = read_hdf(path, 'table', where=['index>2'])
  155. assert_frame_equal(df[df.index > 2], result)
  156. finally:
  157. safe_remove(path)
  158. def test_long_strings(self):
  159. # GH6166
  160. df = DataFrame({'a': tm.rands_array(100, size=10)},
  161. index=tm.rands_array(100, size=10))
  162. with ensure_clean_store(self.path) as store:
  163. store.append('df', df, data_columns=['a'])
  164. result = store.select('df')
  165. assert_frame_equal(df, result)
  166. def test_api(self):
  167. # GH4584
  168. # API issue when to_hdf doesn't acdept append AND format args
  169. with ensure_clean_path(self.path) as path:
  170. df = tm.makeDataFrame()
  171. df.iloc[:10].to_hdf(path, 'df', append=True, format='table')
  172. df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
  173. assert_frame_equal(read_hdf(path, 'df'), df)
  174. # append to False
  175. df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
  176. df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
  177. assert_frame_equal(read_hdf(path, 'df'), df)
  178. with ensure_clean_path(self.path) as path:
  179. df = tm.makeDataFrame()
  180. df.iloc[:10].to_hdf(path, 'df', append=True)
  181. df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
  182. assert_frame_equal(read_hdf(path, 'df'), df)
  183. # append to False
  184. df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
  185. df.iloc[10:].to_hdf(path, 'df', append=True)
  186. assert_frame_equal(read_hdf(path, 'df'), df)
  187. with ensure_clean_path(self.path) as path:
  188. df = tm.makeDataFrame()
  189. df.to_hdf(path, 'df', append=False, format='fixed')
  190. assert_frame_equal(read_hdf(path, 'df'), df)
  191. df.to_hdf(path, 'df', append=False, format='f')
  192. assert_frame_equal(read_hdf(path, 'df'), df)
  193. df.to_hdf(path, 'df', append=False)
  194. assert_frame_equal(read_hdf(path, 'df'), df)
  195. df.to_hdf(path, 'df')
  196. assert_frame_equal(read_hdf(path, 'df'), df)
  197. with ensure_clean_store(self.path) as store:
  198. path = store._path
  199. df = tm.makeDataFrame()
  200. _maybe_remove(store, 'df')
  201. store.append('df', df.iloc[:10], append=True, format='table')
  202. store.append('df', df.iloc[10:], append=True, format='table')
  203. assert_frame_equal(store.select('df'), df)
  204. # append to False
  205. _maybe_remove(store, 'df')
  206. store.append('df', df.iloc[:10], append=False, format='table')
  207. store.append('df', df.iloc[10:], append=True, format='table')
  208. assert_frame_equal(store.select('df'), df)
  209. # formats
  210. _maybe_remove(store, 'df')
  211. store.append('df', df.iloc[:10], append=False, format='table')
  212. store.append('df', df.iloc[10:], append=True, format='table')
  213. assert_frame_equal(store.select('df'), df)
  214. _maybe_remove(store, 'df')
  215. store.append('df', df.iloc[:10], append=False, format='table')
  216. store.append('df', df.iloc[10:], append=True, format=None)
  217. assert_frame_equal(store.select('df'), df)
  218. with ensure_clean_path(self.path) as path:
  219. # invalid
  220. df = tm.makeDataFrame()
  221. pytest.raises(ValueError, df.to_hdf, path,
  222. 'df', append=True, format='f')
  223. pytest.raises(ValueError, df.to_hdf, path,
  224. 'df', append=True, format='fixed')
  225. pytest.raises(TypeError, df.to_hdf, path,
  226. 'df', append=True, format='foo')
  227. pytest.raises(TypeError, df.to_hdf, path,
  228. 'df', append=False, format='bar')
  229. # File path doesn't exist
  230. path = ""
  231. pytest.raises(compat.FileNotFoundError,
  232. read_hdf, path, 'df')
  233. def test_api_default_format(self):
  234. # default_format option
  235. with ensure_clean_store(self.path) as store:
  236. df = tm.makeDataFrame()
  237. pd.set_option('io.hdf.default_format', 'fixed')
  238. _maybe_remove(store, 'df')
  239. store.put('df', df)
  240. assert not store.get_storer('df').is_table
  241. pytest.raises(ValueError, store.append, 'df2', df)
  242. pd.set_option('io.hdf.default_format', 'table')
  243. _maybe_remove(store, 'df')
  244. store.put('df', df)
  245. assert store.get_storer('df').is_table
  246. _maybe_remove(store, 'df2')
  247. store.append('df2', df)
  248. assert store.get_storer('df').is_table
  249. pd.set_option('io.hdf.default_format', None)
  250. with ensure_clean_path(self.path) as path:
  251. df = tm.makeDataFrame()
  252. pd.set_option('io.hdf.default_format', 'fixed')
  253. df.to_hdf(path, 'df')
  254. with HDFStore(path) as store:
  255. assert not store.get_storer('df').is_table
  256. pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True)
  257. pd.set_option('io.hdf.default_format', 'table')
  258. df.to_hdf(path, 'df3')
  259. with HDFStore(path) as store:
  260. assert store.get_storer('df3').is_table
  261. df.to_hdf(path, 'df4', append=True)
  262. with HDFStore(path) as store:
  263. assert store.get_storer('df4').is_table
  264. pd.set_option('io.hdf.default_format', None)
  265. def test_keys(self):
  266. with ensure_clean_store(self.path) as store:
  267. store['a'] = tm.makeTimeSeries()
  268. store['b'] = tm.makeStringSeries()
  269. store['c'] = tm.makeDataFrame()
  270. with catch_warnings(record=True):
  271. store['d'] = tm.makePanel()
  272. store['foo/bar'] = tm.makePanel()
  273. assert len(store) == 5
  274. expected = {'/a', '/b', '/c', '/d', '/foo/bar'}
  275. assert set(store.keys()) == expected
  276. assert set(store) == expected
  277. def test_keys_ignore_hdf_softlink(self):
  278. # GH 20523
  279. # Puts a softlink into HDF file and rereads
  280. with ensure_clean_store(self.path) as store:
  281. df = DataFrame(dict(A=lrange(5), B=lrange(5)))
  282. store.put("df", df)
  283. assert store.keys() == ["/df"]
  284. store._handle.create_soft_link(store._handle.root, "symlink", "df")
  285. # Should ignore the softlink
  286. assert store.keys() == ["/df"]
  287. def test_iter_empty(self):
  288. with ensure_clean_store(self.path) as store:
  289. # GH 12221
  290. assert list(store) == []
  291. def test_repr(self):
  292. with ensure_clean_store(self.path) as store:
  293. repr(store)
  294. store.info()
  295. store['a'] = tm.makeTimeSeries()
  296. store['b'] = tm.makeStringSeries()
  297. store['c'] = tm.makeDataFrame()
  298. with catch_warnings(record=True):
  299. store['d'] = tm.makePanel()
  300. store['foo/bar'] = tm.makePanel()
  301. store.append('e', tm.makePanel())
  302. df = tm.makeDataFrame()
  303. df['obj1'] = 'foo'
  304. df['obj2'] = 'bar'
  305. df['bool1'] = df['A'] > 0
  306. df['bool2'] = df['B'] > 0
  307. df['bool3'] = True
  308. df['int1'] = 1
  309. df['int2'] = 2
  310. df['timestamp1'] = Timestamp('20010102')
  311. df['timestamp2'] = Timestamp('20010103')
  312. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  313. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  314. df.loc[3:6, ['obj1']] = np.nan
  315. df = df._consolidate()._convert(datetime=True)
  316. with catch_warnings(record=True):
  317. simplefilter("ignore", pd.errors.PerformanceWarning)
  318. store['df'] = df
  319. # make a random group in hdf space
  320. store._handle.create_group(store._handle.root, 'bah')
  321. assert store.filename in repr(store)
  322. assert store.filename in str(store)
  323. store.info()
  324. # storers
  325. with ensure_clean_store(self.path) as store:
  326. df = tm.makeDataFrame()
  327. store.append('df', df)
  328. s = store.get_storer('df')
  329. repr(s)
  330. str(s)
  331. @ignore_natural_naming_warning
  332. def test_contains(self):
  333. with ensure_clean_store(self.path) as store:
  334. store['a'] = tm.makeTimeSeries()
  335. store['b'] = tm.makeDataFrame()
  336. store['foo/bar'] = tm.makeDataFrame()
  337. assert 'a' in store
  338. assert 'b' in store
  339. assert 'c' not in store
  340. assert 'foo/bar' in store
  341. assert '/foo/bar' in store
  342. assert '/foo/b' not in store
  343. assert 'bar' not in store
  344. # gh-2694: tables.NaturalNameWarning
  345. with catch_warnings(record=True):
  346. store['node())'] = tm.makeDataFrame()
  347. assert 'node())' in store
  348. def test_versioning(self):
  349. with ensure_clean_store(self.path) as store:
  350. store['a'] = tm.makeTimeSeries()
  351. store['b'] = tm.makeDataFrame()
  352. df = tm.makeTimeDataFrame()
  353. _maybe_remove(store, 'df1')
  354. store.append('df1', df[:10])
  355. store.append('df1', df[10:])
  356. assert store.root.a._v_attrs.pandas_version == '0.15.2'
  357. assert store.root.b._v_attrs.pandas_version == '0.15.2'
  358. assert store.root.df1._v_attrs.pandas_version == '0.15.2'
  359. # write a file and wipe its versioning
  360. _maybe_remove(store, 'df2')
  361. store.append('df2', df)
  362. # this is an error because its table_type is appendable, but no
  363. # version info
  364. store.get_node('df2')._v_attrs.pandas_version = None
  365. pytest.raises(Exception, store.select, 'df2')
  366. def test_mode(self):
  367. df = tm.makeTimeDataFrame()
  368. def check(mode):
  369. with ensure_clean_path(self.path) as path:
  370. # constructor
  371. if mode in ['r', 'r+']:
  372. pytest.raises(IOError, HDFStore, path, mode=mode)
  373. else:
  374. store = HDFStore(path, mode=mode)
  375. assert store._handle.mode == mode
  376. store.close()
  377. with ensure_clean_path(self.path) as path:
  378. # context
  379. if mode in ['r', 'r+']:
  380. def f():
  381. with HDFStore(path, mode=mode) as store: # noqa
  382. pass
  383. pytest.raises(IOError, f)
  384. else:
  385. with HDFStore(path, mode=mode) as store:
  386. assert store._handle.mode == mode
  387. with ensure_clean_path(self.path) as path:
  388. # conv write
  389. if mode in ['r', 'r+']:
  390. pytest.raises(IOError, df.to_hdf,
  391. path, 'df', mode=mode)
  392. df.to_hdf(path, 'df', mode='w')
  393. else:
  394. df.to_hdf(path, 'df', mode=mode)
  395. # conv read
  396. if mode in ['w']:
  397. pytest.raises(ValueError, read_hdf,
  398. path, 'df', mode=mode)
  399. else:
  400. result = read_hdf(path, 'df', mode=mode)
  401. assert_frame_equal(result, df)
  402. def check_default_mode():
  403. # read_hdf uses default mode
  404. with ensure_clean_path(self.path) as path:
  405. df.to_hdf(path, 'df', mode='w')
  406. result = read_hdf(path, 'df')
  407. assert_frame_equal(result, df)
  408. check('r')
  409. check('r+')
  410. check('a')
  411. check('w')
  412. check_default_mode()
  413. def test_reopen_handle(self):
  414. with ensure_clean_path(self.path) as path:
  415. store = HDFStore(path, mode='a')
  416. store['a'] = tm.makeTimeSeries()
  417. # invalid mode change
  418. pytest.raises(PossibleDataLossError, store.open, 'w')
  419. store.close()
  420. assert not store.is_open
  421. # truncation ok here
  422. store.open('w')
  423. assert store.is_open
  424. assert len(store) == 0
  425. store.close()
  426. assert not store.is_open
  427. store = HDFStore(path, mode='a')
  428. store['a'] = tm.makeTimeSeries()
  429. # reopen as read
  430. store.open('r')
  431. assert store.is_open
  432. assert len(store) == 1
  433. assert store._mode == 'r'
  434. store.close()
  435. assert not store.is_open
  436. # reopen as append
  437. store.open('a')
  438. assert store.is_open
  439. assert len(store) == 1
  440. assert store._mode == 'a'
  441. store.close()
  442. assert not store.is_open
  443. # reopen as append (again)
  444. store.open('a')
  445. assert store.is_open
  446. assert len(store) == 1
  447. assert store._mode == 'a'
  448. store.close()
  449. assert not store.is_open
  450. def test_open_args(self):
  451. with ensure_clean_path(self.path) as path:
  452. df = tm.makeDataFrame()
  453. # create an in memory store
  454. store = HDFStore(path, mode='a', driver='H5FD_CORE',
  455. driver_core_backing_store=0)
  456. store['df'] = df
  457. store.append('df2', df)
  458. tm.assert_frame_equal(store['df'], df)
  459. tm.assert_frame_equal(store['df2'], df)
  460. store.close()
  461. # the file should not have actually been written
  462. assert not os.path.exists(path)
  463. def test_flush(self):
  464. with ensure_clean_store(self.path) as store:
  465. store['a'] = tm.makeTimeSeries()
  466. store.flush()
  467. store.flush(fsync=True)
  468. def test_get(self):
  469. with ensure_clean_store(self.path) as store:
  470. store['a'] = tm.makeTimeSeries()
  471. left = store.get('a')
  472. right = store['a']
  473. tm.assert_series_equal(left, right)
  474. left = store.get('/a')
  475. right = store['/a']
  476. tm.assert_series_equal(left, right)
  477. pytest.raises(KeyError, store.get, 'b')
  478. @pytest.mark.parametrize('where, expected', [
  479. ('/', {
  480. '': ({'first_group', 'second_group'}, set()),
  481. '/first_group': (set(), {'df1', 'df2'}),
  482. '/second_group': ({'third_group'}, {'df3', 's1'}),
  483. '/second_group/third_group': (set(), {'df4'}),
  484. }),
  485. ('/second_group', {
  486. '/second_group': ({'third_group'}, {'df3', 's1'}),
  487. '/second_group/third_group': (set(), {'df4'}),
  488. })
  489. ])
  490. def test_walk(self, where, expected):
  491. # GH10143
  492. objs = {
  493. 'df1': pd.DataFrame([1, 2, 3]),
  494. 'df2': pd.DataFrame([4, 5, 6]),
  495. 'df3': pd.DataFrame([6, 7, 8]),
  496. 'df4': pd.DataFrame([9, 10, 11]),
  497. 's1': pd.Series([10, 9, 8]),
  498. # Next 3 items aren't pandas objects and should be ignored
  499. 'a1': np.array([[1, 2, 3], [4, 5, 6]]),
  500. 'tb1': np.array([(1, 2, 3), (4, 5, 6)], dtype='i,i,i'),
  501. 'tb2': np.array([(7, 8, 9), (10, 11, 12)], dtype='i,i,i')
  502. }
  503. with ensure_clean_store('walk_groups.hdf', mode='w') as store:
  504. store.put('/first_group/df1', objs['df1'])
  505. store.put('/first_group/df2', objs['df2'])
  506. store.put('/second_group/df3', objs['df3'])
  507. store.put('/second_group/s1', objs['s1'])
  508. store.put('/second_group/third_group/df4', objs['df4'])
  509. # Create non-pandas objects
  510. store._handle.create_array('/first_group', 'a1', objs['a1'])
  511. store._handle.create_table('/first_group', 'tb1', obj=objs['tb1'])
  512. store._handle.create_table('/second_group', 'tb2', obj=objs['tb2'])
  513. assert len(list(store.walk(where=where))) == len(expected)
  514. for path, groups, leaves in store.walk(where=where):
  515. assert path in expected
  516. expected_groups, expected_frames = expected[path]
  517. assert expected_groups == set(groups)
  518. assert expected_frames == set(leaves)
  519. for leaf in leaves:
  520. frame_path = '/'.join([path, leaf])
  521. obj = store.get(frame_path)
  522. if 'df' in leaf:
  523. tm.assert_frame_equal(obj, objs[leaf])
  524. else:
  525. tm.assert_series_equal(obj, objs[leaf])
  526. def test_getattr(self):
  527. with ensure_clean_store(self.path) as store:
  528. s = tm.makeTimeSeries()
  529. store['a'] = s
  530. # test attribute access
  531. result = store.a
  532. tm.assert_series_equal(result, s)
  533. result = getattr(store, 'a')
  534. tm.assert_series_equal(result, s)
  535. df = tm.makeTimeDataFrame()
  536. store['df'] = df
  537. result = store.df
  538. tm.assert_frame_equal(result, df)
  539. # errors
  540. pytest.raises(AttributeError, getattr, store, 'd')
  541. for x in ['mode', 'path', 'handle', 'complib']:
  542. pytest.raises(AttributeError, getattr, store, x)
  543. # not stores
  544. for x in ['mode', 'path', 'handle', 'complib']:
  545. getattr(store, "_%s" % x)
  546. def test_put(self):
  547. with ensure_clean_store(self.path) as store:
  548. ts = tm.makeTimeSeries()
  549. df = tm.makeTimeDataFrame()
  550. store['a'] = ts
  551. store['b'] = df[:10]
  552. store['foo/bar/bah'] = df[:10]
  553. store['foo'] = df[:10]
  554. store['/foo'] = df[:10]
  555. store.put('c', df[:10], format='table')
  556. # not OK, not a table
  557. pytest.raises(
  558. ValueError, store.put, 'b', df[10:], append=True)
  559. # node does not currently exist, test _is_table_type returns False
  560. # in this case
  561. # _maybe_remove(store, 'f')
  562. # pytest.raises(ValueError, store.put, 'f', df[10:],
  563. # append=True)
  564. # can't put to a table (use append instead)
  565. pytest.raises(ValueError, store.put, 'c', df[10:], append=True)
  566. # overwrite table
  567. store.put('c', df[:10], format='table', append=False)
  568. tm.assert_frame_equal(df[:10], store['c'])
  569. def test_put_string_index(self):
  570. with ensure_clean_store(self.path) as store:
  571. index = Index(
  572. ["I am a very long string index: %s" % i for i in range(20)])
  573. s = Series(np.arange(20), index=index)
  574. df = DataFrame({'A': s, 'B': s})
  575. store['a'] = s
  576. tm.assert_series_equal(store['a'], s)
  577. store['b'] = df
  578. tm.assert_frame_equal(store['b'], df)
  579. # mixed length
  580. index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] +
  581. ["I am a very long string index: %s" % i
  582. for i in range(20)])
  583. s = Series(np.arange(21), index=index)
  584. df = DataFrame({'A': s, 'B': s})
  585. store['a'] = s
  586. tm.assert_series_equal(store['a'], s)
  587. store['b'] = df
  588. tm.assert_frame_equal(store['b'], df)
  589. def test_put_compression(self):
  590. with ensure_clean_store(self.path) as store:
  591. df = tm.makeTimeDataFrame()
  592. store.put('c', df, format='table', complib='zlib')
  593. tm.assert_frame_equal(store['c'], df)
  594. # can't compress if format='fixed'
  595. pytest.raises(ValueError, store.put, 'b', df,
  596. format='fixed', complib='zlib')
  597. @td.skip_if_windows_python_3
  598. def test_put_compression_blosc(self):
  599. df = tm.makeTimeDataFrame()
  600. with ensure_clean_store(self.path) as store:
  601. # can't compress if format='fixed'
  602. pytest.raises(ValueError, store.put, 'b', df,
  603. format='fixed', complib='blosc')
  604. store.put('c', df, format='table', complib='blosc')
  605. tm.assert_frame_equal(store['c'], df)
  606. def test_complibs_default_settings(self):
  607. # GH15943
  608. df = tm.makeDataFrame()
  609. # Set complevel and check if complib is automatically set to
  610. # default value
  611. with ensure_clean_path(self.path) as tmpfile:
  612. df.to_hdf(tmpfile, 'df', complevel=9)
  613. result = pd.read_hdf(tmpfile, 'df')
  614. tm.assert_frame_equal(result, df)
  615. with tables.open_file(tmpfile, mode='r') as h5file:
  616. for node in h5file.walk_nodes(where='/df', classname='Leaf'):
  617. assert node.filters.complevel == 9
  618. assert node.filters.complib == 'zlib'
  619. # Set complib and check to see if compression is disabled
  620. with ensure_clean_path(self.path) as tmpfile:
  621. df.to_hdf(tmpfile, 'df', complib='zlib')
  622. result = pd.read_hdf(tmpfile, 'df')
  623. tm.assert_frame_equal(result, df)
  624. with tables.open_file(tmpfile, mode='r') as h5file:
  625. for node in h5file.walk_nodes(where='/df', classname='Leaf'):
  626. assert node.filters.complevel == 0
  627. assert node.filters.complib is None
  628. # Check if not setting complib or complevel results in no compression
  629. with ensure_clean_path(self.path) as tmpfile:
  630. df.to_hdf(tmpfile, 'df')
  631. result = pd.read_hdf(tmpfile, 'df')
  632. tm.assert_frame_equal(result, df)
  633. with tables.open_file(tmpfile, mode='r') as h5file:
  634. for node in h5file.walk_nodes(where='/df', classname='Leaf'):
  635. assert node.filters.complevel == 0
  636. assert node.filters.complib is None
  637. # Check if file-defaults can be overridden on a per table basis
  638. with ensure_clean_path(self.path) as tmpfile:
  639. store = pd.HDFStore(tmpfile)
  640. store.append('dfc', df, complevel=9, complib='blosc')
  641. store.append('df', df)
  642. store.close()
  643. with tables.open_file(tmpfile, mode='r') as h5file:
  644. for node in h5file.walk_nodes(where='/df', classname='Leaf'):
  645. assert node.filters.complevel == 0
  646. assert node.filters.complib is None
  647. for node in h5file.walk_nodes(where='/dfc', classname='Leaf'):
  648. assert node.filters.complevel == 9
  649. assert node.filters.complib == 'blosc'
  650. def test_complibs(self):
  651. # GH14478
  652. df = tm.makeDataFrame()
  653. # Building list of all complibs and complevels tuples
  654. all_complibs = tables.filters.all_complibs
  655. # Remove lzo if its not available on this platform
  656. if not tables.which_lib_version('lzo'):
  657. all_complibs.remove('lzo')
  658. # Remove bzip2 if its not available on this platform
  659. if not tables.which_lib_version("bzip2"):
  660. all_complibs.remove("bzip2")
  661. all_levels = range(0, 10)
  662. all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
  663. for (lib, lvl) in all_tests:
  664. with ensure_clean_path(self.path) as tmpfile:
  665. gname = 'foo'
  666. # Write and read file to see if data is consistent
  667. df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
  668. result = pd.read_hdf(tmpfile, gname)
  669. tm.assert_frame_equal(result, df)
  670. # Open file and check metadata
  671. # for correct amount of compression
  672. h5table = tables.open_file(tmpfile, mode='r')
  673. for node in h5table.walk_nodes(where='/' + gname,
  674. classname='Leaf'):
  675. assert node.filters.complevel == lvl
  676. if lvl == 0:
  677. assert node.filters.complib is None
  678. else:
  679. assert node.filters.complib == lib
  680. h5table.close()
  681. def test_put_integer(self):
  682. # non-date, non-string index
  683. df = DataFrame(np.random.randn(50, 100))
  684. self._check_roundtrip(df, tm.assert_frame_equal)
  685. @xfail_non_writeable
  686. def test_put_mixed_type(self):
  687. df = tm.makeTimeDataFrame()
  688. df['obj1'] = 'foo'
  689. df['obj2'] = 'bar'
  690. df['bool1'] = df['A'] > 0
  691. df['bool2'] = df['B'] > 0
  692. df['bool3'] = True
  693. df['int1'] = 1
  694. df['int2'] = 2
  695. df['timestamp1'] = Timestamp('20010102')
  696. df['timestamp2'] = Timestamp('20010103')
  697. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  698. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  699. df.loc[3:6, ['obj1']] = np.nan
  700. df = df._consolidate()._convert(datetime=True)
  701. with ensure_clean_store(self.path) as store:
  702. _maybe_remove(store, 'df')
  703. # PerformanceWarning
  704. with catch_warnings(record=True):
  705. simplefilter("ignore", pd.errors.PerformanceWarning)
  706. store.put('df', df)
  707. expected = store.get('df')
  708. tm.assert_frame_equal(expected, df)
  709. @pytest.mark.filterwarnings(
  710. "ignore:object name:tables.exceptions.NaturalNameWarning"
  711. )
  712. def test_append(self):
  713. with ensure_clean_store(self.path) as store:
  714. # this is allowed by almost always don't want to do it
  715. # tables.NaturalNameWarning):
  716. with catch_warnings(record=True):
  717. df = tm.makeTimeDataFrame()
  718. _maybe_remove(store, 'df1')
  719. store.append('df1', df[:10])
  720. store.append('df1', df[10:])
  721. tm.assert_frame_equal(store['df1'], df)
  722. _maybe_remove(store, 'df2')
  723. store.put('df2', df[:10], format='table')
  724. store.append('df2', df[10:])
  725. tm.assert_frame_equal(store['df2'], df)
  726. _maybe_remove(store, 'df3')
  727. store.append('/df3', df[:10])
  728. store.append('/df3', df[10:])
  729. tm.assert_frame_equal(store['df3'], df)
  730. # this is allowed by almost always don't want to do it
  731. # tables.NaturalNameWarning
  732. _maybe_remove(store, '/df3 foo')
  733. store.append('/df3 foo', df[:10])
  734. store.append('/df3 foo', df[10:])
  735. tm.assert_frame_equal(store['df3 foo'], df)
  736. # panel
  737. wp = tm.makePanel()
  738. _maybe_remove(store, 'wp1')
  739. store.append('wp1', wp.iloc[:, :10, :])
  740. store.append('wp1', wp.iloc[:, 10:, :])
  741. assert_panel_equal(store['wp1'], wp)
  742. # test using differt order of items on the non-index axes
  743. _maybe_remove(store, 'wp1')
  744. wp_append1 = wp.iloc[:, :10, :]
  745. store.append('wp1', wp_append1)
  746. wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1])
  747. store.append('wp1', wp_append2)
  748. assert_panel_equal(store['wp1'], wp)
  749. # dtype issues - mizxed type in a single object column
  750. df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
  751. df['mixed_column'] = 'testing'
  752. df.loc[2, 'mixed_column'] = np.nan
  753. _maybe_remove(store, 'df')
  754. store.append('df', df)
  755. tm.assert_frame_equal(store['df'], df)
  756. # uints - test storage of uints
  757. uint_data = DataFrame({
  758. 'u08': Series(np.random.randint(0, high=255, size=5),
  759. dtype=np.uint8),
  760. 'u16': Series(np.random.randint(0, high=65535, size=5),
  761. dtype=np.uint16),
  762. 'u32': Series(np.random.randint(0, high=2**30, size=5),
  763. dtype=np.uint32),
  764. 'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62],
  765. dtype=np.uint64)}, index=np.arange(5))
  766. _maybe_remove(store, 'uints')
  767. store.append('uints', uint_data)
  768. tm.assert_frame_equal(store['uints'], uint_data)
  769. # uints - test storage of uints in indexable columns
  770. _maybe_remove(store, 'uints')
  771. # 64-bit indices not yet supported
  772. store.append('uints', uint_data, data_columns=[
  773. 'u08', 'u16', 'u32'])
  774. tm.assert_frame_equal(store['uints'], uint_data)
  775. def test_append_series(self):
  776. with ensure_clean_store(self.path) as store:
  777. # basic
  778. ss = tm.makeStringSeries()
  779. ts = tm.makeTimeSeries()
  780. ns = Series(np.arange(100))
  781. store.append('ss', ss)
  782. result = store['ss']
  783. tm.assert_series_equal(result, ss)
  784. assert result.name is None
  785. store.append('ts', ts)
  786. result = store['ts']
  787. tm.assert_series_equal(result, ts)
  788. assert result.name is None
  789. ns.name = 'foo'
  790. store.append('ns', ns)
  791. result = store['ns']
  792. tm.assert_series_equal(result, ns)
  793. assert result.name == ns.name
  794. # select on the values
  795. expected = ns[ns > 60]
  796. result = store.select('ns', 'foo>60')
  797. tm.assert_series_equal(result, expected)
  798. # select on the index and values
  799. expected = ns[(ns > 70) & (ns.index < 90)]
  800. result = store.select('ns', 'foo>70 and index<90')
  801. tm.assert_series_equal(result, expected)
  802. # multi-index
  803. mi = DataFrame(np.random.randn(5, 1), columns=['A'])
  804. mi['B'] = np.arange(len(mi))
  805. mi['C'] = 'foo'
  806. mi.loc[3:5, 'C'] = 'bar'
  807. mi.set_index(['C', 'B'], inplace=True)
  808. s = mi.stack()
  809. s.index = s.index.droplevel(2)
  810. store.append('mi', s)
  811. tm.assert_series_equal(store['mi'], s)
  812. def test_store_index_types(self):
  813. # GH5386
  814. # test storing various index types
  815. with ensure_clean_store(self.path) as store:
  816. def check(format, index):
  817. df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
  818. df.index = index(len(df))
  819. _maybe_remove(store, 'df')
  820. store.put('df', df, format=format)
  821. assert_frame_equal(df, store['df'])
  822. for index in [tm.makeFloatIndex, tm.makeStringIndex,
  823. tm.makeIntIndex, tm.makeDateIndex]:
  824. check('table', index)
  825. check('fixed', index)
  826. # period index currently broken for table
  827. # seee GH7796 FIXME
  828. check('fixed', tm.makePeriodIndex)
  829. # check('table',tm.makePeriodIndex)
  830. # unicode
  831. index = tm.makeUnicodeIndex
  832. if compat.PY3:
  833. check('table', index)
  834. check('fixed', index)
  835. else:
  836. # only support for fixed types (and they have a perf warning)
  837. pytest.raises(TypeError, check, 'table', index)
  838. # PerformanceWarning
  839. with catch_warnings(record=True):
  840. simplefilter("ignore", pd.errors.PerformanceWarning)
  841. check('fixed', index)
  842. @pytest.mark.skipif(not is_platform_little_endian(),
  843. reason="reason platform is not little endian")
  844. def test_encoding(self):
  845. with ensure_clean_store(self.path) as store:
  846. df = DataFrame(dict(A='foo', B='bar'), index=range(5))
  847. df.loc[2, 'A'] = np.nan
  848. df.loc[3, 'B'] = np.nan
  849. _maybe_remove(store, 'df')
  850. store.append('df', df, encoding='ascii')
  851. tm.assert_frame_equal(store['df'], df)
  852. expected = df.reindex(columns=['A'])
  853. result = store.select('df', Term('columns=A', encoding='ascii'))
  854. tm.assert_frame_equal(result, expected)
  855. def test_latin_encoding(self):
  856. if compat.PY2:
  857. pytest.skip("[unicode] is not implemented as a table column")
  858. values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
  859. [b'E\xc9, 17', b'a', b'b', b'c'],
  860. [b'EE, 17', b'', b'a', b'b', b'c'],
  861. [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
  862. [b'', b'a', b'b', b'c'],
  863. [b'\xf8\xfc', b'a', b'b', b'c'],
  864. [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
  865. [np.nan, b'', b'b', b'c'],
  866. [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
  867. def _try_decode(x, encoding='latin-1'):
  868. try:
  869. return x.decode(encoding)
  870. except AttributeError:
  871. return x
  872. # not sure how to remove latin-1 from code in python 2 and 3
  873. values = [[_try_decode(x) for x in y] for y in values]
  874. examples = []
  875. for dtype in ['category', object]:
  876. for val in values:
  877. examples.append(pd.Series(val, dtype=dtype))
  878. def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
  879. with ensure_clean_path(self.path) as store:
  880. s.to_hdf(store, key, format='table', encoding=encoding,
  881. nan_rep=nan_rep)
  882. retr = read_hdf(store, key)
  883. s_nan = s.replace(nan_rep, np.nan)
  884. if is_categorical_dtype(s_nan):
  885. assert is_categorical_dtype(retr)
  886. assert_series_equal(s_nan, retr, check_dtype=False,
  887. check_categorical=False)
  888. else:
  889. assert_series_equal(s_nan, retr)
  890. for s in examples:
  891. roundtrip(s)
  892. # fails:
  893. # for x in examples:
  894. # roundtrip(s, nan_rep=b'\xf8\xfc')
  895. def test_append_some_nans(self):
  896. with ensure_clean_store(self.path) as store:
  897. df = DataFrame({'A': Series(np.random.randn(20)).astype('int32'),
  898. 'A1': np.random.randn(20),
  899. 'A2': np.random.randn(20),
  900. 'B': 'foo', 'C': 'bar',
  901. 'D': Timestamp("20010101"),
  902. 'E': datetime.datetime(2001, 1, 2, 0, 0)},
  903. index=np.arange(20))
  904. # some nans
  905. _maybe_remove(store, 'df1')
  906. df.loc[0:15, ['A1', 'B', 'D', 'E']] = np.nan
  907. store.append('df1', df[:10])
  908. store.append('df1', df[10:])
  909. tm.assert_frame_equal(store['df1'], df)
  910. # first column
  911. df1 = df.copy()
  912. df1.loc[:, 'A1'] = np.nan
  913. _maybe_remove(store, 'df1')
  914. store.append('df1', df1[:10])
  915. store.append('df1', df1[10:])
  916. tm.assert_frame_equal(store['df1'], df1)
  917. # 2nd column
  918. df2 = df.copy()
  919. df2.loc[:, 'A2'] = np.nan
  920. _maybe_remove(store, 'df2')
  921. store.append('df2', df2[:10])
  922. store.append('df2', df2[10:])
  923. tm.assert_frame_equal(store['df2'], df2)
  924. # datetimes
  925. df3 = df.copy()
  926. df3.loc[:, 'E'] = np.nan
  927. _maybe_remove(store, 'df3')
  928. store.append('df3', df3[:10])
  929. store.append('df3', df3[10:])
  930. tm.assert_frame_equal(store['df3'], df3)
  931. def test_append_all_nans(self):
  932. with ensure_clean_store(self.path) as store:
  933. df = DataFrame({'A1': np.random.randn(20),
  934. 'A2': np.random.randn(20)},
  935. index=np.arange(20))
  936. df.loc[0:15, :] = np.nan
  937. # nan some entire rows (dropna=True)
  938. _maybe_remove(store, 'df')
  939. store.append('df', df[:10], dropna=True)
  940. store.append('df', df[10:], dropna=True)
  941. tm.assert_frame_equal(store['df'], df[-4:])
  942. # nan some entire rows (dropna=False)
  943. _maybe_remove(store, 'df2')
  944. store.append('df2', df[:10], dropna=False)
  945. store.append('df2', df[10:], dropna=False)
  946. tm.assert_frame_equal(store['df2'], df)
  947. # tests the option io.hdf.dropna_table
  948. pd.set_option('io.hdf.dropna_table', False)
  949. _maybe_remove(store, 'df3')
  950. store.append('df3', df[:10])
  951. store.append('df3', df[10:])
  952. tm.assert_frame_equal(store['df3'], df)
  953. pd.set_option('io.hdf.dropna_table', True)
  954. _maybe_remove(store, 'df4')
  955. store.append('df4', df[:10])
  956. store.append('df4', df[10:])
  957. tm.assert_frame_equal(store['df4'], df[-4:])
  958. # nan some entire rows (string are still written!)
  959. df = DataFrame({'A1': np.random.randn(20),
  960. 'A2': np.random.randn(20),
  961. 'B': 'foo', 'C': 'bar'},
  962. index=np.arange(20))
  963. df.loc[0:15, :] = np.nan
  964. _maybe_remove(store, 'df')
  965. store.append('df', df[:10], dropna=True)
  966. store.append('df', df[10:], dropna=True)
  967. tm.assert_frame_equal(store['df'], df)
  968. _maybe_remove(store, 'df2')
  969. store.append('df2', df[:10], dropna=False)
  970. store.append('df2', df[10:], dropna=False)
  971. tm.assert_frame_equal(store['df2'], df)
  972. # nan some entire rows (but since we have dates they are still
  973. # written!)
  974. df = DataFrame({'A1': np.random.randn(20),
  975. 'A2': np.random.randn(20),
  976. 'B': 'foo', 'C': 'bar',
  977. 'D': Timestamp("20010101"),
  978. 'E': datetime.datetime(2001, 1, 2, 0, 0)},
  979. index=np.arange(20))
  980. df.loc[0:15, :] = np.nan
  981. _maybe_remove(store, 'df')
  982. store.append('df', df[:10], dropna=True)
  983. store.append('df', df[10:], dropna=True)
  984. tm.assert_frame_equal(store['df'], df)
  985. _maybe_remove(store, 'df2')
  986. store.append('df2', df[:10], dropna=False)
  987. store.append('df2', df[10:], dropna=False)
  988. tm.assert_frame_equal(store['df2'], df)
  989. # Test to make sure defaults are to not drop.
  990. # Corresponding to Issue 9382
  991. df_with_missing = DataFrame(
  992. {'col1': [0, np.nan, 2], 'col2': [1, np.nan, np.nan]})
  993. with ensure_clean_path(self.path) as path:
  994. df_with_missing.to_hdf(path, 'df_with_missing', format='table')
  995. reloaded = read_hdf(path, 'df_with_missing')
  996. tm.assert_frame_equal(df_with_missing, reloaded)
  997. matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
  998. [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
  999. [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
  1000. with catch_warnings(record=True):
  1001. panel_with_missing = Panel(matrix,
  1002. items=['Item1', 'Item2', 'Item3'],
  1003. major_axis=[1, 2],
  1004. minor_axis=['A', 'B', 'C'])
  1005. with ensure_clean_path(self.path) as path:
  1006. panel_with_missing.to_hdf(
  1007. path, 'panel_with_missing', format='table')
  1008. reloaded_panel = read_hdf(path, 'panel_with_missing')
  1009. tm.assert_panel_equal(panel_with_missing, reloaded_panel)
  1010. def test_append_frame_column_oriented(self):
  1011. with ensure_clean_store(self.path) as store:
  1012. # column oriented
  1013. df = tm.makeTimeDataFrame()
  1014. _maybe_remove(store, 'df1')
  1015. store.append('df1', df.iloc[:, :2], axes=['columns'])
  1016. store.append('df1', df.iloc[:, 2:])
  1017. tm.assert_frame_equal(store['df1'], df)
  1018. result = store.select('df1', 'columns=A')
  1019. expected = df.reindex(columns=['A'])
  1020. tm.assert_frame_equal(expected, result)
  1021. # selection on the non-indexable
  1022. result = store.select(
  1023. 'df1', ('columns=A', 'index=df.index[0:4]'))
  1024. expected = df.reindex(columns=['A'], index=df.index[0:4])
  1025. tm.assert_frame_equal(expected, result)
  1026. # this isn't supported
  1027. with pytest.raises(TypeError):
  1028. store.select('df1',
  1029. 'columns=A and index>df.index[4]')
  1030. def test_append_with_different_block_ordering(self):
  1031. # GH 4096; using same frames, but different block orderings
  1032. with ensure_clean_store(self.path) as store:
  1033. for i in range(10):
  1034. df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
  1035. df['index'] = range(10)
  1036. df['index'] += i * 10
  1037. df['int64'] = Series([1] * len(df), dtype='int64')
  1038. df['int16'] = Series([1] * len(df), dtype='int16')
  1039. if i % 2 == 0:
  1040. del df['int64']
  1041. df['int64'] = Series([1] * len(df), dtype='int64')
  1042. if i % 3 == 0:
  1043. a = df.pop('A')
  1044. df['A'] = a
  1045. df.set_index('index', inplace=True)
  1046. store.append('df', df)
  1047. # test a different ordering but with more fields (like invalid
  1048. # combinate)
  1049. with ensure_clean_store(self.path) as store:
  1050. df = DataFrame(np.random.randn(10, 2),
  1051. columns=list('AB'), dtype='float64')
  1052. df['int64'] = Series([1] * len(df), dtype='int64')
  1053. df['int16'] = Series([1] * len(df), dtype='int16')
  1054. store.append('df', df)
  1055. # store additional fields in different blocks
  1056. df['int16_2'] = Series([1] * len(df), dtype='int16')
  1057. pytest.raises(ValueError, store.append, 'df', df)
  1058. # store multile additional fields in different blocks
  1059. df['float_3'] = Series([1.] * len(df), dtype='float64')
  1060. pytest.raises(ValueError, store.append, 'df', df)
  1061. def test_append_with_strings(self):
  1062. with ensure_clean_store(self.path) as store:
  1063. with catch_warnings(record=True):
  1064. simplefilter("ignore", FutureWarning)
  1065. wp = tm.makePanel()
  1066. wp2 = wp.rename(
  1067. minor_axis={x: "%s_extra" % x for x in wp.minor_axis})
  1068. def check_col(key, name, size):
  1069. assert getattr(store.get_storer(key)
  1070. .table.description, name).itemsize == size
  1071. store.append('s1', wp, min_itemsize=20)
  1072. store.append('s1', wp2)
  1073. expected = concat([wp, wp2], axis=2)
  1074. expected = expected.reindex(
  1075. minor_axis=sorted(expected.minor_axis))
  1076. assert_panel_equal(store['s1'], expected)
  1077. check_col('s1', 'minor_axis', 20)
  1078. # test dict format
  1079. store.append('s2', wp, min_itemsize={'minor_axis': 20})
  1080. store.append('s2', wp2)
  1081. expected = concat([wp, wp2], axis=2)
  1082. expected = expected.reindex(
  1083. minor_axis=sorted(expected.minor_axis))
  1084. assert_panel_equal(store['s2'], expected)
  1085. check_col('s2', 'minor_axis', 20)
  1086. # apply the wrong field (similar to #1)
  1087. store.append('s3', wp, min_itemsize={'major_axis': 20})
  1088. pytest.raises(ValueError, store.append, 's3', wp2)
  1089. # test truncation of bigger strings
  1090. store.append('s4', wp)
  1091. pytest.raises(ValueError, store.append, 's4', wp2)
  1092. # avoid truncation on elements
  1093. df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
  1094. store.append('df_big', df)
  1095. tm.assert_frame_equal(store.select('df_big'), df)
  1096. check_col('df_big', 'values_block_1', 15)
  1097. # appending smaller string ok
  1098. df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
  1099. store.append('df_big', df2)
  1100. expected = concat([df, df2])
  1101. tm.assert_frame_equal(store.select('df_big'), expected)
  1102. check_col('df_big', 'values_block_1', 15)
  1103. # avoid truncation on elements
  1104. df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
  1105. store.append('df_big2', df, min_itemsize={'values': 50})
  1106. tm.assert_frame_equal(store.select('df_big2'), df)
  1107. check_col('df_big2', 'values_block_1', 50)
  1108. # bigger string on next append
  1109. store.append('df_new', df)
  1110. df_new = DataFrame(
  1111. [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
  1112. pytest.raises(ValueError, store.append, 'df_new', df_new)
  1113. # min_itemsize on Series index (GH 11412)
  1114. df = tm.makeMixedDataFrame().set_index('C')
  1115. store.append('ss', df['B'], min_itemsize={'index': 4})
  1116. tm.assert_series_equal(store.select('ss'), df['B'])
  1117. # same as above, with data_columns=True
  1118. store.append('ss2', df['B'], data_columns=True,
  1119. min_itemsize={'index': 4})
  1120. tm.assert_series_equal(store.select('ss2'), df['B'])
  1121. # min_itemsize in index without appending (GH 10381)
  1122. store.put('ss3', df, format='table',
  1123. min_itemsize={'index': 6})
  1124. # just make sure there is a longer string:
  1125. df2 = df.copy().reset_index().assign(C='longer').set_index('C')
  1126. store.append('ss3', df2)
  1127. tm.assert_frame_equal(store.select('ss3'),
  1128. pd.concat([df, df2]))
  1129. # same as above, with a Series
  1130. store.put('ss4', df['B'], format='table',
  1131. min_itemsize={'index': 6})
  1132. store.append('ss4', df2['B'])
  1133. tm.assert_series_equal(store.select('ss4'),
  1134. pd.concat([df['B'], df2['B']]))
  1135. # with nans
  1136. _maybe_remove(store, 'df')
  1137. df = tm.makeTimeDataFrame()
  1138. df['string'] = 'foo'
  1139. df.loc[1:4, 'string'] = np.nan
  1140. df['string2'] = 'bar'
  1141. df.loc[4:8, 'string2'] = np.nan
  1142. df['string3'] = 'bah'
  1143. df.loc[1:, 'string3'] = np.nan
  1144. store.append('df', df)
  1145. result = store.select('df')
  1146. tm.assert_frame_equal(result, df)
  1147. with ensure_clean_store(self.path) as store:
  1148. def check_col(key, name, size):
  1149. assert getattr(store.get_storer(key)
  1150. .table.description, name).itemsize, size
  1151. df = DataFrame(dict(A='foo', B='bar'), index=range(10))
  1152. # a min_itemsize that creates a data_column
  1153. _maybe_remove(store, 'df')
  1154. store.append('df', df, min_itemsize={'A': 200})
  1155. check_col('df', 'A', 200)
  1156. assert store.get_storer('df').data_columns == ['A']
  1157. # a min_itemsize that creates a data_column2
  1158. _maybe_remove(store, 'df')
  1159. store.append('df', df, data_columns=['B'], min_itemsize={'A': 200})
  1160. check_col('df', 'A', 200)
  1161. assert store.get_storer('df').data_columns == ['B', 'A']
  1162. # a min_itemsize that creates a data_column2
  1163. _maybe_remove(store, 'df')
  1164. store.append('df', df, data_columns=[
  1165. 'B'], min_itemsize={'values': 200})
  1166. check_col('df', 'B', 200)
  1167. check_col('df', 'values_block_0', 200)
  1168. assert store.get_storer('df').data_columns == ['B']
  1169. # infer the .typ on subsequent appends
  1170. _maybe_remove(store, 'df')
  1171. store.append('df', df[:5], min_itemsize=200)
  1172. store.append('df', df[5:], min_itemsize=200)
  1173. tm.assert_frame_equal(store['df'], df)
  1174. # invalid min_itemsize keys
  1175. df = DataFrame(['foo', 'foo', 'foo', 'barh',
  1176. 'barh', 'barh'], columns=['A'])
  1177. _maybe_remove(store, 'df')
  1178. pytest.raises(ValueError, store.append, 'df',
  1179. df, min_itemsize={'foo': 20, 'foobar': 20})
  1180. def test_append_with_empty_string(self):
  1181. with ensure_clean_store(self.path) as store:
  1182. # with all empty strings (GH 12242)
  1183. df = DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', '']})
  1184. store.append('df', df[:-1], min_itemsize={'x': 1})
  1185. store.append('df', df[-1:], min_itemsize={'x': 1})
  1186. tm.assert_frame_equal(store.select('df'), df)
  1187. def test_to_hdf_with_min_itemsize(self):
  1188. with ensure_clean_path(self.path) as path:
  1189. # min_itemsize in index with to_hdf (GH 10381)
  1190. df = tm.makeMixedDataFrame().set_index('C')
  1191. df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
  1192. # just make sure there is a longer string:
  1193. df2 = df.copy().reset_index().assign(C='longer').set_index('C')
  1194. df2.to_hdf(path, 'ss3', append=True, format='table')
  1195. tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
  1196. pd.concat([df, df2]))
  1197. # same as above, with a Series
  1198. df['B'].to_hdf(path, 'ss4', format='table',
  1199. min_itemsize={'index': 6})
  1200. df2['B'].to_hdf(path, 'ss4', append=True, format='table')
  1201. tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
  1202. pd.concat([df['B'], df2['B']]))
  1203. @pytest.mark.parametrize(
  1204. "format",
  1205. [pytest.param('fixed', marks=xfail_non_writeable),
  1206. 'table'])
  1207. def test_to_hdf_errors(self, format):
  1208. data = ['\ud800foo']
  1209. ser = pd.Series(data, index=pd.Index(data))
  1210. with ensure_clean_path(self.path) as path:
  1211. # GH 20835
  1212. ser.to_hdf(path, 'table', format=format, errors='surrogatepass')
  1213. result = pd.read_hdf(path, 'table', errors='surrogatepass')
  1214. tm.assert_series_equal(result, ser)
  1215. def test_append_with_data_columns(self):
  1216. with ensure_clean_store(self.path) as store:
  1217. df = tm.makeTimeDataFrame()
  1218. df.iloc[0, df.columns.get_loc('B')] = 1.
  1219. _maybe_remove(store, 'df')
  1220. store.append('df', df[:2], data_columns=['B'])
  1221. store.append('df', df[2:])
  1222. tm.assert_frame_equal(store['df'], df)
  1223. # check that we have indices created
  1224. assert(store._handle.root.df.table.cols.index.is_indexed is True)
  1225. assert(store._handle.root.df.table.cols.B.is_indexed is True)
  1226. # data column searching
  1227. result = store.select('df', 'B>0')
  1228. expected = df[df.B > 0]
  1229. tm.assert_frame_equal(result, expected)
  1230. # data column searching (with an indexable and a data_columns)
  1231. result = store.select(
  1232. 'df', 'B>0 and index>df.index[3]')
  1233. df_new = df.reindex(index=df.index[4:])
  1234. expected = df_new[df_new.B > 0]
  1235. tm.assert_frame_equal(result, expected)
  1236. # data column selection with a string data_column
  1237. df_new = df.copy()
  1238. df_new['string'] = 'foo'
  1239. df_new.loc[1:4, 'string'] = np.nan
  1240. df_new.loc[5:6, 'string'] = 'bar'
  1241. _maybe_remove(store, 'df')
  1242. store.append('df', df_new, data_columns=['string'])
  1243. result = store.select('df', "string='foo'")
  1244. expected = df_new[df_new.string == 'foo']
  1245. tm.assert_frame_equal(result, expected)
  1246. # using min_itemsize and a data column
  1247. def check_col(key, name, size):
  1248. assert getattr(store.get_storer(key)
  1249. .table.description, name).itemsize == size
  1250. with ensure_clean_store(self.path) as store:
  1251. _maybe_remove(store, 'df')
  1252. store.append('df', df_new, data_columns=['string'],
  1253. min_itemsize={'string': 30})
  1254. check_col('df', 'string', 30)
  1255. _maybe_remove(store, 'df')
  1256. store.append(
  1257. 'df', df_new, data_columns=['string'], min_itemsize=30)
  1258. check_col('df', 'string', 30)
  1259. _maybe_remove(store, 'df')
  1260. store.append('df', df_new, data_columns=['string'],
  1261. min_itemsize={'values': 30})
  1262. check_col('df', 'string', 30)
  1263. with ensure_clean_store(self.path) as store:
  1264. df_new['string2'] = 'foobarbah'
  1265. df_new['string_block1'] = 'foobarbah1'
  1266. df_new['string_block2'] = 'foobarbah2'
  1267. _maybe_remove(store, 'df')
  1268. store.append('df', df_new, data_columns=['string', 'string2'],
  1269. min_itemsize={'string': 30, 'string2': 40,
  1270. 'values': 50})
  1271. check_col('df', 'string', 30)
  1272. check_col('df', 'string2', 40)
  1273. check_col('df', 'values_block_1', 50)
  1274. with ensure_clean_store(self.path) as store:
  1275. # multiple data columns
  1276. df_new = df.copy()
  1277. df_new.iloc[0, df_new.columns.get_loc('A')] = 1.
  1278. df_new.iloc[0, df_new.columns.get_loc('B')] = -1.
  1279. df_new['string'] = 'foo'
  1280. sl = df_new.columns.get_loc('string')
  1281. df_new.iloc[1:4, sl] = np.nan
  1282. df_new.iloc[5:6, sl] = 'bar'
  1283. df_new['string2'] = 'foo'
  1284. sl = df_new.columns.get_loc('string2')
  1285. df_new.iloc[2:5, sl] = np.nan
  1286. df_new.iloc[7:8, sl] = 'bar'
  1287. _maybe_remove(store, 'df')
  1288. store.append(
  1289. 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
  1290. result = store.select('df',
  1291. "string='foo' and string2='foo'"
  1292. " and A>0 and B<0")
  1293. expected = df_new[(df_new.string == 'foo') & (
  1294. df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
  1295. tm.assert_frame_equal(result, expected, check_index_type=False)
  1296. # yield an empty frame
  1297. result = store.select('df', "string='foo' and string2='cool'")
  1298. expected = df_new[(df_new.string == 'foo') & (
  1299. df_new.string2 == 'cool')]
  1300. tm.assert_frame_equal(result, expected, check_index_type=False)
  1301. with ensure_clean_store(self.path) as store:
  1302. # doc example
  1303. df_dc = df.copy()
  1304. df_dc['string'] = 'foo'
  1305. df_dc.loc[4:6, 'string'] = np.nan
  1306. df_dc.loc[7:9, 'string'] = 'bar'
  1307. df_dc['string2'] = 'cool'
  1308. df_dc['datetime'] = Timestamp('20010102')
  1309. df_dc = df_dc._convert(datetime=True)
  1310. df_dc.loc[3:5, ['A', 'B', 'datetime']] = np.nan
  1311. _maybe_remove(store, 'df_dc')
  1312. store.append('df_dc', df_dc,
  1313. data_columns=['B', 'C', 'string',
  1314. 'string2', 'datetime'])
  1315. result = store.select('df_dc', 'B>0')
  1316. expected = df_dc[df_dc.B > 0]
  1317. tm.assert_frame_equal(result, expected, check_index_type=False)
  1318. result = store.select(
  1319. 'df_dc', ['B > 0', 'C > 0', 'string == foo'])
  1320. expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
  1321. df_dc.string == 'foo')]
  1322. tm.assert_frame_equal(result, expected, check_index_type=False)
  1323. with ensure_clean_store(self.path) as store:
  1324. # doc example part 2
  1325. np.random.seed(1234)
  1326. index = date_range('1/1/2000', periods=8)
  1327. df_dc = DataFrame(np.random.randn(8, 3), index=index,
  1328. columns=['A', 'B', 'C'])
  1329. df_dc['string'] = 'foo'
  1330. df_dc.loc[4:6, 'string'] = np.nan
  1331. df_dc.loc[7:9, 'string'] = 'bar'
  1332. df_dc.loc[:, ['B', 'C']] = df_dc.loc[:, ['B', 'C']].abs()
  1333. df_dc['string2'] = 'cool'
  1334. # on-disk operations
  1335. store.append('df_dc', df_dc, data_columns=[
  1336. 'B', 'C', 'string', 'string2'])
  1337. result = store.select('df_dc', 'B>0')
  1338. expected = df_dc[df_dc.B > 0]
  1339. tm.assert_frame_equal(result, expected)
  1340. result = store.select(
  1341. 'df_dc', ['B > 0', 'C > 0', 'string == "foo"'])
  1342. expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) &
  1343. (df_dc.string == 'foo')]
  1344. tm.assert_frame_equal(result, expected)
  1345. with ensure_clean_store(self.path) as store:
  1346. with catch_warnings(record=True):
  1347. # panel
  1348. # GH5717 not handling data_columns
  1349. np.random.seed(1234)
  1350. p = tm.makePanel()
  1351. store.append('p1', p)
  1352. tm.assert_panel_equal(store.select('p1'), p)
  1353. store.append('p2', p, data_columns=True)
  1354. tm.assert_panel_equal(store.select('p2'), p)
  1355. result = store.select('p2', where='ItemA>0')
  1356. expected = p.to_frame()
  1357. expected = expected[expected['ItemA'] > 0]
  1358. tm.assert_frame_equal(result.to_frame(), expected)
  1359. result = store.select(
  1360. 'p2', where='ItemA>0 & minor_axis=["A","B"]')
  1361. expected = p.to_frame()
  1362. expected = expected[expected['ItemA'] > 0]
  1363. expected = expected[expected.reset_index(
  1364. level=['major']).index.isin(['A', 'B'])]
  1365. tm.assert_frame_equal(result.to_frame(), expected)
  1366. def test_create_table_index(self):
  1367. with ensure_clean_store(self.path) as store:
  1368. with catch_warnings(record=True):
  1369. def col(t, column):
  1370. return getattr(store.get_storer(t).table.cols, column)
  1371. # index=False
  1372. wp = tm.makePanel()
  1373. store.append('p5', wp, index=False)
  1374. store.create_table_index('p5', columns=['major_axis'])
  1375. assert(col('p5', 'major_axis').is_indexed is True)
  1376. assert(col('p5', 'minor_axis').is_indexed is False)
  1377. # index=True
  1378. store.append('p5i', wp, index=True)
  1379. assert(col('p5i', 'major_axis').is_indexed is True)
  1380. assert(col('p5i', 'minor_axis').is_indexed is True)
  1381. # default optlevels
  1382. store.get_storer('p5').create_index()
  1383. assert(col('p5', 'major_axis').index.optlevel == 6)
  1384. assert(col('p5', 'minor_axis').index.kind == 'medium')
  1385. # let's change the indexing scheme
  1386. store.create_table_index('p5')
  1387. assert(col('p5', 'major_axis').index.optlevel == 6)
  1388. assert(col('p5', 'minor_axis').index.kind == 'medium')
  1389. store.create_table_index('p5', optlevel=9)
  1390. assert(col('p5', 'major_axis').index.optlevel == 9)
  1391. assert(col('p5', 'minor_axis').index.kind == 'medium')
  1392. store.create_table_index('p5', kind='full')
  1393. assert(col('p5', 'major_axis').index.optlevel == 9)
  1394. assert(col('p5', 'minor_axis').index.kind == 'full')
  1395. store.create_table_index('p5', optlevel=1, kind='light')
  1396. assert(col('p5', 'major_axis').index.optlevel == 1)
  1397. assert(col('p5', 'minor_axis').index.kind == 'light')
  1398. # data columns
  1399. df = tm.makeTimeDataFrame()
  1400. df['string'] = 'foo'
  1401. df['string2'] = 'bar'
  1402. store.append('f', df, data_columns=['string', 'string2'])
  1403. assert(col('f', 'index').is_indexed is True)
  1404. assert(col('f', 'string').is_indexed is True)
  1405. assert(col('f', 'string2').is_indexed is True)
  1406. # specify index=columns
  1407. store.append(
  1408. 'f2', df, index=['string'],
  1409. data_columns=['string', 'string2'])
  1410. assert(col('f2', 'index').is_indexed is False)
  1411. assert(col('f2', 'string').is_indexed is True)
  1412. assert(col('f2', 'string2').is_indexed is False)
  1413. # try to index a non-table
  1414. _maybe_remove(store, 'f2')
  1415. store.put('f2', df)
  1416. pytest.raises(TypeError, store.create_table_index, 'f2')
  1417. def test_append_diff_item_order(self):
  1418. with catch_warnings(record=True):
  1419. wp = tm.makePanel()
  1420. wp1 = wp.iloc[:, :10, :]
  1421. wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']),
  1422. 10:, :]
  1423. with ensure_clean_store(self.path) as store:
  1424. store.put('panel', wp1, format='table')
  1425. pytest.raises(ValueError, store.put, 'panel', wp2,
  1426. append=True)
  1427. def test_append_hierarchical(self):
  1428. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  1429. ['one', 'two', 'three']],
  1430. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  1431. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  1432. names=['foo', 'bar'])
  1433. df = DataFrame(np.random.randn(10, 3), index=index,
  1434. columns=['A', 'B', 'C'])
  1435. with ensure_clean_store(self.path) as store:
  1436. store.append('mi', df)
  1437. result = store.select('mi')
  1438. tm.assert_frame_equal(result, df)
  1439. # GH 3748
  1440. result = store.select('mi', columns=['A', 'B'])
  1441. expected = df.reindex(columns=['A', 'B'])
  1442. tm.assert_frame_equal(result, expected)
  1443. with ensure_clean_path('test.hdf') as path:
  1444. df.to_hdf(path, 'df', format='table')
  1445. result = read_hdf(path, 'df', columns=['A', 'B'])
  1446. expected = df.reindex(columns=['A', 'B'])
  1447. tm.assert_frame_equal(result, expected)
  1448. def test_column_multiindex(self):
  1449. # GH 4710
  1450. # recreate multi-indexes properly
  1451. index = MultiIndex.from_tuples([('A', 'a'), ('A', 'b'),
  1452. ('B', 'a'), ('B', 'b')],
  1453. names=['first', 'second'])
  1454. df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
  1455. expected = df.copy()
  1456. if isinstance(expected.index, RangeIndex):
  1457. expected.index = Int64Index(expected.index)
  1458. with ensure_clean_store(self.path) as store:
  1459. store.put('df', df)
  1460. tm.assert_frame_equal(store['df'], expected,
  1461. check_index_type=True,
  1462. check_column_type=True)
  1463. store.put('df1', df, format='table')
  1464. tm.assert_frame_equal(store['df1'], expected,
  1465. check_index_type=True,
  1466. check_column_type=True)
  1467. pytest.raises(ValueError, store.put, 'df2', df,
  1468. format='table', data_columns=['A'])
  1469. pytest.raises(ValueError, store.put, 'df3', df,
  1470. format='table', data_columns=True)
  1471. # appending multi-column on existing table (see GH 6167)
  1472. with ensure_clean_store(self.path) as store:
  1473. store.append('df2', df)
  1474. store.append('df2', df)
  1475. tm.assert_frame_equal(store['df2'], concat((df, df)))
  1476. # non_index_axes name
  1477. df = DataFrame(np.arange(12).reshape(3, 4),
  1478. columns=Index(list('ABCD'), name='foo'))
  1479. expected = df.copy()
  1480. if isinstance(expected.index, RangeIndex):
  1481. expected.index = Int64Index(expected.index)
  1482. with ensure_clean_store(self.path) as store:
  1483. store.put('df1', df, format='table')
  1484. tm.assert_frame_equal(store['df1'], expected,
  1485. check_index_type=True,
  1486. check_column_type=True)
  1487. def test_store_multiindex(self):
  1488. # validate multi-index names
  1489. # GH 5527
  1490. with ensure_clean_store(self.path) as store:
  1491. def make_index(names=None):
  1492. return MultiIndex.from_tuples([(datetime.datetime(2013, 12, d),
  1493. s, t)
  1494. for d in range(1, 3)
  1495. for s in range(2)
  1496. for t in range(3)],
  1497. names=names)
  1498. # no names
  1499. _maybe_remove(store, 'df')
  1500. df = DataFrame(np.zeros((12, 2)), columns=[
  1501. 'a', 'b'], index=make_index())
  1502. store.append('df', df)
  1503. tm.assert_frame_equal(store.select('df'), df)
  1504. # partial names
  1505. _maybe_remove(store, 'df')
  1506. df = DataFrame(np.zeros((12, 2)), columns=[
  1507. 'a', 'b'], index=make_index(['date', None, None]))
  1508. store.append('df', df)
  1509. tm.assert_frame_equal(store.select('df'), df)
  1510. # series
  1511. _maybe_remove(store, 's')
  1512. s = Series(np.zeros(12), index=make_index(['date', None, None]))
  1513. store.append('s', s)
  1514. xp = Series(np.zeros(12), index=make_index(
  1515. ['date', 'level_1', 'level_2']))
  1516. tm.assert_series_equal(store.select('s'), xp)
  1517. # dup with column
  1518. _maybe_remove(store, 'df')
  1519. df = DataFrame(np.zeros((12, 2)), columns=[
  1520. 'a', 'b'], index=make_index(['date', 'a', 't']))
  1521. pytest.raises(ValueError, store.append, 'df', df)
  1522. # dup within level
  1523. _maybe_remove(store, 'df')
  1524. df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
  1525. index=make_index(['date', 'date', 'date']))
  1526. pytest.raises(ValueError, store.append, 'df', df)
  1527. # fully names
  1528. _maybe_remove(store, 'df')
  1529. df = DataFrame(np.zeros((12, 2)), columns=[
  1530. 'a', 'b'], index=make_index(['date', 's', 't']))
  1531. store.append('df', df)
  1532. tm.assert_frame_equal(store.select('df'), df)
  1533. def test_select_columns_in_where(self):
  1534. # GH 6169
  1535. # recreate multi-indexes when columns is passed
  1536. # in the `where` argument
  1537. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  1538. ['one', 'two', 'three']],
  1539. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  1540. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  1541. names=['foo_name', 'bar_name'])
  1542. # With a DataFrame
  1543. df = DataFrame(np.random.randn(10, 3), index=index,
  1544. columns=['A', 'B', 'C'])
  1545. with ensure_clean_store(self.path) as store:
  1546. store.put('df', df, format='table')
  1547. expected = df[['A']]
  1548. tm.assert_frame_equal(store.select('df', columns=['A']), expected)
  1549. tm.assert_frame_equal(store.select(
  1550. 'df', where="columns=['A']"), expected)
  1551. # With a Series
  1552. s = Series(np.random.randn(10), index=index,
  1553. name='A')
  1554. with ensure_clean_store(self.path) as store:
  1555. store.put('s', s, format='table')
  1556. tm.assert_series_equal(store.select('s', where="columns=['A']"), s)
  1557. def test_mi_data_columns(self):
  1558. # GH 14435
  1559. idx = pd.MultiIndex.from_arrays([date_range('2000-01-01', periods=5),
  1560. range(5)], names=['date', 'id'])
  1561. df = pd.DataFrame({'a': [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)
  1562. with ensure_clean_store(self.path) as store:
  1563. store.append('df', df, data_columns=True)
  1564. actual = store.select('df', where='id == 1')
  1565. expected = df.iloc[[1], :]
  1566. tm.assert_frame_equal(actual, expected)
  1567. def test_pass_spec_to_storer(self):
  1568. df = tm.makeDataFrame()
  1569. with ensure_clean_store(self.path) as store:
  1570. store.put('df', df)
  1571. pytest.raises(TypeError, store.select, 'df', columns=['A'])
  1572. pytest.raises(TypeError, store.select,
  1573. 'df', where=[('columns=A')])
  1574. @xfail_non_writeable
  1575. def test_append_misc(self):
  1576. with ensure_clean_store(self.path) as store:
  1577. df = tm.makeDataFrame()
  1578. store.append('df', df, chunksize=1)
  1579. result = store.select('df')
  1580. tm.assert_frame_equal(result, df)
  1581. store.append('df1', df, expectedrows=10)
  1582. result = store.select('df1')
  1583. tm.assert_frame_equal(result, df)
  1584. # more chunksize in append tests
  1585. def check(obj, comparator):
  1586. for c in [10, 200, 1000]:
  1587. with ensure_clean_store(self.path, mode='w') as store:
  1588. store.append('obj', obj, chunksize=c)
  1589. result = store.select('obj')
  1590. comparator(result, obj)
  1591. df = tm.makeDataFrame()
  1592. df['string'] = 'foo'
  1593. df['float322'] = 1.
  1594. df['float322'] = df['float322'].astype('float32')
  1595. df['bool'] = df['float322'] > 0
  1596. df['time1'] = Timestamp('20130101')
  1597. df['time2'] = Timestamp('20130102')
  1598. check(df, tm.assert_frame_equal)
  1599. with catch_warnings(record=True):
  1600. p = tm.makePanel()
  1601. check(p, assert_panel_equal)
  1602. # empty frame, GH4273
  1603. with ensure_clean_store(self.path) as store:
  1604. # 0 len
  1605. df_empty = DataFrame(columns=list('ABC'))
  1606. store.append('df', df_empty)
  1607. pytest.raises(KeyError, store.select, 'df')
  1608. # repeated append of 0/non-zero frames
  1609. df = DataFrame(np.random.rand(10, 3), columns=list('ABC'))
  1610. store.append('df', df)
  1611. assert_frame_equal(store.select('df'), df)
  1612. store.append('df', df_empty)
  1613. assert_frame_equal(store.select('df'), df)
  1614. # store
  1615. df = DataFrame(columns=list('ABC'))
  1616. store.put('df2', df)
  1617. assert_frame_equal(store.select('df2'), df)
  1618. with catch_warnings(record=True):
  1619. # 0 len
  1620. p_empty = Panel(items=list('ABC'))
  1621. store.append('p', p_empty)
  1622. pytest.raises(KeyError, store.select, 'p')
  1623. # repeated append of 0/non-zero frames
  1624. p = Panel(np.random.randn(3, 4, 5), items=list('ABC'))
  1625. store.append('p', p)
  1626. assert_panel_equal(store.select('p'), p)
  1627. store.append('p', p_empty)
  1628. assert_panel_equal(store.select('p'), p)
  1629. # store
  1630. store.put('p2', p_empty)
  1631. assert_panel_equal(store.select('p2'), p_empty)
  1632. def test_append_raise(self):
  1633. with ensure_clean_store(self.path) as store:
  1634. # test append with invalid input to get good error messages
  1635. # list in column
  1636. df = tm.makeDataFrame()
  1637. df['invalid'] = [['a']] * len(df)
  1638. assert df.dtypes['invalid'] == np.object_
  1639. pytest.raises(TypeError, store.append, 'df', df)
  1640. # multiple invalid columns
  1641. df['invalid2'] = [['a']] * len(df)
  1642. df['invalid3'] = [['a']] * len(df)
  1643. pytest.raises(TypeError, store.append, 'df', df)
  1644. # datetime with embedded nans as object
  1645. df = tm.makeDataFrame()
  1646. s = Series(datetime.datetime(2001, 1, 2), index=df.index)
  1647. s = s.astype(object)
  1648. s[0:5] = np.nan
  1649. df['invalid'] = s
  1650. assert df.dtypes['invalid'] == np.object_
  1651. pytest.raises(TypeError, store.append, 'df', df)
  1652. # directly ndarray
  1653. pytest.raises(TypeError, store.append, 'df', np.arange(10))
  1654. # series directly
  1655. pytest.raises(TypeError, store.append,
  1656. 'df', Series(np.arange(10)))
  1657. # appending an incompatible table
  1658. df = tm.makeDataFrame()
  1659. store.append('df', df)
  1660. df['foo'] = 'foo'
  1661. pytest.raises(ValueError, store.append, 'df', df)
  1662. def test_table_index_incompatible_dtypes(self):
  1663. df1 = DataFrame({'a': [1, 2, 3]})
  1664. df2 = DataFrame({'a': [4, 5, 6]},
  1665. index=date_range('1/1/2000', periods=3))
  1666. with ensure_clean_store(self.path) as store:
  1667. store.put('frame', df1, format='table')
  1668. pytest.raises(TypeError, store.put, 'frame', df2,
  1669. format='table', append=True)
  1670. def test_table_values_dtypes_roundtrip(self):
  1671. with ensure_clean_store(self.path) as store:
  1672. df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
  1673. store.append('df_f8', df1)
  1674. assert_series_equal(df1.dtypes, store['df_f8'].dtypes)
  1675. df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
  1676. store.append('df_i8', df2)
  1677. assert_series_equal(df2.dtypes, store['df_i8'].dtypes)
  1678. # incompatible dtype
  1679. pytest.raises(ValueError, store.append, 'df_i8', df1)
  1680. # check creation/storage/retrieval of float32 (a bit hacky to
  1681. # actually create them thought)
  1682. df1 = DataFrame(
  1683. np.array([[1], [2], [3]], dtype='f4'), columns=['A'])
  1684. store.append('df_f4', df1)
  1685. assert_series_equal(df1.dtypes, store['df_f4'].dtypes)
  1686. assert df1.dtypes[0] == 'float32'
  1687. # check with mixed dtypes
  1688. df1 = DataFrame({c: Series(np.random.randint(5), dtype=c)
  1689. for c in ['float32', 'float64', 'int32',
  1690. 'int64', 'int16', 'int8']})
  1691. df1['string'] = 'foo'
  1692. df1['float322'] = 1.
  1693. df1['float322'] = df1['float322'].astype('float32')
  1694. df1['bool'] = df1['float32'] > 0
  1695. df1['time1'] = Timestamp('20130101')
  1696. df1['time2'] = Timestamp('20130102')
  1697. store.append('df_mixed_dtypes1', df1)
  1698. result = store.select('df_mixed_dtypes1').get_dtype_counts()
  1699. expected = Series({'float32': 2, 'float64': 1, 'int32': 1,
  1700. 'bool': 1, 'int16': 1, 'int8': 1,
  1701. 'int64': 1, 'object': 1, 'datetime64[ns]': 2})
  1702. result = result.sort_index()
  1703. expected = expected.sort_index()
  1704. tm.assert_series_equal(result, expected)
  1705. def test_table_mixed_dtypes(self):
  1706. # frame
  1707. df = tm.makeDataFrame()
  1708. df['obj1'] = 'foo'
  1709. df['obj2'] = 'bar'
  1710. df['bool1'] = df['A'] > 0
  1711. df['bool2'] = df['B'] > 0
  1712. df['bool3'] = True
  1713. df['int1'] = 1
  1714. df['int2'] = 2
  1715. df['timestamp1'] = Timestamp('20010102')
  1716. df['timestamp2'] = Timestamp('20010103')
  1717. df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
  1718. df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
  1719. df.loc[3:6, ['obj1']] = np.nan
  1720. df = df._consolidate()._convert(datetime=True)
  1721. with ensure_clean_store(self.path) as store:
  1722. store.append('df1_mixed', df)
  1723. tm.assert_frame_equal(store.select('df1_mixed'), df)
  1724. with catch_warnings(record=True):
  1725. # panel
  1726. wp = tm.makePanel()
  1727. wp['obj1'] = 'foo'
  1728. wp['obj2'] = 'bar'
  1729. wp['bool1'] = wp['ItemA'] > 0
  1730. wp['bool2'] = wp['ItemB'] > 0
  1731. wp['int1'] = 1
  1732. wp['int2'] = 2
  1733. wp = wp._consolidate()
  1734. with catch_warnings(record=True):
  1735. with ensure_clean_store(self.path) as store:
  1736. store.append('p1_mixed', wp)
  1737. assert_panel_equal(store.select('p1_mixed'), wp)
  1738. def test_unimplemented_dtypes_table_columns(self):
  1739. with ensure_clean_store(self.path) as store:
  1740. dtypes = [('date', datetime.date(2001, 1, 2))]
  1741. # py3 ok for unicode
  1742. if not compat.PY3:
  1743. dtypes.append(('unicode', u('\\u03c3')))
  1744. # currently not supported dtypes ####
  1745. for n, f in dtypes:
  1746. df = tm.makeDataFrame()
  1747. df[n] = f
  1748. pytest.raises(
  1749. TypeError, store.append, 'df1_%s' % n, df)
  1750. # frame
  1751. df = tm.makeDataFrame()
  1752. df['obj1'] = 'foo'
  1753. df['obj2'] = 'bar'
  1754. df['datetime1'] = datetime.date(2001, 1, 2)
  1755. df = df._consolidate()._convert(datetime=True)
  1756. with ensure_clean_store(self.path) as store:
  1757. # this fails because we have a date in the object block......
  1758. pytest.raises(TypeError, store.append, 'df_unimplemented', df)
  1759. @xfail_non_writeable
  1760. @pytest.mark.skipif(
  1761. LooseVersion(np.__version__) == LooseVersion('1.15.0'),
  1762. reason=("Skipping pytables test when numpy version is "
  1763. "exactly equal to 1.15.0: gh-22098"))
  1764. def test_calendar_roundtrip_issue(self):
  1765. # 8591
  1766. # doc example from tseries holiday section
  1767. weekmask_egypt = 'Sun Mon Tue Wed Thu'
  1768. holidays = ['2012-05-01',
  1769. datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
  1770. bday_egypt = pd.offsets.CustomBusinessDay(
  1771. holidays=holidays, weekmask=weekmask_egypt)
  1772. dt = datetime.datetime(2013, 4, 30)
  1773. dts = date_range(dt, periods=5, freq=bday_egypt)
  1774. s = (Series(dts.weekday, dts).map(
  1775. Series('Mon Tue Wed Thu Fri Sat Sun'.split())))
  1776. with ensure_clean_store(self.path) as store:
  1777. store.put('fixed', s)
  1778. result = store.select('fixed')
  1779. assert_series_equal(result, s)
  1780. store.append('table', s)
  1781. result = store.select('table')
  1782. assert_series_equal(result, s)
  1783. def test_roundtrip_tz_aware_index(self):
  1784. # GH 17618
  1785. time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern')
  1786. df = pd.DataFrame(data=[0], index=[time])
  1787. with ensure_clean_store(self.path) as store:
  1788. store.put('frame', df, format='fixed')
  1789. recons = store['frame']
  1790. tm.assert_frame_equal(recons, df)
  1791. assert recons.index[0].value == 946706400000000000
  1792. def test_append_with_timedelta(self):
  1793. # GH 3577
  1794. # append timedelta
  1795. df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp(
  1796. '20130101') + timedelta(days=i, seconds=10) for i in range(10)]))
  1797. df['C'] = df['A'] - df['B']
  1798. df.loc[3:5, 'C'] = np.nan
  1799. with ensure_clean_store(self.path) as store:
  1800. # table
  1801. _maybe_remove(store, 'df')
  1802. store.append('df', df, data_columns=True)
  1803. result = store.select('df')
  1804. assert_frame_equal(result, df)
  1805. result = store.select('df', where="C<100000")
  1806. assert_frame_equal(result, df)
  1807. result = store.select('df', where="C<pd.Timedelta('-3D')")
  1808. assert_frame_equal(result, df.iloc[3:])
  1809. result = store.select('df', "C<'-3D'")
  1810. assert_frame_equal(result, df.iloc[3:])
  1811. # a bit hacky here as we don't really deal with the NaT properly
  1812. result = store.select('df', "C<'-500000s'")
  1813. result = result.dropna(subset=['C'])
  1814. assert_frame_equal(result, df.iloc[6:])
  1815. result = store.select('df', "C<'-3.5D'")
  1816. result = result.iloc[1:]
  1817. assert_frame_equal(result, df.iloc[4:])
  1818. # fixed
  1819. _maybe_remove(store, 'df2')
  1820. store.put('df2', df)
  1821. result = store.select('df2')
  1822. assert_frame_equal(result, df)
  1823. def test_remove(self):
  1824. with ensure_clean_store(self.path) as store:
  1825. ts = tm.makeTimeSeries()
  1826. df = tm.makeDataFrame()
  1827. store['a'] = ts
  1828. store['b'] = df
  1829. _maybe_remove(store, 'a')
  1830. assert len(store) == 1
  1831. tm.assert_frame_equal(df, store['b'])
  1832. _maybe_remove(store, 'b')
  1833. assert len(store) == 0
  1834. # nonexistence
  1835. pytest.raises(KeyError, store.remove, 'a_nonexistent_store')
  1836. # pathing
  1837. store['a'] = ts
  1838. store['b/foo'] = df
  1839. _maybe_remove(store, 'foo')
  1840. _maybe_remove(store, 'b/foo')
  1841. assert len(store) == 1
  1842. store['a'] = ts
  1843. store['b/foo'] = df
  1844. _maybe_remove(store, 'b')
  1845. assert len(store) == 1
  1846. # __delitem__
  1847. store['a'] = ts
  1848. store['b'] = df
  1849. del store['a']
  1850. del store['b']
  1851. assert len(store) == 0
  1852. def test_remove_where(self):
  1853. with ensure_clean_store(self.path) as store:
  1854. with catch_warnings(record=True):
  1855. # non-existance
  1856. crit1 = 'index>foo'
  1857. pytest.raises(KeyError, store.remove, 'a', [crit1])
  1858. # try to remove non-table (with crit)
  1859. # non-table ok (where = None)
  1860. wp = tm.makePanel(30)
  1861. store.put('wp', wp, format='table')
  1862. store.remove('wp', ["minor_axis=['A', 'D']"])
  1863. rs = store.select('wp')
  1864. expected = wp.reindex(minor_axis=['B', 'C'])
  1865. assert_panel_equal(rs, expected)
  1866. # empty where
  1867. _maybe_remove(store, 'wp')
  1868. store.put('wp', wp, format='table')
  1869. # deleted number (entire table)
  1870. n = store.remove('wp', [])
  1871. assert n == 120
  1872. # non - empty where
  1873. _maybe_remove(store, 'wp')
  1874. store.put('wp', wp, format='table')
  1875. pytest.raises(ValueError, store.remove,
  1876. 'wp', ['foo'])
  1877. def test_remove_startstop(self):
  1878. # GH #4835 and #6177
  1879. with ensure_clean_store(self.path) as store:
  1880. with catch_warnings(record=True):
  1881. wp = tm.makePanel(30)
  1882. # start
  1883. _maybe_remove(store, 'wp1')
  1884. store.put('wp1', wp, format='t')
  1885. n = store.remove('wp1', start=32)
  1886. assert n == 120 - 32
  1887. result = store.select('wp1')
  1888. expected = wp.reindex(major_axis=wp.major_axis[:32 // 4])
  1889. assert_panel_equal(result, expected)
  1890. _maybe_remove(store, 'wp2')
  1891. store.put('wp2', wp, format='t')
  1892. n = store.remove('wp2', start=-32)
  1893. assert n == 32
  1894. result = store.select('wp2')
  1895. expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4])
  1896. assert_panel_equal(result, expected)
  1897. # stop
  1898. _maybe_remove(store, 'wp3')
  1899. store.put('wp3', wp, format='t')
  1900. n = store.remove('wp3', stop=32)
  1901. assert n == 32
  1902. result = store.select('wp3')
  1903. expected = wp.reindex(major_axis=wp.major_axis[32 // 4:])
  1904. assert_panel_equal(result, expected)
  1905. _maybe_remove(store, 'wp4')
  1906. store.put('wp4', wp, format='t')
  1907. n = store.remove('wp4', stop=-32)
  1908. assert n == 120 - 32
  1909. result = store.select('wp4')
  1910. expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:])
  1911. assert_panel_equal(result, expected)
  1912. # start n stop
  1913. _maybe_remove(store, 'wp5')
  1914. store.put('wp5', wp, format='t')
  1915. n = store.remove('wp5', start=16, stop=-16)
  1916. assert n == 120 - 32
  1917. result = store.select('wp5')
  1918. expected = wp.reindex(
  1919. major_axis=(wp.major_axis[:16 // 4]
  1920. .union(wp.major_axis[-16 // 4:])))
  1921. assert_panel_equal(result, expected)
  1922. _maybe_remove(store, 'wp6')
  1923. store.put('wp6', wp, format='t')
  1924. n = store.remove('wp6', start=16, stop=16)
  1925. assert n == 0
  1926. result = store.select('wp6')
  1927. expected = wp.reindex(major_axis=wp.major_axis)
  1928. assert_panel_equal(result, expected)
  1929. # with where
  1930. _maybe_remove(store, 'wp7')
  1931. # TODO: unused?
  1932. date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa
  1933. crit = 'major_axis=date'
  1934. store.put('wp7', wp, format='t')
  1935. n = store.remove('wp7', where=[crit], stop=80)
  1936. assert n == 28
  1937. result = store.select('wp7')
  1938. expected = wp.reindex(major_axis=wp.major_axis.difference(
  1939. wp.major_axis[np.arange(0, 20, 3)]))
  1940. assert_panel_equal(result, expected)
  1941. def test_remove_crit(self):
  1942. with ensure_clean_store(self.path) as store:
  1943. with catch_warnings(record=True):
  1944. wp = tm.makePanel(30)
  1945. # group row removal
  1946. _maybe_remove(store, 'wp3')
  1947. date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
  1948. crit4 = 'major_axis=date4'
  1949. store.put('wp3', wp, format='t')
  1950. n = store.remove('wp3', where=[crit4])
  1951. assert n == 36
  1952. result = store.select('wp3')
  1953. expected = wp.reindex(
  1954. major_axis=wp.major_axis.difference(date4))
  1955. assert_panel_equal(result, expected)
  1956. # upper half
  1957. _maybe_remove(store, 'wp')
  1958. store.put('wp', wp, format='table')
  1959. date = wp.major_axis[len(wp.major_axis) // 2]
  1960. crit1 = 'major_axis>date'
  1961. crit2 = "minor_axis=['A', 'D']"
  1962. n = store.remove('wp', where=[crit1])
  1963. assert n == 56
  1964. n = store.remove('wp', where=[crit2])
  1965. assert n == 32
  1966. result = store['wp']
  1967. expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
  1968. assert_panel_equal(result, expected)
  1969. # individual row elements
  1970. _maybe_remove(store, 'wp2')
  1971. store.put('wp2', wp, format='table')
  1972. date1 = wp.major_axis[1:3]
  1973. crit1 = 'major_axis=date1'
  1974. store.remove('wp2', where=[crit1])
  1975. result = store.select('wp2')
  1976. expected = wp.reindex(
  1977. major_axis=wp.major_axis.difference(date1))
  1978. assert_panel_equal(result, expected)
  1979. date2 = wp.major_axis[5]
  1980. crit2 = 'major_axis=date2'
  1981. store.remove('wp2', where=[crit2])
  1982. result = store['wp2']
  1983. expected = wp.reindex(
  1984. major_axis=(wp.major_axis
  1985. .difference(date1)
  1986. .difference(Index([date2]))
  1987. ))
  1988. assert_panel_equal(result, expected)
  1989. date3 = [wp.major_axis[7], wp.major_axis[9]]
  1990. crit3 = 'major_axis=date3'
  1991. store.remove('wp2', where=[crit3])
  1992. result = store['wp2']
  1993. expected = wp.reindex(major_axis=wp.major_axis
  1994. .difference(date1)
  1995. .difference(Index([date2]))
  1996. .difference(Index(date3)))
  1997. assert_panel_equal(result, expected)
  1998. # corners
  1999. _maybe_remove(store, 'wp4')
  2000. store.put('wp4', wp, format='table')
  2001. n = store.remove(
  2002. 'wp4', where="major_axis>wp.major_axis[-1]")
  2003. result = store.select('wp4')
  2004. assert_panel_equal(result, wp)
  2005. def test_invalid_terms(self):
  2006. with ensure_clean_store(self.path) as store:
  2007. with catch_warnings(record=True):
  2008. df = tm.makeTimeDataFrame()
  2009. df['string'] = 'foo'
  2010. df.loc[0:4, 'string'] = 'bar'
  2011. wp = tm.makePanel()
  2012. store.put('df', df, format='table')
  2013. store.put('wp', wp, format='table')
  2014. # some invalid terms
  2015. pytest.raises(ValueError, store.select,
  2016. 'wp', "minor=['A', 'B']")
  2017. pytest.raises(ValueError, store.select,
  2018. 'wp', ["index=['20121114']"])
  2019. pytest.raises(ValueError, store.select, 'wp', [
  2020. "index=['20121114', '20121114']"])
  2021. pytest.raises(TypeError, Term)
  2022. # more invalid
  2023. pytest.raises(
  2024. ValueError, store.select, 'df', 'df.index[3]')
  2025. pytest.raises(SyntaxError, store.select, 'df', 'index>')
  2026. pytest.raises(
  2027. ValueError, store.select, 'wp',
  2028. "major_axis<'20000108' & minor_axis['A', 'B']")
  2029. # from the docs
  2030. with ensure_clean_path(self.path) as path:
  2031. dfq = DataFrame(np.random.randn(10, 4), columns=list(
  2032. 'ABCD'), index=date_range('20130101', periods=10))
  2033. dfq.to_hdf(path, 'dfq', format='table', data_columns=True)
  2034. # check ok
  2035. read_hdf(path, 'dfq',
  2036. where="index>Timestamp('20130104') & columns=['A', 'B']")
  2037. read_hdf(path, 'dfq', where="A>0 or C>0")
  2038. # catch the invalid reference
  2039. with ensure_clean_path(self.path) as path:
  2040. dfq = DataFrame(np.random.randn(10, 4), columns=list(
  2041. 'ABCD'), index=date_range('20130101', periods=10))
  2042. dfq.to_hdf(path, 'dfq', format='table')
  2043. pytest.raises(ValueError, read_hdf, path,
  2044. 'dfq', where="A>0 or C>0")
  2045. def test_terms(self):
  2046. with ensure_clean_store(self.path) as store:
  2047. with catch_warnings(record=True):
  2048. simplefilter("ignore", FutureWarning)
  2049. wp = tm.makePanel()
  2050. wpneg = Panel.fromDict({-1: tm.makeDataFrame(),
  2051. 0: tm.makeDataFrame(),
  2052. 1: tm.makeDataFrame()})
  2053. store.put('wp', wp, format='table')
  2054. store.put('wpneg', wpneg, format='table')
  2055. # panel
  2056. result = store.select(
  2057. 'wp',
  2058. "major_axis<'20000108' and minor_axis=['A', 'B']")
  2059. expected = wp.truncate(
  2060. after='20000108').reindex(minor=['A', 'B'])
  2061. assert_panel_equal(result, expected)
  2062. # with deprecation
  2063. result = store.select(
  2064. 'wp', where=("major_axis<'20000108' "
  2065. "and minor_axis=['A', 'B']"))
  2066. expected = wp.truncate(
  2067. after='20000108').reindex(minor=['A', 'B'])
  2068. tm.assert_panel_equal(result, expected)
  2069. with catch_warnings(record=True):
  2070. # valid terms
  2071. terms = [('major_axis=20121114'),
  2072. ('major_axis>20121114'),
  2073. (("major_axis=['20121114', '20121114']"),),
  2074. ('major_axis=datetime.datetime(2012, 11, 14)'),
  2075. 'major_axis> 20121114',
  2076. 'major_axis >20121114',
  2077. 'major_axis > 20121114',
  2078. (("minor_axis=['A', 'B']"),),
  2079. (("minor_axis=['A', 'B']"),),
  2080. ((("minor_axis==['A', 'B']"),),),
  2081. (("items=['ItemA', 'ItemB']"),),
  2082. ('items=ItemA'),
  2083. ]
  2084. for t in terms:
  2085. store.select('wp', t)
  2086. with pytest.raises(TypeError,
  2087. match='Only named functions are supported'):
  2088. store.select(
  2089. 'wp',
  2090. 'major_axis == (lambda x: x)("20130101")')
  2091. with catch_warnings(record=True):
  2092. # check USub node parsing
  2093. res = store.select('wpneg', 'items == -1')
  2094. expected = Panel({-1: wpneg[-1]})
  2095. tm.assert_panel_equal(res, expected)
  2096. msg = 'Unary addition not supported'
  2097. with pytest.raises(NotImplementedError, match=msg):
  2098. store.select('wpneg', 'items == +1')
  2099. def test_term_compat(self):
  2100. with ensure_clean_store(self.path) as store:
  2101. with catch_warnings(record=True):
  2102. wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
  2103. major_axis=date_range('1/1/2000', periods=5),
  2104. minor_axis=['A', 'B', 'C', 'D'])
  2105. store.append('wp', wp)
  2106. result = store.select(
  2107. 'wp', where=("major_axis>20000102 "
  2108. "and minor_axis=['A', 'B']"))
  2109. expected = wp.loc[:, wp.major_axis >
  2110. Timestamp('20000102'), ['A', 'B']]
  2111. assert_panel_equal(result, expected)
  2112. store.remove('wp', 'major_axis>20000103')
  2113. result = store.select('wp')
  2114. expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :]
  2115. assert_panel_equal(result, expected)
  2116. with ensure_clean_store(self.path) as store:
  2117. with catch_warnings(record=True):
  2118. wp = Panel(np.random.randn(2, 5, 4),
  2119. items=['Item1', 'Item2'],
  2120. major_axis=date_range('1/1/2000', periods=5),
  2121. minor_axis=['A', 'B', 'C', 'D'])
  2122. store.append('wp', wp)
  2123. # stringified datetimes
  2124. result = store.select(
  2125. 'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
  2126. expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
  2127. assert_panel_equal(result, expected)
  2128. result = store.select(
  2129. 'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
  2130. expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
  2131. assert_panel_equal(result, expected)
  2132. result = store.select(
  2133. 'wp',
  2134. "major_axis=[datetime.datetime(2000, 1, 2, 0, 0), "
  2135. "datetime.datetime(2000, 1, 3, 0, 0)]")
  2136. expected = wp.loc[:, [Timestamp('20000102'),
  2137. Timestamp('20000103')]]
  2138. assert_panel_equal(result, expected)
  2139. result = store.select(
  2140. 'wp', "minor_axis=['A', 'B']")
  2141. expected = wp.loc[:, :, ['A', 'B']]
  2142. assert_panel_equal(result, expected)
  2143. def test_same_name_scoping(self):
  2144. with ensure_clean_store(self.path) as store:
  2145. import pandas as pd
  2146. df = DataFrame(np.random.randn(20, 2),
  2147. index=pd.date_range('20130101', periods=20))
  2148. store.put('df', df, format='table')
  2149. expected = df[df.index > pd.Timestamp('20130105')]
  2150. import datetime # noqa
  2151. result = store.select('df', 'index>datetime.datetime(2013,1,5)')
  2152. assert_frame_equal(result, expected)
  2153. from datetime import datetime # noqa
  2154. # technically an error, but allow it
  2155. result = store.select('df', 'index>datetime.datetime(2013,1,5)')
  2156. assert_frame_equal(result, expected)
  2157. result = store.select('df', 'index>datetime(2013,1,5)')
  2158. assert_frame_equal(result, expected)
  2159. def test_series(self):
  2160. s = tm.makeStringSeries()
  2161. self._check_roundtrip(s, tm.assert_series_equal)
  2162. ts = tm.makeTimeSeries()
  2163. self._check_roundtrip(ts, tm.assert_series_equal)
  2164. ts2 = Series(ts.index, Index(ts.index, dtype=object))
  2165. self._check_roundtrip(ts2, tm.assert_series_equal)
  2166. ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object),
  2167. dtype=object))
  2168. self._check_roundtrip(ts3, tm.assert_series_equal,
  2169. check_index_type=False)
  2170. def test_sparse_series(self):
  2171. s = tm.makeStringSeries()
  2172. s.iloc[3:5] = np.nan
  2173. ss = s.to_sparse()
  2174. self._check_roundtrip(ss, tm.assert_series_equal,
  2175. check_series_type=True)
  2176. ss2 = s.to_sparse(kind='integer')
  2177. self._check_roundtrip(ss2, tm.assert_series_equal,
  2178. check_series_type=True)
  2179. ss3 = s.to_sparse(fill_value=0)
  2180. self._check_roundtrip(ss3, tm.assert_series_equal,
  2181. check_series_type=True)
  2182. def test_sparse_frame(self):
  2183. s = tm.makeDataFrame()
  2184. s.iloc[3:5, 1:3] = np.nan
  2185. s.iloc[8:10, -2] = np.nan
  2186. ss = s.to_sparse()
  2187. self._check_double_roundtrip(ss, tm.assert_frame_equal,
  2188. check_frame_type=True)
  2189. ss2 = s.to_sparse(kind='integer')
  2190. self._check_double_roundtrip(ss2, tm.assert_frame_equal,
  2191. check_frame_type=True)
  2192. ss3 = s.to_sparse(fill_value=0)
  2193. self._check_double_roundtrip(ss3, tm.assert_frame_equal,
  2194. check_frame_type=True)
  2195. def test_float_index(self):
  2196. # GH #454
  2197. index = np.random.randn(10)
  2198. s = Series(np.random.randn(10), index=index)
  2199. self._check_roundtrip(s, tm.assert_series_equal)
  2200. @xfail_non_writeable
  2201. def test_tuple_index(self):
  2202. # GH #492
  2203. col = np.arange(10)
  2204. idx = [(0., 1.), (2., 3.), (4., 5.)]
  2205. data = np.random.randn(30).reshape((3, 10))
  2206. DF = DataFrame(data, index=idx, columns=col)
  2207. with catch_warnings(record=True):
  2208. simplefilter("ignore", pd.errors.PerformanceWarning)
  2209. self._check_roundtrip(DF, tm.assert_frame_equal)
  2210. @xfail_non_writeable
  2211. @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
  2212. def test_index_types(self):
  2213. with catch_warnings(record=True):
  2214. values = np.random.randn(2)
  2215. func = lambda l, r: tm.assert_series_equal(l, r,
  2216. check_dtype=True,
  2217. check_index_type=True,
  2218. check_series_type=True)
  2219. with catch_warnings(record=True):
  2220. ser = Series(values, [0, 'y'])
  2221. self._check_roundtrip(ser, func)
  2222. with catch_warnings(record=True):
  2223. ser = Series(values, [datetime.datetime.today(), 0])
  2224. self._check_roundtrip(ser, func)
  2225. with catch_warnings(record=True):
  2226. ser = Series(values, ['y', 0])
  2227. self._check_roundtrip(ser, func)
  2228. with catch_warnings(record=True):
  2229. ser = Series(values, [datetime.date.today(), 'a'])
  2230. self._check_roundtrip(ser, func)
  2231. with catch_warnings(record=True):
  2232. ser = Series(values, [0, 'y'])
  2233. self._check_roundtrip(ser, func)
  2234. ser = Series(values, [datetime.datetime.today(), 0])
  2235. self._check_roundtrip(ser, func)
  2236. ser = Series(values, ['y', 0])
  2237. self._check_roundtrip(ser, func)
  2238. ser = Series(values, [datetime.date.today(), 'a'])
  2239. self._check_roundtrip(ser, func)
  2240. ser = Series(values, [1.23, 'b'])
  2241. self._check_roundtrip(ser, func)
  2242. ser = Series(values, [1, 1.53])
  2243. self._check_roundtrip(ser, func)
  2244. ser = Series(values, [1, 5])
  2245. self._check_roundtrip(ser, func)
  2246. ser = Series(values, [datetime.datetime(
  2247. 2012, 1, 1), datetime.datetime(2012, 1, 2)])
  2248. self._check_roundtrip(ser, func)
  2249. def test_timeseries_preepoch(self):
  2250. dr = bdate_range('1/1/1940', '1/1/1960')
  2251. ts = Series(np.random.randn(len(dr)), index=dr)
  2252. try:
  2253. self._check_roundtrip(ts, tm.assert_series_equal)
  2254. except OverflowError:
  2255. pytest.skip('known failer on some windows platforms')
  2256. @xfail_non_writeable
  2257. @pytest.mark.parametrize("compression", [
  2258. False, pytest.param(True, marks=td.skip_if_windows_python_3)
  2259. ])
  2260. def test_frame(self, compression):
  2261. df = tm.makeDataFrame()
  2262. # put in some random NAs
  2263. df.values[0, 0] = np.nan
  2264. df.values[5, 3] = np.nan
  2265. self._check_roundtrip_table(df, tm.assert_frame_equal,
  2266. compression=compression)
  2267. self._check_roundtrip(df, tm.assert_frame_equal,
  2268. compression=compression)
  2269. tdf = tm.makeTimeDataFrame()
  2270. self._check_roundtrip(tdf, tm.assert_frame_equal,
  2271. compression=compression)
  2272. with ensure_clean_store(self.path) as store:
  2273. # not consolidated
  2274. df['foo'] = np.random.randn(len(df))
  2275. store['df'] = df
  2276. recons = store['df']
  2277. assert recons._data.is_consolidated()
  2278. # empty
  2279. self._check_roundtrip(df[:0], tm.assert_frame_equal)
  2280. @xfail_non_writeable
  2281. def test_empty_series_frame(self):
  2282. s0 = Series()
  2283. s1 = Series(name='myseries')
  2284. df0 = DataFrame()
  2285. df1 = DataFrame(index=['a', 'b', 'c'])
  2286. df2 = DataFrame(columns=['d', 'e', 'f'])
  2287. self._check_roundtrip(s0, tm.assert_series_equal)
  2288. self._check_roundtrip(s1, tm.assert_series_equal)
  2289. self._check_roundtrip(df0, tm.assert_frame_equal)
  2290. self._check_roundtrip(df1, tm.assert_frame_equal)
  2291. self._check_roundtrip(df2, tm.assert_frame_equal)
  2292. @xfail_non_writeable
  2293. @pytest.mark.parametrize(
  2294. 'dtype', [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]'])
  2295. def test_empty_series(self, dtype):
  2296. s = Series(dtype=dtype)
  2297. self._check_roundtrip(s, tm.assert_series_equal)
  2298. def test_can_serialize_dates(self):
  2299. rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]
  2300. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  2301. self._check_roundtrip(frame, tm.assert_frame_equal)
  2302. def test_store_hierarchical(self):
  2303. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  2304. ['one', 'two', 'three']],
  2305. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  2306. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  2307. names=['foo', 'bar'])
  2308. frame = DataFrame(np.random.randn(10, 3), index=index,
  2309. columns=['A', 'B', 'C'])
  2310. self._check_roundtrip(frame, tm.assert_frame_equal)
  2311. self._check_roundtrip(frame.T, tm.assert_frame_equal)
  2312. self._check_roundtrip(frame['A'], tm.assert_series_equal)
  2313. # check that the names are stored
  2314. with ensure_clean_store(self.path) as store:
  2315. store['frame'] = frame
  2316. recons = store['frame']
  2317. tm.assert_frame_equal(recons, frame)
  2318. def test_store_index_name(self):
  2319. df = tm.makeDataFrame()
  2320. df.index.name = 'foo'
  2321. with ensure_clean_store(self.path) as store:
  2322. store['frame'] = df
  2323. recons = store['frame']
  2324. tm.assert_frame_equal(recons, df)
  2325. def test_store_index_name_with_tz(self):
  2326. # GH 13884
  2327. df = pd.DataFrame({'A': [1, 2]})
  2328. df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])
  2329. df.index = df.index.tz_localize('UTC')
  2330. df.index.name = 'foo'
  2331. with ensure_clean_store(self.path) as store:
  2332. store.put('frame', df, format='table')
  2333. recons = store['frame']
  2334. tm.assert_frame_equal(recons, df)
  2335. @pytest.mark.parametrize('table_format', ['table', 'fixed'])
  2336. def test_store_index_name_numpy_str(self, table_format):
  2337. # GH #13492
  2338. idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
  2339. datetime.date(2000, 1, 2)]),
  2340. name=u('cols\u05d2'))
  2341. idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
  2342. datetime.date(2010, 1, 2)]),
  2343. name=u('rows\u05d0'))
  2344. df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
  2345. # This used to fail, returning numpy strings instead of python strings.
  2346. with ensure_clean_path(self.path) as path:
  2347. df.to_hdf(path, 'df', format=table_format)
  2348. df2 = read_hdf(path, 'df')
  2349. assert_frame_equal(df, df2, check_names=True)
  2350. assert type(df2.index.name) == text_type
  2351. assert type(df2.columns.name) == text_type
  2352. def test_store_series_name(self):
  2353. df = tm.makeDataFrame()
  2354. series = df['A']
  2355. with ensure_clean_store(self.path) as store:
  2356. store['series'] = series
  2357. recons = store['series']
  2358. tm.assert_series_equal(recons, series)
  2359. @xfail_non_writeable
  2360. @pytest.mark.parametrize("compression", [
  2361. False, pytest.param(True, marks=td.skip_if_windows_python_3)
  2362. ])
  2363. def test_store_mixed(self, compression):
  2364. def _make_one():
  2365. df = tm.makeDataFrame()
  2366. df['obj1'] = 'foo'
  2367. df['obj2'] = 'bar'
  2368. df['bool1'] = df['A'] > 0
  2369. df['bool2'] = df['B'] > 0
  2370. df['int1'] = 1
  2371. df['int2'] = 2
  2372. return df._consolidate()
  2373. df1 = _make_one()
  2374. df2 = _make_one()
  2375. self._check_roundtrip(df1, tm.assert_frame_equal)
  2376. self._check_roundtrip(df2, tm.assert_frame_equal)
  2377. with ensure_clean_store(self.path) as store:
  2378. store['obj'] = df1
  2379. tm.assert_frame_equal(store['obj'], df1)
  2380. store['obj'] = df2
  2381. tm.assert_frame_equal(store['obj'], df2)
  2382. # check that can store Series of all of these types
  2383. self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
  2384. compression=compression)
  2385. self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
  2386. compression=compression)
  2387. self._check_roundtrip(df1['int1'], tm.assert_series_equal,
  2388. compression=compression)
  2389. def test_wide(self):
  2390. with catch_warnings(record=True):
  2391. wp = tm.makePanel()
  2392. self._check_roundtrip(wp, assert_panel_equal)
  2393. @pytest.mark.filterwarnings(
  2394. "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
  2395. )
  2396. def test_select_with_dups(self):
  2397. # single dtypes
  2398. df = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
  2399. df.index = date_range('20130101 9:30', periods=10, freq='T')
  2400. with ensure_clean_store(self.path) as store:
  2401. store.append('df', df)
  2402. result = store.select('df')
  2403. expected = df
  2404. assert_frame_equal(result, expected, by_blocks=True)
  2405. result = store.select('df', columns=df.columns)
  2406. expected = df
  2407. assert_frame_equal(result, expected, by_blocks=True)
  2408. result = store.select('df', columns=['A'])
  2409. expected = df.loc[:, ['A']]
  2410. assert_frame_equal(result, expected)
  2411. # dups across dtypes
  2412. df = concat([DataFrame(np.random.randn(10, 4),
  2413. columns=['A', 'A', 'B', 'B']),
  2414. DataFrame(np.random.randint(0, 10, size=20)
  2415. .reshape(10, 2),
  2416. columns=['A', 'C'])],
  2417. axis=1)
  2418. df.index = date_range('20130101 9:30', periods=10, freq='T')
  2419. with ensure_clean_store(self.path) as store:
  2420. store.append('df', df)
  2421. result = store.select('df')
  2422. expected = df
  2423. assert_frame_equal(result, expected, by_blocks=True)
  2424. result = store.select('df', columns=df.columns)
  2425. expected = df
  2426. assert_frame_equal(result, expected, by_blocks=True)
  2427. expected = df.loc[:, ['A']]
  2428. result = store.select('df', columns=['A'])
  2429. assert_frame_equal(result, expected, by_blocks=True)
  2430. expected = df.loc[:, ['B', 'A']]
  2431. result = store.select('df', columns=['B', 'A'])
  2432. assert_frame_equal(result, expected, by_blocks=True)
  2433. # duplicates on both index and columns
  2434. with ensure_clean_store(self.path) as store:
  2435. store.append('df', df)
  2436. store.append('df', df)
  2437. expected = df.loc[:, ['B', 'A']]
  2438. expected = concat([expected, expected])
  2439. result = store.select('df', columns=['B', 'A'])
  2440. assert_frame_equal(result, expected, by_blocks=True)
  2441. @pytest.mark.filterwarnings(
  2442. "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
  2443. )
  2444. def test_wide_table_dups(self):
  2445. with ensure_clean_store(self.path) as store:
  2446. with catch_warnings(record=True):
  2447. wp = tm.makePanel()
  2448. store.put('panel', wp, format='table')
  2449. store.put('panel', wp, format='table', append=True)
  2450. recons = store['panel']
  2451. assert_panel_equal(recons, wp)
  2452. def test_long(self):
  2453. def _check(left, right):
  2454. assert_panel_equal(left.to_panel(), right.to_panel())
  2455. with catch_warnings(record=True):
  2456. wp = tm.makePanel()
  2457. self._check_roundtrip(wp.to_frame(), _check)
  2458. def test_overwrite_node(self):
  2459. with ensure_clean_store(self.path) as store:
  2460. store['a'] = tm.makeTimeDataFrame()
  2461. ts = tm.makeTimeSeries()
  2462. store['a'] = ts
  2463. tm.assert_series_equal(store['a'], ts)
  2464. def test_sparse_with_compression(self):
  2465. # GH 2931
  2466. # make sparse dataframe
  2467. arr = np.random.binomial(n=1, p=.01, size=(1000, 10))
  2468. df = DataFrame(arr).to_sparse(fill_value=0)
  2469. # case 1: store uncompressed
  2470. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2471. compression=False,
  2472. check_frame_type=True)
  2473. # case 2: store compressed (works)
  2474. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2475. compression='zlib',
  2476. check_frame_type=True)
  2477. # set one series to be completely sparse
  2478. df[0] = np.zeros(1000)
  2479. # case 3: store df with completely sparse series uncompressed
  2480. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2481. compression=False,
  2482. check_frame_type=True)
  2483. # case 4: try storing df with completely sparse series compressed
  2484. # (fails)
  2485. self._check_double_roundtrip(df, tm.assert_frame_equal,
  2486. compression='zlib',
  2487. check_frame_type=True)
  2488. def test_select(self):
  2489. with ensure_clean_store(self.path) as store:
  2490. with catch_warnings(record=True):
  2491. wp = tm.makePanel()
  2492. # put/select ok
  2493. _maybe_remove(store, 'wp')
  2494. store.put('wp', wp, format='table')
  2495. store.select('wp')
  2496. # non-table ok (where = None)
  2497. _maybe_remove(store, 'wp')
  2498. store.put('wp2', wp)
  2499. store.select('wp2')
  2500. # selection on the non-indexable with a large number of columns
  2501. wp = Panel(np.random.randn(100, 100, 100),
  2502. items=['Item%03d' % i for i in range(100)],
  2503. major_axis=date_range('1/1/2000', periods=100),
  2504. minor_axis=['E%03d' % i for i in range(100)])
  2505. _maybe_remove(store, 'wp')
  2506. store.append('wp', wp)
  2507. items = ['Item%03d' % i for i in range(80)]
  2508. result = store.select('wp', 'items=items')
  2509. expected = wp.reindex(items=items)
  2510. assert_panel_equal(expected, result)
  2511. # selectin non-table with a where
  2512. # pytest.raises(ValueError, store.select,
  2513. # 'wp2', ('column', ['A', 'D']))
  2514. # select with columns=
  2515. df = tm.makeTimeDataFrame()
  2516. _maybe_remove(store, 'df')
  2517. store.append('df', df)
  2518. result = store.select('df', columns=['A', 'B'])
  2519. expected = df.reindex(columns=['A', 'B'])
  2520. tm.assert_frame_equal(expected, result)
  2521. # equivalentsly
  2522. result = store.select('df', [("columns=['A', 'B']")])
  2523. expected = df.reindex(columns=['A', 'B'])
  2524. tm.assert_frame_equal(expected, result)
  2525. # with a data column
  2526. _maybe_remove(store, 'df')
  2527. store.append('df', df, data_columns=['A'])
  2528. result = store.select('df', ['A > 0'], columns=['A', 'B'])
  2529. expected = df[df.A > 0].reindex(columns=['A', 'B'])
  2530. tm.assert_frame_equal(expected, result)
  2531. # all a data columns
  2532. _maybe_remove(store, 'df')
  2533. store.append('df', df, data_columns=True)
  2534. result = store.select('df', ['A > 0'], columns=['A', 'B'])
  2535. expected = df[df.A > 0].reindex(columns=['A', 'B'])
  2536. tm.assert_frame_equal(expected, result)
  2537. # with a data column, but different columns
  2538. _maybe_remove(store, 'df')
  2539. store.append('df', df, data_columns=['A'])
  2540. result = store.select('df', ['A > 0'], columns=['C', 'D'])
  2541. expected = df[df.A > 0].reindex(columns=['C', 'D'])
  2542. tm.assert_frame_equal(expected, result)
  2543. def test_select_dtypes(self):
  2544. with ensure_clean_store(self.path) as store:
  2545. # with a Timestamp data column (GH #2637)
  2546. df = DataFrame(dict(
  2547. ts=bdate_range('2012-01-01', periods=300),
  2548. A=np.random.randn(300)))
  2549. _maybe_remove(store, 'df')
  2550. store.append('df', df, data_columns=['ts', 'A'])
  2551. result = store.select('df', "ts>=Timestamp('2012-02-01')")
  2552. expected = df[df.ts >= Timestamp('2012-02-01')]
  2553. tm.assert_frame_equal(expected, result)
  2554. # bool columns (GH #2849)
  2555. df = DataFrame(np.random.randn(5, 2), columns=['A', 'B'])
  2556. df['object'] = 'foo'
  2557. df.loc[4:5, 'object'] = 'bar'
  2558. df['boolv'] = df['A'] > 0
  2559. _maybe_remove(store, 'df')
  2560. store.append('df', df, data_columns=True)
  2561. expected = (df[df.boolv == True] # noqa
  2562. .reindex(columns=['A', 'boolv']))
  2563. for v in [True, 'true', 1]:
  2564. result = store.select('df', 'boolv == %s' % str(v),
  2565. columns=['A', 'boolv'])
  2566. tm.assert_frame_equal(expected, result)
  2567. expected = (df[df.boolv == False] # noqa
  2568. .reindex(columns=['A', 'boolv']))
  2569. for v in [False, 'false', 0]:
  2570. result = store.select(
  2571. 'df', 'boolv == %s' % str(v), columns=['A', 'boolv'])
  2572. tm.assert_frame_equal(expected, result)
  2573. # integer index
  2574. df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
  2575. _maybe_remove(store, 'df_int')
  2576. store.append('df_int', df)
  2577. result = store.select(
  2578. 'df_int', "index<10 and columns=['A']")
  2579. expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
  2580. tm.assert_frame_equal(expected, result)
  2581. # float index
  2582. df = DataFrame(dict(A=np.random.rand(
  2583. 20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
  2584. _maybe_remove(store, 'df_float')
  2585. store.append('df_float', df)
  2586. result = store.select(
  2587. 'df_float', "index<10.0 and columns=['A']")
  2588. expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
  2589. tm.assert_frame_equal(expected, result)
  2590. with ensure_clean_store(self.path) as store:
  2591. # floats w/o NaN
  2592. df = DataFrame(
  2593. dict(cols=range(11), values=range(11)), dtype='float64')
  2594. df['cols'] = (df['cols'] + 10).apply(str)
  2595. store.append('df1', df, data_columns=True)
  2596. result = store.select(
  2597. 'df1', where='values>2.0')
  2598. expected = df[df['values'] > 2.0]
  2599. tm.assert_frame_equal(expected, result)
  2600. # floats with NaN
  2601. df.iloc[0] = np.nan
  2602. expected = df[df['values'] > 2.0]
  2603. store.append('df2', df, data_columns=True, index=False)
  2604. result = store.select(
  2605. 'df2', where='values>2.0')
  2606. tm.assert_frame_equal(expected, result)
  2607. # https://github.com/PyTables/PyTables/issues/282
  2608. # bug in selection when 0th row has a np.nan and an index
  2609. # store.append('df3',df,data_columns=True)
  2610. # result = store.select(
  2611. # 'df3', where='values>2.0')
  2612. # tm.assert_frame_equal(expected, result)
  2613. # not in first position float with NaN ok too
  2614. df = DataFrame(
  2615. dict(cols=range(11), values=range(11)), dtype='float64')
  2616. df['cols'] = (df['cols'] + 10).apply(str)
  2617. df.iloc[1] = np.nan
  2618. expected = df[df['values'] > 2.0]
  2619. store.append('df4', df, data_columns=True)
  2620. result = store.select(
  2621. 'df4', where='values>2.0')
  2622. tm.assert_frame_equal(expected, result)
  2623. # test selection with comparison against numpy scalar
  2624. # GH 11283
  2625. with ensure_clean_store(self.path) as store:
  2626. df = tm.makeDataFrame()
  2627. expected = df[df['A'] > 0]
  2628. store.append('df', df, data_columns=True)
  2629. np_zero = np.float64(0) # noqa
  2630. result = store.select('df', where=["A>np_zero"])
  2631. tm.assert_frame_equal(expected, result)
  2632. def test_select_with_many_inputs(self):
  2633. with ensure_clean_store(self.path) as store:
  2634. df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300),
  2635. A=np.random.randn(300),
  2636. B=range(300),
  2637. users=['a'] * 50 + ['b'] * 50 + ['c'] * 100 +
  2638. ['a%03d' % i for i in range(100)]))
  2639. _maybe_remove(store, 'df')
  2640. store.append('df', df, data_columns=['ts', 'A', 'B', 'users'])
  2641. # regular select
  2642. result = store.select('df', "ts>=Timestamp('2012-02-01')")
  2643. expected = df[df.ts >= Timestamp('2012-02-01')]
  2644. tm.assert_frame_equal(expected, result)
  2645. # small selector
  2646. result = store.select(
  2647. 'df',
  2648. "ts>=Timestamp('2012-02-01') & users=['a','b','c']")
  2649. expected = df[(df.ts >= Timestamp('2012-02-01')) &
  2650. df.users.isin(['a', 'b', 'c'])]
  2651. tm.assert_frame_equal(expected, result)
  2652. # big selector along the columns
  2653. selector = ['a', 'b', 'c'] + ['a%03d' % i for i in range(60)]
  2654. result = store.select(
  2655. 'df',
  2656. "ts>=Timestamp('2012-02-01') and users=selector")
  2657. expected = df[(df.ts >= Timestamp('2012-02-01')) &
  2658. df.users.isin(selector)]
  2659. tm.assert_frame_equal(expected, result)
  2660. selector = range(100, 200)
  2661. result = store.select('df', 'B=selector')
  2662. expected = df[df.B.isin(selector)]
  2663. tm.assert_frame_equal(expected, result)
  2664. assert len(result) == 100
  2665. # big selector along the index
  2666. selector = Index(df.ts[0:100].values)
  2667. result = store.select('df', 'ts=selector')
  2668. expected = df[df.ts.isin(selector.values)]
  2669. tm.assert_frame_equal(expected, result)
  2670. assert len(result) == 100
  2671. def test_select_iterator(self):
  2672. # single table
  2673. with ensure_clean_store(self.path) as store:
  2674. df = tm.makeTimeDataFrame(500)
  2675. _maybe_remove(store, 'df')
  2676. store.append('df', df)
  2677. expected = store.select('df')
  2678. results = [s for s in store.select('df', iterator=True)]
  2679. result = concat(results)
  2680. tm.assert_frame_equal(expected, result)
  2681. results = [s for s in store.select('df', chunksize=100)]
  2682. assert len(results) == 5
  2683. result = concat(results)
  2684. tm.assert_frame_equal(expected, result)
  2685. results = [s for s in store.select('df', chunksize=150)]
  2686. result = concat(results)
  2687. tm.assert_frame_equal(result, expected)
  2688. with ensure_clean_path(self.path) as path:
  2689. df = tm.makeTimeDataFrame(500)
  2690. df.to_hdf(path, 'df_non_table')
  2691. pytest.raises(TypeError, read_hdf, path,
  2692. 'df_non_table', chunksize=100)
  2693. pytest.raises(TypeError, read_hdf, path,
  2694. 'df_non_table', iterator=True)
  2695. with ensure_clean_path(self.path) as path:
  2696. df = tm.makeTimeDataFrame(500)
  2697. df.to_hdf(path, 'df', format='table')
  2698. results = [s for s in read_hdf(path, 'df', chunksize=100)]
  2699. result = concat(results)
  2700. assert len(results) == 5
  2701. tm.assert_frame_equal(result, df)
  2702. tm.assert_frame_equal(result, read_hdf(path, 'df'))
  2703. # multiple
  2704. with ensure_clean_store(self.path) as store:
  2705. df1 = tm.makeTimeDataFrame(500)
  2706. store.append('df1', df1, data_columns=True)
  2707. df2 = tm.makeTimeDataFrame(500).rename(
  2708. columns=lambda x: "%s_2" % x)
  2709. df2['foo'] = 'bar'
  2710. store.append('df2', df2)
  2711. df = concat([df1, df2], axis=1)
  2712. # full selection
  2713. expected = store.select_as_multiple(
  2714. ['df1', 'df2'], selector='df1')
  2715. results = [s for s in store.select_as_multiple(
  2716. ['df1', 'df2'], selector='df1', chunksize=150)]
  2717. result = concat(results)
  2718. tm.assert_frame_equal(expected, result)
  2719. def test_select_iterator_complete_8014(self):
  2720. # GH 8014
  2721. # using iterator and where clause
  2722. chunksize = 1e4
  2723. # no iterator
  2724. with ensure_clean_store(self.path) as store:
  2725. expected = tm.makeTimeDataFrame(100064, 'S')
  2726. _maybe_remove(store, 'df')
  2727. store.append('df', expected)
  2728. beg_dt = expected.index[0]
  2729. end_dt = expected.index[-1]
  2730. # select w/o iteration and no where clause works
  2731. result = store.select('df')
  2732. tm.assert_frame_equal(expected, result)
  2733. # select w/o iterator and where clause, single term, begin
  2734. # of range, works
  2735. where = "index >= '%s'" % beg_dt
  2736. result = store.select('df', where=where)
  2737. tm.assert_frame_equal(expected, result)
  2738. # select w/o iterator and where clause, single term, end
  2739. # of range, works
  2740. where = "index <= '%s'" % end_dt
  2741. result = store.select('df', where=where)
  2742. tm.assert_frame_equal(expected, result)
  2743. # select w/o iterator and where clause, inclusive range,
  2744. # works
  2745. where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
  2746. result = store.select('df', where=where)
  2747. tm.assert_frame_equal(expected, result)
  2748. # with iterator, full range
  2749. with ensure_clean_store(self.path) as store:
  2750. expected = tm.makeTimeDataFrame(100064, 'S')
  2751. _maybe_remove(store, 'df')
  2752. store.append('df', expected)
  2753. beg_dt = expected.index[0]
  2754. end_dt = expected.index[-1]
  2755. # select w/iterator and no where clause works
  2756. results = [s for s in store.select('df', chunksize=chunksize)]
  2757. result = concat(results)
  2758. tm.assert_frame_equal(expected, result)
  2759. # select w/iterator and where clause, single term, begin of range
  2760. where = "index >= '%s'" % beg_dt
  2761. results = [s for s in store.select(
  2762. 'df', where=where, chunksize=chunksize)]
  2763. result = concat(results)
  2764. tm.assert_frame_equal(expected, result)
  2765. # select w/iterator and where clause, single term, end of range
  2766. where = "index <= '%s'" % end_dt
  2767. results = [s for s in store.select(
  2768. 'df', where=where, chunksize=chunksize)]
  2769. result = concat(results)
  2770. tm.assert_frame_equal(expected, result)
  2771. # select w/iterator and where clause, inclusive range
  2772. where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
  2773. results = [s for s in store.select(
  2774. 'df', where=where, chunksize=chunksize)]
  2775. result = concat(results)
  2776. tm.assert_frame_equal(expected, result)
  2777. def test_select_iterator_non_complete_8014(self):
  2778. # GH 8014
  2779. # using iterator and where clause
  2780. chunksize = 1e4
  2781. # with iterator, non complete range
  2782. with ensure_clean_store(self.path) as store:
  2783. expected = tm.makeTimeDataFrame(100064, 'S')
  2784. _maybe_remove(store, 'df')
  2785. store.append('df', expected)
  2786. beg_dt = expected.index[1]
  2787. end_dt = expected.index[-2]
  2788. # select w/iterator and where clause, single term, begin of range
  2789. where = "index >= '%s'" % beg_dt
  2790. results = [s for s in store.select(
  2791. 'df', where=where, chunksize=chunksize)]
  2792. result = concat(results)
  2793. rexpected = expected[expected.index >= beg_dt]
  2794. tm.assert_frame_equal(rexpected, result)
  2795. # select w/iterator and where clause, single term, end of range
  2796. where = "index <= '%s'" % end_dt
  2797. results = [s for s in store.select(
  2798. 'df', where=where, chunksize=chunksize)]
  2799. result = concat(results)
  2800. rexpected = expected[expected.index <= end_dt]
  2801. tm.assert_frame_equal(rexpected, result)
  2802. # select w/iterator and where clause, inclusive range
  2803. where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
  2804. results = [s for s in store.select(
  2805. 'df', where=where, chunksize=chunksize)]
  2806. result = concat(results)
  2807. rexpected = expected[(expected.index >= beg_dt) &
  2808. (expected.index <= end_dt)]
  2809. tm.assert_frame_equal(rexpected, result)
  2810. # with iterator, empty where
  2811. with ensure_clean_store(self.path) as store:
  2812. expected = tm.makeTimeDataFrame(100064, 'S')
  2813. _maybe_remove(store, 'df')
  2814. store.append('df', expected)
  2815. end_dt = expected.index[-1]
  2816. # select w/iterator and where clause, single term, begin of range
  2817. where = "index > '%s'" % end_dt
  2818. results = [s for s in store.select(
  2819. 'df', where=where, chunksize=chunksize)]
  2820. assert 0 == len(results)
  2821. def test_select_iterator_many_empty_frames(self):
  2822. # GH 8014
  2823. # using iterator and where clause can return many empty
  2824. # frames.
  2825. chunksize = int(1e4)
  2826. # with iterator, range limited to the first chunk
  2827. with ensure_clean_store(self.path) as store:
  2828. expected = tm.makeTimeDataFrame(100000, 'S')
  2829. _maybe_remove(store, 'df')
  2830. store.append('df', expected)
  2831. beg_dt = expected.index[0]
  2832. end_dt = expected.index[chunksize - 1]
  2833. # select w/iterator and where clause, single term, begin of range
  2834. where = "index >= '%s'" % beg_dt
  2835. results = [s for s in store.select(
  2836. 'df', where=where, chunksize=chunksize)]
  2837. result = concat(results)
  2838. rexpected = expected[expected.index >= beg_dt]
  2839. tm.assert_frame_equal(rexpected, result)
  2840. # select w/iterator and where clause, single term, end of range
  2841. where = "index <= '%s'" % end_dt
  2842. results = [s for s in store.select(
  2843. 'df', where=where, chunksize=chunksize)]
  2844. assert len(results) == 1
  2845. result = concat(results)
  2846. rexpected = expected[expected.index <= end_dt]
  2847. tm.assert_frame_equal(rexpected, result)
  2848. # select w/iterator and where clause, inclusive range
  2849. where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
  2850. results = [s for s in store.select(
  2851. 'df', where=where, chunksize=chunksize)]
  2852. # should be 1, is 10
  2853. assert len(results) == 1
  2854. result = concat(results)
  2855. rexpected = expected[(expected.index >= beg_dt) &
  2856. (expected.index <= end_dt)]
  2857. tm.assert_frame_equal(rexpected, result)
  2858. # select w/iterator and where clause which selects
  2859. # *nothing*.
  2860. #
  2861. # To be consistent with Python idiom I suggest this should
  2862. # return [] e.g. `for e in []: print True` never prints
  2863. # True.
  2864. where = "index <= '%s' & index >= '%s'" % (beg_dt, end_dt)
  2865. results = [s for s in store.select(
  2866. 'df', where=where, chunksize=chunksize)]
  2867. # should be []
  2868. assert len(results) == 0
  2869. @pytest.mark.filterwarnings(
  2870. "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
  2871. )
  2872. def test_retain_index_attributes(self):
  2873. # GH 3499, losing frequency info on index recreation
  2874. df = DataFrame(dict(
  2875. A=Series(lrange(3),
  2876. index=date_range('2000-1-1', periods=3, freq='H'))))
  2877. with ensure_clean_store(self.path) as store:
  2878. _maybe_remove(store, 'data')
  2879. store.put('data', df, format='table')
  2880. result = store.get('data')
  2881. tm.assert_frame_equal(df, result)
  2882. for attr in ['freq', 'tz', 'name']:
  2883. for idx in ['index', 'columns']:
  2884. assert (getattr(getattr(df, idx), attr, None) ==
  2885. getattr(getattr(result, idx), attr, None))
  2886. # try to append a table with a different frequency
  2887. with catch_warnings(record=True):
  2888. df2 = DataFrame(dict(
  2889. A=Series(lrange(3),
  2890. index=date_range('2002-1-1',
  2891. periods=3, freq='D'))))
  2892. store.append('data', df2)
  2893. assert store.get_storer('data').info['index']['freq'] is None
  2894. # this is ok
  2895. _maybe_remove(store, 'df2')
  2896. df2 = DataFrame(dict(
  2897. A=Series(lrange(3),
  2898. index=[Timestamp('20010101'), Timestamp('20010102'),
  2899. Timestamp('20020101')])))
  2900. store.append('df2', df2)
  2901. df3 = DataFrame(dict(
  2902. A=Series(lrange(3),
  2903. index=date_range('2002-1-1', periods=3,
  2904. freq='D'))))
  2905. store.append('df2', df3)
  2906. @pytest.mark.filterwarnings(
  2907. "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
  2908. )
  2909. def test_retain_index_attributes2(self):
  2910. with ensure_clean_path(self.path) as path:
  2911. with catch_warnings(record=True):
  2912. df = DataFrame(dict(
  2913. A=Series(lrange(3),
  2914. index=date_range('2000-1-1',
  2915. periods=3, freq='H'))))
  2916. df.to_hdf(path, 'data', mode='w', append=True)
  2917. df2 = DataFrame(dict(
  2918. A=Series(lrange(3),
  2919. index=date_range('2002-1-1', periods=3,
  2920. freq='D'))))
  2921. df2.to_hdf(path, 'data', append=True)
  2922. idx = date_range('2000-1-1', periods=3, freq='H')
  2923. idx.name = 'foo'
  2924. df = DataFrame(dict(A=Series(lrange(3), index=idx)))
  2925. df.to_hdf(path, 'data', mode='w', append=True)
  2926. assert read_hdf(path, 'data').index.name == 'foo'
  2927. with catch_warnings(record=True):
  2928. idx2 = date_range('2001-1-1', periods=3, freq='H')
  2929. idx2.name = 'bar'
  2930. df2 = DataFrame(dict(A=Series(lrange(3), index=idx2)))
  2931. df2.to_hdf(path, 'data', append=True)
  2932. assert read_hdf(path, 'data').index.name is None
  2933. def test_panel_select(self):
  2934. with ensure_clean_store(self.path) as store:
  2935. with catch_warnings(record=True):
  2936. wp = tm.makePanel()
  2937. store.put('wp', wp, format='table')
  2938. date = wp.major_axis[len(wp.major_axis) // 2]
  2939. crit1 = ('major_axis>=date')
  2940. crit2 = ("minor_axis=['A', 'D']")
  2941. result = store.select('wp', [crit1, crit2])
  2942. expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
  2943. assert_panel_equal(result, expected)
  2944. result = store.select(
  2945. 'wp', ['major_axis>="20000124"',
  2946. ("minor_axis=['A', 'B']")])
  2947. expected = wp.truncate(
  2948. before='20000124').reindex(minor=['A', 'B'])
  2949. assert_panel_equal(result, expected)
  2950. def test_frame_select(self):
  2951. df = tm.makeTimeDataFrame()
  2952. with ensure_clean_store(self.path) as store:
  2953. store.put('frame', df, format='table')
  2954. date = df.index[len(df) // 2]
  2955. crit1 = Term('index>=date')
  2956. assert crit1.env.scope['date'] == date
  2957. crit2 = ("columns=['A', 'D']")
  2958. crit3 = ('columns=A')
  2959. result = store.select('frame', [crit1, crit2])
  2960. expected = df.loc[date:, ['A', 'D']]
  2961. tm.assert_frame_equal(result, expected)
  2962. result = store.select('frame', [crit3])
  2963. expected = df.loc[:, ['A']]
  2964. tm.assert_frame_equal(result, expected)
  2965. # invalid terms
  2966. df = tm.makeTimeDataFrame()
  2967. store.append('df_time', df)
  2968. pytest.raises(
  2969. ValueError, store.select, 'df_time', "index>0")
  2970. # can't select if not written as table
  2971. # store['frame'] = df
  2972. # pytest.raises(ValueError, store.select,
  2973. # 'frame', [crit1, crit2])
  2974. def test_frame_select_complex(self):
  2975. # select via complex criteria
  2976. df = tm.makeTimeDataFrame()
  2977. df['string'] = 'foo'
  2978. df.loc[df.index[0:4], 'string'] = 'bar'
  2979. with ensure_clean_store(self.path) as store:
  2980. store.put('df', df, format='table', data_columns=['string'])
  2981. # empty
  2982. result = store.select('df', 'index>df.index[3] & string="bar"')
  2983. expected = df.loc[(df.index > df.index[3]) & (df.string == 'bar')]
  2984. tm.assert_frame_equal(result, expected)
  2985. result = store.select('df', 'index>df.index[3] & string="foo"')
  2986. expected = df.loc[(df.index > df.index[3]) & (df.string == 'foo')]
  2987. tm.assert_frame_equal(result, expected)
  2988. # or
  2989. result = store.select('df', 'index>df.index[3] | string="bar"')
  2990. expected = df.loc[(df.index > df.index[3]) | (df.string == 'bar')]
  2991. tm.assert_frame_equal(result, expected)
  2992. result = store.select('df', '(index>df.index[3] & '
  2993. 'index<=df.index[6]) | string="bar"')
  2994. expected = df.loc[((df.index > df.index[3]) & (
  2995. df.index <= df.index[6])) | (df.string == 'bar')]
  2996. tm.assert_frame_equal(result, expected)
  2997. # invert
  2998. result = store.select('df', 'string!="bar"')
  2999. expected = df.loc[df.string != 'bar']
  3000. tm.assert_frame_equal(result, expected)
  3001. # invert not implemented in numexpr :(
  3002. pytest.raises(NotImplementedError,
  3003. store.select, 'df', '~(string="bar")')
  3004. # invert ok for filters
  3005. result = store.select('df', "~(columns=['A','B'])")
  3006. expected = df.loc[:, df.columns.difference(['A', 'B'])]
  3007. tm.assert_frame_equal(result, expected)
  3008. # in
  3009. result = store.select(
  3010. 'df', "index>df.index[3] & columns in ['A','B']")
  3011. expected = df.loc[df.index > df.index[3]].reindex(columns=[
  3012. 'A', 'B'])
  3013. tm.assert_frame_equal(result, expected)
  3014. def test_frame_select_complex2(self):
  3015. with ensure_clean_path(['parms.hdf', 'hist.hdf']) as paths:
  3016. pp, hh = paths
  3017. # use non-trivial selection criteria
  3018. parms = DataFrame({'A': [1, 1, 2, 2, 3]})
  3019. parms.to_hdf(pp, 'df', mode='w',
  3020. format='table', data_columns=['A'])
  3021. selection = read_hdf(pp, 'df', where='A=[2,3]')
  3022. hist = DataFrame(np.random.randn(25, 1),
  3023. columns=['data'],
  3024. index=MultiIndex.from_tuples(
  3025. [(i, j) for i in range(5)
  3026. for j in range(5)],
  3027. names=['l1', 'l2']))
  3028. hist.to_hdf(hh, 'df', mode='w', format='table')
  3029. expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]')
  3030. # sccope with list like
  3031. l = selection.index.tolist() # noqa
  3032. store = HDFStore(hh)
  3033. result = store.select('df', where='l1=l')
  3034. assert_frame_equal(result, expected)
  3035. store.close()
  3036. result = read_hdf(hh, 'df', where='l1=l')
  3037. assert_frame_equal(result, expected)
  3038. # index
  3039. index = selection.index # noqa
  3040. result = read_hdf(hh, 'df', where='l1=index')
  3041. assert_frame_equal(result, expected)
  3042. result = read_hdf(hh, 'df', where='l1=selection.index')
  3043. assert_frame_equal(result, expected)
  3044. result = read_hdf(hh, 'df', where='l1=selection.index.tolist()')
  3045. assert_frame_equal(result, expected)
  3046. result = read_hdf(hh, 'df', where='l1=list(selection.index)')
  3047. assert_frame_equal(result, expected)
  3048. # sccope with index
  3049. store = HDFStore(hh)
  3050. result = store.select('df', where='l1=index')
  3051. assert_frame_equal(result, expected)
  3052. result = store.select('df', where='l1=selection.index')
  3053. assert_frame_equal(result, expected)
  3054. result = store.select('df', where='l1=selection.index.tolist()')
  3055. assert_frame_equal(result, expected)
  3056. result = store.select('df', where='l1=list(selection.index)')
  3057. assert_frame_equal(result, expected)
  3058. store.close()
  3059. def test_invalid_filtering(self):
  3060. # can't use more than one filter (atm)
  3061. df = tm.makeTimeDataFrame()
  3062. with ensure_clean_store(self.path) as store:
  3063. store.put('df', df, format='table')
  3064. # not implemented
  3065. pytest.raises(NotImplementedError, store.select,
  3066. 'df', "columns=['A'] | columns=['B']")
  3067. # in theory we could deal with this
  3068. pytest.raises(NotImplementedError, store.select,
  3069. 'df', "columns=['A','B'] & columns=['C']")
  3070. def test_string_select(self):
  3071. # GH 2973
  3072. with ensure_clean_store(self.path) as store:
  3073. df = tm.makeTimeDataFrame()
  3074. # test string ==/!=
  3075. df['x'] = 'none'
  3076. df.loc[2:7, 'x'] = ''
  3077. store.append('df', df, data_columns=['x'])
  3078. result = store.select('df', 'x=none')
  3079. expected = df[df.x == 'none']
  3080. assert_frame_equal(result, expected)
  3081. try:
  3082. result = store.select('df', 'x!=none')
  3083. expected = df[df.x != 'none']
  3084. assert_frame_equal(result, expected)
  3085. except Exception as detail:
  3086. pprint_thing("[{0}]".format(detail))
  3087. pprint_thing(store)
  3088. pprint_thing(expected)
  3089. df2 = df.copy()
  3090. df2.loc[df2.x == '', 'x'] = np.nan
  3091. store.append('df2', df2, data_columns=['x'])
  3092. result = store.select('df2', 'x!=none')
  3093. expected = df2[isna(df2.x)]
  3094. assert_frame_equal(result, expected)
  3095. # int ==/!=
  3096. df['int'] = 1
  3097. df.loc[2:7, 'int'] = 2
  3098. store.append('df3', df, data_columns=['int'])
  3099. result = store.select('df3', 'int=2')
  3100. expected = df[df.int == 2]
  3101. assert_frame_equal(result, expected)
  3102. result = store.select('df3', 'int!=2')
  3103. expected = df[df.int != 2]
  3104. assert_frame_equal(result, expected)
  3105. def test_read_column(self):
  3106. df = tm.makeTimeDataFrame()
  3107. with ensure_clean_store(self.path) as store:
  3108. _maybe_remove(store, 'df')
  3109. # GH 17912
  3110. # HDFStore.select_column should raise a KeyError
  3111. # exception if the key is not a valid store
  3112. with pytest.raises(KeyError,
  3113. match='No object named df in the file'):
  3114. store.select_column('df', 'index')
  3115. store.append('df', df)
  3116. # error
  3117. pytest.raises(KeyError, store.select_column, 'df', 'foo')
  3118. def f():
  3119. store.select_column('df', 'index', where=['index>5'])
  3120. pytest.raises(Exception, f)
  3121. # valid
  3122. result = store.select_column('df', 'index')
  3123. tm.assert_almost_equal(result.values, Series(df.index).values)
  3124. assert isinstance(result, Series)
  3125. # not a data indexable column
  3126. pytest.raises(
  3127. ValueError, store.select_column, 'df', 'values_block_0')
  3128. # a data column
  3129. df2 = df.copy()
  3130. df2['string'] = 'foo'
  3131. store.append('df2', df2, data_columns=['string'])
  3132. result = store.select_column('df2', 'string')
  3133. tm.assert_almost_equal(result.values, df2['string'].values)
  3134. # a data column with NaNs, result excludes the NaNs
  3135. df3 = df.copy()
  3136. df3['string'] = 'foo'
  3137. df3.loc[4:6, 'string'] = np.nan
  3138. store.append('df3', df3, data_columns=['string'])
  3139. result = store.select_column('df3', 'string')
  3140. tm.assert_almost_equal(result.values, df3['string'].values)
  3141. # start/stop
  3142. result = store.select_column('df3', 'string', start=2)
  3143. tm.assert_almost_equal(result.values, df3['string'].values[2:])
  3144. result = store.select_column('df3', 'string', start=-2)
  3145. tm.assert_almost_equal(result.values, df3['string'].values[-2:])
  3146. result = store.select_column('df3', 'string', stop=2)
  3147. tm.assert_almost_equal(result.values, df3['string'].values[:2])
  3148. result = store.select_column('df3', 'string', stop=-2)
  3149. tm.assert_almost_equal(result.values, df3['string'].values[:-2])
  3150. result = store.select_column('df3', 'string', start=2, stop=-2)
  3151. tm.assert_almost_equal(result.values, df3['string'].values[2:-2])
  3152. result = store.select_column('df3', 'string', start=-2, stop=2)
  3153. tm.assert_almost_equal(result.values, df3['string'].values[-2:2])
  3154. # GH 10392 - make sure column name is preserved
  3155. df4 = DataFrame({'A': np.random.randn(10), 'B': 'foo'})
  3156. store.append('df4', df4, data_columns=True)
  3157. expected = df4['B']
  3158. result = store.select_column('df4', 'B')
  3159. tm.assert_series_equal(result, expected)
  3160. def test_coordinates(self):
  3161. df = tm.makeTimeDataFrame()
  3162. with ensure_clean_store(self.path) as store:
  3163. _maybe_remove(store, 'df')
  3164. store.append('df', df)
  3165. # all
  3166. c = store.select_as_coordinates('df')
  3167. assert((c.values == np.arange(len(df.index))).all())
  3168. # get coordinates back & test vs frame
  3169. _maybe_remove(store, 'df')
  3170. df = DataFrame(dict(A=lrange(5), B=lrange(5)))
  3171. store.append('df', df)
  3172. c = store.select_as_coordinates('df', ['index<3'])
  3173. assert((c.values == np.arange(3)).all())
  3174. result = store.select('df', where=c)
  3175. expected = df.loc[0:2, :]
  3176. tm.assert_frame_equal(result, expected)
  3177. c = store.select_as_coordinates('df', ['index>=3', 'index<=4'])
  3178. assert((c.values == np.arange(2) + 3).all())
  3179. result = store.select('df', where=c)
  3180. expected = df.loc[3:4, :]
  3181. tm.assert_frame_equal(result, expected)
  3182. assert isinstance(c, Index)
  3183. # multiple tables
  3184. _maybe_remove(store, 'df1')
  3185. _maybe_remove(store, 'df2')
  3186. df1 = tm.makeTimeDataFrame()
  3187. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  3188. store.append('df1', df1, data_columns=['A', 'B'])
  3189. store.append('df2', df2)
  3190. c = store.select_as_coordinates('df1', ['A>0', 'B>0'])
  3191. df1_result = store.select('df1', c)
  3192. df2_result = store.select('df2', c)
  3193. result = concat([df1_result, df2_result], axis=1)
  3194. expected = concat([df1, df2], axis=1)
  3195. expected = expected[(expected.A > 0) & (expected.B > 0)]
  3196. tm.assert_frame_equal(result, expected)
  3197. # pass array/mask as the coordinates
  3198. with ensure_clean_store(self.path) as store:
  3199. df = DataFrame(np.random.randn(1000, 2),
  3200. index=date_range('20000101', periods=1000))
  3201. store.append('df', df)
  3202. c = store.select_column('df', 'index')
  3203. where = c[DatetimeIndex(c).month == 5].index
  3204. expected = df.iloc[where]
  3205. # locations
  3206. result = store.select('df', where=where)
  3207. tm.assert_frame_equal(result, expected)
  3208. # boolean
  3209. result = store.select('df', where=where)
  3210. tm.assert_frame_equal(result, expected)
  3211. # invalid
  3212. pytest.raises(ValueError, store.select, 'df',
  3213. where=np.arange(len(df), dtype='float64'))
  3214. pytest.raises(ValueError, store.select, 'df',
  3215. where=np.arange(len(df) + 1))
  3216. pytest.raises(ValueError, store.select, 'df',
  3217. where=np.arange(len(df)), start=5)
  3218. pytest.raises(ValueError, store.select, 'df',
  3219. where=np.arange(len(df)), start=5, stop=10)
  3220. # selection with filter
  3221. selection = date_range('20000101', periods=500)
  3222. result = store.select('df', where='index in selection')
  3223. expected = df[df.index.isin(selection)]
  3224. tm.assert_frame_equal(result, expected)
  3225. # list
  3226. df = DataFrame(np.random.randn(10, 2))
  3227. store.append('df2', df)
  3228. result = store.select('df2', where=[0, 3, 5])
  3229. expected = df.iloc[[0, 3, 5]]
  3230. tm.assert_frame_equal(result, expected)
  3231. # boolean
  3232. where = [True] * 10
  3233. where[-2] = False
  3234. result = store.select('df2', where=where)
  3235. expected = df.loc[where]
  3236. tm.assert_frame_equal(result, expected)
  3237. # start/stop
  3238. result = store.select('df2', start=5, stop=10)
  3239. expected = df[5:10]
  3240. tm.assert_frame_equal(result, expected)
  3241. def test_append_to_multiple(self):
  3242. df1 = tm.makeTimeDataFrame()
  3243. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  3244. df2['foo'] = 'bar'
  3245. df = concat([df1, df2], axis=1)
  3246. with ensure_clean_store(self.path) as store:
  3247. # exceptions
  3248. pytest.raises(ValueError, store.append_to_multiple,
  3249. {'df1': ['A', 'B'], 'df2': None}, df,
  3250. selector='df3')
  3251. pytest.raises(ValueError, store.append_to_multiple,
  3252. {'df1': None, 'df2': None}, df, selector='df3')
  3253. pytest.raises(
  3254. ValueError, store.append_to_multiple, 'df1', df, 'df1')
  3255. # regular operation
  3256. store.append_to_multiple(
  3257. {'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
  3258. result = store.select_as_multiple(
  3259. ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
  3260. expected = df[(df.A > 0) & (df.B > 0)]
  3261. tm.assert_frame_equal(result, expected)
  3262. def test_append_to_multiple_dropna(self):
  3263. df1 = tm.makeTimeDataFrame()
  3264. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  3265. df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
  3266. df = concat([df1, df2], axis=1)
  3267. with ensure_clean_store(self.path) as store:
  3268. # dropna=True should guarantee rows are synchronized
  3269. store.append_to_multiple(
  3270. {'df1': ['A', 'B'], 'df2': None}, df, selector='df1',
  3271. dropna=True)
  3272. result = store.select_as_multiple(['df1', 'df2'])
  3273. expected = df.dropna()
  3274. tm.assert_frame_equal(result, expected)
  3275. tm.assert_index_equal(store.select('df1').index,
  3276. store.select('df2').index)
  3277. @pytest.mark.xfail(run=False,
  3278. reason="append_to_multiple_dropna_false "
  3279. "is not raising as failed")
  3280. def test_append_to_multiple_dropna_false(self):
  3281. df1 = tm.makeTimeDataFrame()
  3282. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  3283. df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
  3284. df = concat([df1, df2], axis=1)
  3285. with ensure_clean_store(self.path) as store:
  3286. # dropna=False shouldn't synchronize row indexes
  3287. store.append_to_multiple(
  3288. {'df1a': ['A', 'B'], 'df2a': None}, df, selector='df1a',
  3289. dropna=False)
  3290. with pytest.raises(ValueError):
  3291. store.select_as_multiple(['df1a', 'df2a'])
  3292. assert not store.select('df1a').index.equals(
  3293. store.select('df2a').index)
  3294. def test_select_as_multiple(self):
  3295. df1 = tm.makeTimeDataFrame()
  3296. df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
  3297. df2['foo'] = 'bar'
  3298. with ensure_clean_store(self.path) as store:
  3299. # no tables stored
  3300. pytest.raises(Exception, store.select_as_multiple,
  3301. None, where=['A>0', 'B>0'], selector='df1')
  3302. store.append('df1', df1, data_columns=['A', 'B'])
  3303. store.append('df2', df2)
  3304. # exceptions
  3305. pytest.raises(Exception, store.select_as_multiple,
  3306. None, where=['A>0', 'B>0'], selector='df1')
  3307. pytest.raises(Exception, store.select_as_multiple,
  3308. [None], where=['A>0', 'B>0'], selector='df1')
  3309. pytest.raises(KeyError, store.select_as_multiple,
  3310. ['df1', 'df3'], where=['A>0', 'B>0'],
  3311. selector='df1')
  3312. pytest.raises(KeyError, store.select_as_multiple,
  3313. ['df3'], where=['A>0', 'B>0'], selector='df1')
  3314. pytest.raises(KeyError, store.select_as_multiple,
  3315. ['df1', 'df2'], where=['A>0', 'B>0'],
  3316. selector='df4')
  3317. # default select
  3318. result = store.select('df1', ['A>0', 'B>0'])
  3319. expected = store.select_as_multiple(
  3320. ['df1'], where=['A>0', 'B>0'], selector='df1')
  3321. tm.assert_frame_equal(result, expected)
  3322. expected = store.select_as_multiple(
  3323. 'df1', where=['A>0', 'B>0'], selector='df1')
  3324. tm.assert_frame_equal(result, expected)
  3325. # multiple
  3326. result = store.select_as_multiple(
  3327. ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
  3328. expected = concat([df1, df2], axis=1)
  3329. expected = expected[(expected.A > 0) & (expected.B > 0)]
  3330. tm.assert_frame_equal(result, expected)
  3331. # multiple (diff selector)
  3332. result = store.select_as_multiple(
  3333. ['df1', 'df2'], where='index>df2.index[4]', selector='df2')
  3334. expected = concat([df1, df2], axis=1)
  3335. expected = expected[5:]
  3336. tm.assert_frame_equal(result, expected)
  3337. # test excpection for diff rows
  3338. store.append('df3', tm.makeTimeDataFrame(nper=50))
  3339. pytest.raises(ValueError, store.select_as_multiple,
  3340. ['df1', 'df3'], where=['A>0', 'B>0'],
  3341. selector='df1')
  3342. @pytest.mark.skipif(
  3343. LooseVersion(tables.__version__) < LooseVersion('3.1.0'),
  3344. reason=("tables version does not support fix for nan selection "
  3345. "bug: GH 4858"))
  3346. def test_nan_selection_bug_4858(self):
  3347. with ensure_clean_store(self.path) as store:
  3348. df = DataFrame(dict(cols=range(6), values=range(6)),
  3349. dtype='float64')
  3350. df['cols'] = (df['cols'] + 10).apply(str)
  3351. df.iloc[0] = np.nan
  3352. expected = DataFrame(dict(cols=['13.0', '14.0', '15.0'], values=[
  3353. 3., 4., 5.]), index=[3, 4, 5])
  3354. # write w/o the index on that particular column
  3355. store.append('df', df, data_columns=True, index=['cols'])
  3356. result = store.select('df', where='values>2.0')
  3357. assert_frame_equal(result, expected)
  3358. def test_start_stop_table(self):
  3359. with ensure_clean_store(self.path) as store:
  3360. # table
  3361. df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
  3362. store.append('df', df)
  3363. result = store.select(
  3364. 'df', "columns=['A']", start=0, stop=5)
  3365. expected = df.loc[0:4, ['A']]
  3366. tm.assert_frame_equal(result, expected)
  3367. # out of range
  3368. result = store.select(
  3369. 'df', "columns=['A']", start=30, stop=40)
  3370. assert len(result) == 0
  3371. expected = df.loc[30:40, ['A']]
  3372. tm.assert_frame_equal(result, expected)
  3373. def test_start_stop_multiple(self):
  3374. # GH 16209
  3375. with ensure_clean_store(self.path) as store:
  3376. df = DataFrame({"foo": [1, 2], "bar": [1, 2]})
  3377. store.append_to_multiple({'selector': ['foo'], 'data': None}, df,
  3378. selector='selector')
  3379. result = store.select_as_multiple(['selector', 'data'],
  3380. selector='selector', start=0,
  3381. stop=1)
  3382. expected = df.loc[[0], ['foo', 'bar']]
  3383. tm.assert_frame_equal(result, expected)
  3384. def test_start_stop_fixed(self):
  3385. with ensure_clean_store(self.path) as store:
  3386. # fixed, GH 8287
  3387. df = DataFrame(dict(A=np.random.rand(20),
  3388. B=np.random.rand(20)),
  3389. index=pd.date_range('20130101', periods=20))
  3390. store.put('df', df)
  3391. result = store.select(
  3392. 'df', start=0, stop=5)
  3393. expected = df.iloc[0:5, :]
  3394. tm.assert_frame_equal(result, expected)
  3395. result = store.select(
  3396. 'df', start=5, stop=10)
  3397. expected = df.iloc[5:10, :]
  3398. tm.assert_frame_equal(result, expected)
  3399. # out of range
  3400. result = store.select(
  3401. 'df', start=30, stop=40)
  3402. expected = df.iloc[30:40, :]
  3403. tm.assert_frame_equal(result, expected)
  3404. # series
  3405. s = df.A
  3406. store.put('s', s)
  3407. result = store.select('s', start=0, stop=5)
  3408. expected = s.iloc[0:5]
  3409. tm.assert_series_equal(result, expected)
  3410. result = store.select('s', start=5, stop=10)
  3411. expected = s.iloc[5:10]
  3412. tm.assert_series_equal(result, expected)
  3413. # sparse; not implemented
  3414. df = tm.makeDataFrame()
  3415. df.iloc[3:5, 1:3] = np.nan
  3416. df.iloc[8:10, -2] = np.nan
  3417. dfs = df.to_sparse()
  3418. store.put('dfs', dfs)
  3419. with pytest.raises(NotImplementedError):
  3420. store.select('dfs', start=0, stop=5)
  3421. def test_select_filter_corner(self):
  3422. df = DataFrame(np.random.randn(50, 100))
  3423. df.index = ['%.3d' % c for c in df.index]
  3424. df.columns = ['%.3d' % c for c in df.columns]
  3425. with ensure_clean_store(self.path) as store:
  3426. store.put('frame', df, format='table')
  3427. crit = 'columns=df.columns[:75]'
  3428. result = store.select('frame', [crit])
  3429. tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])
  3430. crit = 'columns=df.columns[:75:2]'
  3431. result = store.select('frame', [crit])
  3432. tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])
  3433. def test_path_pathlib(self):
  3434. df = tm.makeDataFrame()
  3435. result = tm.round_trip_pathlib(
  3436. lambda p: df.to_hdf(p, 'df'),
  3437. lambda p: pd.read_hdf(p, 'df'))
  3438. tm.assert_frame_equal(df, result)
  3439. @pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)])
  3440. def test_contiguous_mixed_data_table(self, start, stop):
  3441. # GH 17021
  3442. # ValueError when reading a contiguous mixed-data table ft. VLArray
  3443. df = DataFrame({'a': Series([20111010, 20111011, 20111012]),
  3444. 'b': Series(['ab', 'cd', 'ab'])})
  3445. with ensure_clean_store(self.path) as store:
  3446. store.append('test_dataset', df)
  3447. result = store.select('test_dataset', start=start, stop=stop)
  3448. assert_frame_equal(df[start:stop], result)
  3449. def test_path_pathlib_hdfstore(self):
  3450. df = tm.makeDataFrame()
  3451. def writer(path):
  3452. with pd.HDFStore(path) as store:
  3453. df.to_hdf(store, 'df')
  3454. def reader(path):
  3455. with pd.HDFStore(path) as store:
  3456. return pd.read_hdf(store, 'df')
  3457. result = tm.round_trip_pathlib(writer, reader)
  3458. tm.assert_frame_equal(df, result)
  3459. def test_pickle_path_localpath(self):
  3460. df = tm.makeDataFrame()
  3461. result = tm.round_trip_pathlib(
  3462. lambda p: df.to_hdf(p, 'df'),
  3463. lambda p: pd.read_hdf(p, 'df'))
  3464. tm.assert_frame_equal(df, result)
  3465. def test_path_localpath_hdfstore(self):
  3466. df = tm.makeDataFrame()
  3467. def writer(path):
  3468. with pd.HDFStore(path) as store:
  3469. df.to_hdf(store, 'df')
  3470. def reader(path):
  3471. with pd.HDFStore(path) as store:
  3472. return pd.read_hdf(store, 'df')
  3473. result = tm.round_trip_localpath(writer, reader)
  3474. tm.assert_frame_equal(df, result)
  3475. def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
  3476. options = {}
  3477. if compression:
  3478. options['complib'] = _default_compressor
  3479. with ensure_clean_store(self.path, 'w', **options) as store:
  3480. store['obj'] = obj
  3481. retrieved = store['obj']
  3482. comparator(retrieved, obj, **kwargs)
  3483. def _check_double_roundtrip(self, obj, comparator, compression=False,
  3484. **kwargs):
  3485. options = {}
  3486. if compression:
  3487. options['complib'] = compression or _default_compressor
  3488. with ensure_clean_store(self.path, 'w', **options) as store:
  3489. store['obj'] = obj
  3490. retrieved = store['obj']
  3491. comparator(retrieved, obj, **kwargs)
  3492. store['obj'] = retrieved
  3493. again = store['obj']
  3494. comparator(again, obj, **kwargs)
  3495. def _check_roundtrip_table(self, obj, comparator, compression=False):
  3496. options = {}
  3497. if compression:
  3498. options['complib'] = _default_compressor
  3499. with ensure_clean_store(self.path, 'w', **options) as store:
  3500. store.put('obj', obj, format='table')
  3501. retrieved = store['obj']
  3502. comparator(retrieved, obj)
  3503. def test_multiple_open_close(self):
  3504. # gh-4409: open & close multiple times
  3505. with ensure_clean_path(self.path) as path:
  3506. df = tm.makeDataFrame()
  3507. df.to_hdf(path, 'df', mode='w', format='table')
  3508. # single
  3509. store = HDFStore(path)
  3510. assert 'CLOSED' not in store.info()
  3511. assert store.is_open
  3512. store.close()
  3513. assert 'CLOSED' in store.info()
  3514. assert not store.is_open
  3515. with ensure_clean_path(self.path) as path:
  3516. if pytables._table_file_open_policy_is_strict:
  3517. # multiples
  3518. store1 = HDFStore(path)
  3519. def f():
  3520. HDFStore(path)
  3521. pytest.raises(ValueError, f)
  3522. store1.close()
  3523. else:
  3524. # multiples
  3525. store1 = HDFStore(path)
  3526. store2 = HDFStore(path)
  3527. assert 'CLOSED' not in store1.info()
  3528. assert 'CLOSED' not in store2.info()
  3529. assert store1.is_open
  3530. assert store2.is_open
  3531. store1.close()
  3532. assert 'CLOSED' in store1.info()
  3533. assert not store1.is_open
  3534. assert 'CLOSED' not in store2.info()
  3535. assert store2.is_open
  3536. store2.close()
  3537. assert 'CLOSED' in store1.info()
  3538. assert 'CLOSED' in store2.info()
  3539. assert not store1.is_open
  3540. assert not store2.is_open
  3541. # nested close
  3542. store = HDFStore(path, mode='w')
  3543. store.append('df', df)
  3544. store2 = HDFStore(path)
  3545. store2.append('df2', df)
  3546. store2.close()
  3547. assert 'CLOSED' in store2.info()
  3548. assert not store2.is_open
  3549. store.close()
  3550. assert 'CLOSED' in store.info()
  3551. assert not store.is_open
  3552. # double closing
  3553. store = HDFStore(path, mode='w')
  3554. store.append('df', df)
  3555. store2 = HDFStore(path)
  3556. store.close()
  3557. assert 'CLOSED' in store.info()
  3558. assert not store.is_open
  3559. store2.close()
  3560. assert 'CLOSED' in store2.info()
  3561. assert not store2.is_open
  3562. # ops on a closed store
  3563. with ensure_clean_path(self.path) as path:
  3564. df = tm.makeDataFrame()
  3565. df.to_hdf(path, 'df', mode='w', format='table')
  3566. store = HDFStore(path)
  3567. store.close()
  3568. pytest.raises(ClosedFileError, store.keys)
  3569. pytest.raises(ClosedFileError, lambda: 'df' in store)
  3570. pytest.raises(ClosedFileError, lambda: len(store))
  3571. pytest.raises(ClosedFileError, lambda: store['df'])
  3572. pytest.raises(AttributeError, lambda: store.df)
  3573. pytest.raises(ClosedFileError, store.select, 'df')
  3574. pytest.raises(ClosedFileError, store.get, 'df')
  3575. pytest.raises(ClosedFileError, store.append, 'df2', df)
  3576. pytest.raises(ClosedFileError, store.put, 'df3', df)
  3577. pytest.raises(ClosedFileError, store.get_storer, 'df2')
  3578. pytest.raises(ClosedFileError, store.remove, 'df2')
  3579. with pytest.raises(ClosedFileError, match='file is not open'):
  3580. store.select('df')
  3581. def test_pytables_native_read(self, datapath):
  3582. with ensure_clean_store(
  3583. datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
  3584. mode='r') as store:
  3585. d2 = store['detector/readout']
  3586. assert isinstance(d2, DataFrame)
  3587. @pytest.mark.skipif(PY35 and is_platform_windows(),
  3588. reason="native2 read fails oddly on windows / 3.5")
  3589. def test_pytables_native2_read(self, datapath):
  3590. with ensure_clean_store(
  3591. datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
  3592. mode='r') as store:
  3593. str(store)
  3594. d1 = store['detector']
  3595. assert isinstance(d1, DataFrame)
  3596. @xfail_non_writeable
  3597. def test_legacy_table_fixed_format_read_py2(self, datapath):
  3598. # GH 24510
  3599. # legacy table with fixed format written in Python 2
  3600. with ensure_clean_store(
  3601. datapath('io', 'data', 'legacy_hdf',
  3602. 'legacy_table_fixed_py2.h5'),
  3603. mode='r') as store:
  3604. result = store.select('df')
  3605. expected = pd.DataFrame([[1, 2, 3, 'D']],
  3606. columns=['A', 'B', 'C', 'D'],
  3607. index=pd.Index(['ABC'],
  3608. name='INDEX_NAME'))
  3609. assert_frame_equal(expected, result)
  3610. def test_legacy_table_read_py2(self, datapath):
  3611. # issue: 24925
  3612. # legacy table written in Python 2
  3613. with ensure_clean_store(
  3614. datapath('io', 'data', 'legacy_hdf',
  3615. 'legacy_table_py2.h5'),
  3616. mode='r') as store:
  3617. result = store.select('table')
  3618. expected = pd.DataFrame({
  3619. "a": ["a", "b"],
  3620. "b": [2, 3]
  3621. })
  3622. assert_frame_equal(expected, result)
  3623. def test_legacy_table_read(self, datapath):
  3624. # legacy table types
  3625. with ensure_clean_store(
  3626. datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
  3627. mode='r') as store:
  3628. with catch_warnings():
  3629. simplefilter("ignore", pd.io.pytables.IncompatibilityWarning)
  3630. store.select('df1')
  3631. store.select('df2')
  3632. store.select('wp1')
  3633. # force the frame
  3634. store.select('df2', typ='legacy_frame')
  3635. # old version warning
  3636. pytest.raises(
  3637. Exception, store.select, 'wp1', 'minor_axis=B')
  3638. df2 = store.select('df2')
  3639. result = store.select('df2', 'index>df2.index[2]')
  3640. expected = df2[df2.index > df2.index[2]]
  3641. assert_frame_equal(expected, result)
  3642. def test_copy(self):
  3643. with catch_warnings(record=True):
  3644. def do_copy(f, new_f=None, keys=None,
  3645. propindexes=True, **kwargs):
  3646. try:
  3647. store = HDFStore(f, 'r')
  3648. if new_f is None:
  3649. import tempfile
  3650. fd, new_f = tempfile.mkstemp()
  3651. tstore = store.copy(
  3652. new_f, keys=keys, propindexes=propindexes, **kwargs)
  3653. # check keys
  3654. if keys is None:
  3655. keys = store.keys()
  3656. assert set(keys) == set(tstore.keys())
  3657. # check indices & nrows
  3658. for k in tstore.keys():
  3659. if tstore.get_storer(k).is_table:
  3660. new_t = tstore.get_storer(k)
  3661. orig_t = store.get_storer(k)
  3662. assert orig_t.nrows == new_t.nrows
  3663. # check propindixes
  3664. if propindexes:
  3665. for a in orig_t.axes:
  3666. if a.is_indexed:
  3667. assert new_t[a.name].is_indexed
  3668. finally:
  3669. safe_close(store)
  3670. safe_close(tstore)
  3671. try:
  3672. os.close(fd)
  3673. except (OSError, ValueError):
  3674. pass
  3675. safe_remove(new_f)
  3676. # new table
  3677. df = tm.makeDataFrame()
  3678. try:
  3679. path = create_tempfile(self.path)
  3680. st = HDFStore(path)
  3681. st.append('df', df, data_columns=['A'])
  3682. st.close()
  3683. do_copy(f=path)
  3684. do_copy(f=path, propindexes=False)
  3685. finally:
  3686. safe_remove(path)
  3687. def test_store_datetime_fractional_secs(self):
  3688. with ensure_clean_store(self.path) as store:
  3689. dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
  3690. series = Series([0], [dt])
  3691. store['a'] = series
  3692. assert store['a'].index[0] == dt
  3693. def test_tseries_indices_series(self):
  3694. with ensure_clean_store(self.path) as store:
  3695. idx = tm.makeDateIndex(10)
  3696. ser = Series(np.random.randn(len(idx)), idx)
  3697. store['a'] = ser
  3698. result = store['a']
  3699. tm.assert_series_equal(result, ser)
  3700. assert result.index.freq == ser.index.freq
  3701. tm.assert_class_equal(result.index, ser.index, obj="series index")
  3702. idx = tm.makePeriodIndex(10)
  3703. ser = Series(np.random.randn(len(idx)), idx)
  3704. store['a'] = ser
  3705. result = store['a']
  3706. tm.assert_series_equal(result, ser)
  3707. assert result.index.freq == ser.index.freq
  3708. tm.assert_class_equal(result.index, ser.index, obj="series index")
  3709. def test_tseries_indices_frame(self):
  3710. with ensure_clean_store(self.path) as store:
  3711. idx = tm.makeDateIndex(10)
  3712. df = DataFrame(np.random.randn(len(idx), 3), index=idx)
  3713. store['a'] = df
  3714. result = store['a']
  3715. assert_frame_equal(result, df)
  3716. assert result.index.freq == df.index.freq
  3717. tm.assert_class_equal(result.index, df.index,
  3718. obj="dataframe index")
  3719. idx = tm.makePeriodIndex(10)
  3720. df = DataFrame(np.random.randn(len(idx), 3), idx)
  3721. store['a'] = df
  3722. result = store['a']
  3723. assert_frame_equal(result, df)
  3724. assert result.index.freq == df.index.freq
  3725. tm.assert_class_equal(result.index, df.index,
  3726. obj="dataframe index")
  3727. def test_unicode_index(self):
  3728. unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]
  3729. # PerformanceWarning
  3730. with catch_warnings(record=True):
  3731. simplefilter("ignore", pd.errors.PerformanceWarning)
  3732. s = Series(np.random.randn(len(unicode_values)), unicode_values)
  3733. self._check_roundtrip(s, tm.assert_series_equal)
  3734. def test_unicode_longer_encoded(self):
  3735. # GH 11234
  3736. char = '\u0394'
  3737. df = pd.DataFrame({'A': [char]})
  3738. with ensure_clean_store(self.path) as store:
  3739. store.put('df', df, format='table', encoding='utf-8')
  3740. result = store.get('df')
  3741. tm.assert_frame_equal(result, df)
  3742. df = pd.DataFrame({'A': ['a', char], 'B': ['b', 'b']})
  3743. with ensure_clean_store(self.path) as store:
  3744. store.put('df', df, format='table', encoding='utf-8')
  3745. result = store.get('df')
  3746. tm.assert_frame_equal(result, df)
  3747. @xfail_non_writeable
  3748. def test_store_datetime_mixed(self):
  3749. df = DataFrame(
  3750. {'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']})
  3751. ts = tm.makeTimeSeries()
  3752. df['d'] = ts.index[:3]
  3753. self._check_roundtrip(df, tm.assert_frame_equal)
  3754. # def test_cant_write_multiindex_table(self):
  3755. # # for now, #1848
  3756. # df = DataFrame(np.random.randn(10, 4),
  3757. # index=[np.arange(5).repeat(2),
  3758. # np.tile(np.arange(2), 5)])
  3759. # pytest.raises(Exception, store.put, 'foo', df, format='table')
  3760. def test_append_with_diff_col_name_types_raises_value_error(self):
  3761. df = DataFrame(np.random.randn(10, 1))
  3762. df2 = DataFrame({'a': np.random.randn(10)})
  3763. df3 = DataFrame({(1, 2): np.random.randn(10)})
  3764. df4 = DataFrame({('1', 2): np.random.randn(10)})
  3765. df5 = DataFrame({('1', 2, object): np.random.randn(10)})
  3766. with ensure_clean_store(self.path) as store:
  3767. name = 'df_%s' % tm.rands(10)
  3768. store.append(name, df)
  3769. for d in (df2, df3, df4, df5):
  3770. with pytest.raises(ValueError):
  3771. store.append(name, d)
  3772. def test_query_with_nested_special_character(self):
  3773. df = DataFrame({'a': ['a', 'a', 'c', 'b',
  3774. 'test & test', 'c', 'b', 'e'],
  3775. 'b': [1, 2, 3, 4, 5, 6, 7, 8]})
  3776. expected = df[df.a == 'test & test']
  3777. with ensure_clean_store(self.path) as store:
  3778. store.append('test', df, format='table', data_columns=True)
  3779. result = store.select('test', 'a = "test & test"')
  3780. tm.assert_frame_equal(expected, result)
  3781. def test_categorical(self):
  3782. with ensure_clean_store(self.path) as store:
  3783. # Basic
  3784. _maybe_remove(store, 's')
  3785. s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
  3786. 'a', 'b', 'c', 'd'], ordered=False))
  3787. store.append('s', s, format='table')
  3788. result = store.select('s')
  3789. tm.assert_series_equal(s, result)
  3790. _maybe_remove(store, 's_ordered')
  3791. s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
  3792. 'a', 'b', 'c', 'd'], ordered=True))
  3793. store.append('s_ordered', s, format='table')
  3794. result = store.select('s_ordered')
  3795. tm.assert_series_equal(s, result)
  3796. _maybe_remove(store, 'df')
  3797. df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
  3798. store.append('df', df, format='table')
  3799. result = store.select('df')
  3800. tm.assert_frame_equal(result, df)
  3801. # Dtypes
  3802. _maybe_remove(store, 'si')
  3803. s = Series([1, 1, 2, 2, 3, 4, 5]).astype('category')
  3804. store.append('si', s)
  3805. result = store.select('si')
  3806. tm.assert_series_equal(result, s)
  3807. _maybe_remove(store, 'si2')
  3808. s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype('category')
  3809. store.append('si2', s)
  3810. result = store.select('si2')
  3811. tm.assert_series_equal(result, s)
  3812. # Multiple
  3813. _maybe_remove(store, 'df2')
  3814. df2 = df.copy()
  3815. df2['s2'] = Series(list('abcdefg')).astype('category')
  3816. store.append('df2', df2)
  3817. result = store.select('df2')
  3818. tm.assert_frame_equal(result, df2)
  3819. # Make sure the metadata is OK
  3820. info = store.info()
  3821. assert '/df2 ' in info
  3822. # assert '/df2/meta/values_block_0/meta' in info
  3823. assert '/df2/meta/values_block_1/meta' in info
  3824. # unordered
  3825. _maybe_remove(store, 's2')
  3826. s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
  3827. 'a', 'b', 'c', 'd'], ordered=False))
  3828. store.append('s2', s, format='table')
  3829. result = store.select('s2')
  3830. tm.assert_series_equal(result, s)
  3831. # Query
  3832. _maybe_remove(store, 'df3')
  3833. store.append('df3', df, data_columns=['s'])
  3834. expected = df[df.s.isin(['b', 'c'])]
  3835. result = store.select('df3', where=['s in ["b","c"]'])
  3836. tm.assert_frame_equal(result, expected)
  3837. expected = df[df.s.isin(['b', 'c'])]
  3838. result = store.select('df3', where=['s = ["b","c"]'])
  3839. tm.assert_frame_equal(result, expected)
  3840. expected = df[df.s.isin(['d'])]
  3841. result = store.select('df3', where=['s in ["d"]'])
  3842. tm.assert_frame_equal(result, expected)
  3843. expected = df[df.s.isin(['f'])]
  3844. result = store.select('df3', where=['s in ["f"]'])
  3845. tm.assert_frame_equal(result, expected)
  3846. # Appending with same categories is ok
  3847. store.append('df3', df)
  3848. df = concat([df, df])
  3849. expected = df[df.s.isin(['b', 'c'])]
  3850. result = store.select('df3', where=['s in ["b","c"]'])
  3851. tm.assert_frame_equal(result, expected)
  3852. # Appending must have the same categories
  3853. df3 = df.copy()
  3854. df3['s'].cat.remove_unused_categories(inplace=True)
  3855. with pytest.raises(ValueError):
  3856. store.append('df3', df3)
  3857. # Remove, and make sure meta data is removed (its a recursive
  3858. # removal so should be).
  3859. result = store.select('df3/meta/s/meta')
  3860. assert result is not None
  3861. store.remove('df3')
  3862. with pytest.raises(KeyError):
  3863. store.select('df3/meta/s/meta')
  3864. def test_categorical_conversion(self):
  3865. # GH13322
  3866. # Check that read_hdf with categorical columns doesn't return rows if
  3867. # where criteria isn't met.
  3868. obsids = ['ESP_012345_6789', 'ESP_987654_3210']
  3869. imgids = ['APF00006np', 'APF0001imm']
  3870. data = [4.3, 9.8]
  3871. # Test without categories
  3872. df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))
  3873. # We are expecting an empty DataFrame matching types of df
  3874. expected = df.iloc[[], :]
  3875. with ensure_clean_path(self.path) as path:
  3876. df.to_hdf(path, 'df', format='table', data_columns=True)
  3877. result = read_hdf(path, 'df', where='obsids=B')
  3878. tm.assert_frame_equal(result, expected)
  3879. # Test with categories
  3880. df.obsids = df.obsids.astype('category')
  3881. df.imgids = df.imgids.astype('category')
  3882. # We are expecting an empty DataFrame matching types of df
  3883. expected = df.iloc[[], :]
  3884. with ensure_clean_path(self.path) as path:
  3885. df.to_hdf(path, 'df', format='table', data_columns=True)
  3886. result = read_hdf(path, 'df', where='obsids=B')
  3887. tm.assert_frame_equal(result, expected)
  3888. def test_categorical_nan_only_columns(self):
  3889. # GH18413
  3890. # Check that read_hdf with categorical columns with NaN-only values can
  3891. # be read back.
  3892. df = pd.DataFrame({
  3893. 'a': ['a', 'b', 'c', np.nan],
  3894. 'b': [np.nan, np.nan, np.nan, np.nan],
  3895. 'c': [1, 2, 3, 4],
  3896. 'd': pd.Series([None] * 4, dtype=object)
  3897. })
  3898. df['a'] = df.a.astype('category')
  3899. df['b'] = df.b.astype('category')
  3900. df['d'] = df.b.astype('category')
  3901. expected = df
  3902. with ensure_clean_path(self.path) as path:
  3903. df.to_hdf(path, 'df', format='table', data_columns=True)
  3904. result = read_hdf(path, 'df')
  3905. tm.assert_frame_equal(result, expected)
  3906. def test_duplicate_column_name(self):
  3907. df = DataFrame(columns=["a", "a"], data=[[0, 0]])
  3908. with ensure_clean_path(self.path) as path:
  3909. pytest.raises(ValueError, df.to_hdf,
  3910. path, 'df', format='fixed')
  3911. df.to_hdf(path, 'df', format='table')
  3912. other = read_hdf(path, 'df')
  3913. tm.assert_frame_equal(df, other)
  3914. assert df.equals(other)
  3915. assert other.equals(df)
  3916. def test_round_trip_equals(self):
  3917. # GH 9330
  3918. df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
  3919. with ensure_clean_path(self.path) as path:
  3920. df.to_hdf(path, 'df', format='table')
  3921. other = read_hdf(path, 'df')
  3922. tm.assert_frame_equal(df, other)
  3923. assert df.equals(other)
  3924. assert other.equals(df)
  3925. def test_preserve_timedeltaindex_type(self):
  3926. # GH9635
  3927. # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
  3928. # the type of the index.
  3929. df = DataFrame(np.random.normal(size=(10, 5)))
  3930. df.index = timedelta_range(
  3931. start='0s', periods=10, freq='1s', name='example')
  3932. with ensure_clean_store(self.path) as store:
  3933. store['df'] = df
  3934. assert_frame_equal(store['df'], df)
  3935. def test_columns_multiindex_modified(self):
  3936. # BUG: 7212
  3937. # read_hdf store.select modified the passed columns parameters
  3938. # when multi-indexed.
  3939. df = DataFrame(np.random.rand(4, 5),
  3940. index=list('abcd'),
  3941. columns=list('ABCDE'))
  3942. df.index.name = 'letters'
  3943. df = df.set_index(keys='E', append=True)
  3944. data_columns = df.index.names + df.columns.tolist()
  3945. with ensure_clean_path(self.path) as path:
  3946. df.to_hdf(path, 'df',
  3947. mode='a',
  3948. append=True,
  3949. data_columns=data_columns,
  3950. index=False)
  3951. cols2load = list('BCD')
  3952. cols2load_original = list(cols2load)
  3953. df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa
  3954. assert cols2load_original == cols2load
  3955. @ignore_natural_naming_warning
  3956. def test_to_hdf_with_object_column_names(self):
  3957. # GH9057
  3958. # Writing HDF5 table format should only work for string-like
  3959. # column types
  3960. types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
  3961. tm.makeDateIndex, tm.makeTimedeltaIndex,
  3962. tm.makePeriodIndex]
  3963. types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]
  3964. if compat.PY3:
  3965. types_should_run.append(tm.makeUnicodeIndex)
  3966. else:
  3967. # TODO: Add back to types_should_fail
  3968. # https://github.com/pandas-dev/pandas/issues/20907
  3969. pass
  3970. for index in types_should_fail:
  3971. df = DataFrame(np.random.randn(10, 2), columns=index(2))
  3972. with ensure_clean_path(self.path) as path:
  3973. with catch_warnings(record=True):
  3974. msg = "cannot have non-object label DataIndexableCol"
  3975. with pytest.raises(ValueError, match=msg):
  3976. df.to_hdf(path, 'df', format='table',
  3977. data_columns=True)
  3978. for index in types_should_run:
  3979. df = DataFrame(np.random.randn(10, 2), columns=index(2))
  3980. with ensure_clean_path(self.path) as path:
  3981. with catch_warnings(record=True):
  3982. df.to_hdf(path, 'df', format='table', data_columns=True)
  3983. result = pd.read_hdf(
  3984. path, 'df', where="index = [{0}]".format(df.index[0]))
  3985. assert(len(result))
  3986. def test_read_hdf_open_store(self):
  3987. # GH10330
  3988. # No check for non-string path_or-buf, and no test of open store
  3989. df = DataFrame(np.random.rand(4, 5),
  3990. index=list('abcd'),
  3991. columns=list('ABCDE'))
  3992. df.index.name = 'letters'
  3993. df = df.set_index(keys='E', append=True)
  3994. with ensure_clean_path(self.path) as path:
  3995. df.to_hdf(path, 'df', mode='w')
  3996. direct = read_hdf(path, 'df')
  3997. store = HDFStore(path, mode='r')
  3998. indirect = read_hdf(store, 'df')
  3999. tm.assert_frame_equal(direct, indirect)
  4000. assert store.is_open
  4001. store.close()
  4002. def test_read_hdf_iterator(self):
  4003. df = DataFrame(np.random.rand(4, 5),
  4004. index=list('abcd'),
  4005. columns=list('ABCDE'))
  4006. df.index.name = 'letters'
  4007. df = df.set_index(keys='E', append=True)
  4008. with ensure_clean_path(self.path) as path:
  4009. df.to_hdf(path, 'df', mode='w', format='t')
  4010. direct = read_hdf(path, 'df')
  4011. iterator = read_hdf(path, 'df', iterator=True)
  4012. assert isinstance(iterator, TableIterator)
  4013. indirect = next(iterator.__iter__())
  4014. tm.assert_frame_equal(direct, indirect)
  4015. iterator.store.close()
  4016. def test_read_hdf_errors(self):
  4017. df = DataFrame(np.random.rand(4, 5),
  4018. index=list('abcd'),
  4019. columns=list('ABCDE'))
  4020. with ensure_clean_path(self.path) as path:
  4021. pytest.raises(IOError, read_hdf, path, 'key')
  4022. df.to_hdf(path, 'df')
  4023. store = HDFStore(path, mode='r')
  4024. store.close()
  4025. pytest.raises(IOError, read_hdf, store, 'df')
  4026. def test_read_hdf_generic_buffer_errors(self):
  4027. pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df')
  4028. def test_invalid_complib(self):
  4029. df = DataFrame(np.random.rand(4, 5),
  4030. index=list('abcd'),
  4031. columns=list('ABCDE'))
  4032. with ensure_clean_path(self.path) as path:
  4033. with pytest.raises(ValueError):
  4034. df.to_hdf(path, 'df', complib='foolib')
  4035. # GH10443
  4036. def test_read_nokey(self):
  4037. df = DataFrame(np.random.rand(4, 5),
  4038. index=list('abcd'),
  4039. columns=list('ABCDE'))
  4040. # Categorical dtype not supported for "fixed" format. So no need
  4041. # to test with that dtype in the dataframe here.
  4042. with ensure_clean_path(self.path) as path:
  4043. df.to_hdf(path, 'df', mode='a')
  4044. reread = read_hdf(path)
  4045. assert_frame_equal(df, reread)
  4046. df.to_hdf(path, 'df2', mode='a')
  4047. pytest.raises(ValueError, read_hdf, path)
  4048. def test_read_nokey_table(self):
  4049. # GH13231
  4050. df = DataFrame({'i': range(5),
  4051. 'c': Series(list('abacd'), dtype='category')})
  4052. with ensure_clean_path(self.path) as path:
  4053. df.to_hdf(path, 'df', mode='a', format='table')
  4054. reread = read_hdf(path)
  4055. assert_frame_equal(df, reread)
  4056. df.to_hdf(path, 'df2', mode='a', format='table')
  4057. pytest.raises(ValueError, read_hdf, path)
  4058. def test_read_nokey_empty(self):
  4059. with ensure_clean_path(self.path) as path:
  4060. store = HDFStore(path)
  4061. store.close()
  4062. pytest.raises(ValueError, read_hdf, path)
  4063. @td.skip_if_no('pathlib')
  4064. def test_read_from_pathlib_path(self):
  4065. # GH11773
  4066. from pathlib import Path
  4067. expected = DataFrame(np.random.rand(4, 5),
  4068. index=list('abcd'),
  4069. columns=list('ABCDE'))
  4070. with ensure_clean_path(self.path) as filename:
  4071. path_obj = Path(filename)
  4072. expected.to_hdf(path_obj, 'df', mode='a')
  4073. actual = read_hdf(path_obj, 'df')
  4074. tm.assert_frame_equal(expected, actual)
  4075. @td.skip_if_no('py.path')
  4076. def test_read_from_py_localpath(self):
  4077. # GH11773
  4078. from py.path import local as LocalPath
  4079. expected = DataFrame(np.random.rand(4, 5),
  4080. index=list('abcd'),
  4081. columns=list('ABCDE'))
  4082. with ensure_clean_path(self.path) as filename:
  4083. path_obj = LocalPath(filename)
  4084. expected.to_hdf(path_obj, 'df', mode='a')
  4085. actual = read_hdf(path_obj, 'df')
  4086. tm.assert_frame_equal(expected, actual)
  4087. def test_query_long_float_literal(self):
  4088. # GH 14241
  4089. df = pd.DataFrame({'A': [1000000000.0009,
  4090. 1000000000.0011,
  4091. 1000000000.0015]})
  4092. with ensure_clean_store(self.path) as store:
  4093. store.append('test', df, format='table', data_columns=True)
  4094. cutoff = 1000000000.0006
  4095. result = store.select('test', "A < %.4f" % cutoff)
  4096. assert result.empty
  4097. cutoff = 1000000000.0010
  4098. result = store.select('test', "A > %.4f" % cutoff)
  4099. expected = df.loc[[1, 2], :]
  4100. tm.assert_frame_equal(expected, result)
  4101. exact = 1000000000.0011
  4102. result = store.select('test', 'A == %.4f' % exact)
  4103. expected = df.loc[[1], :]
  4104. tm.assert_frame_equal(expected, result)
  4105. def test_query_compare_column_type(self):
  4106. # GH 15492
  4107. df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'],
  4108. 'real_date': date_range('2014-01-01', periods=2),
  4109. 'float': [1.1, 1.2],
  4110. 'int': [1, 2]},
  4111. columns=['date', 'real_date', 'float', 'int'])
  4112. with ensure_clean_store(self.path) as store:
  4113. store.append('test', df, format='table', data_columns=True)
  4114. ts = pd.Timestamp('2014-01-01') # noqa
  4115. result = store.select('test', where='real_date > ts')
  4116. expected = df.loc[[1], :]
  4117. tm.assert_frame_equal(expected, result)
  4118. for op in ['<', '>', '==']:
  4119. # non strings to string column always fail
  4120. for v in [2.1, True, pd.Timestamp('2014-01-01'),
  4121. pd.Timedelta(1, 's')]:
  4122. query = 'date {op} v'.format(op=op)
  4123. with pytest.raises(TypeError):
  4124. store.select('test', where=query)
  4125. # strings to other columns must be convertible to type
  4126. v = 'a'
  4127. for col in ['int', 'float', 'real_date']:
  4128. query = '{col} {op} v'.format(op=op, col=col)
  4129. with pytest.raises(ValueError):
  4130. store.select('test', where=query)
  4131. for v, col in zip(['1', '1.1', '2014-01-01'],
  4132. ['int', 'float', 'real_date']):
  4133. query = '{col} {op} v'.format(op=op, col=col)
  4134. result = store.select('test', where=query)
  4135. if op == '==':
  4136. expected = df.loc[[0], :]
  4137. elif op == '>':
  4138. expected = df.loc[[1], :]
  4139. else:
  4140. expected = df.loc[[], :]
  4141. tm.assert_frame_equal(expected, result)
  4142. @pytest.mark.parametrize('format', ['fixed', 'table'])
  4143. def test_read_hdf_series_mode_r(self, format):
  4144. # GH 16583
  4145. # Tests that reading a Series saved to an HDF file
  4146. # still works if a mode='r' argument is supplied
  4147. series = tm.makeFloatSeries()
  4148. with ensure_clean_path(self.path) as path:
  4149. series.to_hdf(path, key='data', format=format)
  4150. result = pd.read_hdf(path, key='data', mode='r')
  4151. tm.assert_series_equal(result, series)
  4152. @pytest.mark.skipif(not PY36, reason="Need python 3.6")
  4153. def test_fspath(self):
  4154. with tm.ensure_clean('foo.h5') as path:
  4155. with pd.HDFStore(path) as store:
  4156. assert os.fspath(store) == str(path)
  4157. def test_read_py2_hdf_file_in_py3(self, datapath):
  4158. # GH 16781
  4159. # tests reading a PeriodIndex DataFrame written in Python2 in Python3
  4160. # the file was generated in Python 2.7 like so:
  4161. #
  4162. # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
  4163. # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
  4164. # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
  4165. expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
  4166. ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
  4167. with ensure_clean_store(
  4168. datapath('io', 'data', 'legacy_hdf',
  4169. 'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
  4170. mode='r') as store:
  4171. result = store['p']
  4172. assert_frame_equal(result, expected)
  4173. class TestHDFComplexValues(Base):
  4174. # GH10447
  4175. def test_complex_fixed(self):
  4176. df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
  4177. index=list('abcd'),
  4178. columns=list('ABCDE'))
  4179. with ensure_clean_path(self.path) as path:
  4180. df.to_hdf(path, 'df')
  4181. reread = read_hdf(path, 'df')
  4182. assert_frame_equal(df, reread)
  4183. df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
  4184. index=list('abcd'),
  4185. columns=list('ABCDE'))
  4186. with ensure_clean_path(self.path) as path:
  4187. df.to_hdf(path, 'df')
  4188. reread = read_hdf(path, 'df')
  4189. assert_frame_equal(df, reread)
  4190. def test_complex_table(self):
  4191. df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
  4192. index=list('abcd'),
  4193. columns=list('ABCDE'))
  4194. with ensure_clean_path(self.path) as path:
  4195. df.to_hdf(path, 'df', format='table')
  4196. reread = read_hdf(path, 'df')
  4197. assert_frame_equal(df, reread)
  4198. df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
  4199. index=list('abcd'),
  4200. columns=list('ABCDE'))
  4201. with ensure_clean_path(self.path) as path:
  4202. df.to_hdf(path, 'df', format='table', mode='w')
  4203. reread = read_hdf(path, 'df')
  4204. assert_frame_equal(df, reread)
  4205. @xfail_non_writeable
  4206. def test_complex_mixed_fixed(self):
  4207. complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
  4208. 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
  4209. complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
  4210. dtype=np.complex128)
  4211. df = DataFrame({'A': [1, 2, 3, 4],
  4212. 'B': ['a', 'b', 'c', 'd'],
  4213. 'C': complex64,
  4214. 'D': complex128,
  4215. 'E': [1.0, 2.0, 3.0, 4.0]},
  4216. index=list('abcd'))
  4217. with ensure_clean_path(self.path) as path:
  4218. df.to_hdf(path, 'df')
  4219. reread = read_hdf(path, 'df')
  4220. assert_frame_equal(df, reread)
  4221. def test_complex_mixed_table(self):
  4222. complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
  4223. 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
  4224. complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
  4225. dtype=np.complex128)
  4226. df = DataFrame({'A': [1, 2, 3, 4],
  4227. 'B': ['a', 'b', 'c', 'd'],
  4228. 'C': complex64,
  4229. 'D': complex128,
  4230. 'E': [1.0, 2.0, 3.0, 4.0]},
  4231. index=list('abcd'))
  4232. with ensure_clean_store(self.path) as store:
  4233. store.append('df', df, data_columns=['A', 'B'])
  4234. result = store.select('df', where='A>2')
  4235. assert_frame_equal(df.loc[df.A > 2], result)
  4236. with ensure_clean_path(self.path) as path:
  4237. df.to_hdf(path, 'df', format='table')
  4238. reread = read_hdf(path, 'df')
  4239. assert_frame_equal(df, reread)
  4240. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  4241. def test_complex_across_dimensions_fixed(self):
  4242. with catch_warnings(record=True):
  4243. complex128 = np.array(
  4244. [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
  4245. s = Series(complex128, index=list('abcd'))
  4246. df = DataFrame({'A': s, 'B': s})
  4247. p = Panel({'One': df, 'Two': df})
  4248. objs = [s, df, p]
  4249. comps = [tm.assert_series_equal, tm.assert_frame_equal,
  4250. tm.assert_panel_equal]
  4251. for obj, comp in zip(objs, comps):
  4252. with ensure_clean_path(self.path) as path:
  4253. obj.to_hdf(path, 'obj', format='fixed')
  4254. reread = read_hdf(path, 'obj')
  4255. comp(obj, reread)
  4256. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  4257. def test_complex_across_dimensions(self):
  4258. complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
  4259. s = Series(complex128, index=list('abcd'))
  4260. df = DataFrame({'A': s, 'B': s})
  4261. with catch_warnings(record=True):
  4262. p = Panel({'One': df, 'Two': df})
  4263. objs = [df, p]
  4264. comps = [tm.assert_frame_equal, tm.assert_panel_equal]
  4265. for obj, comp in zip(objs, comps):
  4266. with ensure_clean_path(self.path) as path:
  4267. obj.to_hdf(path, 'obj', format='table')
  4268. reread = read_hdf(path, 'obj')
  4269. comp(obj, reread)
  4270. def test_complex_indexing_error(self):
  4271. complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
  4272. dtype=np.complex128)
  4273. df = DataFrame({'A': [1, 2, 3, 4],
  4274. 'B': ['a', 'b', 'c', 'd'],
  4275. 'C': complex128},
  4276. index=list('abcd'))
  4277. with ensure_clean_store(self.path) as store:
  4278. pytest.raises(TypeError, store.append,
  4279. 'df', df, data_columns=['C'])
  4280. def test_complex_series_error(self):
  4281. complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
  4282. s = Series(complex128, index=list('abcd'))
  4283. with ensure_clean_path(self.path) as path:
  4284. pytest.raises(TypeError, s.to_hdf, path, 'obj', format='t')
  4285. with ensure_clean_path(self.path) as path:
  4286. s.to_hdf(path, 'obj', format='t', index=False)
  4287. reread = read_hdf(path, 'obj')
  4288. tm.assert_series_equal(s, reread)
  4289. def test_complex_append(self):
  4290. df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
  4291. 'b': np.random.randn(100)})
  4292. with ensure_clean_store(self.path) as store:
  4293. store.append('df', df, data_columns=['b'])
  4294. store.append('df', df)
  4295. result = store.select('df')
  4296. assert_frame_equal(pd.concat([df, df], 0), result)
  4297. class TestTimezones(Base):
  4298. def _compare_with_tz(self, a, b):
  4299. tm.assert_frame_equal(a, b)
  4300. # compare the zones on each element
  4301. for c in a.columns:
  4302. for i in a.index:
  4303. a_e = a.loc[i, c]
  4304. b_e = b.loc[i, c]
  4305. if not (a_e == b_e and a_e.tz == b_e.tz):
  4306. raise AssertionError(
  4307. "invalid tz comparison [%s] [%s]" % (a_e, b_e))
  4308. def test_append_with_timezones_dateutil(self):
  4309. from datetime import timedelta
  4310. # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
  4311. # filename issues.
  4312. from pandas._libs.tslibs.timezones import maybe_get_tz
  4313. gettz = lambda x: maybe_get_tz('dateutil/' + x)
  4314. # as columns
  4315. with ensure_clean_store(self.path) as store:
  4316. _maybe_remove(store, 'df_tz')
  4317. df = DataFrame(dict(A=[Timestamp('20130102 2:00:00', tz=gettz(
  4318. 'US/Eastern')) + timedelta(hours=1) * i for i in range(5)]))
  4319. store.append('df_tz', df, data_columns=['A'])
  4320. result = store['df_tz']
  4321. self._compare_with_tz(result, df)
  4322. assert_frame_equal(result, df)
  4323. # select with tz aware
  4324. expected = df[df.A >= df.A[3]]
  4325. result = store.select('df_tz', where='A>=df.A[3]')
  4326. self._compare_with_tz(result, expected)
  4327. # ensure we include dates in DST and STD time here.
  4328. _maybe_remove(store, 'df_tz')
  4329. df = DataFrame(dict(A=Timestamp('20130102',
  4330. tz=gettz('US/Eastern')),
  4331. B=Timestamp('20130603',
  4332. tz=gettz('US/Eastern'))),
  4333. index=range(5))
  4334. store.append('df_tz', df)
  4335. result = store['df_tz']
  4336. self._compare_with_tz(result, df)
  4337. assert_frame_equal(result, df)
  4338. df = DataFrame(dict(A=Timestamp('20130102',
  4339. tz=gettz('US/Eastern')),
  4340. B=Timestamp('20130102', tz=gettz('EET'))),
  4341. index=range(5))
  4342. pytest.raises(ValueError, store.append, 'df_tz', df)
  4343. # this is ok
  4344. _maybe_remove(store, 'df_tz')
  4345. store.append('df_tz', df, data_columns=['A', 'B'])
  4346. result = store['df_tz']
  4347. self._compare_with_tz(result, df)
  4348. assert_frame_equal(result, df)
  4349. # can't append with diff timezone
  4350. df = DataFrame(dict(A=Timestamp('20130102',
  4351. tz=gettz('US/Eastern')),
  4352. B=Timestamp('20130102', tz=gettz('CET'))),
  4353. index=range(5))
  4354. pytest.raises(ValueError, store.append, 'df_tz', df)
  4355. # as index
  4356. with ensure_clean_store(self.path) as store:
  4357. # GH 4098 example
  4358. df = DataFrame(dict(A=Series(lrange(3), index=date_range(
  4359. '2000-1-1', periods=3, freq='H', tz=gettz('US/Eastern')))))
  4360. _maybe_remove(store, 'df')
  4361. store.put('df', df)
  4362. result = store.select('df')
  4363. assert_frame_equal(result, df)
  4364. _maybe_remove(store, 'df')
  4365. store.append('df', df)
  4366. result = store.select('df')
  4367. assert_frame_equal(result, df)
  4368. def test_append_with_timezones_pytz(self):
  4369. from datetime import timedelta
  4370. # as columns
  4371. with ensure_clean_store(self.path) as store:
  4372. _maybe_remove(store, 'df_tz')
  4373. df = DataFrame(dict(A=[Timestamp('20130102 2:00:00',
  4374. tz='US/Eastern') +
  4375. timedelta(hours=1) * i
  4376. for i in range(5)]))
  4377. store.append('df_tz', df, data_columns=['A'])
  4378. result = store['df_tz']
  4379. self._compare_with_tz(result, df)
  4380. assert_frame_equal(result, df)
  4381. # select with tz aware
  4382. self._compare_with_tz(store.select(
  4383. 'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]])
  4384. _maybe_remove(store, 'df_tz')
  4385. # ensure we include dates in DST and STD time here.
  4386. df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
  4387. B=Timestamp('20130603', tz='US/Eastern')),
  4388. index=range(5))
  4389. store.append('df_tz', df)
  4390. result = store['df_tz']
  4391. self._compare_with_tz(result, df)
  4392. assert_frame_equal(result, df)
  4393. df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
  4394. B=Timestamp('20130102', tz='EET')),
  4395. index=range(5))
  4396. pytest.raises(ValueError, store.append, 'df_tz', df)
  4397. # this is ok
  4398. _maybe_remove(store, 'df_tz')
  4399. store.append('df_tz', df, data_columns=['A', 'B'])
  4400. result = store['df_tz']
  4401. self._compare_with_tz(result, df)
  4402. assert_frame_equal(result, df)
  4403. # can't append with diff timezone
  4404. df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
  4405. B=Timestamp('20130102', tz='CET')),
  4406. index=range(5))
  4407. pytest.raises(ValueError, store.append, 'df_tz', df)
  4408. # as index
  4409. with ensure_clean_store(self.path) as store:
  4410. # GH 4098 example
  4411. df = DataFrame(dict(A=Series(lrange(3), index=date_range(
  4412. '2000-1-1', periods=3, freq='H', tz='US/Eastern'))))
  4413. _maybe_remove(store, 'df')
  4414. store.put('df', df)
  4415. result = store.select('df')
  4416. assert_frame_equal(result, df)
  4417. _maybe_remove(store, 'df')
  4418. store.append('df', df)
  4419. result = store.select('df')
  4420. assert_frame_equal(result, df)
  4421. def test_tseries_select_index_column(self):
  4422. # GH7777
  4423. # selecting a UTC datetimeindex column did
  4424. # not preserve UTC tzinfo set before storing
  4425. # check that no tz still works
  4426. rng = date_range('1/1/2000', '1/30/2000')
  4427. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  4428. with ensure_clean_store(self.path) as store:
  4429. store.append('frame', frame)
  4430. result = store.select_column('frame', 'index')
  4431. assert rng.tz == DatetimeIndex(result.values).tz
  4432. # check utc
  4433. rng = date_range('1/1/2000', '1/30/2000', tz='UTC')
  4434. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  4435. with ensure_clean_store(self.path) as store:
  4436. store.append('frame', frame)
  4437. result = store.select_column('frame', 'index')
  4438. assert rng.tz == result.dt.tz
  4439. # double check non-utc
  4440. rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
  4441. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  4442. with ensure_clean_store(self.path) as store:
  4443. store.append('frame', frame)
  4444. result = store.select_column('frame', 'index')
  4445. assert rng.tz == result.dt.tz
  4446. def test_timezones_fixed(self):
  4447. with ensure_clean_store(self.path) as store:
  4448. # index
  4449. rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
  4450. df = DataFrame(np.random.randn(len(rng), 4), index=rng)
  4451. store['df'] = df
  4452. result = store['df']
  4453. assert_frame_equal(result, df)
  4454. # as data
  4455. # GH11411
  4456. _maybe_remove(store, 'df')
  4457. df = DataFrame({'A': rng,
  4458. 'B': rng.tz_convert('UTC').tz_localize(None),
  4459. 'C': rng.tz_convert('CET'),
  4460. 'D': range(len(rng))}, index=rng)
  4461. store['df'] = df
  4462. result = store['df']
  4463. assert_frame_equal(result, df)
  4464. def test_fixed_offset_tz(self):
  4465. rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
  4466. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  4467. with ensure_clean_store(self.path) as store:
  4468. store['frame'] = frame
  4469. recons = store['frame']
  4470. tm.assert_index_equal(recons.index, rng)
  4471. assert rng.tz == recons.index.tz
  4472. @td.skip_if_windows
  4473. def test_store_timezone(self):
  4474. # GH2852
  4475. # issue storing datetime.date with a timezone as it resets when read
  4476. # back in a new timezone
  4477. # original method
  4478. with ensure_clean_store(self.path) as store:
  4479. today = datetime.date(2013, 9, 10)
  4480. df = DataFrame([1, 2, 3], index=[today, today, today])
  4481. store['obj1'] = df
  4482. result = store['obj1']
  4483. assert_frame_equal(result, df)
  4484. # with tz setting
  4485. with ensure_clean_store(self.path) as store:
  4486. with set_timezone('EST5EDT'):
  4487. today = datetime.date(2013, 9, 10)
  4488. df = DataFrame([1, 2, 3], index=[today, today, today])
  4489. store['obj1'] = df
  4490. with set_timezone('CST6CDT'):
  4491. result = store['obj1']
  4492. assert_frame_equal(result, df)
  4493. def test_legacy_datetimetz_object(self, datapath):
  4494. # legacy from < 0.17.0
  4495. # 8260
  4496. expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
  4497. B=Timestamp('20130603', tz='CET')),
  4498. index=range(5))
  4499. with ensure_clean_store(
  4500. datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
  4501. mode='r') as store:
  4502. result = store['df']
  4503. assert_frame_equal(result, expected)
  4504. def test_dst_transitions(self):
  4505. # make sure we are not failing on transaitions
  4506. with ensure_clean_store(self.path) as store:
  4507. times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00",
  4508. tz="Europe/London",
  4509. freq="H",
  4510. ambiguous='infer')
  4511. for i in [times, times + pd.Timedelta('10min')]:
  4512. _maybe_remove(store, 'df')
  4513. df = DataFrame({'A': range(len(i)), 'B': i}, index=i)
  4514. store.append('df', df)
  4515. result = store.select('df')
  4516. assert_frame_equal(result, df)