test_indexing.py 128 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import date, datetime, time, timedelta
  4. from warnings import catch_warnings, simplefilter
  5. import numpy as np
  6. import pytest
  7. from pandas._libs.tslib import iNaT
  8. from pandas.compat import long, lrange, lzip, map, range, zip
  9. from pandas.core.dtypes.common import is_float_dtype, is_integer, is_scalar
  10. from pandas.core.dtypes.dtypes import CategoricalDtype
  11. import pandas as pd
  12. from pandas import (
  13. Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Series,
  14. Timestamp, compat, date_range, isna, notna)
  15. import pandas.core.common as com
  16. from pandas.core.indexing import IndexingError
  17. from pandas.tests.frame.common import TestData
  18. import pandas.util.testing as tm
  19. from pandas.util.testing import (
  20. assert_almost_equal, assert_frame_equal, assert_series_equal)
  21. from pandas.tseries.offsets import BDay
  22. class TestDataFrameIndexing(TestData):
  23. def test_getitem(self):
  24. # Slicing
  25. sl = self.frame[:20]
  26. assert len(sl.index) == 20
  27. # Column access
  28. for _, series in compat.iteritems(sl):
  29. assert len(series.index) == 20
  30. assert tm.equalContents(series.index, sl.index)
  31. for key, _ in compat.iteritems(self.frame._series):
  32. assert self.frame[key] is not None
  33. assert 'random' not in self.frame
  34. with pytest.raises(KeyError, match='random'):
  35. self.frame['random']
  36. df = self.frame.copy()
  37. df['$10'] = np.random.randn(len(df))
  38. ad = np.random.randn(len(df))
  39. df['@awesome_domain'] = ad
  40. with pytest.raises(KeyError):
  41. df.__getitem__('df["$10"]')
  42. res = df['@awesome_domain']
  43. tm.assert_numpy_array_equal(ad, res.values)
  44. def test_getitem_dupe_cols(self):
  45. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
  46. with pytest.raises(KeyError):
  47. df[['baf']]
  48. def test_get(self):
  49. b = self.frame.get('B')
  50. assert_series_equal(b, self.frame['B'])
  51. assert self.frame.get('foo') is None
  52. assert_series_equal(self.frame.get('foo', self.frame['B']),
  53. self.frame['B'])
  54. @pytest.mark.parametrize("df", [
  55. DataFrame(),
  56. DataFrame(columns=list("AB")),
  57. DataFrame(columns=list("AB"), index=range(3))
  58. ])
  59. def test_get_none(self, df):
  60. # see gh-5652
  61. assert df.get(None) is None
  62. def test_loc_iterable(self):
  63. idx = iter(['A', 'B', 'C'])
  64. result = self.frame.loc[:, idx]
  65. expected = self.frame.loc[:, ['A', 'B', 'C']]
  66. assert_frame_equal(result, expected)
  67. @pytest.mark.parametrize(
  68. "idx_type",
  69. [list, iter, Index, set,
  70. lambda l: dict(zip(l, range(len(l)))),
  71. lambda l: dict(zip(l, range(len(l)))).keys()],
  72. ids=["list", "iter", "Index", "set", "dict", "dict_keys"])
  73. @pytest.mark.parametrize("levels", [1, 2])
  74. def test_getitem_listlike(self, idx_type, levels):
  75. # GH 21294
  76. if levels == 1:
  77. frame, missing = self.frame, 'food'
  78. else:
  79. # MultiIndex columns
  80. frame = DataFrame(np.random.randn(8, 3),
  81. columns=Index([('foo', 'bar'), ('baz', 'qux'),
  82. ('peek', 'aboo')],
  83. name=('sth', 'sth2')))
  84. missing = ('good', 'food')
  85. keys = [frame.columns[1], frame.columns[0]]
  86. idx = idx_type(keys)
  87. idx_check = list(idx_type(keys))
  88. result = frame[idx]
  89. expected = frame.loc[:, idx_check]
  90. expected.columns.names = frame.columns.names
  91. assert_frame_equal(result, expected)
  92. idx = idx_type(keys + [missing])
  93. with pytest.raises(KeyError, match='not in index'):
  94. frame[idx]
  95. @pytest.mark.parametrize("val,expected", [
  96. (2**63 - 1, Series([1])),
  97. (2**63, Series([2])),
  98. ])
  99. def test_loc_uint64(self, val, expected):
  100. # see gh-19399
  101. df = DataFrame([1, 2], index=[2**63 - 1, 2**63])
  102. result = df.loc[val]
  103. expected.name = val
  104. tm.assert_series_equal(result, expected)
  105. def test_getitem_callable(self):
  106. # GH 12533
  107. result = self.frame[lambda x: 'A']
  108. tm.assert_series_equal(result, self.frame.loc[:, 'A'])
  109. result = self.frame[lambda x: ['A', 'B']]
  110. tm.assert_frame_equal(result, self.frame.loc[:, ['A', 'B']])
  111. df = self.frame[:3]
  112. result = df[lambda x: [True, False, True]]
  113. tm.assert_frame_equal(result, self.frame.iloc[[0, 2], :])
  114. def test_setitem_list(self):
  115. self.frame['E'] = 'foo'
  116. data = self.frame[['A', 'B']]
  117. self.frame[['B', 'A']] = data
  118. assert_series_equal(self.frame['B'], data['A'], check_names=False)
  119. assert_series_equal(self.frame['A'], data['B'], check_names=False)
  120. msg = 'Columns must be same length as key'
  121. with pytest.raises(ValueError, match=msg):
  122. data[['A']] = self.frame[['A', 'B']]
  123. msg = 'Length of values does not match length of index'
  124. with pytest.raises(ValueError, match=msg):
  125. data['A'] = range(len(data.index) - 1)
  126. df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_)
  127. df.loc[1, ['tt1', 'tt2']] = [1, 2]
  128. result = df.loc[df.index[1], ['tt1', 'tt2']]
  129. expected = Series([1, 2], df.columns, dtype=np.int_, name=1)
  130. assert_series_equal(result, expected)
  131. df['tt1'] = df['tt2'] = '0'
  132. df.loc[df.index[1], ['tt1', 'tt2']] = ['1', '2']
  133. result = df.loc[df.index[1], ['tt1', 'tt2']]
  134. expected = Series(['1', '2'], df.columns, name=1)
  135. assert_series_equal(result, expected)
  136. def test_setitem_list_not_dataframe(self):
  137. data = np.random.randn(len(self.frame), 2)
  138. self.frame[['A', 'B']] = data
  139. assert_almost_equal(self.frame[['A', 'B']].values, data)
  140. def test_setitem_list_of_tuples(self):
  141. tuples = lzip(self.frame['A'], self.frame['B'])
  142. self.frame['tuples'] = tuples
  143. result = self.frame['tuples']
  144. expected = Series(tuples, index=self.frame.index, name='tuples')
  145. assert_series_equal(result, expected)
  146. def test_setitem_mulit_index(self):
  147. # GH7655, test that assigning to a sub-frame of a frame
  148. # with multi-index columns aligns both rows and columns
  149. it = ['jim', 'joe', 'jolie'], ['first', 'last'], \
  150. ['left', 'center', 'right']
  151. cols = MultiIndex.from_product(it)
  152. index = pd.date_range('20141006', periods=20)
  153. vals = np.random.randint(1, 1000, (len(index), len(cols)))
  154. df = pd.DataFrame(vals, columns=cols, index=index)
  155. i, j = df.index.values.copy(), it[-1][:]
  156. np.random.shuffle(i)
  157. df['jim'] = df['jolie'].loc[i, ::-1]
  158. assert_frame_equal(df['jim'], df['jolie'])
  159. np.random.shuffle(j)
  160. df[('joe', 'first')] = df[('jolie', 'last')].loc[i, j]
  161. assert_frame_equal(df[('joe', 'first')], df[('jolie', 'last')])
  162. np.random.shuffle(j)
  163. df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j]
  164. assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')])
  165. def test_setitem_callable(self):
  166. # GH 12533
  167. df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
  168. df[lambda x: 'A'] = [11, 12, 13, 14]
  169. exp = pd.DataFrame({'A': [11, 12, 13, 14], 'B': [5, 6, 7, 8]})
  170. tm.assert_frame_equal(df, exp)
  171. def test_setitem_other_callable(self):
  172. # GH 13299
  173. def inc(x):
  174. return x + 1
  175. df = pd.DataFrame([[-1, 1], [1, -1]])
  176. df[df > 0] = inc
  177. expected = pd.DataFrame([[-1, inc], [inc, -1]])
  178. tm.assert_frame_equal(df, expected)
  179. def test_getitem_boolean(self):
  180. # boolean indexing
  181. d = self.tsframe.index[10]
  182. indexer = self.tsframe.index > d
  183. indexer_obj = indexer.astype(object)
  184. subindex = self.tsframe.index[indexer]
  185. subframe = self.tsframe[indexer]
  186. tm.assert_index_equal(subindex, subframe.index)
  187. with pytest.raises(ValueError, match='Item wrong length'):
  188. self.tsframe[indexer[:-1]]
  189. subframe_obj = self.tsframe[indexer_obj]
  190. assert_frame_equal(subframe_obj, subframe)
  191. with pytest.raises(ValueError, match='boolean values only'):
  192. self.tsframe[self.tsframe]
  193. # test that Series work
  194. indexer_obj = Series(indexer_obj, self.tsframe.index)
  195. subframe_obj = self.tsframe[indexer_obj]
  196. assert_frame_equal(subframe_obj, subframe)
  197. # test that Series indexers reindex
  198. # we are producing a warning that since the passed boolean
  199. # key is not the same as the given index, we will reindex
  200. # not sure this is really necessary
  201. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  202. indexer_obj = indexer_obj.reindex(self.tsframe.index[::-1])
  203. subframe_obj = self.tsframe[indexer_obj]
  204. assert_frame_equal(subframe_obj, subframe)
  205. # test df[df > 0]
  206. for df in [self.tsframe, self.mixed_frame,
  207. self.mixed_float, self.mixed_int]:
  208. if compat.PY3 and df is self.mixed_frame:
  209. continue
  210. data = df._get_numeric_data()
  211. bif = df[df > 0]
  212. bifw = DataFrame({c: np.where(data[c] > 0, data[c], np.nan)
  213. for c in data.columns},
  214. index=data.index, columns=data.columns)
  215. # add back other columns to compare
  216. for c in df.columns:
  217. if c not in bifw:
  218. bifw[c] = df[c]
  219. bifw = bifw.reindex(columns=df.columns)
  220. assert_frame_equal(bif, bifw, check_dtype=False)
  221. for c in df.columns:
  222. if bif[c].dtype != bifw[c].dtype:
  223. assert bif[c].dtype == df[c].dtype
  224. def test_getitem_boolean_casting(self):
  225. # don't upcast if we don't need to
  226. df = self.tsframe.copy()
  227. df['E'] = 1
  228. df['E'] = df['E'].astype('int32')
  229. df['E1'] = df['E'].copy()
  230. df['F'] = 1
  231. df['F'] = df['F'].astype('int64')
  232. df['F1'] = df['F'].copy()
  233. casted = df[df > 0]
  234. result = casted.get_dtype_counts()
  235. expected = Series({'float64': 4, 'int32': 2, 'int64': 2})
  236. assert_series_equal(result, expected)
  237. # int block splitting
  238. df.loc[df.index[1:3], ['E1', 'F1']] = 0
  239. casted = df[df > 0]
  240. result = casted.get_dtype_counts()
  241. expected = Series({'float64': 6, 'int32': 1, 'int64': 1})
  242. assert_series_equal(result, expected)
  243. # where dtype conversions
  244. # GH 3733
  245. df = DataFrame(data=np.random.randn(100, 50))
  246. df = df.where(df > 0) # create nans
  247. bools = df > 0
  248. mask = isna(df)
  249. expected = bools.astype(float).mask(mask)
  250. result = bools.mask(mask)
  251. assert_frame_equal(result, expected)
  252. def test_getitem_boolean_list(self):
  253. df = DataFrame(np.arange(12).reshape(3, 4))
  254. def _checkit(lst):
  255. result = df[lst]
  256. expected = df.loc[df.index[lst]]
  257. assert_frame_equal(result, expected)
  258. _checkit([True, False, True])
  259. _checkit([True, True, True])
  260. _checkit([False, False, False])
  261. def test_getitem_boolean_iadd(self):
  262. arr = np.random.randn(5, 5)
  263. df = DataFrame(arr.copy(), columns=['A', 'B', 'C', 'D', 'E'])
  264. df[df < 0] += 1
  265. arr[arr < 0] += 1
  266. assert_almost_equal(df.values, arr)
  267. def test_boolean_index_empty_corner(self):
  268. # #2096
  269. blah = DataFrame(np.empty([0, 1]), columns=['A'],
  270. index=DatetimeIndex([]))
  271. # both of these should succeed trivially
  272. k = np.array([], bool)
  273. blah[k]
  274. blah[k] = 0
  275. def test_getitem_ix_mixed_integer(self):
  276. df = DataFrame(np.random.randn(4, 3),
  277. index=[1, 10, 'C', 'E'], columns=[1, 2, 3])
  278. result = df.iloc[:-1]
  279. expected = df.loc[df.index[:-1]]
  280. assert_frame_equal(result, expected)
  281. with catch_warnings(record=True):
  282. simplefilter("ignore", DeprecationWarning)
  283. result = df.ix[[1, 10]]
  284. expected = df.ix[Index([1, 10], dtype=object)]
  285. assert_frame_equal(result, expected)
  286. # 11320
  287. df = pd.DataFrame({"rna": (1.5, 2.2, 3.2, 4.5),
  288. -1000: [11, 21, 36, 40],
  289. 0: [10, 22, 43, 34],
  290. 1000: [0, 10, 20, 30]},
  291. columns=['rna', -1000, 0, 1000])
  292. result = df[[1000]]
  293. expected = df.iloc[:, [3]]
  294. assert_frame_equal(result, expected)
  295. result = df[[-1000]]
  296. expected = df.iloc[:, [1]]
  297. assert_frame_equal(result, expected)
  298. def test_getitem_setitem_ix_negative_integers(self):
  299. with catch_warnings(record=True):
  300. simplefilter("ignore", DeprecationWarning)
  301. result = self.frame.ix[:, -1]
  302. assert_series_equal(result, self.frame['D'])
  303. with catch_warnings(record=True):
  304. simplefilter("ignore", DeprecationWarning)
  305. result = self.frame.ix[:, [-1]]
  306. assert_frame_equal(result, self.frame[['D']])
  307. with catch_warnings(record=True):
  308. simplefilter("ignore", DeprecationWarning)
  309. result = self.frame.ix[:, [-1, -2]]
  310. assert_frame_equal(result, self.frame[['D', 'C']])
  311. with catch_warnings(record=True):
  312. simplefilter("ignore", DeprecationWarning)
  313. self.frame.ix[:, [-1]] = 0
  314. assert (self.frame['D'] == 0).all()
  315. df = DataFrame(np.random.randn(8, 4))
  316. # ix does label-based indexing when having an integer index
  317. with catch_warnings(record=True):
  318. simplefilter("ignore", DeprecationWarning)
  319. with pytest.raises(KeyError):
  320. df.ix[[-1]]
  321. with catch_warnings(record=True):
  322. simplefilter("ignore", DeprecationWarning)
  323. with pytest.raises(KeyError):
  324. df.ix[:, [-1]]
  325. # #1942
  326. a = DataFrame(np.random.randn(20, 2),
  327. index=[chr(x + 65) for x in range(20)])
  328. with catch_warnings(record=True):
  329. simplefilter("ignore", DeprecationWarning)
  330. a.ix[-1] = a.ix[-2]
  331. with catch_warnings(record=True):
  332. simplefilter("ignore", DeprecationWarning)
  333. assert_series_equal(a.ix[-1], a.ix[-2], check_names=False)
  334. assert a.ix[-1].name == 'T'
  335. assert a.ix[-2].name == 'S'
  336. def test_getattr(self):
  337. assert_series_equal(self.frame.A, self.frame['A'])
  338. pytest.raises(AttributeError, getattr, self.frame,
  339. 'NONEXISTENT_NAME')
  340. def test_setattr_column(self):
  341. df = DataFrame({'foobar': 1}, index=lrange(10))
  342. df.foobar = 5
  343. assert (df.foobar == 5).all()
  344. def test_setitem(self):
  345. # not sure what else to do here
  346. series = self.frame['A'][::2]
  347. self.frame['col5'] = series
  348. assert 'col5' in self.frame
  349. assert len(series) == 15
  350. assert len(self.frame) == 30
  351. exp = np.ravel(np.column_stack((series.values, [np.nan] * 15)))
  352. exp = Series(exp, index=self.frame.index, name='col5')
  353. tm.assert_series_equal(self.frame['col5'], exp)
  354. series = self.frame['A']
  355. self.frame['col6'] = series
  356. tm.assert_series_equal(series, self.frame['col6'], check_names=False)
  357. with pytest.raises(KeyError):
  358. self.frame[np.random.randn(len(self.frame) + 1)] = 1
  359. # set ndarray
  360. arr = np.random.randn(len(self.frame))
  361. self.frame['col9'] = arr
  362. assert (self.frame['col9'] == arr).all()
  363. self.frame['col7'] = 5
  364. assert((self.frame['col7'] == 5).all())
  365. self.frame['col0'] = 3.14
  366. assert((self.frame['col0'] == 3.14).all())
  367. self.frame['col8'] = 'foo'
  368. assert((self.frame['col8'] == 'foo').all())
  369. # this is partially a view (e.g. some blocks are view)
  370. # so raise/warn
  371. smaller = self.frame[:2]
  372. with pytest.raises(com.SettingWithCopyError):
  373. smaller['col10'] = ['1', '2']
  374. assert smaller['col10'].dtype == np.object_
  375. assert (smaller['col10'] == ['1', '2']).all()
  376. # dtype changing GH4204
  377. df = DataFrame([[0, 0]])
  378. df.iloc[0] = np.nan
  379. expected = DataFrame([[np.nan, np.nan]])
  380. assert_frame_equal(df, expected)
  381. df = DataFrame([[0, 0]])
  382. df.loc[0] = np.nan
  383. assert_frame_equal(df, expected)
  384. @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
  385. def test_setitem_dtype(self, dtype):
  386. arr = np.random.randn(len(self.frame))
  387. self.frame[dtype] = np.array(arr, dtype=dtype)
  388. assert self.frame[dtype].dtype.name == dtype
  389. def test_setitem_tuple(self):
  390. self.frame['A', 'B'] = self.frame['A']
  391. assert_series_equal(self.frame['A', 'B'], self.frame[
  392. 'A'], check_names=False)
  393. def test_setitem_always_copy(self):
  394. s = self.frame['A'].copy()
  395. self.frame['E'] = s
  396. self.frame['E'][5:10] = np.nan
  397. assert notna(s[5:10]).all()
  398. def test_setitem_boolean(self):
  399. df = self.frame.copy()
  400. values = self.frame.values
  401. df[df['A'] > 0] = 4
  402. values[values[:, 0] > 0] = 4
  403. assert_almost_equal(df.values, values)
  404. # test that column reindexing works
  405. series = df['A'] == 4
  406. series = series.reindex(df.index[::-1])
  407. df[series] = 1
  408. values[values[:, 0] == 4] = 1
  409. assert_almost_equal(df.values, values)
  410. df[df > 0] = 5
  411. values[values > 0] = 5
  412. assert_almost_equal(df.values, values)
  413. df[df == 5] = 0
  414. values[values == 5] = 0
  415. assert_almost_equal(df.values, values)
  416. # a df that needs alignment first
  417. df[df[:-1] < 0] = 2
  418. np.putmask(values[:-1], values[:-1] < 0, 2)
  419. assert_almost_equal(df.values, values)
  420. # indexed with same shape but rows-reversed df
  421. df[df[::-1] == 2] = 3
  422. values[values == 2] = 3
  423. assert_almost_equal(df.values, values)
  424. msg = "Must pass DataFrame or 2-d ndarray with boolean values only"
  425. with pytest.raises(TypeError, match=msg):
  426. df[df * 0] = 2
  427. # index with DataFrame
  428. mask = df > np.abs(df)
  429. expected = df.copy()
  430. df[df > np.abs(df)] = np.nan
  431. expected.values[mask.values] = np.nan
  432. assert_frame_equal(df, expected)
  433. # set from DataFrame
  434. expected = df.copy()
  435. df[df > np.abs(df)] = df * 2
  436. np.putmask(expected.values, mask.values, df.values * 2)
  437. assert_frame_equal(df, expected)
  438. @pytest.mark.parametrize(
  439. "mask_type",
  440. [lambda df: df > np.abs(df) / 2,
  441. lambda df: (df > np.abs(df) / 2).values],
  442. ids=['dataframe', 'array'])
  443. def test_setitem_boolean_mask(self, mask_type):
  444. # Test for issue #18582
  445. df = self.frame.copy()
  446. mask = mask_type(df)
  447. # index with boolean mask
  448. result = df.copy()
  449. result[mask] = np.nan
  450. expected = df.copy()
  451. expected.values[np.array(mask)] = np.nan
  452. assert_frame_equal(result, expected)
  453. def test_setitem_cast(self):
  454. self.frame['D'] = self.frame['D'].astype('i8')
  455. assert self.frame['D'].dtype == np.int64
  456. # #669, should not cast?
  457. # this is now set to int64, which means a replacement of the column to
  458. # the value dtype (and nothing to do with the existing dtype)
  459. self.frame['B'] = 0
  460. assert self.frame['B'].dtype == np.int64
  461. # cast if pass array of course
  462. self.frame['B'] = np.arange(len(self.frame))
  463. assert issubclass(self.frame['B'].dtype.type, np.integer)
  464. self.frame['foo'] = 'bar'
  465. self.frame['foo'] = 0
  466. assert self.frame['foo'].dtype == np.int64
  467. self.frame['foo'] = 'bar'
  468. self.frame['foo'] = 2.5
  469. assert self.frame['foo'].dtype == np.float64
  470. self.frame['something'] = 0
  471. assert self.frame['something'].dtype == np.int64
  472. self.frame['something'] = 2
  473. assert self.frame['something'].dtype == np.int64
  474. self.frame['something'] = 2.5
  475. assert self.frame['something'].dtype == np.float64
  476. # GH 7704
  477. # dtype conversion on setting
  478. df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC'))
  479. df['event'] = np.nan
  480. df.loc[10, 'event'] = 'foo'
  481. result = df.get_dtype_counts().sort_values()
  482. expected = Series({'float64': 3, 'object': 1}).sort_values()
  483. assert_series_equal(result, expected)
  484. # Test that data type is preserved . #5782
  485. df = DataFrame({'one': np.arange(6, dtype=np.int8)})
  486. df.loc[1, 'one'] = 6
  487. assert df.dtypes.one == np.dtype(np.int8)
  488. df.one = np.int8(7)
  489. assert df.dtypes.one == np.dtype(np.int8)
  490. def test_setitem_boolean_column(self):
  491. expected = self.frame.copy()
  492. mask = self.frame['A'] > 0
  493. self.frame.loc[mask, 'B'] = 0
  494. expected.values[mask.values, 1] = 0
  495. assert_frame_equal(self.frame, expected)
  496. def test_frame_setitem_timestamp(self):
  497. # GH#2155
  498. columns = date_range(start='1/1/2012', end='2/1/2012', freq=BDay())
  499. index = lrange(10)
  500. data = DataFrame(columns=columns, index=index)
  501. t = datetime(2012, 11, 1)
  502. ts = Timestamp(t)
  503. data[ts] = np.nan # works, mostly a smoke-test
  504. assert np.isnan(data[ts]).all()
  505. def test_setitem_corner(self):
  506. # corner case
  507. df = DataFrame({'B': [1., 2., 3.],
  508. 'C': ['a', 'b', 'c']},
  509. index=np.arange(3))
  510. del df['B']
  511. df['B'] = [1., 2., 3.]
  512. assert 'B' in df
  513. assert len(df.columns) == 2
  514. df['A'] = 'beginning'
  515. df['E'] = 'foo'
  516. df['D'] = 'bar'
  517. df[datetime.now()] = 'date'
  518. df[datetime.now()] = 5.
  519. # what to do when empty frame with index
  520. dm = DataFrame(index=self.frame.index)
  521. dm['A'] = 'foo'
  522. dm['B'] = 'bar'
  523. assert len(dm.columns) == 2
  524. assert dm.values.dtype == np.object_
  525. # upcast
  526. dm['C'] = 1
  527. assert dm['C'].dtype == np.int64
  528. dm['E'] = 1.
  529. assert dm['E'].dtype == np.float64
  530. # set existing column
  531. dm['A'] = 'bar'
  532. assert 'bar' == dm['A'][0]
  533. dm = DataFrame(index=np.arange(3))
  534. dm['A'] = 1
  535. dm['foo'] = 'bar'
  536. del dm['foo']
  537. dm['foo'] = 'bar'
  538. assert dm['foo'].dtype == np.object_
  539. dm['coercable'] = ['1', '2', '3']
  540. assert dm['coercable'].dtype == np.object_
  541. def test_setitem_corner2(self):
  542. data = {"title": ['foobar', 'bar', 'foobar'] + ['foobar'] * 17,
  543. "cruft": np.random.random(20)}
  544. df = DataFrame(data)
  545. ix = df[df['title'] == 'bar'].index
  546. df.loc[ix, ['title']] = 'foobar'
  547. df.loc[ix, ['cruft']] = 0
  548. assert df.loc[1, 'title'] == 'foobar'
  549. assert df.loc[1, 'cruft'] == 0
  550. def test_setitem_ambig(self):
  551. # Difficulties with mixed-type data
  552. from decimal import Decimal
  553. # Created as float type
  554. dm = DataFrame(index=lrange(3), columns=lrange(3))
  555. coercable_series = Series([Decimal(1) for _ in range(3)],
  556. index=lrange(3))
  557. uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3))
  558. dm[0] = np.ones(3)
  559. assert len(dm.columns) == 3
  560. dm[1] = coercable_series
  561. assert len(dm.columns) == 3
  562. dm[2] = uncoercable_series
  563. assert len(dm.columns) == 3
  564. assert dm[2].dtype == np.object_
  565. def test_setitem_clear_caches(self):
  566. # see gh-304
  567. df = DataFrame({'x': [1.1, 2.1, 3.1, 4.1], 'y': [5.1, 6.1, 7.1, 8.1]},
  568. index=[0, 1, 2, 3])
  569. df.insert(2, 'z', np.nan)
  570. # cache it
  571. foo = df['z']
  572. df.loc[df.index[2:], 'z'] = 42
  573. expected = Series([np.nan, np.nan, 42, 42], index=df.index, name='z')
  574. assert df['z'] is not foo
  575. tm.assert_series_equal(df['z'], expected)
  576. def test_setitem_None(self):
  577. # GH #766
  578. self.frame[None] = self.frame['A']
  579. assert_series_equal(
  580. self.frame.iloc[:, -1], self.frame['A'], check_names=False)
  581. assert_series_equal(self.frame.loc[:, None], self.frame[
  582. 'A'], check_names=False)
  583. assert_series_equal(self.frame[None], self.frame[
  584. 'A'], check_names=False)
  585. repr(self.frame)
  586. def test_setitem_empty(self):
  587. # GH 9596
  588. df = pd.DataFrame({'a': ['1', '2', '3'],
  589. 'b': ['11', '22', '33'],
  590. 'c': ['111', '222', '333']})
  591. result = df.copy()
  592. result.loc[result.b.isna(), 'a'] = result.a
  593. assert_frame_equal(result, df)
  594. @pytest.mark.parametrize("dtype", ["float", "int64"])
  595. @pytest.mark.parametrize("kwargs", [
  596. dict(),
  597. dict(index=[1]),
  598. dict(columns=["A"])
  599. ])
  600. def test_setitem_empty_frame_with_boolean(self, dtype, kwargs):
  601. # see gh-10126
  602. kwargs["dtype"] = dtype
  603. df = DataFrame(**kwargs)
  604. df2 = df.copy()
  605. df[df > df2] = 47
  606. assert_frame_equal(df, df2)
  607. def test_setitem_scalars_no_index(self):
  608. # GH16823 / 17894
  609. df = DataFrame()
  610. df['foo'] = 1
  611. expected = DataFrame(columns=['foo']).astype(np.int64)
  612. assert_frame_equal(df, expected)
  613. def test_getitem_empty_frame_with_boolean(self):
  614. # Test for issue #11859
  615. df = pd.DataFrame()
  616. df2 = df[df > 0]
  617. assert_frame_equal(df, df2)
  618. def test_delitem_corner(self):
  619. f = self.frame.copy()
  620. del f['D']
  621. assert len(f.columns) == 3
  622. pytest.raises(KeyError, f.__delitem__, 'D')
  623. del f['B']
  624. assert len(f.columns) == 2
  625. def test_getitem_fancy_2d(self):
  626. f = self.frame
  627. with catch_warnings(record=True):
  628. simplefilter("ignore", DeprecationWarning)
  629. assert_frame_equal(f.ix[:, ['B', 'A']],
  630. f.reindex(columns=['B', 'A']))
  631. subidx = self.frame.index[[5, 4, 1]]
  632. with catch_warnings(record=True):
  633. simplefilter("ignore", DeprecationWarning)
  634. assert_frame_equal(f.ix[subidx, ['B', 'A']],
  635. f.reindex(index=subidx, columns=['B', 'A']))
  636. # slicing rows, etc.
  637. with catch_warnings(record=True):
  638. simplefilter("ignore", DeprecationWarning)
  639. assert_frame_equal(f.ix[5:10], f[5:10])
  640. assert_frame_equal(f.ix[5:10, :], f[5:10])
  641. assert_frame_equal(f.ix[:5, ['A', 'B']],
  642. f.reindex(index=f.index[:5],
  643. columns=['A', 'B']))
  644. # slice rows with labels, inclusive!
  645. with catch_warnings(record=True):
  646. simplefilter("ignore", DeprecationWarning)
  647. expected = f.ix[5:11]
  648. result = f.ix[f.index[5]:f.index[10]]
  649. assert_frame_equal(expected, result)
  650. # slice columns
  651. with catch_warnings(record=True):
  652. simplefilter("ignore", DeprecationWarning)
  653. assert_frame_equal(f.ix[:, :2], f.reindex(columns=['A', 'B']))
  654. # get view
  655. with catch_warnings(record=True):
  656. simplefilter("ignore", DeprecationWarning)
  657. exp = f.copy()
  658. f.ix[5:10].values[:] = 5
  659. exp.values[5:10] = 5
  660. assert_frame_equal(f, exp)
  661. with catch_warnings(record=True):
  662. simplefilter("ignore", DeprecationWarning)
  663. pytest.raises(ValueError, f.ix.__getitem__, f > 0.5)
  664. def test_slice_floats(self):
  665. index = [52195.504153, 52196.303147, 52198.369883]
  666. df = DataFrame(np.random.rand(3, 2), index=index)
  667. s1 = df.loc[52195.1:52196.5]
  668. assert len(s1) == 2
  669. s1 = df.loc[52195.1:52196.6]
  670. assert len(s1) == 2
  671. s1 = df.loc[52195.1:52198.9]
  672. assert len(s1) == 3
  673. def test_getitem_fancy_slice_integers_step(self):
  674. df = DataFrame(np.random.randn(10, 5))
  675. # this is OK
  676. result = df.iloc[:8:2] # noqa
  677. df.iloc[:8:2] = np.nan
  678. assert isna(df.iloc[:8:2]).values.all()
  679. def test_getitem_setitem_integer_slice_keyerrors(self):
  680. df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2))
  681. # this is OK
  682. cp = df.copy()
  683. cp.iloc[4:10] = 0
  684. assert (cp.iloc[4:10] == 0).values.all()
  685. # so is this
  686. cp = df.copy()
  687. cp.iloc[3:11] = 0
  688. assert (cp.iloc[3:11] == 0).values.all()
  689. result = df.iloc[2:6]
  690. result2 = df.loc[3:11]
  691. expected = df.reindex([4, 6, 8, 10])
  692. assert_frame_equal(result, expected)
  693. assert_frame_equal(result2, expected)
  694. # non-monotonic, raise KeyError
  695. df2 = df.iloc[lrange(5) + lrange(5, 10)[::-1]]
  696. pytest.raises(KeyError, df2.loc.__getitem__, slice(3, 11))
  697. pytest.raises(KeyError, df2.loc.__setitem__, slice(3, 11), 0)
  698. def test_setitem_fancy_2d(self):
  699. # case 1
  700. frame = self.frame.copy()
  701. expected = frame.copy()
  702. with catch_warnings(record=True):
  703. simplefilter("ignore", DeprecationWarning)
  704. frame.ix[:, ['B', 'A']] = 1
  705. expected['B'] = 1.
  706. expected['A'] = 1.
  707. assert_frame_equal(frame, expected)
  708. # case 2
  709. frame = self.frame.copy()
  710. frame2 = self.frame.copy()
  711. expected = frame.copy()
  712. subidx = self.frame.index[[5, 4, 1]]
  713. values = np.random.randn(3, 2)
  714. with catch_warnings(record=True):
  715. simplefilter("ignore", DeprecationWarning)
  716. frame.ix[subidx, ['B', 'A']] = values
  717. frame2.ix[[5, 4, 1], ['B', 'A']] = values
  718. expected['B'].ix[subidx] = values[:, 0]
  719. expected['A'].ix[subidx] = values[:, 1]
  720. assert_frame_equal(frame, expected)
  721. assert_frame_equal(frame2, expected)
  722. # case 3: slicing rows, etc.
  723. frame = self.frame.copy()
  724. with catch_warnings(record=True):
  725. simplefilter("ignore", DeprecationWarning)
  726. expected1 = self.frame.copy()
  727. frame.ix[5:10] = 1.
  728. expected1.values[5:10] = 1.
  729. assert_frame_equal(frame, expected1)
  730. with catch_warnings(record=True):
  731. simplefilter("ignore", DeprecationWarning)
  732. expected2 = self.frame.copy()
  733. arr = np.random.randn(5, len(frame.columns))
  734. frame.ix[5:10] = arr
  735. expected2.values[5:10] = arr
  736. assert_frame_equal(frame, expected2)
  737. # case 4
  738. with catch_warnings(record=True):
  739. simplefilter("ignore", DeprecationWarning)
  740. frame = self.frame.copy()
  741. frame.ix[5:10, :] = 1.
  742. assert_frame_equal(frame, expected1)
  743. frame.ix[5:10, :] = arr
  744. assert_frame_equal(frame, expected2)
  745. # case 5
  746. with catch_warnings(record=True):
  747. simplefilter("ignore", DeprecationWarning)
  748. frame = self.frame.copy()
  749. frame2 = self.frame.copy()
  750. expected = self.frame.copy()
  751. values = np.random.randn(5, 2)
  752. frame.ix[:5, ['A', 'B']] = values
  753. expected['A'][:5] = values[:, 0]
  754. expected['B'][:5] = values[:, 1]
  755. assert_frame_equal(frame, expected)
  756. with catch_warnings(record=True):
  757. simplefilter("ignore", DeprecationWarning)
  758. frame2.ix[:5, [0, 1]] = values
  759. assert_frame_equal(frame2, expected)
  760. # case 6: slice rows with labels, inclusive!
  761. with catch_warnings(record=True):
  762. simplefilter("ignore", DeprecationWarning)
  763. frame = self.frame.copy()
  764. expected = self.frame.copy()
  765. frame.ix[frame.index[5]:frame.index[10]] = 5.
  766. expected.values[5:11] = 5
  767. assert_frame_equal(frame, expected)
  768. # case 7: slice columns
  769. with catch_warnings(record=True):
  770. simplefilter("ignore", DeprecationWarning)
  771. frame = self.frame.copy()
  772. frame2 = self.frame.copy()
  773. expected = self.frame.copy()
  774. # slice indices
  775. frame.ix[:, 1:3] = 4.
  776. expected.values[:, 1:3] = 4.
  777. assert_frame_equal(frame, expected)
  778. # slice with labels
  779. frame.ix[:, 'B':'C'] = 4.
  780. assert_frame_equal(frame, expected)
  781. # new corner case of boolean slicing / setting
  782. frame = DataFrame(lzip([2, 3, 9, 6, 7], [np.nan] * 5),
  783. columns=['a', 'b'])
  784. lst = [100]
  785. lst.extend([np.nan] * 4)
  786. expected = DataFrame(lzip([100, 3, 9, 6, 7], lst),
  787. columns=['a', 'b'])
  788. frame[frame['a'] == 2] = 100
  789. assert_frame_equal(frame, expected)
  790. def test_fancy_getitem_slice_mixed(self):
  791. sliced = self.mixed_frame.iloc[:, -3:]
  792. assert sliced['D'].dtype == np.float64
  793. # get view with single block
  794. # setting it triggers setting with copy
  795. sliced = self.frame.iloc[:, -3:]
  796. with pytest.raises(com.SettingWithCopyError):
  797. sliced['C'] = 4.
  798. assert (self.frame['C'] == 4).all()
  799. def test_fancy_setitem_int_labels(self):
  800. # integer index defers to label-based indexing
  801. df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
  802. with catch_warnings(record=True):
  803. simplefilter("ignore", DeprecationWarning)
  804. tmp = df.copy()
  805. exp = df.copy()
  806. tmp.ix[[0, 2, 4]] = 5
  807. exp.values[:3] = 5
  808. assert_frame_equal(tmp, exp)
  809. with catch_warnings(record=True):
  810. simplefilter("ignore", DeprecationWarning)
  811. tmp = df.copy()
  812. exp = df.copy()
  813. tmp.ix[6] = 5
  814. exp.values[3] = 5
  815. assert_frame_equal(tmp, exp)
  816. with catch_warnings(record=True):
  817. simplefilter("ignore", DeprecationWarning)
  818. tmp = df.copy()
  819. exp = df.copy()
  820. tmp.ix[:, 2] = 5
  821. # tmp correctly sets the dtype
  822. # so match the exp way
  823. exp[2] = 5
  824. assert_frame_equal(tmp, exp)
  825. def test_fancy_getitem_int_labels(self):
  826. df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
  827. with catch_warnings(record=True):
  828. simplefilter("ignore", DeprecationWarning)
  829. result = df.ix[[4, 2, 0], [2, 0]]
  830. expected = df.reindex(index=[4, 2, 0], columns=[2, 0])
  831. assert_frame_equal(result, expected)
  832. with catch_warnings(record=True):
  833. simplefilter("ignore", DeprecationWarning)
  834. result = df.ix[[4, 2, 0]]
  835. expected = df.reindex(index=[4, 2, 0])
  836. assert_frame_equal(result, expected)
  837. with catch_warnings(record=True):
  838. simplefilter("ignore", DeprecationWarning)
  839. result = df.ix[4]
  840. expected = df.xs(4)
  841. assert_series_equal(result, expected)
  842. with catch_warnings(record=True):
  843. simplefilter("ignore", DeprecationWarning)
  844. result = df.ix[:, 3]
  845. expected = df[3]
  846. assert_series_equal(result, expected)
  847. def test_fancy_index_int_labels_exceptions(self):
  848. df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
  849. with catch_warnings(record=True):
  850. simplefilter("ignore", DeprecationWarning)
  851. # labels that aren't contained
  852. pytest.raises(KeyError, df.ix.__setitem__,
  853. ([0, 1, 2], [2, 3, 4]), 5)
  854. # try to set indices not contained in frame
  855. pytest.raises(KeyError, self.frame.ix.__setitem__,
  856. ['foo', 'bar', 'baz'], 1)
  857. pytest.raises(KeyError, self.frame.ix.__setitem__,
  858. (slice(None, None), ['E']), 1)
  859. # partial setting now allows this GH2578
  860. # pytest.raises(KeyError, self.frame.ix.__setitem__,
  861. # (slice(None, None), 'E'), 1)
  862. def test_setitem_fancy_mixed_2d(self):
  863. with catch_warnings(record=True):
  864. simplefilter("ignore", DeprecationWarning)
  865. self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5
  866. result = self.mixed_frame.ix[:5, ['C', 'B', 'A']]
  867. assert (result.values == 5).all()
  868. self.mixed_frame.ix[5] = np.nan
  869. assert isna(self.mixed_frame.ix[5]).all()
  870. self.mixed_frame.ix[5] = self.mixed_frame.ix[6]
  871. assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6],
  872. check_names=False)
  873. # #1432
  874. with catch_warnings(record=True):
  875. simplefilter("ignore", DeprecationWarning)
  876. df = DataFrame({1: [1., 2., 3.],
  877. 2: [3, 4, 5]})
  878. assert df._is_mixed_type
  879. df.ix[1] = [5, 10]
  880. expected = DataFrame({1: [1., 5., 3.],
  881. 2: [3, 10, 5]})
  882. assert_frame_equal(df, expected)
  883. def test_ix_align(self):
  884. b = Series(np.random.randn(10), name=0).sort_values()
  885. df_orig = DataFrame(np.random.randn(10, 4))
  886. df = df_orig.copy()
  887. with catch_warnings(record=True):
  888. simplefilter("ignore", DeprecationWarning)
  889. df.ix[:, 0] = b
  890. assert_series_equal(df.ix[:, 0].reindex(b.index), b)
  891. with catch_warnings(record=True):
  892. simplefilter("ignore", DeprecationWarning)
  893. dft = df_orig.T
  894. dft.ix[0, :] = b
  895. assert_series_equal(dft.ix[0, :].reindex(b.index), b)
  896. with catch_warnings(record=True):
  897. simplefilter("ignore", DeprecationWarning)
  898. df = df_orig.copy()
  899. df.ix[:5, 0] = b
  900. s = df.ix[:5, 0]
  901. assert_series_equal(s, b.reindex(s.index))
  902. with catch_warnings(record=True):
  903. simplefilter("ignore", DeprecationWarning)
  904. dft = df_orig.T
  905. dft.ix[0, :5] = b
  906. s = dft.ix[0, :5]
  907. assert_series_equal(s, b.reindex(s.index))
  908. with catch_warnings(record=True):
  909. simplefilter("ignore", DeprecationWarning)
  910. df = df_orig.copy()
  911. idx = [0, 1, 3, 5]
  912. df.ix[idx, 0] = b
  913. s = df.ix[idx, 0]
  914. assert_series_equal(s, b.reindex(s.index))
  915. with catch_warnings(record=True):
  916. simplefilter("ignore", DeprecationWarning)
  917. dft = df_orig.T
  918. dft.ix[0, idx] = b
  919. s = dft.ix[0, idx]
  920. assert_series_equal(s, b.reindex(s.index))
  921. def test_ix_frame_align(self):
  922. b = DataFrame(np.random.randn(3, 4))
  923. df_orig = DataFrame(np.random.randn(10, 4))
  924. df = df_orig.copy()
  925. with catch_warnings(record=True):
  926. simplefilter("ignore", DeprecationWarning)
  927. df.ix[:3] = b
  928. out = b.ix[:3]
  929. assert_frame_equal(out, b)
  930. b.sort_index(inplace=True)
  931. with catch_warnings(record=True):
  932. simplefilter("ignore", DeprecationWarning)
  933. df = df_orig.copy()
  934. df.ix[[0, 1, 2]] = b
  935. out = df.ix[[0, 1, 2]].reindex(b.index)
  936. assert_frame_equal(out, b)
  937. with catch_warnings(record=True):
  938. simplefilter("ignore", DeprecationWarning)
  939. df = df_orig.copy()
  940. df.ix[:3] = b
  941. out = df.ix[:3]
  942. assert_frame_equal(out, b.reindex(out.index))
  943. def test_getitem_setitem_non_ix_labels(self):
  944. df = tm.makeTimeDataFrame()
  945. start, end = df.index[[5, 10]]
  946. result = df.loc[start:end]
  947. result2 = df[start:end]
  948. expected = df[5:11]
  949. assert_frame_equal(result, expected)
  950. assert_frame_equal(result2, expected)
  951. result = df.copy()
  952. result.loc[start:end] = 0
  953. result2 = df.copy()
  954. result2[start:end] = 0
  955. expected = df.copy()
  956. expected[5:11] = 0
  957. assert_frame_equal(result, expected)
  958. assert_frame_equal(result2, expected)
  959. def test_ix_multi_take(self):
  960. df = DataFrame(np.random.randn(3, 2))
  961. rs = df.loc[df.index == 0, :]
  962. xp = df.reindex([0])
  963. assert_frame_equal(rs, xp)
  964. """ #1321
  965. df = DataFrame(np.random.randn(3, 2))
  966. rs = df.loc[df.index==0, df.columns==1]
  967. xp = df.reindex([0], [1])
  968. assert_frame_equal(rs, xp)
  969. """
  970. def test_ix_multi_take_nonint_index(self):
  971. df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
  972. columns=['a', 'b'])
  973. with catch_warnings(record=True):
  974. simplefilter("ignore", DeprecationWarning)
  975. rs = df.ix[[0], [0]]
  976. xp = df.reindex(['x'], columns=['a'])
  977. assert_frame_equal(rs, xp)
  978. def test_ix_multi_take_multiindex(self):
  979. df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
  980. columns=[['a', 'b'], ['1', '2']])
  981. with catch_warnings(record=True):
  982. simplefilter("ignore", DeprecationWarning)
  983. rs = df.ix[[0], [0]]
  984. xp = df.reindex(['x'], columns=[('a', '1')])
  985. assert_frame_equal(rs, xp)
  986. def test_ix_dup(self):
  987. idx = Index(['a', 'a', 'b', 'c', 'd', 'd'])
  988. df = DataFrame(np.random.randn(len(idx), 3), idx)
  989. with catch_warnings(record=True):
  990. simplefilter("ignore", DeprecationWarning)
  991. sub = df.ix[:'d']
  992. assert_frame_equal(sub, df)
  993. with catch_warnings(record=True):
  994. simplefilter("ignore", DeprecationWarning)
  995. sub = df.ix['a':'c']
  996. assert_frame_equal(sub, df.ix[0:4])
  997. with catch_warnings(record=True):
  998. simplefilter("ignore", DeprecationWarning)
  999. sub = df.ix['b':'d']
  1000. assert_frame_equal(sub, df.ix[2:])
  1001. def test_getitem_fancy_1d(self):
  1002. f = self.frame
  1003. # return self if no slicing...for now
  1004. with catch_warnings(record=True):
  1005. simplefilter("ignore", DeprecationWarning)
  1006. assert f.ix[:, :] is f
  1007. # low dimensional slice
  1008. with catch_warnings(record=True):
  1009. simplefilter("ignore", DeprecationWarning)
  1010. xs1 = f.ix[2, ['C', 'B', 'A']]
  1011. xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A'])
  1012. tm.assert_series_equal(xs1, xs2)
  1013. with catch_warnings(record=True):
  1014. simplefilter("ignore", DeprecationWarning)
  1015. ts1 = f.ix[5:10, 2]
  1016. ts2 = f[f.columns[2]][5:10]
  1017. tm.assert_series_equal(ts1, ts2)
  1018. # positional xs
  1019. with catch_warnings(record=True):
  1020. simplefilter("ignore", DeprecationWarning)
  1021. xs1 = f.ix[0]
  1022. xs2 = f.xs(f.index[0])
  1023. tm.assert_series_equal(xs1, xs2)
  1024. with catch_warnings(record=True):
  1025. simplefilter("ignore", DeprecationWarning)
  1026. xs1 = f.ix[f.index[5]]
  1027. xs2 = f.xs(f.index[5])
  1028. tm.assert_series_equal(xs1, xs2)
  1029. # single column
  1030. with catch_warnings(record=True):
  1031. simplefilter("ignore", DeprecationWarning)
  1032. assert_series_equal(f.ix[:, 'A'], f['A'])
  1033. # return view
  1034. with catch_warnings(record=True):
  1035. simplefilter("ignore", DeprecationWarning)
  1036. exp = f.copy()
  1037. exp.values[5] = 4
  1038. f.ix[5][:] = 4
  1039. tm.assert_frame_equal(exp, f)
  1040. with catch_warnings(record=True):
  1041. simplefilter("ignore", DeprecationWarning)
  1042. exp.values[:, 1] = 6
  1043. f.ix[:, 1][:] = 6
  1044. tm.assert_frame_equal(exp, f)
  1045. # slice of mixed-frame
  1046. with catch_warnings(record=True):
  1047. simplefilter("ignore", DeprecationWarning)
  1048. xs = self.mixed_frame.ix[5]
  1049. exp = self.mixed_frame.xs(self.mixed_frame.index[5])
  1050. tm.assert_series_equal(xs, exp)
  1051. def test_setitem_fancy_1d(self):
  1052. # case 1: set cross-section for indices
  1053. frame = self.frame.copy()
  1054. expected = self.frame.copy()
  1055. with catch_warnings(record=True):
  1056. simplefilter("ignore", DeprecationWarning)
  1057. frame.ix[2, ['C', 'B', 'A']] = [1., 2., 3.]
  1058. expected['C'][2] = 1.
  1059. expected['B'][2] = 2.
  1060. expected['A'][2] = 3.
  1061. assert_frame_equal(frame, expected)
  1062. with catch_warnings(record=True):
  1063. simplefilter("ignore", DeprecationWarning)
  1064. frame2 = self.frame.copy()
  1065. frame2.ix[2, [3, 2, 1]] = [1., 2., 3.]
  1066. assert_frame_equal(frame, expected)
  1067. # case 2, set a section of a column
  1068. frame = self.frame.copy()
  1069. expected = self.frame.copy()
  1070. with catch_warnings(record=True):
  1071. simplefilter("ignore", DeprecationWarning)
  1072. vals = np.random.randn(5)
  1073. expected.values[5:10, 2] = vals
  1074. frame.ix[5:10, 2] = vals
  1075. assert_frame_equal(frame, expected)
  1076. with catch_warnings(record=True):
  1077. simplefilter("ignore", DeprecationWarning)
  1078. frame2 = self.frame.copy()
  1079. frame2.ix[5:10, 'B'] = vals
  1080. assert_frame_equal(frame, expected)
  1081. # case 3: full xs
  1082. frame = self.frame.copy()
  1083. expected = self.frame.copy()
  1084. with catch_warnings(record=True):
  1085. simplefilter("ignore", DeprecationWarning)
  1086. frame.ix[4] = 5.
  1087. expected.values[4] = 5.
  1088. assert_frame_equal(frame, expected)
  1089. with catch_warnings(record=True):
  1090. simplefilter("ignore", DeprecationWarning)
  1091. frame.ix[frame.index[4]] = 6.
  1092. expected.values[4] = 6.
  1093. assert_frame_equal(frame, expected)
  1094. # single column
  1095. frame = self.frame.copy()
  1096. expected = self.frame.copy()
  1097. with catch_warnings(record=True):
  1098. simplefilter("ignore", DeprecationWarning)
  1099. frame.ix[:, 'A'] = 7.
  1100. expected['A'] = 7.
  1101. assert_frame_equal(frame, expected)
  1102. def test_getitem_fancy_scalar(self):
  1103. f = self.frame
  1104. ix = f.loc
  1105. # individual value
  1106. for col in f.columns:
  1107. ts = f[col]
  1108. for idx in f.index[::5]:
  1109. assert ix[idx, col] == ts[idx]
  1110. def test_setitem_fancy_scalar(self):
  1111. f = self.frame
  1112. expected = self.frame.copy()
  1113. ix = f.loc
  1114. # individual value
  1115. for j, col in enumerate(f.columns):
  1116. ts = f[col] # noqa
  1117. for idx in f.index[::5]:
  1118. i = f.index.get_loc(idx)
  1119. val = np.random.randn()
  1120. expected.values[i, j] = val
  1121. ix[idx, col] = val
  1122. assert_frame_equal(f, expected)
  1123. def test_getitem_fancy_boolean(self):
  1124. f = self.frame
  1125. ix = f.loc
  1126. expected = f.reindex(columns=['B', 'D'])
  1127. result = ix[:, [False, True, False, True]]
  1128. assert_frame_equal(result, expected)
  1129. expected = f.reindex(index=f.index[5:10], columns=['B', 'D'])
  1130. result = ix[f.index[5:10], [False, True, False, True]]
  1131. assert_frame_equal(result, expected)
  1132. boolvec = f.index > f.index[7]
  1133. expected = f.reindex(index=f.index[boolvec])
  1134. result = ix[boolvec]
  1135. assert_frame_equal(result, expected)
  1136. result = ix[boolvec, :]
  1137. assert_frame_equal(result, expected)
  1138. result = ix[boolvec, f.columns[2:]]
  1139. expected = f.reindex(index=f.index[boolvec],
  1140. columns=['C', 'D'])
  1141. assert_frame_equal(result, expected)
  1142. def test_setitem_fancy_boolean(self):
  1143. # from 2d, set with booleans
  1144. frame = self.frame.copy()
  1145. expected = self.frame.copy()
  1146. mask = frame['A'] > 0
  1147. frame.loc[mask] = 0.
  1148. expected.values[mask.values] = 0.
  1149. assert_frame_equal(frame, expected)
  1150. frame = self.frame.copy()
  1151. expected = self.frame.copy()
  1152. frame.loc[mask, ['A', 'B']] = 0.
  1153. expected.values[mask.values, :2] = 0.
  1154. assert_frame_equal(frame, expected)
  1155. def test_getitem_fancy_ints(self):
  1156. result = self.frame.iloc[[1, 4, 7]]
  1157. expected = self.frame.loc[self.frame.index[[1, 4, 7]]]
  1158. assert_frame_equal(result, expected)
  1159. result = self.frame.iloc[:, [2, 0, 1]]
  1160. expected = self.frame.loc[:, self.frame.columns[[2, 0, 1]]]
  1161. assert_frame_equal(result, expected)
  1162. def test_getitem_setitem_fancy_exceptions(self):
  1163. ix = self.frame.iloc
  1164. with pytest.raises(IndexingError, match='Too many indexers'):
  1165. ix[:, :, :]
  1166. with pytest.raises(IndexingError):
  1167. ix[:, :, :] = 1
  1168. def test_getitem_setitem_boolean_misaligned(self):
  1169. # boolean index misaligned labels
  1170. mask = self.frame['A'][::-1] > 1
  1171. result = self.frame.loc[mask]
  1172. expected = self.frame.loc[mask[::-1]]
  1173. assert_frame_equal(result, expected)
  1174. cp = self.frame.copy()
  1175. expected = self.frame.copy()
  1176. cp.loc[mask] = 0
  1177. expected.loc[mask] = 0
  1178. assert_frame_equal(cp, expected)
  1179. def test_getitem_setitem_boolean_multi(self):
  1180. df = DataFrame(np.random.randn(3, 2))
  1181. # get
  1182. k1 = np.array([True, False, True])
  1183. k2 = np.array([False, True])
  1184. result = df.loc[k1, k2]
  1185. expected = df.loc[[0, 2], [1]]
  1186. assert_frame_equal(result, expected)
  1187. expected = df.copy()
  1188. df.loc[np.array([True, False, True]),
  1189. np.array([False, True])] = 5
  1190. expected.loc[[0, 2], [1]] = 5
  1191. assert_frame_equal(df, expected)
  1192. def test_getitem_setitem_float_labels(self):
  1193. index = Index([1.5, 2, 3, 4, 5])
  1194. df = DataFrame(np.random.randn(5, 5), index=index)
  1195. result = df.loc[1.5:4]
  1196. expected = df.reindex([1.5, 2, 3, 4])
  1197. assert_frame_equal(result, expected)
  1198. assert len(result) == 4
  1199. result = df.loc[4:5]
  1200. expected = df.reindex([4, 5]) # reindex with int
  1201. assert_frame_equal(result, expected, check_index_type=False)
  1202. assert len(result) == 2
  1203. result = df.loc[4:5]
  1204. expected = df.reindex([4.0, 5.0]) # reindex with float
  1205. assert_frame_equal(result, expected)
  1206. assert len(result) == 2
  1207. # loc_float changes this to work properly
  1208. result = df.loc[1:2]
  1209. expected = df.iloc[0:2]
  1210. assert_frame_equal(result, expected)
  1211. df.loc[1:2] = 0
  1212. result = df[1:2]
  1213. assert (result == 0).all().all()
  1214. # #2727
  1215. index = Index([1.0, 2.5, 3.5, 4.5, 5.0])
  1216. df = DataFrame(np.random.randn(5, 5), index=index)
  1217. # positional slicing only via iloc!
  1218. pytest.raises(TypeError, lambda: df.iloc[1.0:5])
  1219. result = df.iloc[4:5]
  1220. expected = df.reindex([5.0])
  1221. assert_frame_equal(result, expected)
  1222. assert len(result) == 1
  1223. cp = df.copy()
  1224. with pytest.raises(TypeError):
  1225. cp.iloc[1.0:5] = 0
  1226. with pytest.raises(TypeError):
  1227. result = cp.iloc[1.0:5] == 0 # noqa
  1228. assert result.values.all()
  1229. assert (cp.iloc[0:1] == df.iloc[0:1]).values.all()
  1230. cp = df.copy()
  1231. cp.iloc[4:5] = 0
  1232. assert (cp.iloc[4:5] == 0).values.all()
  1233. assert (cp.iloc[0:4] == df.iloc[0:4]).values.all()
  1234. # float slicing
  1235. result = df.loc[1.0:5]
  1236. expected = df
  1237. assert_frame_equal(result, expected)
  1238. assert len(result) == 5
  1239. result = df.loc[1.1:5]
  1240. expected = df.reindex([2.5, 3.5, 4.5, 5.0])
  1241. assert_frame_equal(result, expected)
  1242. assert len(result) == 4
  1243. result = df.loc[4.51:5]
  1244. expected = df.reindex([5.0])
  1245. assert_frame_equal(result, expected)
  1246. assert len(result) == 1
  1247. result = df.loc[1.0:5.0]
  1248. expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0])
  1249. assert_frame_equal(result, expected)
  1250. assert len(result) == 5
  1251. cp = df.copy()
  1252. cp.loc[1.0:5.0] = 0
  1253. result = cp.loc[1.0:5.0]
  1254. assert (result == 0).values.all()
  1255. def test_setitem_single_column_mixed(self):
  1256. df = DataFrame(np.random.randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
  1257. columns=['foo', 'bar', 'baz'])
  1258. df['str'] = 'qux'
  1259. df.loc[df.index[::2], 'str'] = np.nan
  1260. expected = np.array([np.nan, 'qux', np.nan, 'qux', np.nan],
  1261. dtype=object)
  1262. assert_almost_equal(df['str'].values, expected)
  1263. def test_setitem_single_column_mixed_datetime(self):
  1264. df = DataFrame(np.random.randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
  1265. columns=['foo', 'bar', 'baz'])
  1266. df['timestamp'] = Timestamp('20010102')
  1267. # check our dtypes
  1268. result = df.get_dtype_counts()
  1269. expected = Series({'float64': 3, 'datetime64[ns]': 1})
  1270. assert_series_equal(result, expected)
  1271. # set an allowable datetime64 type
  1272. df.loc['b', 'timestamp'] = iNaT
  1273. assert isna(df.loc['b', 'timestamp'])
  1274. # allow this syntax
  1275. df.loc['c', 'timestamp'] = np.nan
  1276. assert isna(df.loc['c', 'timestamp'])
  1277. # allow this syntax
  1278. df.loc['d', :] = np.nan
  1279. assert not isna(df.loc['c', :]).all()
  1280. # as of GH 3216 this will now work!
  1281. # try to set with a list like item
  1282. # pytest.raises(
  1283. # Exception, df.loc.__setitem__, ('d', 'timestamp'), [np.nan])
  1284. def test_setitem_mixed_datetime(self):
  1285. # GH 9336
  1286. expected = DataFrame({'a': [0, 0, 0, 0, 13, 14],
  1287. 'b': [pd.datetime(2012, 1, 1),
  1288. 1,
  1289. 'x',
  1290. 'y',
  1291. pd.datetime(2013, 1, 1),
  1292. pd.datetime(2014, 1, 1)]})
  1293. df = pd.DataFrame(0, columns=list('ab'), index=range(6))
  1294. df['b'] = pd.NaT
  1295. df.loc[0, 'b'] = pd.datetime(2012, 1, 1)
  1296. df.loc[1, 'b'] = 1
  1297. df.loc[[2, 3], 'b'] = 'x', 'y'
  1298. A = np.array([[13, np.datetime64('2013-01-01T00:00:00')],
  1299. [14, np.datetime64('2014-01-01T00:00:00')]])
  1300. df.loc[[4, 5], ['a', 'b']] = A
  1301. assert_frame_equal(df, expected)
  1302. def test_setitem_frame(self):
  1303. piece = self.frame.loc[self.frame.index[:2], ['A', 'B']]
  1304. self.frame.loc[self.frame.index[-2]:, ['A', 'B']] = piece.values
  1305. result = self.frame.loc[self.frame.index[-2:], ['A', 'B']].values
  1306. expected = piece.values
  1307. assert_almost_equal(result, expected)
  1308. # GH 3216
  1309. # already aligned
  1310. f = self.mixed_frame.copy()
  1311. piece = DataFrame([[1., 2.], [3., 4.]],
  1312. index=f.index[0:2], columns=['A', 'B'])
  1313. key = (slice(None, 2), ['A', 'B'])
  1314. f.loc[key] = piece
  1315. assert_almost_equal(f.loc[f.index[0:2], ['A', 'B']].values,
  1316. piece.values)
  1317. # rows unaligned
  1318. f = self.mixed_frame.copy()
  1319. piece = DataFrame([[1., 2.], [3., 4.], [5., 6.], [7., 8.]],
  1320. index=list(f.index[0:2]) + ['foo', 'bar'],
  1321. columns=['A', 'B'])
  1322. key = (slice(None, 2), ['A', 'B'])
  1323. f.loc[key] = piece
  1324. assert_almost_equal(f.loc[f.index[0:2:], ['A', 'B']].values,
  1325. piece.values[0:2])
  1326. # key is unaligned with values
  1327. f = self.mixed_frame.copy()
  1328. piece = f.loc[f.index[:2], ['A']]
  1329. piece.index = f.index[-2:]
  1330. key = (slice(-2, None), ['A', 'B'])
  1331. f.loc[key] = piece
  1332. piece['B'] = np.nan
  1333. assert_almost_equal(f.loc[f.index[-2:], ['A', 'B']].values,
  1334. piece.values)
  1335. # ndarray
  1336. f = self.mixed_frame.copy()
  1337. piece = self.mixed_frame.loc[f.index[:2], ['A', 'B']]
  1338. key = (slice(-2, None), ['A', 'B'])
  1339. f.loc[key] = piece.values
  1340. assert_almost_equal(f.loc[f.index[-2:], ['A', 'B']].values,
  1341. piece.values)
  1342. # needs upcasting
  1343. df = DataFrame([[1, 2, 'foo'], [3, 4, 'bar']], columns=['A', 'B', 'C'])
  1344. df2 = df.copy()
  1345. df2.loc[:, ['A', 'B']] = df.loc[:, ['A', 'B']] + 0.5
  1346. expected = df.reindex(columns=['A', 'B'])
  1347. expected += 0.5
  1348. expected['C'] = df['C']
  1349. assert_frame_equal(df2, expected)
  1350. def test_setitem_frame_align(self):
  1351. piece = self.frame.loc[self.frame.index[:2], ['A', 'B']]
  1352. piece.index = self.frame.index[-2:]
  1353. piece.columns = ['A', 'B']
  1354. self.frame.loc[self.frame.index[-2:], ['A', 'B']] = piece
  1355. result = self.frame.loc[self.frame.index[-2:], ['A', 'B']].values
  1356. expected = piece.values
  1357. assert_almost_equal(result, expected)
  1358. def test_getitem_setitem_ix_duplicates(self):
  1359. # #1201
  1360. df = DataFrame(np.random.randn(5, 3),
  1361. index=['foo', 'foo', 'bar', 'baz', 'bar'])
  1362. result = df.loc['foo']
  1363. expected = df[:2]
  1364. assert_frame_equal(result, expected)
  1365. result = df.loc['bar']
  1366. expected = df.iloc[[2, 4]]
  1367. assert_frame_equal(result, expected)
  1368. result = df.loc['baz']
  1369. expected = df.iloc[3]
  1370. assert_series_equal(result, expected)
  1371. def test_getitem_ix_boolean_duplicates_multiple(self):
  1372. # #1201
  1373. df = DataFrame(np.random.randn(5, 3),
  1374. index=['foo', 'foo', 'bar', 'baz', 'bar'])
  1375. result = df.loc[['bar']]
  1376. exp = df.iloc[[2, 4]]
  1377. assert_frame_equal(result, exp)
  1378. result = df.loc[df[1] > 0]
  1379. exp = df[df[1] > 0]
  1380. assert_frame_equal(result, exp)
  1381. result = df.loc[df[0] > 0]
  1382. exp = df[df[0] > 0]
  1383. assert_frame_equal(result, exp)
  1384. def test_getitem_setitem_ix_bool_keyerror(self):
  1385. # #2199
  1386. df = DataFrame({'a': [1, 2, 3]})
  1387. pytest.raises(KeyError, df.loc.__getitem__, False)
  1388. pytest.raises(KeyError, df.loc.__getitem__, True)
  1389. pytest.raises(KeyError, df.loc.__setitem__, False, 0)
  1390. pytest.raises(KeyError, df.loc.__setitem__, True, 0)
  1391. def test_getitem_list_duplicates(self):
  1392. # #1943
  1393. df = DataFrame(np.random.randn(4, 4), columns=list('AABC'))
  1394. df.columns.name = 'foo'
  1395. result = df[['B', 'C']]
  1396. assert result.columns.name == 'foo'
  1397. expected = df.iloc[:, 2:]
  1398. assert_frame_equal(result, expected)
  1399. def test_get_value(self):
  1400. for idx in self.frame.index:
  1401. for col in self.frame.columns:
  1402. with tm.assert_produces_warning(FutureWarning,
  1403. check_stacklevel=False):
  1404. result = self.frame.get_value(idx, col)
  1405. expected = self.frame[col][idx]
  1406. assert result == expected
  1407. def test_lookup(self):
  1408. def alt(df, rows, cols, dtype):
  1409. with tm.assert_produces_warning(FutureWarning,
  1410. check_stacklevel=False):
  1411. result = [df.get_value(r, c) for r, c in zip(rows, cols)]
  1412. return np.array(result, dtype=dtype)
  1413. def testit(df):
  1414. rows = list(df.index) * len(df.columns)
  1415. cols = list(df.columns) * len(df.index)
  1416. result = df.lookup(rows, cols)
  1417. expected = alt(df, rows, cols, dtype=np.object_)
  1418. tm.assert_almost_equal(result, expected, check_dtype=False)
  1419. testit(self.mixed_frame)
  1420. testit(self.frame)
  1421. df = DataFrame({'label': ['a', 'b', 'a', 'c'],
  1422. 'mask_a': [True, True, False, True],
  1423. 'mask_b': [True, False, False, False],
  1424. 'mask_c': [False, True, False, True]})
  1425. df['mask'] = df.lookup(df.index, 'mask_' + df['label'])
  1426. exp_mask = alt(df, df.index, 'mask_' + df['label'], dtype=np.bool_)
  1427. tm.assert_series_equal(df['mask'], pd.Series(exp_mask, name='mask'))
  1428. assert df['mask'].dtype == np.bool_
  1429. with pytest.raises(KeyError):
  1430. self.frame.lookup(['xyz'], ['A'])
  1431. with pytest.raises(KeyError):
  1432. self.frame.lookup([self.frame.index[0]], ['xyz'])
  1433. with pytest.raises(ValueError, match='same size'):
  1434. self.frame.lookup(['a', 'b', 'c'], ['a'])
  1435. def test_set_value(self):
  1436. for idx in self.frame.index:
  1437. for col in self.frame.columns:
  1438. with tm.assert_produces_warning(FutureWarning,
  1439. check_stacklevel=False):
  1440. self.frame.set_value(idx, col, 1)
  1441. assert self.frame[col][idx] == 1
  1442. def test_set_value_resize(self):
  1443. with tm.assert_produces_warning(FutureWarning,
  1444. check_stacklevel=False):
  1445. res = self.frame.set_value('foobar', 'B', 0)
  1446. assert res is self.frame
  1447. assert res.index[-1] == 'foobar'
  1448. with tm.assert_produces_warning(FutureWarning,
  1449. check_stacklevel=False):
  1450. assert res.get_value('foobar', 'B') == 0
  1451. self.frame.loc['foobar', 'qux'] = 0
  1452. with tm.assert_produces_warning(FutureWarning,
  1453. check_stacklevel=False):
  1454. assert self.frame.get_value('foobar', 'qux') == 0
  1455. res = self.frame.copy()
  1456. with tm.assert_produces_warning(FutureWarning,
  1457. check_stacklevel=False):
  1458. res3 = res.set_value('foobar', 'baz', 'sam')
  1459. assert res3['baz'].dtype == np.object_
  1460. res = self.frame.copy()
  1461. with tm.assert_produces_warning(FutureWarning,
  1462. check_stacklevel=False):
  1463. res3 = res.set_value('foobar', 'baz', True)
  1464. assert res3['baz'].dtype == np.object_
  1465. res = self.frame.copy()
  1466. with tm.assert_produces_warning(FutureWarning,
  1467. check_stacklevel=False):
  1468. res3 = res.set_value('foobar', 'baz', 5)
  1469. assert is_float_dtype(res3['baz'])
  1470. assert isna(res3['baz'].drop(['foobar'])).all()
  1471. with tm.assert_produces_warning(FutureWarning,
  1472. check_stacklevel=False):
  1473. pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
  1474. def test_set_value_with_index_dtype_change(self):
  1475. df_orig = DataFrame(np.random.randn(3, 3),
  1476. index=lrange(3), columns=list('ABC'))
  1477. # this is actually ambiguous as the 2 is interpreted as a positional
  1478. # so column is not created
  1479. df = df_orig.copy()
  1480. with tm.assert_produces_warning(FutureWarning,
  1481. check_stacklevel=False):
  1482. df.set_value('C', 2, 1.0)
  1483. assert list(df.index) == list(df_orig.index) + ['C']
  1484. # assert list(df.columns) == list(df_orig.columns) + [2]
  1485. df = df_orig.copy()
  1486. df.loc['C', 2] = 1.0
  1487. assert list(df.index) == list(df_orig.index) + ['C']
  1488. # assert list(df.columns) == list(df_orig.columns) + [2]
  1489. # create both new
  1490. df = df_orig.copy()
  1491. with tm.assert_produces_warning(FutureWarning,
  1492. check_stacklevel=False):
  1493. df.set_value('C', 'D', 1.0)
  1494. assert list(df.index) == list(df_orig.index) + ['C']
  1495. assert list(df.columns) == list(df_orig.columns) + ['D']
  1496. df = df_orig.copy()
  1497. df.loc['C', 'D'] = 1.0
  1498. assert list(df.index) == list(df_orig.index) + ['C']
  1499. assert list(df.columns) == list(df_orig.columns) + ['D']
  1500. def test_get_set_value_no_partial_indexing(self):
  1501. # partial w/ MultiIndex raise exception
  1502. index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
  1503. df = DataFrame(index=index, columns=lrange(4))
  1504. with tm.assert_produces_warning(FutureWarning,
  1505. check_stacklevel=False):
  1506. pytest.raises(KeyError, df.get_value, 0, 1)
  1507. def test_single_element_ix_dont_upcast(self):
  1508. self.frame['E'] = 1
  1509. assert issubclass(self.frame['E'].dtype.type, (int, np.integer))
  1510. with catch_warnings(record=True):
  1511. simplefilter("ignore", DeprecationWarning)
  1512. result = self.frame.ix[self.frame.index[5], 'E']
  1513. assert is_integer(result)
  1514. result = self.frame.loc[self.frame.index[5], 'E']
  1515. assert is_integer(result)
  1516. # GH 11617
  1517. df = pd.DataFrame(dict(a=[1.23]))
  1518. df["b"] = 666
  1519. with catch_warnings(record=True):
  1520. simplefilter("ignore", DeprecationWarning)
  1521. result = df.ix[0, "b"]
  1522. assert is_integer(result)
  1523. result = df.loc[0, "b"]
  1524. assert is_integer(result)
  1525. expected = Series([666], [0], name='b')
  1526. with catch_warnings(record=True):
  1527. simplefilter("ignore", DeprecationWarning)
  1528. result = df.ix[[0], "b"]
  1529. assert_series_equal(result, expected)
  1530. result = df.loc[[0], "b"]
  1531. assert_series_equal(result, expected)
  1532. def test_iloc_row(self):
  1533. df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))
  1534. result = df.iloc[1]
  1535. exp = df.loc[2]
  1536. assert_series_equal(result, exp)
  1537. result = df.iloc[2]
  1538. exp = df.loc[4]
  1539. assert_series_equal(result, exp)
  1540. # slice
  1541. result = df.iloc[slice(4, 8)]
  1542. expected = df.loc[8:14]
  1543. assert_frame_equal(result, expected)
  1544. # verify slice is view
  1545. # setting it makes it raise/warn
  1546. with pytest.raises(com.SettingWithCopyError):
  1547. result[2] = 0.
  1548. exp_col = df[2].copy()
  1549. exp_col[4:8] = 0.
  1550. assert_series_equal(df[2], exp_col)
  1551. # list of integers
  1552. result = df.iloc[[1, 2, 4, 6]]
  1553. expected = df.reindex(df.index[[1, 2, 4, 6]])
  1554. assert_frame_equal(result, expected)
  1555. def test_iloc_col(self):
  1556. df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2))
  1557. result = df.iloc[:, 1]
  1558. exp = df.loc[:, 2]
  1559. assert_series_equal(result, exp)
  1560. result = df.iloc[:, 2]
  1561. exp = df.loc[:, 4]
  1562. assert_series_equal(result, exp)
  1563. # slice
  1564. result = df.iloc[:, slice(4, 8)]
  1565. expected = df.loc[:, 8:14]
  1566. assert_frame_equal(result, expected)
  1567. # verify slice is view
  1568. # and that we are setting a copy
  1569. with pytest.raises(com.SettingWithCopyError):
  1570. result[8] = 0.
  1571. assert (df[8] == 0).all()
  1572. # list of integers
  1573. result = df.iloc[:, [1, 2, 4, 6]]
  1574. expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
  1575. assert_frame_equal(result, expected)
  1576. def test_iloc_duplicates(self):
  1577. df = DataFrame(np.random.rand(3, 3), columns=list('ABC'),
  1578. index=list('aab'))
  1579. result = df.iloc[0]
  1580. with catch_warnings(record=True):
  1581. simplefilter("ignore", DeprecationWarning)
  1582. result2 = df.ix[0]
  1583. assert isinstance(result, Series)
  1584. assert_almost_equal(result.values, df.values[0])
  1585. assert_series_equal(result, result2)
  1586. with catch_warnings(record=True):
  1587. simplefilter("ignore", DeprecationWarning)
  1588. result = df.T.iloc[:, 0]
  1589. result2 = df.T.ix[:, 0]
  1590. assert isinstance(result, Series)
  1591. assert_almost_equal(result.values, df.values[0])
  1592. assert_series_equal(result, result2)
  1593. # multiindex
  1594. df = DataFrame(np.random.randn(3, 3),
  1595. columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
  1596. index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
  1597. with catch_warnings(record=True):
  1598. simplefilter("ignore", DeprecationWarning)
  1599. rs = df.iloc[0]
  1600. xp = df.ix[0]
  1601. assert_series_equal(rs, xp)
  1602. with catch_warnings(record=True):
  1603. simplefilter("ignore", DeprecationWarning)
  1604. rs = df.iloc[:, 0]
  1605. xp = df.T.ix[0]
  1606. assert_series_equal(rs, xp)
  1607. with catch_warnings(record=True):
  1608. simplefilter("ignore", DeprecationWarning)
  1609. rs = df.iloc[:, [0]]
  1610. xp = df.ix[:, [0]]
  1611. assert_frame_equal(rs, xp)
  1612. # #2259
  1613. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2])
  1614. result = df.iloc[:, [0]]
  1615. expected = df.take([0], axis=1)
  1616. assert_frame_equal(result, expected)
  1617. def test_loc_duplicates(self):
  1618. # gh-17105
  1619. # insert a duplicate element to the index
  1620. trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1),
  1621. end=pd.Timestamp(year=2017, month=1, day=5))
  1622. trange = trange.insert(loc=5,
  1623. item=pd.Timestamp(year=2017, month=1, day=5))
  1624. df = pd.DataFrame(0, index=trange, columns=["A", "B"])
  1625. bool_idx = np.array([False, False, False, False, False, True])
  1626. # assignment
  1627. df.loc[trange[bool_idx], "A"] = 6
  1628. expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6],
  1629. 'B': [0, 0, 0, 0, 0, 0]},
  1630. index=trange)
  1631. tm.assert_frame_equal(df, expected)
  1632. # in-place
  1633. df = pd.DataFrame(0, index=trange, columns=["A", "B"])
  1634. df.loc[trange[bool_idx], "A"] += 6
  1635. tm.assert_frame_equal(df, expected)
  1636. def test_iloc_sparse_propegate_fill_value(self):
  1637. from pandas.core.sparse.api import SparseDataFrame
  1638. df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)
  1639. assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)
  1640. def test_iat(self):
  1641. for i, row in enumerate(self.frame.index):
  1642. for j, col in enumerate(self.frame.columns):
  1643. result = self.frame.iat[i, j]
  1644. expected = self.frame.at[row, col]
  1645. assert result == expected
  1646. def test_nested_exception(self):
  1647. # Ignore the strange way of triggering the problem
  1648. # (which may get fixed), it's just a way to trigger
  1649. # the issue or reraising an outer exception without
  1650. # a named argument
  1651. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6],
  1652. "c": [7, 8, 9]}).set_index(["a", "b"])
  1653. index = list(df.index)
  1654. index[0] = ["a", "b"]
  1655. df.index = index
  1656. try:
  1657. repr(df)
  1658. except Exception as e:
  1659. assert type(e) != UnboundLocalError
  1660. @pytest.mark.parametrize("method,expected_values", [
  1661. ("nearest", [0, 1, 1, 2]),
  1662. ("pad", [np.nan, 0, 1, 1]),
  1663. ("backfill", [0, 1, 2, 2])
  1664. ])
  1665. def test_reindex_methods(self, method, expected_values):
  1666. df = pd.DataFrame({"x": list(range(5))})
  1667. target = np.array([-0.1, 0.9, 1.1, 1.5])
  1668. expected = pd.DataFrame({'x': expected_values}, index=target)
  1669. actual = df.reindex(target, method=method)
  1670. assert_frame_equal(expected, actual)
  1671. actual = df.reindex_like(df, method=method, tolerance=0)
  1672. assert_frame_equal(df, actual)
  1673. actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0])
  1674. assert_frame_equal(df, actual)
  1675. actual = df.reindex(target, method=method, tolerance=1)
  1676. assert_frame_equal(expected, actual)
  1677. actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1])
  1678. assert_frame_equal(expected, actual)
  1679. e2 = expected[::-1]
  1680. actual = df.reindex(target[::-1], method=method)
  1681. assert_frame_equal(e2, actual)
  1682. new_order = [3, 0, 2, 1]
  1683. e2 = expected.iloc[new_order]
  1684. actual = df.reindex(target[new_order], method=method)
  1685. assert_frame_equal(e2, actual)
  1686. switched_method = ('pad' if method == 'backfill'
  1687. else 'backfill' if method == 'pad'
  1688. else method)
  1689. actual = df[::-1].reindex(target, method=switched_method)
  1690. assert_frame_equal(expected, actual)
  1691. def test_reindex_methods_nearest_special(self):
  1692. df = pd.DataFrame({"x": list(range(5))})
  1693. target = np.array([-0.1, 0.9, 1.1, 1.5])
  1694. expected = pd.DataFrame({"x": [0, 1, 1, np.nan]}, index=target)
  1695. actual = df.reindex(target, method="nearest", tolerance=0.2)
  1696. assert_frame_equal(expected, actual)
  1697. expected = pd.DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target)
  1698. actual = df.reindex(target, method="nearest",
  1699. tolerance=[0.5, 0.01, 0.4, 0.1])
  1700. assert_frame_equal(expected, actual)
  1701. def test_reindex_frame_add_nat(self):
  1702. rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
  1703. df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng})
  1704. result = df.reindex(lrange(15))
  1705. assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))
  1706. mask = com.isna(result)['B']
  1707. assert mask[-5:].all()
  1708. assert not mask[:-5].any()
  1709. def test_set_dataframe_column_ns_dtype(self):
  1710. x = DataFrame([datetime.now(), datetime.now()])
  1711. assert x[0].dtype == np.dtype('M8[ns]')
  1712. def test_non_monotonic_reindex_methods(self):
  1713. dr = pd.date_range('2013-08-01', periods=6, freq='B')
  1714. data = np.random.randn(6, 1)
  1715. df = pd.DataFrame(data, index=dr, columns=list('A'))
  1716. df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]],
  1717. columns=list('A'))
  1718. # index is not monotonic increasing or decreasing
  1719. pytest.raises(ValueError, df_rev.reindex, df.index, method='pad')
  1720. pytest.raises(ValueError, df_rev.reindex, df.index, method='ffill')
  1721. pytest.raises(ValueError, df_rev.reindex, df.index, method='bfill')
  1722. pytest.raises(ValueError, df_rev.reindex, df.index, method='nearest')
  1723. def test_reindex_level(self):
  1724. from itertools import permutations
  1725. icol = ['jim', 'joe', 'jolie']
  1726. def verify_first_level(df, level, idx, check_index_type=True):
  1727. def f(val):
  1728. return np.nonzero((df[level] == val).to_numpy())[0]
  1729. i = np.concatenate(list(map(f, idx)))
  1730. left = df.set_index(icol).reindex(idx, level=level)
  1731. right = df.iloc[i].set_index(icol)
  1732. assert_frame_equal(left, right, check_index_type=check_index_type)
  1733. def verify(df, level, idx, indexer, check_index_type=True):
  1734. left = df.set_index(icol).reindex(idx, level=level)
  1735. right = df.iloc[indexer].set_index(icol)
  1736. assert_frame_equal(left, right, check_index_type=check_index_type)
  1737. df = pd.DataFrame({'jim': list('B' * 4 + 'A' * 2 + 'C' * 3),
  1738. 'joe': list('abcdeabcd')[::-1],
  1739. 'jolie': [10, 20, 30] * 3,
  1740. 'joline': np.random.randint(0, 1000, 9)})
  1741. target = [['C', 'B', 'A'], ['F', 'C', 'A', 'D'], ['A'],
  1742. ['A', 'B', 'C'], ['C', 'A', 'B'], ['C', 'B'], ['C', 'A'],
  1743. ['A', 'B'], ['B', 'A', 'C']]
  1744. for idx in target:
  1745. verify_first_level(df, 'jim', idx)
  1746. # reindex by these causes different MultiIndex levels
  1747. for idx in [['D', 'F'], ['A', 'C', 'B']]:
  1748. verify_first_level(df, 'jim', idx, check_index_type=False)
  1749. verify(df, 'joe', list('abcde'), [3, 2, 1, 0, 5, 4, 8, 7, 6])
  1750. verify(df, 'joe', list('abcd'), [3, 2, 1, 0, 5, 8, 7, 6])
  1751. verify(df, 'joe', list('abc'), [3, 2, 1, 8, 7, 6])
  1752. verify(df, 'joe', list('eca'), [1, 3, 4, 6, 8])
  1753. verify(df, 'joe', list('edc'), [0, 1, 4, 5, 6])
  1754. verify(df, 'joe', list('eadbc'), [3, 0, 2, 1, 4, 5, 8, 7, 6])
  1755. verify(df, 'joe', list('edwq'), [0, 4, 5])
  1756. verify(df, 'joe', list('wq'), [], check_index_type=False)
  1757. df = DataFrame({'jim': ['mid'] * 5 + ['btm'] * 8 + ['top'] * 7,
  1758. 'joe': ['3rd'] * 2 + ['1st'] * 3 + ['2nd'] * 3 +
  1759. ['1st'] * 2 + ['3rd'] * 3 + ['1st'] * 2 +
  1760. ['3rd'] * 3 + ['2nd'] * 2,
  1761. # this needs to be jointly unique with jim and joe or
  1762. # reindexing will fail ~1.5% of the time, this works
  1763. # out to needing unique groups of same size as joe
  1764. 'jolie': np.concatenate([
  1765. np.random.choice(1000, x, replace=False)
  1766. for x in [2, 3, 3, 2, 3, 2, 3, 2]]),
  1767. 'joline': np.random.randn(20).round(3) * 10})
  1768. for idx in permutations(df['jim'].unique()):
  1769. for i in range(3):
  1770. verify_first_level(df, 'jim', idx[:i + 1])
  1771. i = [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10,
  1772. 11, 12, 13, 14, 18, 19, 15, 16, 17]
  1773. verify(df, 'joe', ['1st', '2nd', '3rd'], i)
  1774. i = [0, 1, 2, 3, 4, 10, 11, 12, 5, 6,
  1775. 7, 8, 9, 15, 16, 17, 18, 19, 13, 14]
  1776. verify(df, 'joe', ['3rd', '2nd', '1st'], i)
  1777. i = [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]
  1778. verify(df, 'joe', ['2nd', '3rd'], i)
  1779. i = [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]
  1780. verify(df, 'joe', ['3rd', '1st'], i)
  1781. def test_getitem_ix_float_duplicates(self):
  1782. df = pd.DataFrame(np.random.randn(3, 3),
  1783. index=[0.1, 0.2, 0.2], columns=list('abc'))
  1784. expect = df.iloc[1:]
  1785. assert_frame_equal(df.loc[0.2], expect)
  1786. with catch_warnings(record=True):
  1787. simplefilter("ignore", DeprecationWarning)
  1788. assert_frame_equal(df.ix[0.2], expect)
  1789. expect = df.iloc[1:, 0]
  1790. assert_series_equal(df.loc[0.2, 'a'], expect)
  1791. df.index = [1, 0.2, 0.2]
  1792. expect = df.iloc[1:]
  1793. assert_frame_equal(df.loc[0.2], expect)
  1794. with catch_warnings(record=True):
  1795. simplefilter("ignore", DeprecationWarning)
  1796. assert_frame_equal(df.ix[0.2], expect)
  1797. expect = df.iloc[1:, 0]
  1798. assert_series_equal(df.loc[0.2, 'a'], expect)
  1799. df = pd.DataFrame(np.random.randn(4, 3),
  1800. index=[1, 0.2, 0.2, 1], columns=list('abc'))
  1801. expect = df.iloc[1:-1]
  1802. assert_frame_equal(df.loc[0.2], expect)
  1803. with catch_warnings(record=True):
  1804. simplefilter("ignore", DeprecationWarning)
  1805. assert_frame_equal(df.ix[0.2], expect)
  1806. expect = df.iloc[1:-1, 0]
  1807. assert_series_equal(df.loc[0.2, 'a'], expect)
  1808. df.index = [0.1, 0.2, 2, 0.2]
  1809. expect = df.iloc[[1, -1]]
  1810. assert_frame_equal(df.loc[0.2], expect)
  1811. with catch_warnings(record=True):
  1812. simplefilter("ignore", DeprecationWarning)
  1813. assert_frame_equal(df.ix[0.2], expect)
  1814. expect = df.iloc[[1, -1], 0]
  1815. assert_series_equal(df.loc[0.2, 'a'], expect)
  1816. def test_getitem_sparse_column(self):
  1817. # https://github.com/pandas-dev/pandas/issues/23559
  1818. data = pd.SparseArray([0, 1])
  1819. df = pd.DataFrame({"A": data})
  1820. expected = pd.Series(data, name="A")
  1821. result = df['A']
  1822. tm.assert_series_equal(result, expected)
  1823. result = df.iloc[:, 0]
  1824. tm.assert_series_equal(result, expected)
  1825. result = df.loc[:, 'A']
  1826. tm.assert_series_equal(result, expected)
  1827. def test_setitem_with_sparse_value(self):
  1828. # GH8131
  1829. df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
  1830. sp_array = pd.SparseArray([0, 0, 1])
  1831. df['new_column'] = sp_array
  1832. assert_series_equal(df['new_column'],
  1833. pd.Series(sp_array, name='new_column'),
  1834. check_names=False)
  1835. def test_setitem_with_unaligned_sparse_value(self):
  1836. df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
  1837. sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0])
  1838. df['new_column'] = sp_series
  1839. exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column')
  1840. assert_series_equal(df['new_column'], exp)
  1841. def test_setitem_with_unaligned_tz_aware_datetime_column(self):
  1842. # GH 12981
  1843. # Assignment of unaligned offset-aware datetime series.
  1844. # Make sure timezone isn't lost
  1845. column = pd.Series(pd.date_range('2015-01-01', periods=3, tz='utc'),
  1846. name='dates')
  1847. df = pd.DataFrame({'dates': column})
  1848. df['dates'] = column[[1, 0, 2]]
  1849. assert_series_equal(df['dates'], column)
  1850. df = pd.DataFrame({'dates': column})
  1851. df.loc[[0, 1, 2], 'dates'] = column[[1, 0, 2]]
  1852. assert_series_equal(df['dates'], column)
  1853. def test_setitem_datetime_coercion(self):
  1854. # gh-1048
  1855. df = pd.DataFrame({'c': [pd.Timestamp('2010-10-01')] * 3})
  1856. df.loc[0:1, 'c'] = np.datetime64('2008-08-08')
  1857. assert pd.Timestamp('2008-08-08') == df.loc[0, 'c']
  1858. assert pd.Timestamp('2008-08-08') == df.loc[1, 'c']
  1859. df.loc[2, 'c'] = date(2005, 5, 5)
  1860. assert pd.Timestamp('2005-05-05') == df.loc[2, 'c']
  1861. def test_setitem_datetimelike_with_inference(self):
  1862. # GH 7592
  1863. # assignment of timedeltas with NaT
  1864. one_hour = timedelta(hours=1)
  1865. df = DataFrame(index=date_range('20130101', periods=4))
  1866. df['A'] = np.array([1 * one_hour] * 4, dtype='m8[ns]')
  1867. df.loc[:, 'B'] = np.array([2 * one_hour] * 4, dtype='m8[ns]')
  1868. df.loc[:3, 'C'] = np.array([3 * one_hour] * 3, dtype='m8[ns]')
  1869. df.loc[:, 'D'] = np.array([4 * one_hour] * 4, dtype='m8[ns]')
  1870. df.loc[df.index[:3], 'E'] = np.array([5 * one_hour] * 3,
  1871. dtype='m8[ns]')
  1872. df['F'] = np.timedelta64('NaT')
  1873. df.loc[df.index[:-1], 'F'] = np.array([6 * one_hour] * 3,
  1874. dtype='m8[ns]')
  1875. df.loc[df.index[-3]:, 'G'] = date_range('20130101', periods=3)
  1876. df['H'] = np.datetime64('NaT')
  1877. result = df.dtypes
  1878. expected = Series([np.dtype('timedelta64[ns]')] * 6 +
  1879. [np.dtype('datetime64[ns]')] * 2,
  1880. index=list('ABCDEFGH'))
  1881. assert_series_equal(result, expected)
  1882. @pytest.mark.parametrize('idxer', ['var', ['var']])
  1883. def test_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
  1884. # GH 11365
  1885. tz = tz_naive_fixture
  1886. idx = date_range(start='2015-07-12', periods=3, freq='H', tz=tz)
  1887. expected = DataFrame(1.2, index=idx, columns=['var'])
  1888. result = DataFrame(index=idx, columns=['var'])
  1889. result.loc[:, idxer] = expected
  1890. tm.assert_frame_equal(result, expected)
  1891. def test_at_time_between_time_datetimeindex(self):
  1892. index = date_range("2012-01-01", "2012-01-05", freq='30min')
  1893. df = DataFrame(np.random.randn(len(index), 5), index=index)
  1894. akey = time(12, 0, 0)
  1895. bkey = slice(time(13, 0, 0), time(14, 0, 0))
  1896. ainds = [24, 72, 120, 168]
  1897. binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
  1898. result = df.at_time(akey)
  1899. expected = df.loc[akey]
  1900. expected2 = df.iloc[ainds]
  1901. assert_frame_equal(result, expected)
  1902. assert_frame_equal(result, expected2)
  1903. assert len(result) == 4
  1904. result = df.between_time(bkey.start, bkey.stop)
  1905. expected = df.loc[bkey]
  1906. expected2 = df.iloc[binds]
  1907. assert_frame_equal(result, expected)
  1908. assert_frame_equal(result, expected2)
  1909. assert len(result) == 12
  1910. result = df.copy()
  1911. result.loc[akey] = 0
  1912. result = result.loc[akey]
  1913. expected = df.loc[akey].copy()
  1914. expected.loc[:] = 0
  1915. assert_frame_equal(result, expected)
  1916. result = df.copy()
  1917. result.loc[akey] = 0
  1918. result.loc[akey] = df.iloc[ainds]
  1919. assert_frame_equal(result, df)
  1920. result = df.copy()
  1921. result.loc[bkey] = 0
  1922. result = result.loc[bkey]
  1923. expected = df.loc[bkey].copy()
  1924. expected.loc[:] = 0
  1925. assert_frame_equal(result, expected)
  1926. result = df.copy()
  1927. result.loc[bkey] = 0
  1928. result.loc[bkey] = df.iloc[binds]
  1929. assert_frame_equal(result, df)
  1930. def test_xs(self):
  1931. idx = self.frame.index[5]
  1932. xs = self.frame.xs(idx)
  1933. for item, value in compat.iteritems(xs):
  1934. if np.isnan(value):
  1935. assert np.isnan(self.frame[item][idx])
  1936. else:
  1937. assert value == self.frame[item][idx]
  1938. # mixed-type xs
  1939. test_data = {
  1940. 'A': {'1': 1, '2': 2},
  1941. 'B': {'1': '1', '2': '2', '3': '3'},
  1942. }
  1943. frame = DataFrame(test_data)
  1944. xs = frame.xs('1')
  1945. assert xs.dtype == np.object_
  1946. assert xs['A'] == 1
  1947. assert xs['B'] == '1'
  1948. with pytest.raises(KeyError):
  1949. self.tsframe.xs(self.tsframe.index[0] - BDay())
  1950. # xs get column
  1951. series = self.frame.xs('A', axis=1)
  1952. expected = self.frame['A']
  1953. assert_series_equal(series, expected)
  1954. # view is returned if possible
  1955. series = self.frame.xs('A', axis=1)
  1956. series[:] = 5
  1957. assert (expected == 5).all()
  1958. def test_xs_corner(self):
  1959. # pathological mixed-type reordering case
  1960. df = DataFrame(index=[0])
  1961. df['A'] = 1.
  1962. df['B'] = 'foo'
  1963. df['C'] = 2.
  1964. df['D'] = 'bar'
  1965. df['E'] = 3.
  1966. xs = df.xs(0)
  1967. exp = pd.Series([1., 'foo', 2., 'bar', 3.],
  1968. index=list('ABCDE'), name=0)
  1969. tm.assert_series_equal(xs, exp)
  1970. # no columns but Index(dtype=object)
  1971. df = DataFrame(index=['a', 'b', 'c'])
  1972. result = df.xs('a')
  1973. expected = Series([], name='a', index=pd.Index([], dtype=object))
  1974. assert_series_equal(result, expected)
  1975. def test_xs_duplicates(self):
  1976. df = DataFrame(np.random.randn(5, 2), index=['b', 'b', 'c', 'b', 'a'])
  1977. cross = df.xs('c')
  1978. exp = df.iloc[2]
  1979. assert_series_equal(cross, exp)
  1980. def test_xs_keep_level(self):
  1981. df = (DataFrame({'day': {0: 'sat', 1: 'sun'},
  1982. 'flavour': {0: 'strawberry', 1: 'strawberry'},
  1983. 'sales': {0: 10, 1: 12},
  1984. 'year': {0: 2008, 1: 2008}})
  1985. .set_index(['year', 'flavour', 'day']))
  1986. result = df.xs('sat', level='day', drop_level=False)
  1987. expected = df[:1]
  1988. assert_frame_equal(result, expected)
  1989. result = df.xs([2008, 'sat'], level=['year', 'day'], drop_level=False)
  1990. assert_frame_equal(result, expected)
  1991. def test_xs_view(self):
  1992. # in 0.14 this will return a view if possible a copy otherwise, but
  1993. # this is numpy dependent
  1994. dm = DataFrame(np.arange(20.).reshape(4, 5),
  1995. index=lrange(4), columns=lrange(5))
  1996. dm.xs(2)[:] = 10
  1997. assert (dm.xs(2) == 10).all()
  1998. def test_index_namedtuple(self):
  1999. from collections import namedtuple
  2000. IndexType = namedtuple("IndexType", ["a", "b"])
  2001. idx1 = IndexType("foo", "bar")
  2002. idx2 = IndexType("baz", "bof")
  2003. index = Index([idx1, idx2],
  2004. name="composite_index", tupleize_cols=False)
  2005. df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
  2006. with catch_warnings(record=True):
  2007. simplefilter("ignore", DeprecationWarning)
  2008. result = df.ix[IndexType("foo", "bar")]["A"]
  2009. assert result == 1
  2010. result = df.loc[IndexType("foo", "bar")]["A"]
  2011. assert result == 1
  2012. def test_boolean_indexing(self):
  2013. idx = lrange(3)
  2014. cols = ['A', 'B', 'C']
  2015. df1 = DataFrame(index=idx, columns=cols,
  2016. data=np.array([[0.0, 0.5, 1.0],
  2017. [1.5, 2.0, 2.5],
  2018. [3.0, 3.5, 4.0]],
  2019. dtype=float))
  2020. df2 = DataFrame(index=idx, columns=cols,
  2021. data=np.ones((len(idx), len(cols))))
  2022. expected = DataFrame(index=idx, columns=cols,
  2023. data=np.array([[0.0, 0.5, 1.0],
  2024. [1.5, 2.0, -1],
  2025. [-1, -1, -1]], dtype=float))
  2026. df1[df1 > 2.0 * df2] = -1
  2027. assert_frame_equal(df1, expected)
  2028. with pytest.raises(ValueError, match='Item wrong length'):
  2029. df1[df1.index[:-1] > 2] = -1
  2030. def test_boolean_indexing_mixed(self):
  2031. df = DataFrame({
  2032. long(0): {35: np.nan, 40: np.nan, 43: np.nan,
  2033. 49: np.nan, 50: np.nan},
  2034. long(1): {35: np.nan,
  2035. 40: 0.32632316859446198,
  2036. 43: np.nan,
  2037. 49: 0.32632316859446198,
  2038. 50: 0.39114724480578139},
  2039. long(2): {35: np.nan, 40: np.nan, 43: 0.29012581014105987,
  2040. 49: np.nan, 50: np.nan},
  2041. long(3): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan,
  2042. 50: np.nan},
  2043. long(4): {35: 0.34215328467153283, 40: np.nan, 43: np.nan,
  2044. 49: np.nan, 50: np.nan},
  2045. 'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}})
  2046. # mixed int/float ok
  2047. df2 = df.copy()
  2048. df2[df2 > 0.3] = 1
  2049. expected = df.copy()
  2050. expected.loc[40, 1] = 1
  2051. expected.loc[49, 1] = 1
  2052. expected.loc[50, 1] = 1
  2053. expected.loc[35, 4] = 1
  2054. assert_frame_equal(df2, expected)
  2055. df['foo'] = 'test'
  2056. msg = ("boolean setting on mixed-type|"
  2057. "not supported between|"
  2058. "unorderable types")
  2059. with pytest.raises(TypeError, match=msg):
  2060. # TODO: This message should be the same in PY2/PY3
  2061. df[df > 0.3] = 1
  2062. def test_where(self):
  2063. default_frame = DataFrame(np.random.randn(5, 3),
  2064. columns=['A', 'B', 'C'])
  2065. def _safe_add(df):
  2066. # only add to the numeric items
  2067. def is_ok(s):
  2068. return (issubclass(s.dtype.type, (np.integer, np.floating)) and
  2069. s.dtype != 'uint8')
  2070. return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s)
  2071. for c, s in compat.iteritems(df)))
  2072. def _check_get(df, cond, check_dtypes=True):
  2073. other1 = _safe_add(df)
  2074. rs = df.where(cond, other1)
  2075. rs2 = df.where(cond.values, other1)
  2076. for k, v in rs.iteritems():
  2077. exp = Series(
  2078. np.where(cond[k], df[k], other1[k]), index=v.index)
  2079. assert_series_equal(v, exp, check_names=False)
  2080. assert_frame_equal(rs, rs2)
  2081. # dtypes
  2082. if check_dtypes:
  2083. assert (rs.dtypes == df.dtypes).all()
  2084. # check getting
  2085. for df in [default_frame, self.mixed_frame,
  2086. self.mixed_float, self.mixed_int]:
  2087. if compat.PY3 and df is self.mixed_frame:
  2088. with pytest.raises(TypeError):
  2089. df > 0
  2090. continue
  2091. cond = df > 0
  2092. _check_get(df, cond)
  2093. # upcasting case (GH # 2794)
  2094. df = DataFrame({c: Series([1] * 3, dtype=c)
  2095. for c in ['float32', 'float64',
  2096. 'int32', 'int64']})
  2097. df.iloc[1, :] = 0
  2098. result = df.where(df >= 0).get_dtype_counts()
  2099. # when we don't preserve boolean casts
  2100. #
  2101. # expected = Series({ 'float32' : 1, 'float64' : 3 })
  2102. expected = Series({'float32': 1, 'float64': 1, 'int32': 1, 'int64': 1})
  2103. assert_series_equal(result, expected)
  2104. # aligning
  2105. def _check_align(df, cond, other, check_dtypes=True):
  2106. rs = df.where(cond, other)
  2107. for i, k in enumerate(rs.columns):
  2108. result = rs[k]
  2109. d = df[k].values
  2110. c = cond[k].reindex(df[k].index).fillna(False).values
  2111. if is_scalar(other):
  2112. o = other
  2113. else:
  2114. if isinstance(other, np.ndarray):
  2115. o = Series(other[:, i], index=result.index).values
  2116. else:
  2117. o = other[k].values
  2118. new_values = d if c.all() else np.where(c, d, o)
  2119. expected = Series(new_values, index=result.index, name=k)
  2120. # since we can't always have the correct numpy dtype
  2121. # as numpy doesn't know how to downcast, don't check
  2122. assert_series_equal(result, expected, check_dtype=False)
  2123. # dtypes
  2124. # can't check dtype when other is an ndarray
  2125. if check_dtypes and not isinstance(other, np.ndarray):
  2126. assert (rs.dtypes == df.dtypes).all()
  2127. for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
  2128. if compat.PY3 and df is self.mixed_frame:
  2129. with pytest.raises(TypeError):
  2130. df > 0
  2131. continue
  2132. # other is a frame
  2133. cond = (df > 0)[1:]
  2134. _check_align(df, cond, _safe_add(df))
  2135. # check other is ndarray
  2136. cond = df > 0
  2137. _check_align(df, cond, (_safe_add(df).values))
  2138. # integers are upcast, so don't check the dtypes
  2139. cond = df > 0
  2140. check_dtypes = all(not issubclass(s.type, np.integer)
  2141. for s in df.dtypes)
  2142. _check_align(df, cond, np.nan, check_dtypes=check_dtypes)
  2143. # invalid conditions
  2144. df = default_frame
  2145. err1 = (df + 1).values[0:2, :]
  2146. pytest.raises(ValueError, df.where, cond, err1)
  2147. err2 = cond.iloc[:2, :].values
  2148. other1 = _safe_add(df)
  2149. pytest.raises(ValueError, df.where, err2, other1)
  2150. pytest.raises(ValueError, df.mask, True)
  2151. pytest.raises(ValueError, df.mask, 0)
  2152. # where inplace
  2153. def _check_set(df, cond, check_dtypes=True):
  2154. dfi = df.copy()
  2155. econd = cond.reindex_like(df).fillna(True)
  2156. expected = dfi.mask(~econd)
  2157. dfi.where(cond, np.nan, inplace=True)
  2158. assert_frame_equal(dfi, expected)
  2159. # dtypes (and confirm upcasts)x
  2160. if check_dtypes:
  2161. for k, v in compat.iteritems(df.dtypes):
  2162. if issubclass(v.type, np.integer) and not cond[k].all():
  2163. v = np.dtype('float64')
  2164. assert dfi[k].dtype == v
  2165. for df in [default_frame, self.mixed_frame, self.mixed_float,
  2166. self.mixed_int]:
  2167. if compat.PY3 and df is self.mixed_frame:
  2168. with pytest.raises(TypeError):
  2169. df > 0
  2170. continue
  2171. cond = df > 0
  2172. _check_set(df, cond)
  2173. cond = df >= 0
  2174. _check_set(df, cond)
  2175. # aligining
  2176. cond = (df >= 0)[1:]
  2177. _check_set(df, cond)
  2178. # GH 10218
  2179. # test DataFrame.where with Series slicing
  2180. df = DataFrame({'a': range(3), 'b': range(4, 7)})
  2181. result = df.where(df['a'] == 1)
  2182. expected = df[df['a'] == 1].reindex(df.index)
  2183. assert_frame_equal(result, expected)
  2184. @pytest.mark.parametrize("klass", [list, tuple, np.array])
  2185. def test_where_array_like(self, klass):
  2186. # see gh-15414
  2187. df = DataFrame({"a": [1, 2, 3]})
  2188. cond = [[False], [True], [True]]
  2189. expected = DataFrame({"a": [np.nan, 2, 3]})
  2190. result = df.where(klass(cond))
  2191. assert_frame_equal(result, expected)
  2192. df["b"] = 2
  2193. expected["b"] = [2, np.nan, 2]
  2194. cond = [[False, True], [True, False], [True, True]]
  2195. result = df.where(klass(cond))
  2196. assert_frame_equal(result, expected)
  2197. @pytest.mark.parametrize("cond", [
  2198. [[1], [0], [1]],
  2199. Series([[2], [5], [7]]),
  2200. DataFrame({"a": [2, 5, 7]}),
  2201. [["True"], ["False"], ["True"]],
  2202. [[Timestamp("2017-01-01")],
  2203. [pd.NaT], [Timestamp("2017-01-02")]]
  2204. ])
  2205. def test_where_invalid_input_single(self, cond):
  2206. # see gh-15414: only boolean arrays accepted
  2207. df = DataFrame({"a": [1, 2, 3]})
  2208. msg = "Boolean array expected for the condition"
  2209. with pytest.raises(ValueError, match=msg):
  2210. df.where(cond)
  2211. @pytest.mark.parametrize("cond", [
  2212. [[0, 1], [1, 0], [1, 1]],
  2213. Series([[0, 2], [5, 0], [4, 7]]),
  2214. [["False", "True"], ["True", "False"],
  2215. ["True", "True"]],
  2216. DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}),
  2217. [[pd.NaT, Timestamp("2017-01-01")],
  2218. [Timestamp("2017-01-02"), pd.NaT],
  2219. [Timestamp("2017-01-03"), Timestamp("2017-01-03")]]
  2220. ])
  2221. def test_where_invalid_input_multiple(self, cond):
  2222. # see gh-15414: only boolean arrays accepted
  2223. df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]})
  2224. msg = "Boolean array expected for the condition"
  2225. with pytest.raises(ValueError, match=msg):
  2226. df.where(cond)
  2227. def test_where_dataframe_col_match(self):
  2228. df = DataFrame([[1, 2, 3], [4, 5, 6]])
  2229. cond = DataFrame([[True, False, True], [False, False, True]])
  2230. result = df.where(cond)
  2231. expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]])
  2232. tm.assert_frame_equal(result, expected)
  2233. # this *does* align, though has no matching columns
  2234. cond.columns = ["a", "b", "c"]
  2235. result = df.where(cond)
  2236. expected = DataFrame(np.nan, index=df.index, columns=df.columns)
  2237. tm.assert_frame_equal(result, expected)
  2238. def test_where_ndframe_align(self):
  2239. msg = "Array conditional must be same shape as self"
  2240. df = DataFrame([[1, 2, 3], [4, 5, 6]])
  2241. cond = [True]
  2242. with pytest.raises(ValueError, match=msg):
  2243. df.where(cond)
  2244. expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]])
  2245. out = df.where(Series(cond))
  2246. tm.assert_frame_equal(out, expected)
  2247. cond = np.array([False, True, False, True])
  2248. with pytest.raises(ValueError, match=msg):
  2249. df.where(cond)
  2250. expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]])
  2251. out = df.where(Series(cond))
  2252. tm.assert_frame_equal(out, expected)
  2253. def test_where_bug(self):
  2254. # see gh-2793
  2255. df = DataFrame({'a': [1.0, 2.0, 3.0, 4.0], 'b': [
  2256. 4.0, 3.0, 2.0, 1.0]}, dtype='float64')
  2257. expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0], 'b': [
  2258. 4.0, 3.0, np.nan, np.nan]}, dtype='float64')
  2259. result = df.where(df > 2, np.nan)
  2260. assert_frame_equal(result, expected)
  2261. result = df.copy()
  2262. result.where(result > 2, np.nan, inplace=True)
  2263. assert_frame_equal(result, expected)
  2264. def test_where_bug_mixed(self, sint_dtype):
  2265. # see gh-2793
  2266. df = DataFrame({"a": np.array([1, 2, 3, 4], dtype=sint_dtype),
  2267. "b": np.array([4.0, 3.0, 2.0, 1.0],
  2268. dtype="float64")})
  2269. expected = DataFrame({"a": [np.nan, np.nan, 3.0, 4.0],
  2270. "b": [4.0, 3.0, np.nan, np.nan]},
  2271. dtype="float64")
  2272. result = df.where(df > 2, np.nan)
  2273. assert_frame_equal(result, expected)
  2274. result = df.copy()
  2275. result.where(result > 2, np.nan, inplace=True)
  2276. assert_frame_equal(result, expected)
  2277. def test_where_bug_transposition(self):
  2278. # see gh-7506
  2279. a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]})
  2280. b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]})
  2281. do_not_replace = b.isna() | (a > b)
  2282. expected = a.copy()
  2283. expected[~do_not_replace] = b
  2284. result = a.where(do_not_replace, b)
  2285. assert_frame_equal(result, expected)
  2286. a = DataFrame({0: [4, 6], 1: [1, 0]})
  2287. b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]})
  2288. do_not_replace = b.isna() | (a > b)
  2289. expected = a.copy()
  2290. expected[~do_not_replace] = b
  2291. result = a.where(do_not_replace, b)
  2292. assert_frame_equal(result, expected)
  2293. def test_where_datetime(self):
  2294. # GH 3311
  2295. df = DataFrame(dict(A=date_range('20130102', periods=5),
  2296. B=date_range('20130104', periods=5),
  2297. C=np.random.randn(5)))
  2298. stamp = datetime(2013, 1, 3)
  2299. with pytest.raises(TypeError):
  2300. df > stamp
  2301. result = df[df.iloc[:, :-1] > stamp]
  2302. expected = df.copy()
  2303. expected.loc[[0, 1], 'A'] = np.nan
  2304. expected.loc[:, 'C'] = np.nan
  2305. assert_frame_equal(result, expected)
  2306. def test_where_none(self):
  2307. # GH 4667
  2308. # setting with None changes dtype
  2309. df = DataFrame({'series': Series(range(10))}).astype(float)
  2310. df[df > 7] = None
  2311. expected = DataFrame(
  2312. {'series': Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])})
  2313. assert_frame_equal(df, expected)
  2314. # GH 7656
  2315. df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, {
  2316. 'A': np.nan, 'B': 'Test', 'C': np.nan}])
  2317. msg = 'boolean setting on mixed-type'
  2318. with pytest.raises(TypeError, match=msg):
  2319. df.where(~isna(df), None, inplace=True)
  2320. def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self):
  2321. # see gh-21947
  2322. df = pd.DataFrame(columns=["a"])
  2323. cond = df.applymap(lambda x: x > 0)
  2324. result = df.where(cond)
  2325. tm.assert_frame_equal(result, df)
  2326. def test_where_align(self):
  2327. def create():
  2328. df = DataFrame(np.random.randn(10, 3))
  2329. df.iloc[3:5, 0] = np.nan
  2330. df.iloc[4:6, 1] = np.nan
  2331. df.iloc[5:8, 2] = np.nan
  2332. return df
  2333. # series
  2334. df = create()
  2335. expected = df.fillna(df.mean())
  2336. result = df.where(pd.notna(df), df.mean(), axis='columns')
  2337. assert_frame_equal(result, expected)
  2338. df.where(pd.notna(df), df.mean(), inplace=True, axis='columns')
  2339. assert_frame_equal(df, expected)
  2340. df = create().fillna(0)
  2341. expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0])
  2342. result = df.where(df > 0, df[0], axis='index')
  2343. assert_frame_equal(result, expected)
  2344. result = df.where(df > 0, df[0], axis='rows')
  2345. assert_frame_equal(result, expected)
  2346. # frame
  2347. df = create()
  2348. expected = df.fillna(1)
  2349. result = df.where(pd.notna(df), DataFrame(
  2350. 1, index=df.index, columns=df.columns))
  2351. assert_frame_equal(result, expected)
  2352. def test_where_complex(self):
  2353. # GH 6345
  2354. expected = DataFrame(
  2355. [[1 + 1j, 2], [np.nan, 4 + 1j]], columns=['a', 'b'])
  2356. df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=['a', 'b'])
  2357. df[df.abs() >= 5] = np.nan
  2358. assert_frame_equal(df, expected)
  2359. def test_where_axis(self):
  2360. # GH 9736
  2361. df = DataFrame(np.random.randn(2, 2))
  2362. mask = DataFrame([[False, False], [False, False]])
  2363. s = Series([0, 1])
  2364. expected = DataFrame([[0, 0], [1, 1]], dtype='float64')
  2365. result = df.where(mask, s, axis='index')
  2366. assert_frame_equal(result, expected)
  2367. result = df.copy()
  2368. result.where(mask, s, axis='index', inplace=True)
  2369. assert_frame_equal(result, expected)
  2370. expected = DataFrame([[0, 1], [0, 1]], dtype='float64')
  2371. result = df.where(mask, s, axis='columns')
  2372. assert_frame_equal(result, expected)
  2373. result = df.copy()
  2374. result.where(mask, s, axis='columns', inplace=True)
  2375. assert_frame_equal(result, expected)
  2376. # Upcast needed
  2377. df = DataFrame([[1, 2], [3, 4]], dtype='int64')
  2378. mask = DataFrame([[False, False], [False, False]])
  2379. s = Series([0, np.nan])
  2380. expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype='float64')
  2381. result = df.where(mask, s, axis='index')
  2382. assert_frame_equal(result, expected)
  2383. result = df.copy()
  2384. result.where(mask, s, axis='index', inplace=True)
  2385. assert_frame_equal(result, expected)
  2386. expected = DataFrame([[0, np.nan], [0, np.nan]])
  2387. result = df.where(mask, s, axis='columns')
  2388. assert_frame_equal(result, expected)
  2389. expected = DataFrame({0: np.array([0, 0], dtype='int64'),
  2390. 1: np.array([np.nan, np.nan], dtype='float64')})
  2391. result = df.copy()
  2392. result.where(mask, s, axis='columns', inplace=True)
  2393. assert_frame_equal(result, expected)
  2394. # Multiple dtypes (=> multiple Blocks)
  2395. df = pd.concat([
  2396. DataFrame(np.random.randn(10, 2)),
  2397. DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype='int64')],
  2398. ignore_index=True, axis=1)
  2399. mask = DataFrame(False, columns=df.columns, index=df.index)
  2400. s1 = Series(1, index=df.columns)
  2401. s2 = Series(2, index=df.index)
  2402. result = df.where(mask, s1, axis='columns')
  2403. expected = DataFrame(1.0, columns=df.columns, index=df.index)
  2404. expected[2] = expected[2].astype('int64')
  2405. expected[3] = expected[3].astype('int64')
  2406. assert_frame_equal(result, expected)
  2407. result = df.copy()
  2408. result.where(mask, s1, axis='columns', inplace=True)
  2409. assert_frame_equal(result, expected)
  2410. result = df.where(mask, s2, axis='index')
  2411. expected = DataFrame(2.0, columns=df.columns, index=df.index)
  2412. expected[2] = expected[2].astype('int64')
  2413. expected[3] = expected[3].astype('int64')
  2414. assert_frame_equal(result, expected)
  2415. result = df.copy()
  2416. result.where(mask, s2, axis='index', inplace=True)
  2417. assert_frame_equal(result, expected)
  2418. # DataFrame vs DataFrame
  2419. d1 = df.copy().drop(1, axis=0)
  2420. expected = df.copy()
  2421. expected.loc[1, :] = np.nan
  2422. result = df.where(mask, d1)
  2423. assert_frame_equal(result, expected)
  2424. result = df.where(mask, d1, axis='index')
  2425. assert_frame_equal(result, expected)
  2426. result = df.copy()
  2427. result.where(mask, d1, inplace=True)
  2428. assert_frame_equal(result, expected)
  2429. result = df.copy()
  2430. result.where(mask, d1, inplace=True, axis='index')
  2431. assert_frame_equal(result, expected)
  2432. d2 = df.copy().drop(1, axis=1)
  2433. expected = df.copy()
  2434. expected.loc[:, 1] = np.nan
  2435. result = df.where(mask, d2)
  2436. assert_frame_equal(result, expected)
  2437. result = df.where(mask, d2, axis='columns')
  2438. assert_frame_equal(result, expected)
  2439. result = df.copy()
  2440. result.where(mask, d2, inplace=True)
  2441. assert_frame_equal(result, expected)
  2442. result = df.copy()
  2443. result.where(mask, d2, inplace=True, axis='columns')
  2444. assert_frame_equal(result, expected)
  2445. def test_where_callable(self):
  2446. # GH 12533
  2447. df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  2448. result = df.where(lambda x: x > 4, lambda x: x + 1)
  2449. exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]])
  2450. tm.assert_frame_equal(result, exp)
  2451. tm.assert_frame_equal(result, df.where(df > 4, df + 1))
  2452. # return ndarray and scalar
  2453. result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99)
  2454. exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]])
  2455. tm.assert_frame_equal(result, exp)
  2456. tm.assert_frame_equal(result, df.where(df % 2 == 0, 99))
  2457. # chain
  2458. result = (df + 2).where(lambda x: x > 8, lambda x: x + 10)
  2459. exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]])
  2460. tm.assert_frame_equal(result, exp)
  2461. tm.assert_frame_equal(result,
  2462. (df + 2).where((df + 2) > 8, (df + 2) + 10))
  2463. def test_where_tz_values(self, tz_naive_fixture):
  2464. df1 = DataFrame(DatetimeIndex(['20150101', '20150102', '20150103'],
  2465. tz=tz_naive_fixture),
  2466. columns=['date'])
  2467. df2 = DataFrame(DatetimeIndex(['20150103', '20150104', '20150105'],
  2468. tz=tz_naive_fixture),
  2469. columns=['date'])
  2470. mask = DataFrame([True, True, False], columns=['date'])
  2471. exp = DataFrame(DatetimeIndex(['20150101', '20150102', '20150105'],
  2472. tz=tz_naive_fixture),
  2473. columns=['date'])
  2474. result = df1.where(mask, df2)
  2475. assert_frame_equal(exp, result)
  2476. def test_mask(self):
  2477. df = DataFrame(np.random.randn(5, 3))
  2478. cond = df > 0
  2479. rs = df.where(cond, np.nan)
  2480. assert_frame_equal(rs, df.mask(df <= 0))
  2481. assert_frame_equal(rs, df.mask(~cond))
  2482. other = DataFrame(np.random.randn(5, 3))
  2483. rs = df.where(cond, other)
  2484. assert_frame_equal(rs, df.mask(df <= 0, other))
  2485. assert_frame_equal(rs, df.mask(~cond, other))
  2486. # see gh-21891
  2487. df = DataFrame([1, 2])
  2488. res = df.mask([[True], [False]])
  2489. exp = DataFrame([np.nan, 2])
  2490. tm.assert_frame_equal(res, exp)
  2491. def test_mask_inplace(self):
  2492. # GH8801
  2493. df = DataFrame(np.random.randn(5, 3))
  2494. cond = df > 0
  2495. rdf = df.copy()
  2496. rdf.where(cond, inplace=True)
  2497. assert_frame_equal(rdf, df.where(cond))
  2498. assert_frame_equal(rdf, df.mask(~cond))
  2499. rdf = df.copy()
  2500. rdf.where(cond, -df, inplace=True)
  2501. assert_frame_equal(rdf, df.where(cond, -df))
  2502. assert_frame_equal(rdf, df.mask(~cond, -df))
  2503. def test_mask_edge_case_1xN_frame(self):
  2504. # GH4071
  2505. df = DataFrame([[1, 2]])
  2506. res = df.mask(DataFrame([[True, False]]))
  2507. expec = DataFrame([[np.nan, 2]])
  2508. assert_frame_equal(res, expec)
  2509. def test_mask_callable(self):
  2510. # GH 12533
  2511. df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  2512. result = df.mask(lambda x: x > 4, lambda x: x + 1)
  2513. exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
  2514. tm.assert_frame_equal(result, exp)
  2515. tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
  2516. # return ndarray and scalar
  2517. result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
  2518. exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
  2519. tm.assert_frame_equal(result, exp)
  2520. tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
  2521. # chain
  2522. result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
  2523. exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
  2524. tm.assert_frame_equal(result, exp)
  2525. tm.assert_frame_equal(result,
  2526. (df + 2).mask((df + 2) > 8, (df + 2) + 10))
  2527. def test_head_tail(self):
  2528. assert_frame_equal(self.frame.head(), self.frame[:5])
  2529. assert_frame_equal(self.frame.tail(), self.frame[-5:])
  2530. assert_frame_equal(self.frame.head(0), self.frame[0:0])
  2531. assert_frame_equal(self.frame.tail(0), self.frame[0:0])
  2532. assert_frame_equal(self.frame.head(-1), self.frame[:-1])
  2533. assert_frame_equal(self.frame.tail(-1), self.frame[1:])
  2534. assert_frame_equal(self.frame.head(1), self.frame[:1])
  2535. assert_frame_equal(self.frame.tail(1), self.frame[-1:])
  2536. # with a float index
  2537. df = self.frame.copy()
  2538. df.index = np.arange(len(self.frame)) + 0.1
  2539. assert_frame_equal(df.head(), df.iloc[:5])
  2540. assert_frame_equal(df.tail(), df.iloc[-5:])
  2541. assert_frame_equal(df.head(0), df[0:0])
  2542. assert_frame_equal(df.tail(0), df[0:0])
  2543. assert_frame_equal(df.head(-1), df.iloc[:-1])
  2544. assert_frame_equal(df.tail(-1), df.iloc[1:])
  2545. # test empty dataframe
  2546. empty_df = DataFrame()
  2547. assert_frame_equal(empty_df.tail(), empty_df)
  2548. assert_frame_equal(empty_df.head(), empty_df)
  2549. def test_type_error_multiindex(self):
  2550. # See gh-12218
  2551. df = DataFrame(columns=['i', 'c', 'x', 'y'],
  2552. data=[[0, 0, 1, 2], [1, 0, 3, 4],
  2553. [0, 1, 1, 2], [1, 1, 3, 4]])
  2554. dg = df.pivot_table(index='i', columns='c',
  2555. values=['x', 'y'])
  2556. with pytest.raises(TypeError, match="is an invalid key"):
  2557. str(dg[:, 0])
  2558. index = Index(range(2), name='i')
  2559. columns = MultiIndex(levels=[['x', 'y'], [0, 1]],
  2560. codes=[[0, 1], [0, 0]],
  2561. names=[None, 'c'])
  2562. expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index)
  2563. result = dg.loc[:, (slice(None), 0)]
  2564. assert_frame_equal(result, expected)
  2565. name = ('x', 0)
  2566. index = Index(range(2), name='i')
  2567. expected = Series([1, 3], index=index, name=name)
  2568. result = dg['x', 0]
  2569. assert_series_equal(result, expected)
  2570. def test_interval_index(self):
  2571. # GH 19977
  2572. index = pd.interval_range(start=0, periods=3)
  2573. df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  2574. index=index,
  2575. columns=['A', 'B', 'C'])
  2576. expected = 1
  2577. result = df.loc[0.5, 'A']
  2578. assert_almost_equal(result, expected)
  2579. index = pd.interval_range(start=0, periods=3, closed='both')
  2580. df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  2581. index=index,
  2582. columns=['A', 'B', 'C'])
  2583. index_exp = pd.interval_range(start=0, periods=2,
  2584. freq=1, closed='both')
  2585. expected = pd.Series([1, 4], index=index_exp, name='A')
  2586. result = df.loc[1, 'A']
  2587. assert_series_equal(result, expected)
  2588. class TestDataFrameIndexingDatetimeWithTZ(TestData):
  2589. def setup_method(self, method):
  2590. self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
  2591. name='foo')
  2592. self.dr = date_range('20130110', periods=3)
  2593. self.df = DataFrame({'A': self.idx, 'B': self.dr})
  2594. def test_setitem(self):
  2595. df = self.df
  2596. idx = self.idx
  2597. # setitem
  2598. df['C'] = idx
  2599. assert_series_equal(df['C'], Series(idx, name='C'))
  2600. df['D'] = 'foo'
  2601. df['D'] = idx
  2602. assert_series_equal(df['D'], Series(idx, name='D'))
  2603. del df['D']
  2604. # assert that A & C are not sharing the same base (e.g. they
  2605. # are copies)
  2606. b1 = df._data.blocks[1]
  2607. b2 = df._data.blocks[2]
  2608. tm.assert_extension_array_equal(b1.values, b2.values)
  2609. assert id(b1.values._data.base) != id(b2.values._data.base)
  2610. # with nan
  2611. df2 = df.copy()
  2612. df2.iloc[1, 1] = pd.NaT
  2613. df2.iloc[1, 2] = pd.NaT
  2614. result = df2['B']
  2615. assert_series_equal(notna(result), Series(
  2616. [True, False, True], name='B'))
  2617. assert_series_equal(df2.dtypes, df.dtypes)
  2618. def test_set_reset(self):
  2619. idx = self.idx
  2620. # set/reset
  2621. df = DataFrame({'A': [0, 1, 2]}, index=idx)
  2622. result = df.reset_index()
  2623. assert result['foo'].dtype, 'M8[ns, US/Eastern'
  2624. df = result.set_index('foo')
  2625. tm.assert_index_equal(df.index, idx)
  2626. def test_transpose(self):
  2627. result = self.df.T
  2628. expected = DataFrame(self.df.values.T)
  2629. expected.index = ['A', 'B']
  2630. assert_frame_equal(result, expected)
  2631. def test_scalar_assignment(self):
  2632. # issue #19843
  2633. df = pd.DataFrame(index=(0, 1, 2))
  2634. df['now'] = pd.Timestamp('20130101', tz='UTC')
  2635. expected = pd.DataFrame(
  2636. {'now': pd.Timestamp('20130101', tz='UTC')}, index=[0, 1, 2])
  2637. tm.assert_frame_equal(df, expected)
  2638. class TestDataFrameIndexingUInt64(TestData):
  2639. def setup_method(self, method):
  2640. self.ir = Index(np.arange(3), dtype=np.uint64)
  2641. self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo')
  2642. self.df = DataFrame({'A': self.idx, 'B': self.ir})
  2643. def test_setitem(self):
  2644. df = self.df
  2645. idx = self.idx
  2646. # setitem
  2647. df['C'] = idx
  2648. assert_series_equal(df['C'], Series(idx, name='C'))
  2649. df['D'] = 'foo'
  2650. df['D'] = idx
  2651. assert_series_equal(df['D'], Series(idx, name='D'))
  2652. del df['D']
  2653. # With NaN: because uint64 has no NaN element,
  2654. # the column should be cast to object.
  2655. df2 = df.copy()
  2656. df2.iloc[1, 1] = pd.NaT
  2657. df2.iloc[1, 2] = pd.NaT
  2658. result = df2['B']
  2659. assert_series_equal(notna(result), Series(
  2660. [True, False, True], name='B'))
  2661. assert_series_equal(df2.dtypes, Series([np.dtype('uint64'),
  2662. np.dtype('O'), np.dtype('O')],
  2663. index=['A', 'B', 'C']))
  2664. def test_set_reset(self):
  2665. idx = self.idx
  2666. # set/reset
  2667. df = DataFrame({'A': [0, 1, 2]}, index=idx)
  2668. result = df.reset_index()
  2669. assert result['foo'].dtype == np.dtype('uint64')
  2670. df = result.set_index('foo')
  2671. tm.assert_index_equal(df.index, idx)
  2672. def test_transpose(self):
  2673. result = self.df.T
  2674. expected = DataFrame(self.df.values.T)
  2675. expected.index = ['A', 'B']
  2676. assert_frame_equal(result, expected)
  2677. class TestDataFrameIndexingCategorical(object):
  2678. def test_assignment(self):
  2679. # assignment
  2680. df = DataFrame({'value': np.array(
  2681. np.random.randint(0, 10000, 100), dtype='int32')})
  2682. labels = Categorical(["{0} - {1}".format(i, i + 499)
  2683. for i in range(0, 10000, 500)])
  2684. df = df.sort_values(by=['value'], ascending=True)
  2685. s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
  2686. d = s.values
  2687. df['D'] = d
  2688. str(df)
  2689. result = df.dtypes
  2690. expected = Series(
  2691. [np.dtype('int32'), CategoricalDtype(categories=labels,
  2692. ordered=False)],
  2693. index=['value', 'D'])
  2694. tm.assert_series_equal(result, expected)
  2695. df['E'] = s
  2696. str(df)
  2697. result = df.dtypes
  2698. expected = Series([np.dtype('int32'),
  2699. CategoricalDtype(categories=labels, ordered=False),
  2700. CategoricalDtype(categories=labels, ordered=False)],
  2701. index=['value', 'D', 'E'])
  2702. tm.assert_series_equal(result, expected)
  2703. result1 = df['D']
  2704. result2 = df['E']
  2705. tm.assert_categorical_equal(result1._data._block.values, d)
  2706. # sorting
  2707. s.name = 'E'
  2708. tm.assert_series_equal(result2.sort_index(), s.sort_index())
  2709. cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
  2710. df = DataFrame(Series(cat))
  2711. def test_assigning_ops(self):
  2712. # systematically test the assigning operations:
  2713. # for all slicing ops:
  2714. # for value in categories and value not in categories:
  2715. # - assign a single value -> exp_single_cats_value
  2716. # - assign a complete row (mixed values) -> exp_single_row
  2717. # assign multiple rows (mixed values) (-> array) -> exp_multi_row
  2718. # assign a part of a column with dtype == categorical ->
  2719. # exp_parts_cats_col
  2720. # assign a part of a column with dtype != categorical ->
  2721. # exp_parts_cats_col
  2722. cats = Categorical(["a", "a", "a", "a", "a", "a", "a"],
  2723. categories=["a", "b"])
  2724. idx = Index(["h", "i", "j", "k", "l", "m", "n"])
  2725. values = [1, 1, 1, 1, 1, 1, 1]
  2726. orig = DataFrame({"cats": cats, "values": values}, index=idx)
  2727. # the expected values
  2728. # changed single row
  2729. cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"],
  2730. categories=["a", "b"])
  2731. idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
  2732. values1 = [1, 1, 2, 1, 1, 1, 1]
  2733. exp_single_row = DataFrame({"cats": cats1,
  2734. "values": values1}, index=idx1)
  2735. # changed multiple rows
  2736. cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"],
  2737. categories=["a", "b"])
  2738. idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
  2739. values2 = [1, 1, 2, 2, 1, 1, 1]
  2740. exp_multi_row = DataFrame({"cats": cats2,
  2741. "values": values2}, index=idx2)
  2742. # changed part of the cats column
  2743. cats3 = Categorical(
  2744. ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
  2745. idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
  2746. values3 = [1, 1, 1, 1, 1, 1, 1]
  2747. exp_parts_cats_col = DataFrame({"cats": cats3,
  2748. "values": values3}, index=idx3)
  2749. # changed single value in cats col
  2750. cats4 = Categorical(
  2751. ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
  2752. idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
  2753. values4 = [1, 1, 1, 1, 1, 1, 1]
  2754. exp_single_cats_value = DataFrame({"cats": cats4,
  2755. "values": values4}, index=idx4)
  2756. # iloc
  2757. # ###############
  2758. # - assign a single value -> exp_single_cats_value
  2759. df = orig.copy()
  2760. df.iloc[2, 0] = "b"
  2761. tm.assert_frame_equal(df, exp_single_cats_value)
  2762. df = orig.copy()
  2763. df.iloc[df.index == "j", 0] = "b"
  2764. tm.assert_frame_equal(df, exp_single_cats_value)
  2765. # - assign a single value not in the current categories set
  2766. with pytest.raises(ValueError):
  2767. df = orig.copy()
  2768. df.iloc[2, 0] = "c"
  2769. # - assign a complete row (mixed values) -> exp_single_row
  2770. df = orig.copy()
  2771. df.iloc[2, :] = ["b", 2]
  2772. tm.assert_frame_equal(df, exp_single_row)
  2773. # - assign a complete row (mixed values) not in categories set
  2774. with pytest.raises(ValueError):
  2775. df = orig.copy()
  2776. df.iloc[2, :] = ["c", 2]
  2777. # - assign multiple rows (mixed values) -> exp_multi_row
  2778. df = orig.copy()
  2779. df.iloc[2:4, :] = [["b", 2], ["b", 2]]
  2780. tm.assert_frame_equal(df, exp_multi_row)
  2781. with pytest.raises(ValueError):
  2782. df = orig.copy()
  2783. df.iloc[2:4, :] = [["c", 2], ["c", 2]]
  2784. # assign a part of a column with dtype == categorical ->
  2785. # exp_parts_cats_col
  2786. df = orig.copy()
  2787. df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
  2788. tm.assert_frame_equal(df, exp_parts_cats_col)
  2789. with pytest.raises(ValueError):
  2790. # different categories -> not sure if this should fail or pass
  2791. df = orig.copy()
  2792. df.iloc[2:4, 0] = Categorical(list('bb'), categories=list('abc'))
  2793. with pytest.raises(ValueError):
  2794. # different values
  2795. df = orig.copy()
  2796. df.iloc[2:4, 0] = Categorical(list('cc'), categories=list('abc'))
  2797. # assign a part of a column with dtype != categorical ->
  2798. # exp_parts_cats_col
  2799. df = orig.copy()
  2800. df.iloc[2:4, 0] = ["b", "b"]
  2801. tm.assert_frame_equal(df, exp_parts_cats_col)
  2802. with pytest.raises(ValueError):
  2803. df.iloc[2:4, 0] = ["c", "c"]
  2804. # loc
  2805. # ##############
  2806. # - assign a single value -> exp_single_cats_value
  2807. df = orig.copy()
  2808. df.loc["j", "cats"] = "b"
  2809. tm.assert_frame_equal(df, exp_single_cats_value)
  2810. df = orig.copy()
  2811. df.loc[df.index == "j", "cats"] = "b"
  2812. tm.assert_frame_equal(df, exp_single_cats_value)
  2813. # - assign a single value not in the current categories set
  2814. with pytest.raises(ValueError):
  2815. df = orig.copy()
  2816. df.loc["j", "cats"] = "c"
  2817. # - assign a complete row (mixed values) -> exp_single_row
  2818. df = orig.copy()
  2819. df.loc["j", :] = ["b", 2]
  2820. tm.assert_frame_equal(df, exp_single_row)
  2821. # - assign a complete row (mixed values) not in categories set
  2822. with pytest.raises(ValueError):
  2823. df = orig.copy()
  2824. df.loc["j", :] = ["c", 2]
  2825. # - assign multiple rows (mixed values) -> exp_multi_row
  2826. df = orig.copy()
  2827. df.loc["j":"k", :] = [["b", 2], ["b", 2]]
  2828. tm.assert_frame_equal(df, exp_multi_row)
  2829. with pytest.raises(ValueError):
  2830. df = orig.copy()
  2831. df.loc["j":"k", :] = [["c", 2], ["c", 2]]
  2832. # assign a part of a column with dtype == categorical ->
  2833. # exp_parts_cats_col
  2834. df = orig.copy()
  2835. df.loc["j":"k", "cats"] = Categorical(
  2836. ["b", "b"], categories=["a", "b"])
  2837. tm.assert_frame_equal(df, exp_parts_cats_col)
  2838. with pytest.raises(ValueError):
  2839. # different categories -> not sure if this should fail or pass
  2840. df = orig.copy()
  2841. df.loc["j":"k", "cats"] = Categorical(
  2842. ["b", "b"], categories=["a", "b", "c"])
  2843. with pytest.raises(ValueError):
  2844. # different values
  2845. df = orig.copy()
  2846. df.loc["j":"k", "cats"] = Categorical(
  2847. ["c", "c"], categories=["a", "b", "c"])
  2848. # assign a part of a column with dtype != categorical ->
  2849. # exp_parts_cats_col
  2850. df = orig.copy()
  2851. df.loc["j":"k", "cats"] = ["b", "b"]
  2852. tm.assert_frame_equal(df, exp_parts_cats_col)
  2853. with pytest.raises(ValueError):
  2854. df.loc["j":"k", "cats"] = ["c", "c"]
  2855. # loc
  2856. # ##############
  2857. # - assign a single value -> exp_single_cats_value
  2858. df = orig.copy()
  2859. df.loc["j", df.columns[0]] = "b"
  2860. tm.assert_frame_equal(df, exp_single_cats_value)
  2861. df = orig.copy()
  2862. df.loc[df.index == "j", df.columns[0]] = "b"
  2863. tm.assert_frame_equal(df, exp_single_cats_value)
  2864. # - assign a single value not in the current categories set
  2865. with pytest.raises(ValueError):
  2866. df = orig.copy()
  2867. df.loc["j", df.columns[0]] = "c"
  2868. # - assign a complete row (mixed values) -> exp_single_row
  2869. df = orig.copy()
  2870. df.loc["j", :] = ["b", 2]
  2871. tm.assert_frame_equal(df, exp_single_row)
  2872. # - assign a complete row (mixed values) not in categories set
  2873. with pytest.raises(ValueError):
  2874. df = orig.copy()
  2875. df.loc["j", :] = ["c", 2]
  2876. # - assign multiple rows (mixed values) -> exp_multi_row
  2877. df = orig.copy()
  2878. df.loc["j":"k", :] = [["b", 2], ["b", 2]]
  2879. tm.assert_frame_equal(df, exp_multi_row)
  2880. with pytest.raises(ValueError):
  2881. df = orig.copy()
  2882. df.loc["j":"k", :] = [["c", 2], ["c", 2]]
  2883. # assign a part of a column with dtype == categorical ->
  2884. # exp_parts_cats_col
  2885. df = orig.copy()
  2886. df.loc["j":"k", df.columns[0]] = Categorical(
  2887. ["b", "b"], categories=["a", "b"])
  2888. tm.assert_frame_equal(df, exp_parts_cats_col)
  2889. with pytest.raises(ValueError):
  2890. # different categories -> not sure if this should fail or pass
  2891. df = orig.copy()
  2892. df.loc["j":"k", df.columns[0]] = Categorical(
  2893. ["b", "b"], categories=["a", "b", "c"])
  2894. with pytest.raises(ValueError):
  2895. # different values
  2896. df = orig.copy()
  2897. df.loc["j":"k", df.columns[0]] = Categorical(
  2898. ["c", "c"], categories=["a", "b", "c"])
  2899. # assign a part of a column with dtype != categorical ->
  2900. # exp_parts_cats_col
  2901. df = orig.copy()
  2902. df.loc["j":"k", df.columns[0]] = ["b", "b"]
  2903. tm.assert_frame_equal(df, exp_parts_cats_col)
  2904. with pytest.raises(ValueError):
  2905. df.loc["j":"k", df.columns[0]] = ["c", "c"]
  2906. # iat
  2907. df = orig.copy()
  2908. df.iat[2, 0] = "b"
  2909. tm.assert_frame_equal(df, exp_single_cats_value)
  2910. # - assign a single value not in the current categories set
  2911. with pytest.raises(ValueError):
  2912. df = orig.copy()
  2913. df.iat[2, 0] = "c"
  2914. # at
  2915. # - assign a single value -> exp_single_cats_value
  2916. df = orig.copy()
  2917. df.at["j", "cats"] = "b"
  2918. tm.assert_frame_equal(df, exp_single_cats_value)
  2919. # - assign a single value not in the current categories set
  2920. with pytest.raises(ValueError):
  2921. df = orig.copy()
  2922. df.at["j", "cats"] = "c"
  2923. # fancy indexing
  2924. catsf = Categorical(["a", "a", "c", "c", "a", "a", "a"],
  2925. categories=["a", "b", "c"])
  2926. idxf = Index(["h", "i", "j", "k", "l", "m", "n"])
  2927. valuesf = [1, 1, 3, 3, 1, 1, 1]
  2928. df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf)
  2929. exp_fancy = exp_multi_row.copy()
  2930. exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)
  2931. df[df["cats"] == "c"] = ["b", 2]
  2932. # category c is kept in .categories
  2933. tm.assert_frame_equal(df, exp_fancy)
  2934. # set_value
  2935. df = orig.copy()
  2936. df.at["j", "cats"] = "b"
  2937. tm.assert_frame_equal(df, exp_single_cats_value)
  2938. with pytest.raises(ValueError):
  2939. df = orig.copy()
  2940. df.at["j", "cats"] = "c"
  2941. # Assigning a Category to parts of a int/... column uses the values of
  2942. # the Catgorical
  2943. df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
  2944. exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
  2945. df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
  2946. df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
  2947. tm.assert_frame_equal(df, exp)
  2948. def test_functions_no_warnings(self):
  2949. df = DataFrame({'value': np.random.randint(0, 100, 20)})
  2950. labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
  2951. with tm.assert_produces_warning(False):
  2952. df['group'] = pd.cut(df.value, range(0, 105, 10), right=False,
  2953. labels=labels)