12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600 |
- from collections import deque
- import datetime as dt
- from datetime import datetime
- from decimal import Decimal
- from itertools import combinations
- from warnings import catch_warnings, simplefilter
- import dateutil
- import numpy as np
- from numpy.random import randn
- import pytest
- from pandas.compat import PY2, Iterable, StringIO, iteritems
- from pandas.core.dtypes.dtypes import CategoricalDtype
- import pandas as pd
- from pandas import (
- Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Panel, Series,
- Timestamp, concat, date_range, isna, read_csv)
- from pandas.tests.extension.decimal import to_decimal
- from pandas.util import testing as tm
- from pandas.util.testing import assert_frame_equal, makeCustomDataframe as mkdf
- @pytest.fixture(params=[True, False])
- def sort(request):
- """Boolean sort keyword for concat and DataFrame.append."""
- return request.param
- @pytest.fixture(params=[True, False, None])
- def sort_with_none(request):
- """Boolean sort keyword for concat and DataFrame.append.
- Includes the default of None
- """
- # TODO: Replace with sort once keyword changes.
- return request.param
- class ConcatenateBase(object):
- def setup_method(self, method):
- self.frame = DataFrame(tm.getSeriesData())
- self.mixed_frame = self.frame.copy()
- self.mixed_frame['foo'] = 'bar'
- class TestConcatAppendCommon(ConcatenateBase):
- """
- Test common dtype coercion rules between concat and append.
- """
- def setup_method(self, method):
- dt_data = [pd.Timestamp('2011-01-01'),
- pd.Timestamp('2011-01-02'),
- pd.Timestamp('2011-01-03')]
- tz_data = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
- pd.Timestamp('2011-01-02', tz='US/Eastern'),
- pd.Timestamp('2011-01-03', tz='US/Eastern')]
- td_data = [pd.Timedelta('1 days'),
- pd.Timedelta('2 days'),
- pd.Timedelta('3 days')]
- period_data = [pd.Period('2011-01', freq='M'),
- pd.Period('2011-02', freq='M'),
- pd.Period('2011-03', freq='M')]
- self.data = {'bool': [True, False, True],
- 'int64': [1, 2, 3],
- 'float64': [1.1, np.nan, 3.3],
- 'category': pd.Categorical(['X', 'Y', 'Z']),
- 'object': ['a', 'b', 'c'],
- 'datetime64[ns]': dt_data,
- 'datetime64[ns, US/Eastern]': tz_data,
- 'timedelta64[ns]': td_data,
- 'period[M]': period_data}
- def _check_expected_dtype(self, obj, label):
- """
- Check whether obj has expected dtype depending on label
- considering not-supported dtypes
- """
- if isinstance(obj, pd.Index):
- if label == 'bool':
- assert obj.dtype == 'object'
- else:
- assert obj.dtype == label
- elif isinstance(obj, pd.Series):
- if label.startswith('period'):
- assert obj.dtype == 'Period[M]'
- else:
- assert obj.dtype == label
- else:
- raise ValueError
- def test_dtypes(self):
- # to confirm test case covers intended dtypes
- for typ, vals in iteritems(self.data):
- self._check_expected_dtype(pd.Index(vals), typ)
- self._check_expected_dtype(pd.Series(vals), typ)
- def test_concatlike_same_dtypes(self):
- # GH 13660
- for typ1, vals1 in iteritems(self.data):
- vals2 = vals1
- vals3 = vals1
- if typ1 == 'category':
- exp_data = pd.Categorical(list(vals1) + list(vals2))
- exp_data3 = pd.Categorical(list(vals1) + list(vals2) +
- list(vals3))
- else:
- exp_data = vals1 + vals2
- exp_data3 = vals1 + vals2 + vals3
- # ----- Index ----- #
- # index.append
- res = pd.Index(vals1).append(pd.Index(vals2))
- exp = pd.Index(exp_data)
- tm.assert_index_equal(res, exp)
- # 3 elements
- res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)])
- exp = pd.Index(exp_data3)
- tm.assert_index_equal(res, exp)
- # index.append name mismatch
- i1 = pd.Index(vals1, name='x')
- i2 = pd.Index(vals2, name='y')
- res = i1.append(i2)
- exp = pd.Index(exp_data)
- tm.assert_index_equal(res, exp)
- # index.append name match
- i1 = pd.Index(vals1, name='x')
- i2 = pd.Index(vals2, name='x')
- res = i1.append(i2)
- exp = pd.Index(exp_data, name='x')
- tm.assert_index_equal(res, exp)
- # cannot append non-index
- with pytest.raises(TypeError, match='all inputs must be Index'):
- pd.Index(vals1).append(vals2)
- with pytest.raises(TypeError, match='all inputs must be Index'):
- pd.Index(vals1).append([pd.Index(vals2), vals3])
- # ----- Series ----- #
- # series.append
- res = pd.Series(vals1).append(pd.Series(vals2),
- ignore_index=True)
- exp = pd.Series(exp_data)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # concat
- res = pd.concat([pd.Series(vals1), pd.Series(vals2)],
- ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # 3 elements
- res = pd.Series(vals1).append([pd.Series(vals2), pd.Series(vals3)],
- ignore_index=True)
- exp = pd.Series(exp_data3)
- tm.assert_series_equal(res, exp)
- res = pd.concat([pd.Series(vals1), pd.Series(vals2),
- pd.Series(vals3)], ignore_index=True)
- tm.assert_series_equal(res, exp)
- # name mismatch
- s1 = pd.Series(vals1, name='x')
- s2 = pd.Series(vals2, name='y')
- res = s1.append(s2, ignore_index=True)
- exp = pd.Series(exp_data)
- tm.assert_series_equal(res, exp, check_index_type=True)
- res = pd.concat([s1, s2], ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # name match
- s1 = pd.Series(vals1, name='x')
- s2 = pd.Series(vals2, name='x')
- res = s1.append(s2, ignore_index=True)
- exp = pd.Series(exp_data, name='x')
- tm.assert_series_equal(res, exp, check_index_type=True)
- res = pd.concat([s1, s2], ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # cannot append non-index
- msg = (r'cannot concatenate object of type \"(.+?)\";'
- ' only pd.Series, pd.DataFrame, and pd.Panel'
- r' \(deprecated\) objs are valid')
- with pytest.raises(TypeError, match=msg):
- pd.Series(vals1).append(vals2)
- with pytest.raises(TypeError, match=msg):
- pd.Series(vals1).append([pd.Series(vals2), vals3])
- with pytest.raises(TypeError, match=msg):
- pd.concat([pd.Series(vals1), vals2])
- with pytest.raises(TypeError, match=msg):
- pd.concat([pd.Series(vals1), pd.Series(vals2), vals3])
- def test_concatlike_dtypes_coercion(self):
- # GH 13660
- for typ1, vals1 in iteritems(self.data):
- for typ2, vals2 in iteritems(self.data):
- vals3 = vals2
- # basically infer
- exp_index_dtype = None
- exp_series_dtype = None
- if typ1 == typ2:
- # same dtype is tested in test_concatlike_same_dtypes
- continue
- elif typ1 == 'category' or typ2 == 'category':
- # ToDo: suspicious
- continue
- # specify expected dtype
- if typ1 == 'bool' and typ2 in ('int64', 'float64'):
- # series coerces to numeric based on numpy rule
- # index doesn't because bool is object dtype
- exp_series_dtype = typ2
- elif typ2 == 'bool' and typ1 in ('int64', 'float64'):
- exp_series_dtype = typ1
- elif (typ1 == 'datetime64[ns, US/Eastern]' or
- typ2 == 'datetime64[ns, US/Eastern]' or
- typ1 == 'timedelta64[ns]' or
- typ2 == 'timedelta64[ns]'):
- exp_index_dtype = object
- exp_series_dtype = object
- exp_data = vals1 + vals2
- exp_data3 = vals1 + vals2 + vals3
- # ----- Index ----- #
- # index.append
- res = pd.Index(vals1).append(pd.Index(vals2))
- exp = pd.Index(exp_data, dtype=exp_index_dtype)
- tm.assert_index_equal(res, exp)
- # 3 elements
- res = pd.Index(vals1).append([pd.Index(vals2),
- pd.Index(vals3)])
- exp = pd.Index(exp_data3, dtype=exp_index_dtype)
- tm.assert_index_equal(res, exp)
- # ----- Series ----- #
- # series.append
- res = pd.Series(vals1).append(pd.Series(vals2),
- ignore_index=True)
- exp = pd.Series(exp_data, dtype=exp_series_dtype)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # concat
- res = pd.concat([pd.Series(vals1), pd.Series(vals2)],
- ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # 3 elements
- res = pd.Series(vals1).append([pd.Series(vals2),
- pd.Series(vals3)],
- ignore_index=True)
- exp = pd.Series(exp_data3, dtype=exp_series_dtype)
- tm.assert_series_equal(res, exp)
- res = pd.concat([pd.Series(vals1), pd.Series(vals2),
- pd.Series(vals3)], ignore_index=True)
- tm.assert_series_equal(res, exp)
- def test_concatlike_common_coerce_to_pandas_object(self):
- # GH 13626
- # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
- dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'])
- tdi = pd.TimedeltaIndex(['1 days', '2 days'])
- exp = pd.Index([pd.Timestamp('2011-01-01'),
- pd.Timestamp('2011-01-02'),
- pd.Timedelta('1 days'),
- pd.Timedelta('2 days')])
- res = dti.append(tdi)
- tm.assert_index_equal(res, exp)
- assert isinstance(res[0], pd.Timestamp)
- assert isinstance(res[-1], pd.Timedelta)
- dts = pd.Series(dti)
- tds = pd.Series(tdi)
- res = dts.append(tds)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- assert isinstance(res.iloc[0], pd.Timestamp)
- assert isinstance(res.iloc[-1], pd.Timedelta)
- res = pd.concat([dts, tds])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- assert isinstance(res.iloc[0], pd.Timestamp)
- assert isinstance(res.iloc[-1], pd.Timedelta)
- def test_concatlike_datetimetz(self, tz_aware_fixture):
- tz = tz_aware_fixture
- # GH 7795
- dti1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
- dti2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], tz=tz)
- exp = pd.DatetimeIndex(['2011-01-01', '2011-01-02',
- '2012-01-01', '2012-01-02'], tz=tz)
- res = dti1.append(dti2)
- tm.assert_index_equal(res, exp)
- dts1 = pd.Series(dti1)
- dts2 = pd.Series(dti2)
- res = dts1.append(dts2)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([dts1, dts2])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- @pytest.mark.parametrize('tz',
- ['UTC', 'US/Eastern', 'Asia/Tokyo', 'EST5EDT'])
- def test_concatlike_datetimetz_short(self, tz):
- # GH#7795
- ix1 = pd.date_range(start='2014-07-15', end='2014-07-17',
- freq='D', tz=tz)
- ix2 = pd.DatetimeIndex(['2014-07-11', '2014-07-21'], tz=tz)
- df1 = pd.DataFrame(0, index=ix1, columns=['A', 'B'])
- df2 = pd.DataFrame(0, index=ix2, columns=['A', 'B'])
- exp_idx = pd.DatetimeIndex(['2014-07-15', '2014-07-16',
- '2014-07-17', '2014-07-11',
- '2014-07-21'], tz=tz)
- exp = pd.DataFrame(0, index=exp_idx, columns=['A', 'B'])
- tm.assert_frame_equal(df1.append(df2), exp)
- tm.assert_frame_equal(pd.concat([df1, df2]), exp)
- def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
- tz = tz_aware_fixture
- # GH 13660
- # different tz coerces to object
- dti1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
- dti2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'])
- exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz),
- pd.Timestamp('2011-01-02', tz=tz),
- pd.Timestamp('2012-01-01'),
- pd.Timestamp('2012-01-02')], dtype=object)
- res = dti1.append(dti2)
- tm.assert_index_equal(res, exp)
- dts1 = pd.Series(dti1)
- dts2 = pd.Series(dti2)
- res = dts1.append(dts2)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([dts1, dts2])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- # different tz
- dti3 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'],
- tz='US/Pacific')
- exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz),
- pd.Timestamp('2011-01-02', tz=tz),
- pd.Timestamp('2012-01-01', tz='US/Pacific'),
- pd.Timestamp('2012-01-02', tz='US/Pacific')],
- dtype=object)
- res = dti1.append(dti3)
- # tm.assert_index_equal(res, exp)
- dts1 = pd.Series(dti1)
- dts3 = pd.Series(dti3)
- res = dts1.append(dts3)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([dts1, dts3])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- def test_concatlike_common_period(self):
- # GH 13660
- pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
- pi2 = pd.PeriodIndex(['2012-01', '2012-02'], freq='M')
- exp = pd.PeriodIndex(['2011-01', '2011-02', '2012-01',
- '2012-02'], freq='M')
- res = pi1.append(pi2)
- tm.assert_index_equal(res, exp)
- ps1 = pd.Series(pi1)
- ps2 = pd.Series(pi2)
- res = ps1.append(ps2)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([ps1, ps2])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- def test_concatlike_common_period_diff_freq_to_object(self):
- # GH 13221
- pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
- pi2 = pd.PeriodIndex(['2012-01-01', '2012-02-01'], freq='D')
- exp = pd.Index([pd.Period('2011-01', freq='M'),
- pd.Period('2011-02', freq='M'),
- pd.Period('2012-01-01', freq='D'),
- pd.Period('2012-02-01', freq='D')], dtype=object)
- res = pi1.append(pi2)
- tm.assert_index_equal(res, exp)
- ps1 = pd.Series(pi1)
- ps2 = pd.Series(pi2)
- res = ps1.append(ps2)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([ps1, ps2])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- def test_concatlike_common_period_mixed_dt_to_object(self):
- # GH 13221
- # different datetimelike
- pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
- tdi = pd.TimedeltaIndex(['1 days', '2 days'])
- exp = pd.Index([pd.Period('2011-01', freq='M'),
- pd.Period('2011-02', freq='M'),
- pd.Timedelta('1 days'),
- pd.Timedelta('2 days')], dtype=object)
- res = pi1.append(tdi)
- tm.assert_index_equal(res, exp)
- ps1 = pd.Series(pi1)
- tds = pd.Series(tdi)
- res = ps1.append(tds)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([ps1, tds])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- # inverse
- exp = pd.Index([pd.Timedelta('1 days'),
- pd.Timedelta('2 days'),
- pd.Period('2011-01', freq='M'),
- pd.Period('2011-02', freq='M')], dtype=object)
- res = tdi.append(pi1)
- tm.assert_index_equal(res, exp)
- ps1 = pd.Series(pi1)
- tds = pd.Series(tdi)
- res = tds.append(ps1)
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([tds, ps1])
- tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
- def test_concat_categorical(self):
- # GH 13524
- # same categories -> category
- s1 = pd.Series([1, 2, np.nan], dtype='category')
- s2 = pd.Series([2, 1, 2], dtype='category')
- exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype='category')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- # partially different categories => not-category
- s1 = pd.Series([3, 2], dtype='category')
- s2 = pd.Series([2, 1], dtype='category')
- exp = pd.Series([3, 2, 2, 1])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- # completely different categories (same dtype) => not-category
- s1 = pd.Series([10, 11, np.nan], dtype='category')
- s2 = pd.Series([np.nan, 1, 3, 2], dtype='category')
- exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype='object')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- def test_union_categorical_same_categories_different_order(self):
- # https://github.com/pandas-dev/pandas/issues/19096
- a = pd.Series(Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c']))
- b = pd.Series(Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c']))
- result = pd.concat([a, b], ignore_index=True)
- expected = pd.Series(Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
- categories=['a', 'b', 'c']))
- tm.assert_series_equal(result, expected)
- def test_concat_categorical_coercion(self):
- # GH 13524
- # category + not-category => not-category
- s1 = pd.Series([1, 2, np.nan], dtype='category')
- s2 = pd.Series([2, 1, 2])
- exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype='object')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- # result shouldn't be affected by 1st elem dtype
- exp = pd.Series([2, 1, 2, 1, 2, np.nan], dtype='object')
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- # all values are not in category => not-category
- s1 = pd.Series([3, 2], dtype='category')
- s2 = pd.Series([2, 1])
- exp = pd.Series([3, 2, 2, 1])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- exp = pd.Series([2, 1, 3, 2])
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- # completely different categories => not-category
- s1 = pd.Series([10, 11, np.nan], dtype='category')
- s2 = pd.Series([1, 3, 2])
- exp = pd.Series([10, 11, np.nan, 1, 3, 2], dtype='object')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- exp = pd.Series([1, 3, 2, 10, 11, np.nan], dtype='object')
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- # different dtype => not-category
- s1 = pd.Series([10, 11, np.nan], dtype='category')
- s2 = pd.Series(['a', 'b', 'c'])
- exp = pd.Series([10, 11, np.nan, 'a', 'b', 'c'])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- exp = pd.Series(['a', 'b', 'c', 10, 11, np.nan])
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- # if normal series only contains NaN-likes => not-category
- s1 = pd.Series([10, 11], dtype='category')
- s2 = pd.Series([np.nan, np.nan, np.nan])
- exp = pd.Series([10, 11, np.nan, np.nan, np.nan])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- exp = pd.Series([np.nan, np.nan, np.nan, 10, 11])
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- def test_concat_categorical_3elem_coercion(self):
- # GH 13524
- # mixed dtypes => not-category
- s1 = pd.Series([1, 2, np.nan], dtype='category')
- s2 = pd.Series([2, 1, 2], dtype='category')
- s3 = pd.Series([1, 2, 1, 2, np.nan])
- exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan],
- dtype='object')
- tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
- tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
- exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2],
- dtype='object')
- tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
- # values are all in either category => not-category
- s1 = pd.Series([4, 5, 6], dtype='category')
- s2 = pd.Series([1, 2, 3], dtype='category')
- s3 = pd.Series([1, 3, 4])
- exp = pd.Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
- tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
- tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
- exp = pd.Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
- tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
- # values are all in either category => not-category
- s1 = pd.Series([4, 5, 6], dtype='category')
- s2 = pd.Series([1, 2, 3], dtype='category')
- s3 = pd.Series([10, 11, 12])
- exp = pd.Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
- tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
- tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
- exp = pd.Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
- tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
- def test_concat_categorical_multi_coercion(self):
- # GH 13524
- s1 = pd.Series([1, 3], dtype='category')
- s2 = pd.Series([3, 4], dtype='category')
- s3 = pd.Series([2, 3])
- s4 = pd.Series([2, 2], dtype='category')
- s5 = pd.Series([1, np.nan])
- s6 = pd.Series([1, 3, 2], dtype='category')
- # mixed dtype, values are all in categories => not-category
- exp = pd.Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
- res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
- tm.assert_series_equal(res, exp)
- res = s1.append([s2, s3, s4, s5, s6], ignore_index=True)
- tm.assert_series_equal(res, exp)
- exp = pd.Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
- res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
- tm.assert_series_equal(res, exp)
- res = s6.append([s5, s4, s3, s2, s1], ignore_index=True)
- tm.assert_series_equal(res, exp)
- def test_concat_categorical_ordered(self):
- # GH 13524
- s1 = pd.Series(pd.Categorical([1, 2, np.nan], ordered=True))
- s2 = pd.Series(pd.Categorical([2, 1, 2], ordered=True))
- exp = pd.Series(pd.Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- exp = pd.Series(pd.Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan],
- ordered=True))
- tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)
- def test_concat_categorical_coercion_nan(self):
- # GH 13524
- # some edge cases
- # category + not-category => not category
- s1 = pd.Series(np.array([np.nan, np.nan], dtype=np.float64),
- dtype='category')
- s2 = pd.Series([np.nan, 1])
- exp = pd.Series([np.nan, np.nan, np.nan, 1])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- s1 = pd.Series([1, np.nan], dtype='category')
- s2 = pd.Series([np.nan, np.nan])
- exp = pd.Series([1, np.nan, np.nan, np.nan], dtype='object')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- # mixed dtype, all nan-likes => not-category
- s1 = pd.Series([np.nan, np.nan], dtype='category')
- s2 = pd.Series([np.nan, np.nan])
- exp = pd.Series([np.nan, np.nan, np.nan, np.nan])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- # all category nan-likes => category
- s1 = pd.Series([np.nan, np.nan], dtype='category')
- s2 = pd.Series([np.nan, np.nan], dtype='category')
- exp = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype='category')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- def test_concat_categorical_empty(self):
- # GH 13524
- s1 = pd.Series([], dtype='category')
- s2 = pd.Series([1, 2], dtype='category')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
- s1 = pd.Series([], dtype='category')
- s2 = pd.Series([], dtype='category')
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
- s1 = pd.Series([], dtype='category')
- s2 = pd.Series([], dtype='object')
- # different dtype => not-category
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
- s1 = pd.Series([], dtype='category')
- s2 = pd.Series([np.nan, np.nan])
- # empty Series is ignored
- exp = pd.Series([np.nan, np.nan])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
- class TestAppend(ConcatenateBase):
- def test_append(self, sort):
- begin_index = self.frame.index[:5]
- end_index = self.frame.index[5:]
- begin_frame = self.frame.reindex(begin_index)
- end_frame = self.frame.reindex(end_index)
- appended = begin_frame.append(end_frame)
- tm.assert_almost_equal(appended['A'], self.frame['A'])
- del end_frame['A']
- partial_appended = begin_frame.append(end_frame, sort=sort)
- assert 'A' in partial_appended
- partial_appended = end_frame.append(begin_frame, sort=sort)
- assert 'A' in partial_appended
- # mixed type handling
- appended = self.mixed_frame[:5].append(self.mixed_frame[5:])
- tm.assert_frame_equal(appended, self.mixed_frame)
- # what to test here
- mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=sort)
- mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:],
- sort=sort)
- # all equal except 'foo' column
- tm.assert_frame_equal(
- mixed_appended.reindex(columns=['A', 'B', 'C', 'D']),
- mixed_appended2.reindex(columns=['A', 'B', 'C', 'D']))
- # append empty
- empty = DataFrame({})
- appended = self.frame.append(empty)
- tm.assert_frame_equal(self.frame, appended)
- assert appended is not self.frame
- appended = empty.append(self.frame)
- tm.assert_frame_equal(self.frame, appended)
- assert appended is not self.frame
- # Overlap
- msg = "Indexes have overlapping values"
- with pytest.raises(ValueError, match=msg):
- self.frame.append(self.frame, verify_integrity=True)
- # see gh-6129: new columns
- df = DataFrame({'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}})
- row = Series([5, 6, 7], index=['a', 'b', 'c'], name='z')
- expected = DataFrame({'a': {'x': 1, 'y': 2, 'z': 5}, 'b': {
- 'x': 3, 'y': 4, 'z': 6}, 'c': {'z': 7}})
- result = df.append(row)
- tm.assert_frame_equal(result, expected)
- def test_append_length0_frame(self, sort):
- df = DataFrame(columns=['A', 'B', 'C'])
- df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
- df5 = df.append(df3, sort=sort)
- expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
- assert_frame_equal(df5, expected)
- def test_append_records(self):
- arr1 = np.zeros((2,), dtype=('i4,f4,a10'))
- arr1[:] = [(1, 2., 'Hello'), (2, 3., "World")]
- arr2 = np.zeros((3,), dtype=('i4,f4,a10'))
- arr2[:] = [(3, 4., 'foo'),
- (5, 6., "bar"),
- (7., 8., 'baz')]
- df1 = DataFrame(arr1)
- df2 = DataFrame(arr2)
- result = df1.append(df2, ignore_index=True)
- expected = DataFrame(np.concatenate((arr1, arr2)))
- assert_frame_equal(result, expected)
- # rewrite sort fixture, since we also want to test default of None
- def test_append_sorts(self, sort_with_none):
- df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
- df2 = pd.DataFrame({"a": [1, 2], 'c': [3, 4]}, index=[2, 3])
- if sort_with_none is None:
- # only warn if not explicitly specified
- # don't check stacklevel since its set for concat, and append
- # has an extra stack.
- ctx = tm.assert_produces_warning(FutureWarning,
- check_stacklevel=False)
- else:
- ctx = tm.assert_produces_warning(None)
- with ctx:
- result = df1.append(df2, sort=sort_with_none)
- # for None / True
- expected = pd.DataFrame({"b": [1, 2, None, None],
- "a": [1, 2, 1, 2],
- "c": [None, None, 3, 4]},
- columns=['a', 'b', 'c'])
- if sort_with_none is False:
- expected = expected[['b', 'a', 'c']]
- tm.assert_frame_equal(result, expected)
- def test_append_different_columns(self, sort):
- df = DataFrame({'bools': np.random.randn(10) > 0,
- 'ints': np.random.randint(0, 10, 10),
- 'floats': np.random.randn(10),
- 'strings': ['foo', 'bar'] * 5})
- a = df[:5].loc[:, ['bools', 'ints', 'floats']]
- b = df[5:].loc[:, ['strings', 'ints', 'floats']]
- appended = a.append(b, sort=sort)
- assert isna(appended['strings'][0:4]).all()
- assert isna(appended['bools'][5:]).all()
- def test_append_many(self, sort):
- chunks = [self.frame[:5], self.frame[5:10],
- self.frame[10:15], self.frame[15:]]
- result = chunks[0].append(chunks[1:])
- tm.assert_frame_equal(result, self.frame)
- chunks[-1] = chunks[-1].copy()
- chunks[-1]['foo'] = 'bar'
- result = chunks[0].append(chunks[1:], sort=sort)
- tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame)
- assert (result['foo'][15:] == 'bar').all()
- assert result['foo'][:15].isna().all()
- def test_append_preserve_index_name(self):
- # #980
- df1 = DataFrame(data=None, columns=['A', 'B', 'C'])
- df1 = df1.set_index(['A'])
- df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]],
- columns=['A', 'B', 'C'])
- df2 = df2.set_index(['A'])
- result = df1.append(df2)
- assert result.index.name == 'A'
- indexes_can_append = [
- pd.RangeIndex(3),
- pd.Index([4, 5, 6]),
- pd.Index([4.5, 5.5, 6.5]),
- pd.Index(list('abc')),
- pd.CategoricalIndex('A B C'.split()),
- pd.CategoricalIndex('D E F'.split(), ordered=True),
- pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
- dt.datetime(2013, 1, 3, 6, 10),
- dt.datetime(2013, 1, 3, 7, 12)]),
- ]
- indexes_cannot_append_with_other = [
- pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
- pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
- ]
- all_indexes = indexes_can_append + indexes_cannot_append_with_other
- @pytest.mark.parametrize("index",
- all_indexes,
- ids=lambda x: x.__class__.__name__)
- def test_append_same_columns_type(self, index):
- # GH18359
- # df wider than ser
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
- ser_index = index[:2]
- ser = pd.Series([7, 8], index=ser_index, name=2)
- result = df.append(ser)
- expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
- index=[0, 1, 2],
- columns=index)
- assert_frame_equal(result, expected)
- # ser wider than df
- ser_index = index
- index = index[:2]
- df = pd.DataFrame([[1, 2], [4, 5]], columns=index)
- ser = pd.Series([7, 8, 9], index=ser_index, name=2)
- result = df.append(ser)
- expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
- index=[0, 1, 2],
- columns=ser_index)
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize("df_columns, series_index",
- combinations(indexes_can_append, r=2),
- ids=lambda x: x.__class__.__name__)
- def test_append_different_columns_types(self, df_columns, series_index):
- # GH18359
- # See also test 'test_append_different_columns_types_raises' below
- # for errors raised when appending
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
- ser = pd.Series([7, 8, 9], index=series_index, name=2)
- result = df.append(ser)
- idx_diff = ser.index.difference(df_columns)
- combined_columns = Index(df_columns.tolist()).append(idx_diff)
- expected = pd.DataFrame([[1., 2., 3., np.nan, np.nan, np.nan],
- [4, 5, 6, np.nan, np.nan, np.nan],
- [np.nan, np.nan, np.nan, 7, 8, 9]],
- index=[0, 1, 2],
- columns=combined_columns)
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize('index_can_append', indexes_can_append,
- ids=lambda x: x.__class__.__name__)
- @pytest.mark.parametrize('index_cannot_append_with_other',
- indexes_cannot_append_with_other,
- ids=lambda x: x.__class__.__name__)
- def test_append_different_columns_types_raises(
- self, index_can_append, index_cannot_append_with_other):
- # GH18359
- # Dataframe.append will raise if IntervalIndex/MultiIndex appends
- # or is appended to a different index type
- #
- # See also test 'test_append_different_columns_types' above for
- # appending without raising.
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
- ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other,
- name=2)
- msg = ("the other index needs to be an IntervalIndex too, but was"
- r" type {}|"
- r"object of type '(int|long|float|Timestamp)' has no len\(\)|"
- "Expected tuple, got str")
- with pytest.raises(TypeError, match=msg.format(
- index_can_append.__class__.__name__)):
- df.append(ser)
- df = pd.DataFrame([[1, 2, 3], [4, 5, 6]],
- columns=index_cannot_append_with_other)
- ser = pd.Series([7, 8, 9], index=index_can_append, name=2)
- msg = (r"unorderable types: (Interval|int)\(\) > "
- r"(int|long|float|str)\(\)|"
- r"Expected tuple, got (int|long|float|str)|"
- r"Cannot compare type 'Timestamp' with type '(int|long)'|"
- r"'>' not supported between instances of 'int' and 'str'")
- with pytest.raises(TypeError, match=msg):
- df.append(ser)
- def test_append_dtype_coerce(self, sort):
- # GH 4993
- # appending with datetime will incorrectly convert datetime64
- df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
- dt.datetime(2013, 1, 2, 0, 0)],
- columns=['start_time'])
- df2 = DataFrame(index=[4, 5], data=[[dt.datetime(2013, 1, 3, 0, 0),
- dt.datetime(2013, 1, 3, 6, 10)],
- [dt.datetime(2013, 1, 4, 0, 0),
- dt.datetime(2013, 1, 4, 7, 10)]],
- columns=['start_time', 'end_time'])
- expected = concat([Series([pd.NaT,
- pd.NaT,
- dt.datetime(2013, 1, 3, 6, 10),
- dt.datetime(2013, 1, 4, 7, 10)],
- name='end_time'),
- Series([dt.datetime(2013, 1, 1, 0, 0),
- dt.datetime(2013, 1, 2, 0, 0),
- dt.datetime(2013, 1, 3, 0, 0),
- dt.datetime(2013, 1, 4, 0, 0)],
- name='start_time')],
- axis=1, sort=sort)
- result = df1.append(df2, ignore_index=True, sort=sort)
- if sort:
- expected = expected[['end_time', 'start_time']]
- else:
- expected = expected[['start_time', 'end_time']]
- assert_frame_equal(result, expected)
- def test_append_missing_column_proper_upcast(self, sort):
- df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')})
- df2 = DataFrame({'B': np.array([True, False, True, False],
- dtype=bool)})
- appended = df1.append(df2, ignore_index=True, sort=sort)
- assert appended['A'].dtype == 'f8'
- assert appended['B'].dtype == 'O'
- def test_append_empty_frame_to_series_with_dateutil_tz(self):
- # GH 23682
- date = Timestamp('2018-10-24 07:30:00', tz=dateutil.tz.tzutc())
- s = Series({'date': date, 'a': 1.0, 'b': 2.0})
- df = DataFrame(columns=['c', 'd'])
- result = df.append(s, ignore_index=True)
- # n.b. it's not clear to me that expected is correct here.
- # It's possible that the `date` column should have
- # datetime64[ns, tz] dtype for both result and expected.
- # that would be more consistent with new columns having
- # their own dtype (float for a and b, datetime64ns, tz for date).
- expected = DataFrame([[np.nan, np.nan, 1., 2., date]],
- columns=['c', 'd', 'a', 'b', 'date'],
- dtype=object)
- # These columns get cast to object after append
- expected['a'] = expected['a'].astype(float)
- expected['b'] = expected['b'].astype(float)
- assert_frame_equal(result, expected)
- class TestConcatenate(ConcatenateBase):
- def test_concat_copy(self):
- df = DataFrame(np.random.randn(4, 3))
- df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
- df3 = DataFrame({5: 'foo'}, index=range(4))
- # These are actual copies.
- result = concat([df, df2, df3], axis=1, copy=True)
- for b in result._data.blocks:
- assert b.values.base is None
- # These are the same.
- result = concat([df, df2, df3], axis=1, copy=False)
- for b in result._data.blocks:
- if b.is_float:
- assert b.values.base is df._data.blocks[0].values.base
- elif b.is_integer:
- assert b.values.base is df2._data.blocks[0].values.base
- elif b.is_object:
- assert b.values.base is not None
- # Float block was consolidated.
- df4 = DataFrame(np.random.randn(4, 1))
- result = concat([df, df2, df3, df4], axis=1, copy=False)
- for b in result._data.blocks:
- if b.is_float:
- assert b.values.base is None
- elif b.is_integer:
- assert b.values.base is df2._data.blocks[0].values.base
- elif b.is_object:
- assert b.values.base is not None
- def test_concat_with_group_keys(self):
- df = DataFrame(np.random.randn(4, 3))
- df2 = DataFrame(np.random.randn(4, 4))
- # axis=0
- df = DataFrame(np.random.randn(3, 4))
- df2 = DataFrame(np.random.randn(4, 4))
- result = concat([df, df2], keys=[0, 1])
- exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1],
- [0, 1, 2, 0, 1, 2, 3]])
- expected = DataFrame(np.r_[df.values, df2.values],
- index=exp_index)
- tm.assert_frame_equal(result, expected)
- result = concat([df, df], keys=[0, 1])
- exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
- [0, 1, 2, 0, 1, 2]])
- expected = DataFrame(np.r_[df.values, df.values],
- index=exp_index2)
- tm.assert_frame_equal(result, expected)
- # axis=1
- df = DataFrame(np.random.randn(4, 3))
- df2 = DataFrame(np.random.randn(4, 4))
- result = concat([df, df2], keys=[0, 1], axis=1)
- expected = DataFrame(np.c_[df.values, df2.values],
- columns=exp_index)
- tm.assert_frame_equal(result, expected)
- result = concat([df, df], keys=[0, 1], axis=1)
- expected = DataFrame(np.c_[df.values, df.values],
- columns=exp_index2)
- tm.assert_frame_equal(result, expected)
- def test_concat_keys_specific_levels(self):
- df = DataFrame(np.random.randn(10, 4))
- pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]]
- level = ['three', 'two', 'one', 'zero']
- result = concat(pieces, axis=1, keys=['one', 'two', 'three'],
- levels=[level],
- names=['group_key'])
- tm.assert_index_equal(result.columns.levels[0],
- Index(level, name='group_key'))
- assert result.columns.names[0] == 'group_key'
- def test_concat_dataframe_keys_bug(self, sort):
- t1 = DataFrame({
- 'value': Series([1, 2, 3], index=Index(['a', 'b', 'c'],
- name='id'))})
- t2 = DataFrame({
- 'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
- # it works
- result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=sort)
- assert list(result.columns) == [('t1', 'value'), ('t2', 'value')]
- def test_concat_series_partial_columns_names(self):
- # GH10698
- foo = Series([1, 2], name='foo')
- bar = Series([1, 2])
- baz = Series([4, 5])
- result = concat([foo, bar, baz], axis=1)
- expected = DataFrame({'foo': [1, 2], 0: [1, 2], 1: [
- 4, 5]}, columns=['foo', 0, 1])
- tm.assert_frame_equal(result, expected)
- result = concat([foo, bar, baz], axis=1, keys=[
- 'red', 'blue', 'yellow'])
- expected = DataFrame({'red': [1, 2], 'blue': [1, 2], 'yellow': [
- 4, 5]}, columns=['red', 'blue', 'yellow'])
- tm.assert_frame_equal(result, expected)
- result = concat([foo, bar, baz], axis=1, ignore_index=True)
- expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]})
- tm.assert_frame_equal(result, expected)
- def test_concat_dict(self):
- frames = {'foo': DataFrame(np.random.randn(4, 3)),
- 'bar': DataFrame(np.random.randn(4, 3)),
- 'baz': DataFrame(np.random.randn(4, 3)),
- 'qux': DataFrame(np.random.randn(4, 3))}
- sorted_keys = sorted(frames)
- result = concat(frames)
- expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
- tm.assert_frame_equal(result, expected)
- result = concat(frames, axis=1)
- expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys,
- axis=1)
- tm.assert_frame_equal(result, expected)
- keys = ['baz', 'foo', 'bar']
- result = concat(frames, keys=keys)
- expected = concat([frames[k] for k in keys], keys=keys)
- tm.assert_frame_equal(result, expected)
- def test_concat_ignore_index(self, sort):
- frame1 = DataFrame({"test1": ["a", "b", "c"],
- "test2": [1, 2, 3],
- "test3": [4.5, 3.2, 1.2]})
- frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
- frame1.index = Index(["x", "y", "z"])
- frame2.index = Index(["x", "y", "q"])
- v1 = concat([frame1, frame2], axis=1,
- ignore_index=True, sort=sort)
- nan = np.nan
- expected = DataFrame([[nan, nan, nan, 4.3],
- ['a', 1, 4.5, 5.2],
- ['b', 2, 3.2, 2.2],
- ['c', 3, 1.2, nan]],
- index=Index(["q", "x", "y", "z"]))
- if not sort:
- expected = expected.loc[['x', 'y', 'z', 'q']]
- tm.assert_frame_equal(v1, expected)
- def test_concat_multiindex_with_keys(self):
- index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
- ['one', 'two', 'three']],
- codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
- [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
- names=['first', 'second'])
- frame = DataFrame(np.random.randn(10, 3), index=index,
- columns=Index(['A', 'B', 'C'], name='exp'))
- result = concat([frame, frame], keys=[0, 1], names=['iteration'])
- assert result.index.names == ('iteration',) + index.names
- tm.assert_frame_equal(result.loc[0], frame)
- tm.assert_frame_equal(result.loc[1], frame)
- assert result.index.nlevels == 3
- def test_concat_multiindex_with_tz(self):
- # GH 6606
- df = DataFrame({'dt': [datetime(2014, 1, 1),
- datetime(2014, 1, 2),
- datetime(2014, 1, 3)],
- 'b': ['A', 'B', 'C'],
- 'c': [1, 2, 3], 'd': [4, 5, 6]})
- df['dt'] = df['dt'].apply(lambda d: Timestamp(d, tz='US/Pacific'))
- df = df.set_index(['dt', 'b'])
- exp_idx1 = DatetimeIndex(['2014-01-01', '2014-01-02',
- '2014-01-03'] * 2,
- tz='US/Pacific', name='dt')
- exp_idx2 = Index(['A', 'B', 'C'] * 2, name='b')
- exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
- expected = DataFrame({'c': [1, 2, 3] * 2, 'd': [4, 5, 6] * 2},
- index=exp_idx, columns=['c', 'd'])
- result = concat([df, df])
- tm.assert_frame_equal(result, expected)
- def test_concat_multiindex_with_none_in_index_names(self):
- # GH 15787
- index = pd.MultiIndex.from_product([[1], range(5)],
- names=['level1', None])
- df = pd.DataFrame({'col': range(5)}, index=index, dtype=np.int32)
- result = concat([df, df], keys=[1, 2], names=['level2'])
- index = pd.MultiIndex.from_product([[1, 2], [1], range(5)],
- names=['level2', 'level1', None])
- expected = pd.DataFrame({'col': list(range(5)) * 2},
- index=index, dtype=np.int32)
- assert_frame_equal(result, expected)
- result = concat([df, df[:2]], keys=[1, 2], names=['level2'])
- level2 = [1] * 5 + [2] * 2
- level1 = [1] * 7
- no_name = list(range(5)) + list(range(2))
- tuples = list(zip(level2, level1, no_name))
- index = pd.MultiIndex.from_tuples(tuples,
- names=['level2', 'level1', None])
- expected = pd.DataFrame({'col': no_name}, index=index,
- dtype=np.int32)
- assert_frame_equal(result, expected)
- def test_concat_keys_and_levels(self):
- df = DataFrame(np.random.randn(1, 3))
- df2 = DataFrame(np.random.randn(1, 4))
- levels = [['foo', 'baz'], ['one', 'two']]
- names = ['first', 'second']
- result = concat([df, df2, df, df2],
- keys=[('foo', 'one'), ('foo', 'two'),
- ('baz', 'one'), ('baz', 'two')],
- levels=levels,
- names=names)
- expected = concat([df, df2, df, df2])
- exp_index = MultiIndex(levels=levels + [[0]],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1],
- [0, 0, 0, 0]],
- names=names + [None])
- expected.index = exp_index
- tm.assert_frame_equal(result, expected)
- # no names
- result = concat([df, df2, df, df2],
- keys=[('foo', 'one'), ('foo', 'two'),
- ('baz', 'one'), ('baz', 'two')],
- levels=levels)
- assert result.index.names == (None,) * 3
- # no levels
- result = concat([df, df2, df, df2],
- keys=[('foo', 'one'), ('foo', 'two'),
- ('baz', 'one'), ('baz', 'two')],
- names=['first', 'second'])
- assert result.index.names == ('first', 'second') + (None,)
- tm.assert_index_equal(result.index.levels[0],
- Index(['baz', 'foo'], name='first'))
- def test_concat_keys_levels_no_overlap(self):
- # GH #1406
- df = DataFrame(np.random.randn(1, 3), index=['a'])
- df2 = DataFrame(np.random.randn(1, 4), index=['b'])
- msg = "Values not found in passed level"
- with pytest.raises(ValueError, match=msg):
- concat([df, df],
- keys=['one', 'two'], levels=[['foo', 'bar', 'baz']])
- msg = "Key one not in level"
- with pytest.raises(ValueError, match=msg):
- concat([df, df2],
- keys=['one', 'two'], levels=[['foo', 'bar', 'baz']])
- def test_concat_rename_index(self):
- a = DataFrame(np.random.rand(3, 3),
- columns=list('ABC'),
- index=Index(list('abc'), name='index_a'))
- b = DataFrame(np.random.rand(3, 3),
- columns=list('ABC'),
- index=Index(list('abc'), name='index_b'))
- result = concat([a, b], keys=['key0', 'key1'],
- names=['lvl0', 'lvl1'])
- exp = concat([a, b], keys=['key0', 'key1'], names=['lvl0'])
- names = list(exp.index.names)
- names[1] = 'lvl1'
- exp.index.set_names(names, inplace=True)
- tm.assert_frame_equal(result, exp)
- assert result.index.names == exp.index.names
- def test_crossed_dtypes_weird_corner(self):
- columns = ['A', 'B', 'C', 'D']
- df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='f8'),
- 'B': np.array([1, 2, 3, 4], dtype='i8'),
- 'C': np.array([1, 2, 3, 4], dtype='f8'),
- 'D': np.array([1, 2, 3, 4], dtype='i8')},
- columns=columns)
- df2 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8'),
- 'B': np.array([1, 2, 3, 4], dtype='f8'),
- 'C': np.array([1, 2, 3, 4], dtype='i8'),
- 'D': np.array([1, 2, 3, 4], dtype='f8')},
- columns=columns)
- appended = df1.append(df2, ignore_index=True)
- expected = DataFrame(np.concatenate([df1.values, df2.values], axis=0),
- columns=columns)
- tm.assert_frame_equal(appended, expected)
- df = DataFrame(np.random.randn(1, 3), index=['a'])
- df2 = DataFrame(np.random.randn(1, 4), index=['b'])
- result = concat(
- [df, df2], keys=['one', 'two'], names=['first', 'second'])
- assert result.index.names == ('first', 'second')
- def test_dups_index(self):
- # GH 4771
- # single dtypes
- df = DataFrame(np.random.randint(0, 10, size=40).reshape(
- 10, 4), columns=['A', 'A', 'C', 'C'])
- result = concat([df, df], axis=1)
- assert_frame_equal(result.iloc[:, :4], df)
- assert_frame_equal(result.iloc[:, 4:], df)
- result = concat([df, df], axis=0)
- assert_frame_equal(result.iloc[:10], df)
- assert_frame_equal(result.iloc[10:], df)
- # multi dtypes
- df = concat([DataFrame(np.random.randn(10, 4),
- columns=['A', 'A', 'B', 'B']),
- DataFrame(np.random.randint(0, 10, size=20)
- .reshape(10, 2),
- columns=['A', 'C'])],
- axis=1)
- result = concat([df, df], axis=1)
- assert_frame_equal(result.iloc[:, :6], df)
- assert_frame_equal(result.iloc[:, 6:], df)
- result = concat([df, df], axis=0)
- assert_frame_equal(result.iloc[:10], df)
- assert_frame_equal(result.iloc[10:], df)
- # append
- result = df.iloc[0:8, :].append(df.iloc[8:])
- assert_frame_equal(result, df)
- result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10])
- assert_frame_equal(result, df)
- expected = concat([df, df], axis=0)
- result = df.append(df)
- assert_frame_equal(result, expected)
- def test_with_mixed_tuples(self, sort):
- # 10697
- # columns have mixed tuples, so handle properly
- df1 = DataFrame({u'A': 'foo', (u'B', 1): 'bar'}, index=range(2))
- df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
- # it works
- concat([df1, df2], sort=sort)
- def test_handle_empty_objects(self, sort):
- df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
- baz = df[:5].copy()
- baz['foo'] = 'bar'
- empty = df[5:5]
- frames = [baz, empty, empty, df[5:]]
- concatted = concat(frames, axis=0, sort=sort)
- expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo'])
- expected['foo'] = expected['foo'].astype('O')
- expected.loc[0:4, 'foo'] = 'bar'
- tm.assert_frame_equal(concatted, expected)
- # empty as first element with time series
- # GH3259
- df = DataFrame(dict(A=range(10000)), index=date_range(
- '20130101', periods=10000, freq='s'))
- empty = DataFrame()
- result = concat([df, empty], axis=1)
- assert_frame_equal(result, df)
- result = concat([empty, df], axis=1)
- assert_frame_equal(result, df)
- result = concat([df, empty])
- assert_frame_equal(result, df)
- result = concat([empty, df])
- assert_frame_equal(result, df)
- def test_concat_mixed_objs(self):
- # concat mixed series/frames
- # G2385
- # axis 1
- index = date_range('01-Jan-2013', periods=10, freq='H')
- arr = np.arange(10, dtype='int64')
- s1 = Series(arr, index=index)
- s2 = Series(arr, index=index)
- df = DataFrame(arr.reshape(-1, 1), index=index)
- expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
- index=index, columns=[0, 0])
- result = concat([df, df], axis=1)
- assert_frame_equal(result, expected)
- expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
- index=index, columns=[0, 1])
- result = concat([s1, s2], axis=1)
- assert_frame_equal(result, expected)
- expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
- index=index, columns=[0, 1, 2])
- result = concat([s1, s2, s1], axis=1)
- assert_frame_equal(result, expected)
- expected = DataFrame(np.repeat(arr, 5).reshape(-1, 5),
- index=index, columns=[0, 0, 1, 2, 3])
- result = concat([s1, df, s2, s2, s1], axis=1)
- assert_frame_equal(result, expected)
- # with names
- s1.name = 'foo'
- expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
- index=index, columns=['foo', 0, 0])
- result = concat([s1, df, s2], axis=1)
- assert_frame_equal(result, expected)
- s2.name = 'bar'
- expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
- index=index, columns=['foo', 0, 'bar'])
- result = concat([s1, df, s2], axis=1)
- assert_frame_equal(result, expected)
- # ignore index
- expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
- index=index, columns=[0, 1, 2])
- result = concat([s1, df, s2], axis=1, ignore_index=True)
- assert_frame_equal(result, expected)
- # axis 0
- expected = DataFrame(np.tile(arr, 3).reshape(-1, 1),
- index=index.tolist() * 3, columns=[0])
- result = concat([s1, df, s2])
- assert_frame_equal(result, expected)
- expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
- result = concat([s1, df, s2], ignore_index=True)
- assert_frame_equal(result, expected)
- # invalid concatente of mixed dims
- with catch_warnings(record=True):
- simplefilter("ignore", FutureWarning)
- panel = tm.makePanel()
- msg = ("cannot concatenate unaligned mixed dimensional NDFrame"
- " objects")
- with pytest.raises(ValueError, match=msg):
- concat([panel, s1], axis=1)
- def test_empty_dtype_coerce(self):
- # xref to #12411
- # xref to #12045
- # xref to #11594
- # see below
- # 10571
- df1 = DataFrame(data=[[1, None], [2, None]], columns=['a', 'b'])
- df2 = DataFrame(data=[[3, None], [4, None]], columns=['a', 'b'])
- result = concat([df1, df2])
- expected = df1.dtypes
- tm.assert_series_equal(result.dtypes, expected)
- def test_dtype_coerceion(self):
- # 12411
- df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'),
- pd.NaT]})
- result = concat([df.iloc[[0]], df.iloc[[1]]])
- tm.assert_series_equal(result.dtypes, df.dtypes)
- # 12045
- import datetime
- df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
- datetime.datetime(1012, 1, 2)]})
- result = concat([df.iloc[[0]], df.iloc[[1]]])
- tm.assert_series_equal(result.dtypes, df.dtypes)
- # 11594
- df = DataFrame({'text': ['some words'] + [None] * 9})
- result = concat([df.iloc[[0]], df.iloc[[1]]])
- tm.assert_series_equal(result.dtypes, df.dtypes)
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_panel_concat_other_axes(self):
- panel = tm.makePanel()
- p1 = panel.iloc[:, :5, :]
- p2 = panel.iloc[:, 5:, :]
- result = concat([p1, p2], axis=1)
- tm.assert_panel_equal(result, panel)
- p1 = panel.iloc[:, :, :2]
- p2 = panel.iloc[:, :, 2:]
- result = concat([p1, p2], axis=2)
- tm.assert_panel_equal(result, panel)
- # if things are a bit misbehaved
- p1 = panel.iloc[:2, :, :2]
- p2 = panel.iloc[:, :, 2:]
- p1['ItemC'] = 'baz'
- result = concat([p1, p2], axis=2)
- expected = panel.copy()
- expected['ItemC'] = expected['ItemC'].astype('O')
- expected.loc['ItemC', :, :2] = 'baz'
- tm.assert_panel_equal(result, expected)
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- # Panel.rename warning we don't care about
- @pytest.mark.filterwarnings("ignore:Using:FutureWarning")
- def test_panel_concat_buglet(self, sort):
- # #2257
- def make_panel():
- index = 5
- cols = 3
- def df():
- return DataFrame(np.random.randn(index, cols),
- index=["I%s" % i for i in range(index)],
- columns=["C%s" % i for i in range(cols)])
- return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']})
- panel1 = make_panel()
- panel2 = make_panel()
- panel2 = panel2.rename(major_axis={x: "%s_1" % x
- for x in panel2.major_axis})
- panel3 = panel2.rename(major_axis=lambda x: '%s_1' % x)
- panel3 = panel3.rename(minor_axis=lambda x: '%s_1' % x)
- # it works!
- concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort)
- def test_concat_series(self):
- ts = tm.makeTimeSeries()
- ts.name = 'foo'
- pieces = [ts[:5], ts[5:15], ts[15:]]
- result = concat(pieces)
- tm.assert_series_equal(result, ts)
- assert result.name == ts.name
- result = concat(pieces, keys=[0, 1, 2])
- expected = ts.copy()
- ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[ns]'))
- exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
- np.arange(len(ts))]
- exp_index = MultiIndex(levels=[[0, 1, 2], ts.index],
- codes=exp_codes)
- expected.index = exp_index
- tm.assert_series_equal(result, expected)
- def test_concat_series_axis1(self, sort=sort):
- ts = tm.makeTimeSeries()
- pieces = [ts[:-2], ts[2:], ts[2:-2]]
- result = concat(pieces, axis=1)
- expected = DataFrame(pieces).T
- assert_frame_equal(result, expected)
- result = concat(pieces, keys=['A', 'B', 'C'], axis=1)
- expected = DataFrame(pieces, index=['A', 'B', 'C']).T
- assert_frame_equal(result, expected)
- # preserve series names, #2489
- s = Series(randn(5), name='A')
- s2 = Series(randn(5), name='B')
- result = concat([s, s2], axis=1)
- expected = DataFrame({'A': s, 'B': s2})
- assert_frame_equal(result, expected)
- s2.name = None
- result = concat([s, s2], axis=1)
- tm.assert_index_equal(result.columns,
- Index(['A', 0], dtype='object'))
- # must reindex, #2603
- s = Series(randn(3), index=['c', 'a', 'b'], name='A')
- s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
- result = concat([s, s2], axis=1, sort=sort)
- expected = DataFrame({'A': s, 'B': s2})
- assert_frame_equal(result, expected)
- def test_concat_series_axis1_names_applied(self):
- # ensure names argument is not ignored on axis=1, #23490
- s = Series([1, 2, 3])
- s2 = Series([4, 5, 6])
- result = concat([s, s2], axis=1, keys=['a', 'b'], names=['A'])
- expected = DataFrame([[1, 4], [2, 5], [3, 6]],
- columns=pd.Index(['a', 'b'], name='A'))
- assert_frame_equal(result, expected)
- result = concat([s, s2], axis=1, keys=[('a', 1), ('b', 2)],
- names=['A', 'B'])
- expected = DataFrame([[1, 4], [2, 5], [3, 6]],
- columns=MultiIndex.from_tuples([('a', 1),
- ('b', 2)],
- names=['A', 'B']))
- assert_frame_equal(result, expected)
- def test_concat_single_with_key(self):
- df = DataFrame(np.random.randn(10, 4))
- result = concat([df], keys=['foo'])
- expected = concat([df, df], keys=['foo', 'bar'])
- tm.assert_frame_equal(result, expected[:10])
- def test_concat_exclude_none(self):
- df = DataFrame(np.random.randn(10, 4))
- pieces = [df[:5], None, None, df[5:]]
- result = concat(pieces)
- tm.assert_frame_equal(result, df)
- with pytest.raises(ValueError, match="All objects passed were None"):
- concat([None, None])
- def test_concat_datetime64_block(self):
- from pandas.core.indexes.datetimes import date_range
- rng = date_range('1/1/2000', periods=10)
- df = DataFrame({'time': rng})
- result = concat([df, df])
- assert (result.iloc[:10]['time'] == rng).all()
- assert (result.iloc[10:]['time'] == rng).all()
- def test_concat_timedelta64_block(self):
- from pandas import to_timedelta
- rng = to_timedelta(np.arange(10), unit='s')
- df = DataFrame({'time': rng})
- result = concat([df, df])
- assert (result.iloc[:10]['time'] == rng).all()
- assert (result.iloc[10:]['time'] == rng).all()
- def test_concat_keys_with_none(self):
- # #1649
- df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])
- result = concat(dict(a=None, b=df0, c=df0[:2], d=df0[:1], e=df0))
- expected = concat(dict(b=df0, c=df0[:2], d=df0[:1], e=df0))
- tm.assert_frame_equal(result, expected)
- result = concat([None, df0, df0[:2], df0[:1], df0],
- keys=['a', 'b', 'c', 'd', 'e'])
- expected = concat([df0, df0[:2], df0[:1], df0],
- keys=['b', 'c', 'd', 'e'])
- tm.assert_frame_equal(result, expected)
- def test_concat_bug_1719(self):
- ts1 = tm.makeTimeSeries()
- ts2 = tm.makeTimeSeries()[::2]
- # to join with union
- # these two are of different length!
- left = concat([ts1, ts2], join='outer', axis=1)
- right = concat([ts2, ts1], join='outer', axis=1)
- assert len(left) == len(right)
- def test_concat_bug_2972(self):
- ts0 = Series(np.zeros(5))
- ts1 = Series(np.ones(5))
- ts0.name = ts1.name = 'same name'
- result = concat([ts0, ts1], axis=1)
- expected = DataFrame({0: ts0, 1: ts1})
- expected.columns = ['same name', 'same name']
- assert_frame_equal(result, expected)
- def test_concat_bug_3602(self):
- # GH 3602, duplicate columns
- df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'prc': [6, 6, 6, 6],
- 'stringvar': ['rrr', 'rrr', 'rrr', 'rrr']})
- df2 = DataFrame({'C': [9, 10, 11, 12], 'misc': [1, 2, 3, 4],
- 'prc': [6, 6, 6, 6]})
- expected = DataFrame([[0, 6, 'rrr', 9, 1, 6],
- [0, 6, 'rrr', 10, 2, 6],
- [0, 6, 'rrr', 11, 3, 6],
- [0, 6, 'rrr', 12, 4, 6]])
- expected.columns = ['firmNo', 'prc', 'stringvar', 'C', 'misc', 'prc']
- result = concat([df1, df2], axis=1)
- assert_frame_equal(result, expected)
- def test_concat_inner_join_empty(self):
- # GH 15328
- df_empty = pd.DataFrame()
- df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64')
- df_expected = pd.DataFrame({'a': []}, index=[], dtype='int64')
- for how, expected in [('inner', df_expected), ('outer', df_a)]:
- result = pd.concat([df_a, df_empty], axis=1, join=how)
- assert_frame_equal(result, expected)
- def test_concat_series_axis1_same_names_ignore_index(self):
- dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1]
- s1 = Series(randn(len(dates)), index=dates, name='value')
- s2 = Series(randn(len(dates)), index=dates, name='value')
- result = concat([s1, s2], axis=1, ignore_index=True)
- expected = Index([0, 1])
- tm.assert_index_equal(result.columns, expected)
- def test_concat_iterables(self):
- # GH8645 check concat works with tuples, list, generators, and weird
- # stuff like deque and custom iterables
- df1 = DataFrame([1, 2, 3])
- df2 = DataFrame([4, 5, 6])
- expected = DataFrame([1, 2, 3, 4, 5, 6])
- assert_frame_equal(concat((df1, df2), ignore_index=True), expected)
- assert_frame_equal(concat([df1, df2], ignore_index=True), expected)
- assert_frame_equal(concat((df for df in (df1, df2)),
- ignore_index=True), expected)
- assert_frame_equal(
- concat(deque((df1, df2)), ignore_index=True), expected)
- class CustomIterator1(object):
- def __len__(self):
- return 2
- def __getitem__(self, index):
- try:
- return {0: df1, 1: df2}[index]
- except KeyError:
- raise IndexError
- assert_frame_equal(pd.concat(CustomIterator1(),
- ignore_index=True), expected)
- class CustomIterator2(Iterable):
- def __iter__(self):
- yield df1
- yield df2
- assert_frame_equal(pd.concat(CustomIterator2(),
- ignore_index=True), expected)
- def test_concat_invalid(self):
- # trying to concat a ndframe with a non-ndframe
- df1 = mkdf(10, 2)
- msg = ('cannot concatenate object of type "{}";'
- ' only pd.Series, pd.DataFrame, and pd.Panel'
- r' \(deprecated\) objs are valid')
- for obj in [1, dict(), [1, 2], (1, 2)]:
- with pytest.raises(TypeError, match=msg.format(type(obj))):
- concat([df1, obj])
- def test_concat_invalid_first_argument(self):
- df1 = mkdf(10, 2)
- df2 = mkdf(10, 2)
- msg = ('first argument must be an iterable of pandas '
- 'objects, you passed an object of type "DataFrame"')
- with pytest.raises(TypeError, match=msg):
- concat(df1, df2)
- # generator ok though
- concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))
- # text reader ok
- # GH6583
- data = """index,A,B,C,D
- foo,2,3,4,5
- bar,7,8,9,10
- baz,12,13,14,15
- qux,12,13,14,15
- foo2,12,13,14,15
- bar2,12,13,14,15
- """
- reader = read_csv(StringIO(data), chunksize=1)
- result = concat(reader, ignore_index=True)
- expected = read_csv(StringIO(data))
- assert_frame_equal(result, expected)
- def test_concat_NaT_series(self):
- # GH 11693
- # test for merging NaT series with datetime series.
- x = Series(date_range('20151124 08:00', '20151124 09:00',
- freq='1h', tz='US/Eastern'))
- y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
- expected = Series([x[0], x[1], pd.NaT, pd.NaT])
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- # all NaT with tz
- expected = Series(pd.NaT, index=range(4),
- dtype='datetime64[ns, US/Eastern]')
- result = pd.concat([y, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- # without tz
- x = pd.Series(pd.date_range('20151124 08:00',
- '20151124 09:00', freq='1h'))
- y = pd.Series(pd.date_range('20151124 10:00',
- '20151124 11:00', freq='1h'))
- y[:] = pd.NaT
- expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
- result = pd.concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- # all NaT without tz
- x[:] = pd.NaT
- expected = pd.Series(pd.NaT, index=range(4),
- dtype='datetime64[ns]')
- result = pd.concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- def test_concat_tz_frame(self):
- df2 = DataFrame(dict(A=pd.Timestamp('20130102', tz='US/Eastern'),
- B=pd.Timestamp('20130603', tz='CET')),
- index=range(5))
- # concat
- df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
- assert_frame_equal(df2, df3)
- def test_concat_tz_series(self):
- # gh-11755: tz and no tz
- x = Series(date_range('20151124 08:00',
- '20151124 09:00',
- freq='1h', tz='UTC'))
- y = Series(date_range('2012-01-01', '2012-01-02'))
- expected = Series([x[0], x[1], y[0], y[1]],
- dtype='object')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- # gh-11887: concat tz and object
- x = Series(date_range('20151124 08:00',
- '20151124 09:00',
- freq='1h', tz='UTC'))
- y = Series(['a', 'b'])
- expected = Series([x[0], x[1], y[0], y[1]],
- dtype='object')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- # see gh-12217 and gh-12306
- # Concatenating two UTC times
- first = pd.DataFrame([[datetime(2016, 1, 1)]])
- first[0] = first[0].dt.tz_localize('UTC')
- second = pd.DataFrame([[datetime(2016, 1, 2)]])
- second[0] = second[0].dt.tz_localize('UTC')
- result = pd.concat([first, second])
- assert result[0].dtype == 'datetime64[ns, UTC]'
- # Concatenating two London times
- first = pd.DataFrame([[datetime(2016, 1, 1)]])
- first[0] = first[0].dt.tz_localize('Europe/London')
- second = pd.DataFrame([[datetime(2016, 1, 2)]])
- second[0] = second[0].dt.tz_localize('Europe/London')
- result = pd.concat([first, second])
- assert result[0].dtype == 'datetime64[ns, Europe/London]'
- # Concatenating 2+1 London times
- first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
- first[0] = first[0].dt.tz_localize('Europe/London')
- second = pd.DataFrame([[datetime(2016, 1, 3)]])
- second[0] = second[0].dt.tz_localize('Europe/London')
- result = pd.concat([first, second])
- assert result[0].dtype == 'datetime64[ns, Europe/London]'
- # Concat'ing 1+2 London times
- first = pd.DataFrame([[datetime(2016, 1, 1)]])
- first[0] = first[0].dt.tz_localize('Europe/London')
- second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
- second[0] = second[0].dt.tz_localize('Europe/London')
- result = pd.concat([first, second])
- assert result[0].dtype == 'datetime64[ns, Europe/London]'
- def test_concat_tz_series_with_datetimelike(self):
- # see gh-12620: tz and timedelta
- x = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
- pd.Timestamp('2011-02-01', tz='US/Eastern')]
- y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')]
- result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
- tm.assert_series_equal(result, pd.Series(x + y, dtype='object'))
- # tz and period
- y = [pd.Period('2011-03', freq='M'), pd.Period('2011-04', freq='M')]
- result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
- tm.assert_series_equal(result, pd.Series(x + y, dtype='object'))
- def test_concat_tz_series_tzlocal(self):
- # see gh-13583
- x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()),
- pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())]
- y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()),
- pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())]
- result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
- tm.assert_series_equal(result, pd.Series(x + y))
- assert result.dtype == 'datetime64[ns, tzlocal()]'
- @pytest.mark.parametrize('tz1', [None, 'UTC'])
- @pytest.mark.parametrize('tz2', [None, 'UTC'])
- @pytest.mark.parametrize('s', [pd.NaT, pd.Timestamp('20150101')])
- def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
- # GH 12396
- # tz-naive
- first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply(
- lambda x: x.dt.tz_localize(tz1))
- second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))
- result = pd.concat([first, second], axis=0)
- expected = pd.DataFrame(pd.Series(
- [pd.NaT, pd.NaT, s], index=[0, 1, 0]))
- expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
- if tz1 != tz2:
- expected = expected.astype(object)
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize('tz1', [None, 'UTC'])
- @pytest.mark.parametrize('tz2', [None, 'UTC'])
- def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
- # GH 12396
- first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
- second = pd.DataFrame(pd.Series(
- [pd.NaT]).dt.tz_localize(tz2), columns=[1])
- expected = pd.DataFrame(
- {0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
- 1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2)}
- )
- result = pd.concat([first, second], axis=1)
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize('tz1', [None, 'UTC'])
- @pytest.mark.parametrize('tz2', [None, 'UTC'])
- def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
- # GH 12396
- # tz-naive
- first = pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
- second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz2)],
- [pd.Timestamp('2016/01/01', tz=tz2)]],
- index=[2, 3])
- expected = pd.DataFrame([pd.NaT, pd.NaT,
- pd.Timestamp('2015/01/01', tz=tz2),
- pd.Timestamp('2016/01/01', tz=tz2)])
- if tz1 != tz2:
- expected = expected.astype(object)
- result = pd.concat([first, second])
- assert_frame_equal(result, expected)
- @pytest.mark.parametrize('tz', [None, 'UTC'])
- def test_concat_NaT_dataframes(self, tz):
- # GH 12396
- first = pd.DataFrame([[pd.NaT], [pd.NaT]])
- first = first.apply(lambda x: x.dt.tz_localize(tz))
- second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz)],
- [pd.Timestamp('2016/01/01', tz=tz)]],
- index=[2, 3])
- expected = pd.DataFrame([pd.NaT, pd.NaT,
- pd.Timestamp('2015/01/01', tz=tz),
- pd.Timestamp('2016/01/01', tz=tz)])
- result = pd.concat([first, second], axis=0)
- assert_frame_equal(result, expected)
- def test_concat_period_series(self):
- x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
- y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
- expected = Series([x[0], x[1], y[0], y[1]], dtype='Period[D]')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- def test_concat_period_multiple_freq_series(self):
- x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
- y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M'))
- expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- assert result.dtype == 'object'
- def test_concat_period_other_series(self):
- x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
- y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M'))
- expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- assert result.dtype == 'object'
- # non-period
- x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
- y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01']))
- expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- assert result.dtype == 'object'
- x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
- y = Series(['A', 'B'])
- expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
- result = concat([x, y], ignore_index=True)
- tm.assert_series_equal(result, expected)
- assert result.dtype == 'object'
- def test_concat_empty_series(self):
- # GH 11082
- s1 = pd.Series([1, 2, 3], name='x')
- s2 = pd.Series(name='y')
- res = pd.concat([s1, s2], axis=1)
- exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
- tm.assert_frame_equal(res, exp)
- s1 = pd.Series([1, 2, 3], name='x')
- s2 = pd.Series(name='y')
- res = pd.concat([s1, s2], axis=0)
- # name will be reset
- exp = pd.Series([1, 2, 3])
- tm.assert_series_equal(res, exp)
- # empty Series with no name
- s1 = pd.Series([1, 2, 3], name='x')
- s2 = pd.Series(name=None)
- res = pd.concat([s1, s2], axis=1)
- exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
- columns=['x', 0])
- tm.assert_frame_equal(res, exp)
- @pytest.mark.parametrize('tz', [None, 'UTC'])
- @pytest.mark.parametrize('values', [[], [1, 2, 3]])
- def test_concat_empty_series_timelike(self, tz, values):
- # GH 18447
- first = Series([], dtype='M8[ns]').dt.tz_localize(tz)
- second = Series(values)
- expected = DataFrame(
- {0: pd.Series([pd.NaT] * len(values),
- dtype='M8[ns]'
- ).dt.tz_localize(tz),
- 1: values})
- result = concat([first, second], axis=1)
- assert_frame_equal(result, expected)
- def test_default_index(self):
- # is_series and ignore_index
- s1 = pd.Series([1, 2, 3], name='x')
- s2 = pd.Series([4, 5, 6], name='y')
- res = pd.concat([s1, s2], axis=1, ignore_index=True)
- assert isinstance(res.columns, pd.RangeIndex)
- exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
- # use check_index_type=True to check the result have
- # RangeIndex (default index)
- tm.assert_frame_equal(res, exp, check_index_type=True,
- check_column_type=True)
- # is_series and all inputs have no names
- s1 = pd.Series([1, 2, 3])
- s2 = pd.Series([4, 5, 6])
- res = pd.concat([s1, s2], axis=1, ignore_index=False)
- assert isinstance(res.columns, pd.RangeIndex)
- exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
- exp.columns = pd.RangeIndex(2)
- tm.assert_frame_equal(res, exp, check_index_type=True,
- check_column_type=True)
- # is_dataframe and ignore_index
- df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]})
- df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]})
- res = pd.concat([df1, df2], axis=0, ignore_index=True)
- exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]],
- columns=['A', 'B'])
- tm.assert_frame_equal(res, exp, check_index_type=True,
- check_column_type=True)
- res = pd.concat([df1, df2], axis=1, ignore_index=True)
- exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
- tm.assert_frame_equal(res, exp, check_index_type=True,
- check_column_type=True)
- def test_concat_multiindex_rangeindex(self):
- # GH13542
- # when multi-index levels are RangeIndex objects
- # there is a bug in concat with objects of len 1
- df = DataFrame(np.random.randn(9, 2))
- df.index = MultiIndex(levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
- codes=[np.repeat(np.arange(3), 3),
- np.tile(np.arange(3), 3)])
- res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
- exp = df.iloc[[2, 3, 4, 5], :]
- tm.assert_frame_equal(res, exp)
- def test_concat_multiindex_dfs_with_deepcopy(self):
- # GH 9967
- from copy import deepcopy
- example_multiindex1 = pd.MultiIndex.from_product([['a'], ['b']])
- example_dataframe1 = pd.DataFrame([0], index=example_multiindex1)
- example_multiindex2 = pd.MultiIndex.from_product([['a'], ['c']])
- example_dataframe2 = pd.DataFrame([1], index=example_multiindex2)
- example_dict = {'s1': example_dataframe1, 's2': example_dataframe2}
- expected_index = pd.MultiIndex(levels=[['s1', 's2'],
- ['a'],
- ['b', 'c']],
- codes=[[0, 1], [0, 0], [0, 1]],
- names=['testname', None, None])
- expected = pd.DataFrame([[0], [1]], index=expected_index)
- result_copy = pd.concat(deepcopy(example_dict), names=['testname'])
- tm.assert_frame_equal(result_copy, expected)
- result_no_copy = pd.concat(example_dict, names=['testname'])
- tm.assert_frame_equal(result_no_copy, expected)
- def test_categorical_concat_append(self):
- cat = Categorical(["a", "b"], categories=["a", "b"])
- vals = [1, 2]
- df = DataFrame({"cats": cat, "vals": vals})
- cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
- vals2 = [1, 2, 1, 2]
- exp = DataFrame({"cats": cat2, "vals": vals2},
- index=Index([0, 1, 0, 1]))
- tm.assert_frame_equal(pd.concat([df, df]), exp)
- tm.assert_frame_equal(df.append(df), exp)
- # GH 13524 can concat different categories
- cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
- vals3 = [1, 2]
- df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
- res = pd.concat([df, df_different_categories], ignore_index=True)
- exp = DataFrame({"cats": list('abab'), "vals": [1, 2, 1, 2]})
- tm.assert_frame_equal(res, exp)
- res = df.append(df_different_categories, ignore_index=True)
- tm.assert_frame_equal(res, exp)
- def test_categorical_concat_dtypes(self):
- # GH8143
- index = ['cat', 'obj', 'num']
- cat = Categorical(['a', 'b', 'c'])
- obj = Series(['a', 'b', 'c'])
- num = Series([1, 2, 3])
- df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
- result = df.dtypes == 'object'
- expected = Series([False, True, False], index=index)
- tm.assert_series_equal(result, expected)
- result = df.dtypes == 'int64'
- expected = Series([False, False, True], index=index)
- tm.assert_series_equal(result, expected)
- result = df.dtypes == 'category'
- expected = Series([True, False, False], index=index)
- tm.assert_series_equal(result, expected)
- def test_categorical_concat(self, sort):
- # See GH 10177
- df1 = DataFrame(np.arange(18, dtype='int64').reshape(6, 3),
- columns=["a", "b", "c"])
- df2 = DataFrame(np.arange(14, dtype='int64').reshape(7, 2),
- columns=["a", "c"])
- cat_values = ["one", "one", "two", "one", "two", "two", "one"]
- df2['h'] = Series(Categorical(cat_values))
- res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
- exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
- 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan,
- np.nan, np.nan, np.nan, np.nan],
- 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
- 'h': [None] * 6 + cat_values})
- tm.assert_frame_equal(res, exp)
- def test_categorical_concat_gh7864(self):
- # GH 7864
- # make sure ordering is preserverd
- df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list('abbaae')})
- df["grade"] = Categorical(df["raw_grade"])
- df['grade'].cat.set_categories(['e', 'a', 'b'])
- df1 = df[0:3]
- df2 = df[3:]
- tm.assert_index_equal(df['grade'].cat.categories,
- df1['grade'].cat.categories)
- tm.assert_index_equal(df['grade'].cat.categories,
- df2['grade'].cat.categories)
- dfx = pd.concat([df1, df2])
- tm.assert_index_equal(df['grade'].cat.categories,
- dfx['grade'].cat.categories)
- dfa = df1.append(df2)
- tm.assert_index_equal(df['grade'].cat.categories,
- dfa['grade'].cat.categories)
- def test_categorical_concat_preserve(self):
- # GH 8641 series concat not preserving category dtype
- # GH 13524 can concat different categories
- s = Series(list('abc'), dtype='category')
- s2 = Series(list('abd'), dtype='category')
- exp = Series(list('abcabd'))
- res = pd.concat([s, s2], ignore_index=True)
- tm.assert_series_equal(res, exp)
- exp = Series(list('abcabc'), dtype='category')
- res = pd.concat([s, s], ignore_index=True)
- tm.assert_series_equal(res, exp)
- exp = Series(list('abcabc'), index=[0, 1, 2, 0, 1, 2],
- dtype='category')
- res = pd.concat([s, s])
- tm.assert_series_equal(res, exp)
- a = Series(np.arange(6, dtype='int64'))
- b = Series(list('aabbca'))
- df2 = DataFrame({'A': a,
- 'B': b.astype(CategoricalDtype(list('cab')))})
- res = pd.concat([df2, df2])
- exp = DataFrame(
- {'A': pd.concat([a, a]),
- 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))})
- tm.assert_frame_equal(res, exp)
- def test_categorical_index_preserver(self):
- a = Series(np.arange(6, dtype='int64'))
- b = Series(list('aabbca'))
- df2 = DataFrame({'A': a,
- 'B': b.astype(CategoricalDtype(list('cab')))
- }).set_index('B')
- result = pd.concat([df2, df2])
- expected = DataFrame(
- {'A': pd.concat([a, a]),
- 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
- }).set_index('B')
- tm.assert_frame_equal(result, expected)
- # wrong catgories
- df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe'))
- }).set_index('B')
- msg = "categories must match existing categories when appending"
- with pytest.raises(TypeError, match=msg):
- pd.concat([df2, df3])
- def test_concat_categoricalindex(self):
- # GH 16111, categories that aren't lexsorted
- categories = [9, 0, 1, 2, 3]
- a = pd.Series(1, index=pd.CategoricalIndex([9, 0],
- categories=categories))
- b = pd.Series(2, index=pd.CategoricalIndex([0, 1],
- categories=categories))
- c = pd.Series(3, index=pd.CategoricalIndex([1, 2],
- categories=categories))
- result = pd.concat([a, b, c], axis=1)
- exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
- exp = pd.DataFrame({0: [1, 1, np.nan, np.nan],
- 1: [np.nan, 2, 2, np.nan],
- 2: [np.nan, np.nan, 3, 3]},
- columns=[0, 1, 2],
- index=exp_idx)
- tm.assert_frame_equal(result, exp)
- def test_concat_order(self):
- # GH 17344
- dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
- dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
- for i in range(100)]
- result = pd.concat(dfs, sort=True).columns
- if PY2:
- # Different sort order between incomparable objects between
- # python 2 and python3 via Index.union.
- expected = dfs[1].columns
- else:
- expected = dfs[0].columns
- tm.assert_index_equal(result, expected)
- def test_concat_datetime_timezone(self):
- # GH 18523
- idx1 = pd.date_range('2011-01-01', periods=3, freq='H',
- tz='Europe/Paris')
- idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H')
- df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1)
- df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2)
- result = pd.concat([df1, df2], axis=1)
- exp_idx = DatetimeIndex(['2011-01-01 00:00:00+01:00',
- '2011-01-01 01:00:00+01:00',
- '2011-01-01 02:00:00+01:00'],
- freq='H'
- ).tz_convert('UTC').tz_convert('Europe/Paris')
- expected = pd.DataFrame([[1, 1], [2, 2], [3, 3]],
- index=exp_idx, columns=['a', 'b'])
- tm.assert_frame_equal(result, expected)
- idx3 = pd.date_range('2011-01-01', periods=3,
- freq='H', tz='Asia/Tokyo')
- df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3)
- result = pd.concat([df1, df3], axis=1)
- exp_idx = DatetimeIndex(['2010-12-31 15:00:00+00:00',
- '2010-12-31 16:00:00+00:00',
- '2010-12-31 17:00:00+00:00',
- '2010-12-31 23:00:00+00:00',
- '2011-01-01 00:00:00+00:00',
- '2011-01-01 01:00:00+00:00']
- )
- expected = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3],
- [1, np.nan], [2, np.nan], [3, np.nan]],
- index=exp_idx, columns=['a', 'b'])
- tm.assert_frame_equal(result, expected)
- # GH 13783: Concat after resample
- result = pd.concat([df1.resample('H').mean(),
- df2.resample('H').mean()], sort=True)
- expected = pd.DataFrame({'a': [1, 2, 3] + [np.nan] * 3,
- 'b': [np.nan] * 3 + [1, 2, 3]},
- index=idx1.append(idx1))
- tm.assert_frame_equal(result, expected)
- @pytest.mark.skipif(PY2, reason="Unhashable Decimal dtype")
- def test_concat_different_extension_dtypes_upcasts(self):
- a = pd.Series(pd.core.arrays.integer_array([1, 2]))
- b = pd.Series(to_decimal([1, 2]))
- result = pd.concat([a, b], ignore_index=True)
- expected = pd.Series([
- 1, 2,
- Decimal(1), Decimal(2)
- ], dtype=object)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
- @pytest.mark.parametrize('dt', np.sctypes['float'])
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_concat_no_unnecessary_upcast(dt, pdt):
- # GH 13247
- dims = pdt().ndim
- dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
- pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
- pdt(np.array([5], dtype=dt, ndmin=dims))]
- x = pd.concat(dfs)
- assert x.values.dtype == dt
- @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
- @pytest.mark.parametrize('dt', np.sctypes['int'])
- @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
- def test_concat_will_upcast(dt, pdt):
- with catch_warnings(record=True):
- dims = pdt().ndim
- dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
- pdt(np.array([np.nan], ndmin=dims)),
- pdt(np.array([5], dtype=dt, ndmin=dims))]
- x = pd.concat(dfs)
- assert x.values.dtype == 'float64'
- def test_concat_empty_and_non_empty_frame_regression():
- # GH 18178 regression test
- df1 = pd.DataFrame({'foo': [1]})
- df2 = pd.DataFrame({'foo': []})
- expected = pd.DataFrame({'foo': [1.0]})
- result = pd.concat([df1, df2])
- assert_frame_equal(result, expected)
- def test_concat_empty_and_non_empty_series_regression():
- # GH 18187 regression test
- s1 = pd.Series([1])
- s2 = pd.Series([])
- expected = s1
- result = pd.concat([s1, s2])
- tm.assert_series_equal(result, expected)
- def test_concat_sorts_columns(sort_with_none):
- # GH-4588
- df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
- df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]})
- # for sort=True/None
- expected = pd.DataFrame({"a": [1, 2, 3, 4],
- "b": [1, 2, None, None],
- "c": [None, None, 5, 6]},
- columns=['a', 'b', 'c'])
- if sort_with_none is False:
- expected = expected[['b', 'a', 'c']]
- if sort_with_none is None:
- # only warn if not explicitly specified
- ctx = tm.assert_produces_warning(FutureWarning)
- else:
- ctx = tm.assert_produces_warning(None)
- # default
- with ctx:
- result = pd.concat([df1, df2], ignore_index=True, sort=sort_with_none)
- tm.assert_frame_equal(result, expected)
- def test_concat_sorts_index(sort_with_none):
- df1 = pd.DataFrame({"a": [1, 2, 3]}, index=['c', 'a', 'b'])
- df2 = pd.DataFrame({"b": [1, 2]}, index=['a', 'b'])
- # For True/None
- expected = pd.DataFrame({"a": [2, 3, 1], "b": [1, 2, None]},
- index=['a', 'b', 'c'],
- columns=['a', 'b'])
- if sort_with_none is False:
- expected = expected.loc[['c', 'a', 'b']]
- if sort_with_none is None:
- # only warn if not explicitly specified
- ctx = tm.assert_produces_warning(FutureWarning)
- else:
- ctx = tm.assert_produces_warning(None)
- # Warn and sort by default
- with ctx:
- result = pd.concat([df1, df2], axis=1, sort=sort_with_none)
- tm.assert_frame_equal(result, expected)
- def test_concat_inner_sort(sort_with_none):
- # https://github.com/pandas-dev/pandas/pull/20613
- df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
- columns=['b', 'a', 'c'])
- df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
- with tm.assert_produces_warning(None):
- # unset sort should *not* warn for inner join
- # since that never sorted
- result = pd.concat([df1, df2], sort=sort_with_none,
- join='inner',
- ignore_index=True)
- expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
- columns=['b', 'a'])
- if sort_with_none is True:
- expected = expected[['a', 'b']]
- tm.assert_frame_equal(result, expected)
- def test_concat_aligned_sort():
- # GH-4588
- df = pd.DataFrame({"c": [1, 2], "b": [3, 4], 'a': [5, 6]},
- columns=['c', 'b', 'a'])
- result = pd.concat([df, df], sort=True, ignore_index=True)
- expected = pd.DataFrame({'a': [5, 6, 5, 6], 'b': [3, 4, 3, 4],
- 'c': [1, 2, 1, 2]},
- columns=['a', 'b', 'c'])
- tm.assert_frame_equal(result, expected)
- result = pd.concat([df, df[['c', 'b']]], join='inner', sort=True,
- ignore_index=True)
- expected = expected[['b', 'c']]
- tm.assert_frame_equal(result, expected)
- def test_concat_aligned_sort_does_not_raise():
- # GH-4588
- # We catch TypeErrors from sorting internally and do not re-raise.
- df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, 'a'])
- expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
- columns=[1, 'a'])
- result = pd.concat([df, df], ignore_index=True, sort=True)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("s1name,s2name", [
- (np.int64(190), (43, 0)), (190, (43, 0))])
- def test_concat_series_name_npscalar_tuple(s1name, s2name):
- # GH21015
- s1 = pd.Series({'a': 1, 'b': 2}, name=s1name)
- s2 = pd.Series({'c': 5, 'd': 6}, name=s2name)
- result = pd.concat([s1, s2])
- expected = pd.Series({'a': 1, 'b': 2, 'c': 5, 'd': 6})
- tm.assert_series_equal(result, expected)
- def test_concat_categorical_tz():
- # GH-23816
- a = pd.Series(pd.date_range('2017-01-01', periods=2, tz='US/Pacific'))
- b = pd.Series(['a', 'b'], dtype='category')
- result = pd.concat([a, b], ignore_index=True)
- expected = pd.Series([
- pd.Timestamp('2017-01-01', tz="US/Pacific"),
- pd.Timestamp('2017-01-02', tz="US/Pacific"),
- 'a', 'b'
- ])
- tm.assert_series_equal(result, expected)
|