test_concat.py 102 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600
  1. from collections import deque
  2. import datetime as dt
  3. from datetime import datetime
  4. from decimal import Decimal
  5. from itertools import combinations
  6. from warnings import catch_warnings, simplefilter
  7. import dateutil
  8. import numpy as np
  9. from numpy.random import randn
  10. import pytest
  11. from pandas.compat import PY2, Iterable, StringIO, iteritems
  12. from pandas.core.dtypes.dtypes import CategoricalDtype
  13. import pandas as pd
  14. from pandas import (
  15. Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Panel, Series,
  16. Timestamp, concat, date_range, isna, read_csv)
  17. from pandas.tests.extension.decimal import to_decimal
  18. from pandas.util import testing as tm
  19. from pandas.util.testing import assert_frame_equal, makeCustomDataframe as mkdf
  20. @pytest.fixture(params=[True, False])
  21. def sort(request):
  22. """Boolean sort keyword for concat and DataFrame.append."""
  23. return request.param
  24. @pytest.fixture(params=[True, False, None])
  25. def sort_with_none(request):
  26. """Boolean sort keyword for concat and DataFrame.append.
  27. Includes the default of None
  28. """
  29. # TODO: Replace with sort once keyword changes.
  30. return request.param
  31. class ConcatenateBase(object):
  32. def setup_method(self, method):
  33. self.frame = DataFrame(tm.getSeriesData())
  34. self.mixed_frame = self.frame.copy()
  35. self.mixed_frame['foo'] = 'bar'
  36. class TestConcatAppendCommon(ConcatenateBase):
  37. """
  38. Test common dtype coercion rules between concat and append.
  39. """
  40. def setup_method(self, method):
  41. dt_data = [pd.Timestamp('2011-01-01'),
  42. pd.Timestamp('2011-01-02'),
  43. pd.Timestamp('2011-01-03')]
  44. tz_data = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  45. pd.Timestamp('2011-01-02', tz='US/Eastern'),
  46. pd.Timestamp('2011-01-03', tz='US/Eastern')]
  47. td_data = [pd.Timedelta('1 days'),
  48. pd.Timedelta('2 days'),
  49. pd.Timedelta('3 days')]
  50. period_data = [pd.Period('2011-01', freq='M'),
  51. pd.Period('2011-02', freq='M'),
  52. pd.Period('2011-03', freq='M')]
  53. self.data = {'bool': [True, False, True],
  54. 'int64': [1, 2, 3],
  55. 'float64': [1.1, np.nan, 3.3],
  56. 'category': pd.Categorical(['X', 'Y', 'Z']),
  57. 'object': ['a', 'b', 'c'],
  58. 'datetime64[ns]': dt_data,
  59. 'datetime64[ns, US/Eastern]': tz_data,
  60. 'timedelta64[ns]': td_data,
  61. 'period[M]': period_data}
  62. def _check_expected_dtype(self, obj, label):
  63. """
  64. Check whether obj has expected dtype depending on label
  65. considering not-supported dtypes
  66. """
  67. if isinstance(obj, pd.Index):
  68. if label == 'bool':
  69. assert obj.dtype == 'object'
  70. else:
  71. assert obj.dtype == label
  72. elif isinstance(obj, pd.Series):
  73. if label.startswith('period'):
  74. assert obj.dtype == 'Period[M]'
  75. else:
  76. assert obj.dtype == label
  77. else:
  78. raise ValueError
  79. def test_dtypes(self):
  80. # to confirm test case covers intended dtypes
  81. for typ, vals in iteritems(self.data):
  82. self._check_expected_dtype(pd.Index(vals), typ)
  83. self._check_expected_dtype(pd.Series(vals), typ)
  84. def test_concatlike_same_dtypes(self):
  85. # GH 13660
  86. for typ1, vals1 in iteritems(self.data):
  87. vals2 = vals1
  88. vals3 = vals1
  89. if typ1 == 'category':
  90. exp_data = pd.Categorical(list(vals1) + list(vals2))
  91. exp_data3 = pd.Categorical(list(vals1) + list(vals2) +
  92. list(vals3))
  93. else:
  94. exp_data = vals1 + vals2
  95. exp_data3 = vals1 + vals2 + vals3
  96. # ----- Index ----- #
  97. # index.append
  98. res = pd.Index(vals1).append(pd.Index(vals2))
  99. exp = pd.Index(exp_data)
  100. tm.assert_index_equal(res, exp)
  101. # 3 elements
  102. res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)])
  103. exp = pd.Index(exp_data3)
  104. tm.assert_index_equal(res, exp)
  105. # index.append name mismatch
  106. i1 = pd.Index(vals1, name='x')
  107. i2 = pd.Index(vals2, name='y')
  108. res = i1.append(i2)
  109. exp = pd.Index(exp_data)
  110. tm.assert_index_equal(res, exp)
  111. # index.append name match
  112. i1 = pd.Index(vals1, name='x')
  113. i2 = pd.Index(vals2, name='x')
  114. res = i1.append(i2)
  115. exp = pd.Index(exp_data, name='x')
  116. tm.assert_index_equal(res, exp)
  117. # cannot append non-index
  118. with pytest.raises(TypeError, match='all inputs must be Index'):
  119. pd.Index(vals1).append(vals2)
  120. with pytest.raises(TypeError, match='all inputs must be Index'):
  121. pd.Index(vals1).append([pd.Index(vals2), vals3])
  122. # ----- Series ----- #
  123. # series.append
  124. res = pd.Series(vals1).append(pd.Series(vals2),
  125. ignore_index=True)
  126. exp = pd.Series(exp_data)
  127. tm.assert_series_equal(res, exp, check_index_type=True)
  128. # concat
  129. res = pd.concat([pd.Series(vals1), pd.Series(vals2)],
  130. ignore_index=True)
  131. tm.assert_series_equal(res, exp, check_index_type=True)
  132. # 3 elements
  133. res = pd.Series(vals1).append([pd.Series(vals2), pd.Series(vals3)],
  134. ignore_index=True)
  135. exp = pd.Series(exp_data3)
  136. tm.assert_series_equal(res, exp)
  137. res = pd.concat([pd.Series(vals1), pd.Series(vals2),
  138. pd.Series(vals3)], ignore_index=True)
  139. tm.assert_series_equal(res, exp)
  140. # name mismatch
  141. s1 = pd.Series(vals1, name='x')
  142. s2 = pd.Series(vals2, name='y')
  143. res = s1.append(s2, ignore_index=True)
  144. exp = pd.Series(exp_data)
  145. tm.assert_series_equal(res, exp, check_index_type=True)
  146. res = pd.concat([s1, s2], ignore_index=True)
  147. tm.assert_series_equal(res, exp, check_index_type=True)
  148. # name match
  149. s1 = pd.Series(vals1, name='x')
  150. s2 = pd.Series(vals2, name='x')
  151. res = s1.append(s2, ignore_index=True)
  152. exp = pd.Series(exp_data, name='x')
  153. tm.assert_series_equal(res, exp, check_index_type=True)
  154. res = pd.concat([s1, s2], ignore_index=True)
  155. tm.assert_series_equal(res, exp, check_index_type=True)
  156. # cannot append non-index
  157. msg = (r'cannot concatenate object of type \"(.+?)\";'
  158. ' only pd.Series, pd.DataFrame, and pd.Panel'
  159. r' \(deprecated\) objs are valid')
  160. with pytest.raises(TypeError, match=msg):
  161. pd.Series(vals1).append(vals2)
  162. with pytest.raises(TypeError, match=msg):
  163. pd.Series(vals1).append([pd.Series(vals2), vals3])
  164. with pytest.raises(TypeError, match=msg):
  165. pd.concat([pd.Series(vals1), vals2])
  166. with pytest.raises(TypeError, match=msg):
  167. pd.concat([pd.Series(vals1), pd.Series(vals2), vals3])
  168. def test_concatlike_dtypes_coercion(self):
  169. # GH 13660
  170. for typ1, vals1 in iteritems(self.data):
  171. for typ2, vals2 in iteritems(self.data):
  172. vals3 = vals2
  173. # basically infer
  174. exp_index_dtype = None
  175. exp_series_dtype = None
  176. if typ1 == typ2:
  177. # same dtype is tested in test_concatlike_same_dtypes
  178. continue
  179. elif typ1 == 'category' or typ2 == 'category':
  180. # ToDo: suspicious
  181. continue
  182. # specify expected dtype
  183. if typ1 == 'bool' and typ2 in ('int64', 'float64'):
  184. # series coerces to numeric based on numpy rule
  185. # index doesn't because bool is object dtype
  186. exp_series_dtype = typ2
  187. elif typ2 == 'bool' and typ1 in ('int64', 'float64'):
  188. exp_series_dtype = typ1
  189. elif (typ1 == 'datetime64[ns, US/Eastern]' or
  190. typ2 == 'datetime64[ns, US/Eastern]' or
  191. typ1 == 'timedelta64[ns]' or
  192. typ2 == 'timedelta64[ns]'):
  193. exp_index_dtype = object
  194. exp_series_dtype = object
  195. exp_data = vals1 + vals2
  196. exp_data3 = vals1 + vals2 + vals3
  197. # ----- Index ----- #
  198. # index.append
  199. res = pd.Index(vals1).append(pd.Index(vals2))
  200. exp = pd.Index(exp_data, dtype=exp_index_dtype)
  201. tm.assert_index_equal(res, exp)
  202. # 3 elements
  203. res = pd.Index(vals1).append([pd.Index(vals2),
  204. pd.Index(vals3)])
  205. exp = pd.Index(exp_data3, dtype=exp_index_dtype)
  206. tm.assert_index_equal(res, exp)
  207. # ----- Series ----- #
  208. # series.append
  209. res = pd.Series(vals1).append(pd.Series(vals2),
  210. ignore_index=True)
  211. exp = pd.Series(exp_data, dtype=exp_series_dtype)
  212. tm.assert_series_equal(res, exp, check_index_type=True)
  213. # concat
  214. res = pd.concat([pd.Series(vals1), pd.Series(vals2)],
  215. ignore_index=True)
  216. tm.assert_series_equal(res, exp, check_index_type=True)
  217. # 3 elements
  218. res = pd.Series(vals1).append([pd.Series(vals2),
  219. pd.Series(vals3)],
  220. ignore_index=True)
  221. exp = pd.Series(exp_data3, dtype=exp_series_dtype)
  222. tm.assert_series_equal(res, exp)
  223. res = pd.concat([pd.Series(vals1), pd.Series(vals2),
  224. pd.Series(vals3)], ignore_index=True)
  225. tm.assert_series_equal(res, exp)
  226. def test_concatlike_common_coerce_to_pandas_object(self):
  227. # GH 13626
  228. # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
  229. dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'])
  230. tdi = pd.TimedeltaIndex(['1 days', '2 days'])
  231. exp = pd.Index([pd.Timestamp('2011-01-01'),
  232. pd.Timestamp('2011-01-02'),
  233. pd.Timedelta('1 days'),
  234. pd.Timedelta('2 days')])
  235. res = dti.append(tdi)
  236. tm.assert_index_equal(res, exp)
  237. assert isinstance(res[0], pd.Timestamp)
  238. assert isinstance(res[-1], pd.Timedelta)
  239. dts = pd.Series(dti)
  240. tds = pd.Series(tdi)
  241. res = dts.append(tds)
  242. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  243. assert isinstance(res.iloc[0], pd.Timestamp)
  244. assert isinstance(res.iloc[-1], pd.Timedelta)
  245. res = pd.concat([dts, tds])
  246. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  247. assert isinstance(res.iloc[0], pd.Timestamp)
  248. assert isinstance(res.iloc[-1], pd.Timedelta)
  249. def test_concatlike_datetimetz(self, tz_aware_fixture):
  250. tz = tz_aware_fixture
  251. # GH 7795
  252. dti1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
  253. dti2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], tz=tz)
  254. exp = pd.DatetimeIndex(['2011-01-01', '2011-01-02',
  255. '2012-01-01', '2012-01-02'], tz=tz)
  256. res = dti1.append(dti2)
  257. tm.assert_index_equal(res, exp)
  258. dts1 = pd.Series(dti1)
  259. dts2 = pd.Series(dti2)
  260. res = dts1.append(dts2)
  261. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  262. res = pd.concat([dts1, dts2])
  263. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  264. @pytest.mark.parametrize('tz',
  265. ['UTC', 'US/Eastern', 'Asia/Tokyo', 'EST5EDT'])
  266. def test_concatlike_datetimetz_short(self, tz):
  267. # GH#7795
  268. ix1 = pd.date_range(start='2014-07-15', end='2014-07-17',
  269. freq='D', tz=tz)
  270. ix2 = pd.DatetimeIndex(['2014-07-11', '2014-07-21'], tz=tz)
  271. df1 = pd.DataFrame(0, index=ix1, columns=['A', 'B'])
  272. df2 = pd.DataFrame(0, index=ix2, columns=['A', 'B'])
  273. exp_idx = pd.DatetimeIndex(['2014-07-15', '2014-07-16',
  274. '2014-07-17', '2014-07-11',
  275. '2014-07-21'], tz=tz)
  276. exp = pd.DataFrame(0, index=exp_idx, columns=['A', 'B'])
  277. tm.assert_frame_equal(df1.append(df2), exp)
  278. tm.assert_frame_equal(pd.concat([df1, df2]), exp)
  279. def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
  280. tz = tz_aware_fixture
  281. # GH 13660
  282. # different tz coerces to object
  283. dti1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz)
  284. dti2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'])
  285. exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz),
  286. pd.Timestamp('2011-01-02', tz=tz),
  287. pd.Timestamp('2012-01-01'),
  288. pd.Timestamp('2012-01-02')], dtype=object)
  289. res = dti1.append(dti2)
  290. tm.assert_index_equal(res, exp)
  291. dts1 = pd.Series(dti1)
  292. dts2 = pd.Series(dti2)
  293. res = dts1.append(dts2)
  294. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  295. res = pd.concat([dts1, dts2])
  296. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  297. # different tz
  298. dti3 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'],
  299. tz='US/Pacific')
  300. exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz),
  301. pd.Timestamp('2011-01-02', tz=tz),
  302. pd.Timestamp('2012-01-01', tz='US/Pacific'),
  303. pd.Timestamp('2012-01-02', tz='US/Pacific')],
  304. dtype=object)
  305. res = dti1.append(dti3)
  306. # tm.assert_index_equal(res, exp)
  307. dts1 = pd.Series(dti1)
  308. dts3 = pd.Series(dti3)
  309. res = dts1.append(dts3)
  310. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  311. res = pd.concat([dts1, dts3])
  312. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  313. def test_concatlike_common_period(self):
  314. # GH 13660
  315. pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
  316. pi2 = pd.PeriodIndex(['2012-01', '2012-02'], freq='M')
  317. exp = pd.PeriodIndex(['2011-01', '2011-02', '2012-01',
  318. '2012-02'], freq='M')
  319. res = pi1.append(pi2)
  320. tm.assert_index_equal(res, exp)
  321. ps1 = pd.Series(pi1)
  322. ps2 = pd.Series(pi2)
  323. res = ps1.append(ps2)
  324. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  325. res = pd.concat([ps1, ps2])
  326. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  327. def test_concatlike_common_period_diff_freq_to_object(self):
  328. # GH 13221
  329. pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
  330. pi2 = pd.PeriodIndex(['2012-01-01', '2012-02-01'], freq='D')
  331. exp = pd.Index([pd.Period('2011-01', freq='M'),
  332. pd.Period('2011-02', freq='M'),
  333. pd.Period('2012-01-01', freq='D'),
  334. pd.Period('2012-02-01', freq='D')], dtype=object)
  335. res = pi1.append(pi2)
  336. tm.assert_index_equal(res, exp)
  337. ps1 = pd.Series(pi1)
  338. ps2 = pd.Series(pi2)
  339. res = ps1.append(ps2)
  340. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  341. res = pd.concat([ps1, ps2])
  342. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  343. def test_concatlike_common_period_mixed_dt_to_object(self):
  344. # GH 13221
  345. # different datetimelike
  346. pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M')
  347. tdi = pd.TimedeltaIndex(['1 days', '2 days'])
  348. exp = pd.Index([pd.Period('2011-01', freq='M'),
  349. pd.Period('2011-02', freq='M'),
  350. pd.Timedelta('1 days'),
  351. pd.Timedelta('2 days')], dtype=object)
  352. res = pi1.append(tdi)
  353. tm.assert_index_equal(res, exp)
  354. ps1 = pd.Series(pi1)
  355. tds = pd.Series(tdi)
  356. res = ps1.append(tds)
  357. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  358. res = pd.concat([ps1, tds])
  359. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  360. # inverse
  361. exp = pd.Index([pd.Timedelta('1 days'),
  362. pd.Timedelta('2 days'),
  363. pd.Period('2011-01', freq='M'),
  364. pd.Period('2011-02', freq='M')], dtype=object)
  365. res = tdi.append(pi1)
  366. tm.assert_index_equal(res, exp)
  367. ps1 = pd.Series(pi1)
  368. tds = pd.Series(tdi)
  369. res = tds.append(ps1)
  370. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  371. res = pd.concat([tds, ps1])
  372. tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1]))
  373. def test_concat_categorical(self):
  374. # GH 13524
  375. # same categories -> category
  376. s1 = pd.Series([1, 2, np.nan], dtype='category')
  377. s2 = pd.Series([2, 1, 2], dtype='category')
  378. exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype='category')
  379. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  380. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  381. # partially different categories => not-category
  382. s1 = pd.Series([3, 2], dtype='category')
  383. s2 = pd.Series([2, 1], dtype='category')
  384. exp = pd.Series([3, 2, 2, 1])
  385. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  386. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  387. # completely different categories (same dtype) => not-category
  388. s1 = pd.Series([10, 11, np.nan], dtype='category')
  389. s2 = pd.Series([np.nan, 1, 3, 2], dtype='category')
  390. exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype='object')
  391. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  392. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  393. def test_union_categorical_same_categories_different_order(self):
  394. # https://github.com/pandas-dev/pandas/issues/19096
  395. a = pd.Series(Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c']))
  396. b = pd.Series(Categorical(['a', 'b', 'c'], categories=['b', 'a', 'c']))
  397. result = pd.concat([a, b], ignore_index=True)
  398. expected = pd.Series(Categorical(['a', 'b', 'c', 'a', 'b', 'c'],
  399. categories=['a', 'b', 'c']))
  400. tm.assert_series_equal(result, expected)
  401. def test_concat_categorical_coercion(self):
  402. # GH 13524
  403. # category + not-category => not-category
  404. s1 = pd.Series([1, 2, np.nan], dtype='category')
  405. s2 = pd.Series([2, 1, 2])
  406. exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype='object')
  407. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  408. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  409. # result shouldn't be affected by 1st elem dtype
  410. exp = pd.Series([2, 1, 2, 1, 2, np.nan], dtype='object')
  411. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  412. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  413. # all values are not in category => not-category
  414. s1 = pd.Series([3, 2], dtype='category')
  415. s2 = pd.Series([2, 1])
  416. exp = pd.Series([3, 2, 2, 1])
  417. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  418. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  419. exp = pd.Series([2, 1, 3, 2])
  420. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  421. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  422. # completely different categories => not-category
  423. s1 = pd.Series([10, 11, np.nan], dtype='category')
  424. s2 = pd.Series([1, 3, 2])
  425. exp = pd.Series([10, 11, np.nan, 1, 3, 2], dtype='object')
  426. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  427. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  428. exp = pd.Series([1, 3, 2, 10, 11, np.nan], dtype='object')
  429. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  430. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  431. # different dtype => not-category
  432. s1 = pd.Series([10, 11, np.nan], dtype='category')
  433. s2 = pd.Series(['a', 'b', 'c'])
  434. exp = pd.Series([10, 11, np.nan, 'a', 'b', 'c'])
  435. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  436. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  437. exp = pd.Series(['a', 'b', 'c', 10, 11, np.nan])
  438. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  439. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  440. # if normal series only contains NaN-likes => not-category
  441. s1 = pd.Series([10, 11], dtype='category')
  442. s2 = pd.Series([np.nan, np.nan, np.nan])
  443. exp = pd.Series([10, 11, np.nan, np.nan, np.nan])
  444. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  445. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  446. exp = pd.Series([np.nan, np.nan, np.nan, 10, 11])
  447. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  448. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  449. def test_concat_categorical_3elem_coercion(self):
  450. # GH 13524
  451. # mixed dtypes => not-category
  452. s1 = pd.Series([1, 2, np.nan], dtype='category')
  453. s2 = pd.Series([2, 1, 2], dtype='category')
  454. s3 = pd.Series([1, 2, 1, 2, np.nan])
  455. exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan],
  456. dtype='object')
  457. tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
  458. tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
  459. exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2],
  460. dtype='object')
  461. tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
  462. tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
  463. # values are all in either category => not-category
  464. s1 = pd.Series([4, 5, 6], dtype='category')
  465. s2 = pd.Series([1, 2, 3], dtype='category')
  466. s3 = pd.Series([1, 3, 4])
  467. exp = pd.Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
  468. tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
  469. tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
  470. exp = pd.Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
  471. tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
  472. tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
  473. # values are all in either category => not-category
  474. s1 = pd.Series([4, 5, 6], dtype='category')
  475. s2 = pd.Series([1, 2, 3], dtype='category')
  476. s3 = pd.Series([10, 11, 12])
  477. exp = pd.Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
  478. tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
  479. tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
  480. exp = pd.Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
  481. tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
  482. tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
  483. def test_concat_categorical_multi_coercion(self):
  484. # GH 13524
  485. s1 = pd.Series([1, 3], dtype='category')
  486. s2 = pd.Series([3, 4], dtype='category')
  487. s3 = pd.Series([2, 3])
  488. s4 = pd.Series([2, 2], dtype='category')
  489. s5 = pd.Series([1, np.nan])
  490. s6 = pd.Series([1, 3, 2], dtype='category')
  491. # mixed dtype, values are all in categories => not-category
  492. exp = pd.Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
  493. res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
  494. tm.assert_series_equal(res, exp)
  495. res = s1.append([s2, s3, s4, s5, s6], ignore_index=True)
  496. tm.assert_series_equal(res, exp)
  497. exp = pd.Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
  498. res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
  499. tm.assert_series_equal(res, exp)
  500. res = s6.append([s5, s4, s3, s2, s1], ignore_index=True)
  501. tm.assert_series_equal(res, exp)
  502. def test_concat_categorical_ordered(self):
  503. # GH 13524
  504. s1 = pd.Series(pd.Categorical([1, 2, np.nan], ordered=True))
  505. s2 = pd.Series(pd.Categorical([2, 1, 2], ordered=True))
  506. exp = pd.Series(pd.Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
  507. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  508. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  509. exp = pd.Series(pd.Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan],
  510. ordered=True))
  511. tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
  512. tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp)
  513. def test_concat_categorical_coercion_nan(self):
  514. # GH 13524
  515. # some edge cases
  516. # category + not-category => not category
  517. s1 = pd.Series(np.array([np.nan, np.nan], dtype=np.float64),
  518. dtype='category')
  519. s2 = pd.Series([np.nan, 1])
  520. exp = pd.Series([np.nan, np.nan, np.nan, 1])
  521. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  522. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  523. s1 = pd.Series([1, np.nan], dtype='category')
  524. s2 = pd.Series([np.nan, np.nan])
  525. exp = pd.Series([1, np.nan, np.nan, np.nan], dtype='object')
  526. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  527. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  528. # mixed dtype, all nan-likes => not-category
  529. s1 = pd.Series([np.nan, np.nan], dtype='category')
  530. s2 = pd.Series([np.nan, np.nan])
  531. exp = pd.Series([np.nan, np.nan, np.nan, np.nan])
  532. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  533. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  534. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  535. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  536. # all category nan-likes => category
  537. s1 = pd.Series([np.nan, np.nan], dtype='category')
  538. s2 = pd.Series([np.nan, np.nan], dtype='category')
  539. exp = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype='category')
  540. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  541. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  542. def test_concat_categorical_empty(self):
  543. # GH 13524
  544. s1 = pd.Series([], dtype='category')
  545. s2 = pd.Series([1, 2], dtype='category')
  546. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
  547. tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
  548. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
  549. tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
  550. s1 = pd.Series([], dtype='category')
  551. s2 = pd.Series([], dtype='category')
  552. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
  553. tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
  554. s1 = pd.Series([], dtype='category')
  555. s2 = pd.Series([], dtype='object')
  556. # different dtype => not-category
  557. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
  558. tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
  559. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
  560. tm.assert_series_equal(s2.append(s1, ignore_index=True), s2)
  561. s1 = pd.Series([], dtype='category')
  562. s2 = pd.Series([np.nan, np.nan])
  563. # empty Series is ignored
  564. exp = pd.Series([np.nan, np.nan])
  565. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  566. tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
  567. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  568. tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
  569. class TestAppend(ConcatenateBase):
  570. def test_append(self, sort):
  571. begin_index = self.frame.index[:5]
  572. end_index = self.frame.index[5:]
  573. begin_frame = self.frame.reindex(begin_index)
  574. end_frame = self.frame.reindex(end_index)
  575. appended = begin_frame.append(end_frame)
  576. tm.assert_almost_equal(appended['A'], self.frame['A'])
  577. del end_frame['A']
  578. partial_appended = begin_frame.append(end_frame, sort=sort)
  579. assert 'A' in partial_appended
  580. partial_appended = end_frame.append(begin_frame, sort=sort)
  581. assert 'A' in partial_appended
  582. # mixed type handling
  583. appended = self.mixed_frame[:5].append(self.mixed_frame[5:])
  584. tm.assert_frame_equal(appended, self.mixed_frame)
  585. # what to test here
  586. mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=sort)
  587. mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:],
  588. sort=sort)
  589. # all equal except 'foo' column
  590. tm.assert_frame_equal(
  591. mixed_appended.reindex(columns=['A', 'B', 'C', 'D']),
  592. mixed_appended2.reindex(columns=['A', 'B', 'C', 'D']))
  593. # append empty
  594. empty = DataFrame({})
  595. appended = self.frame.append(empty)
  596. tm.assert_frame_equal(self.frame, appended)
  597. assert appended is not self.frame
  598. appended = empty.append(self.frame)
  599. tm.assert_frame_equal(self.frame, appended)
  600. assert appended is not self.frame
  601. # Overlap
  602. msg = "Indexes have overlapping values"
  603. with pytest.raises(ValueError, match=msg):
  604. self.frame.append(self.frame, verify_integrity=True)
  605. # see gh-6129: new columns
  606. df = DataFrame({'a': {'x': 1, 'y': 2}, 'b': {'x': 3, 'y': 4}})
  607. row = Series([5, 6, 7], index=['a', 'b', 'c'], name='z')
  608. expected = DataFrame({'a': {'x': 1, 'y': 2, 'z': 5}, 'b': {
  609. 'x': 3, 'y': 4, 'z': 6}, 'c': {'z': 7}})
  610. result = df.append(row)
  611. tm.assert_frame_equal(result, expected)
  612. def test_append_length0_frame(self, sort):
  613. df = DataFrame(columns=['A', 'B', 'C'])
  614. df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
  615. df5 = df.append(df3, sort=sort)
  616. expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
  617. assert_frame_equal(df5, expected)
  618. def test_append_records(self):
  619. arr1 = np.zeros((2,), dtype=('i4,f4,a10'))
  620. arr1[:] = [(1, 2., 'Hello'), (2, 3., "World")]
  621. arr2 = np.zeros((3,), dtype=('i4,f4,a10'))
  622. arr2[:] = [(3, 4., 'foo'),
  623. (5, 6., "bar"),
  624. (7., 8., 'baz')]
  625. df1 = DataFrame(arr1)
  626. df2 = DataFrame(arr2)
  627. result = df1.append(df2, ignore_index=True)
  628. expected = DataFrame(np.concatenate((arr1, arr2)))
  629. assert_frame_equal(result, expected)
  630. # rewrite sort fixture, since we also want to test default of None
  631. def test_append_sorts(self, sort_with_none):
  632. df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
  633. df2 = pd.DataFrame({"a": [1, 2], 'c': [3, 4]}, index=[2, 3])
  634. if sort_with_none is None:
  635. # only warn if not explicitly specified
  636. # don't check stacklevel since its set for concat, and append
  637. # has an extra stack.
  638. ctx = tm.assert_produces_warning(FutureWarning,
  639. check_stacklevel=False)
  640. else:
  641. ctx = tm.assert_produces_warning(None)
  642. with ctx:
  643. result = df1.append(df2, sort=sort_with_none)
  644. # for None / True
  645. expected = pd.DataFrame({"b": [1, 2, None, None],
  646. "a": [1, 2, 1, 2],
  647. "c": [None, None, 3, 4]},
  648. columns=['a', 'b', 'c'])
  649. if sort_with_none is False:
  650. expected = expected[['b', 'a', 'c']]
  651. tm.assert_frame_equal(result, expected)
  652. def test_append_different_columns(self, sort):
  653. df = DataFrame({'bools': np.random.randn(10) > 0,
  654. 'ints': np.random.randint(0, 10, 10),
  655. 'floats': np.random.randn(10),
  656. 'strings': ['foo', 'bar'] * 5})
  657. a = df[:5].loc[:, ['bools', 'ints', 'floats']]
  658. b = df[5:].loc[:, ['strings', 'ints', 'floats']]
  659. appended = a.append(b, sort=sort)
  660. assert isna(appended['strings'][0:4]).all()
  661. assert isna(appended['bools'][5:]).all()
  662. def test_append_many(self, sort):
  663. chunks = [self.frame[:5], self.frame[5:10],
  664. self.frame[10:15], self.frame[15:]]
  665. result = chunks[0].append(chunks[1:])
  666. tm.assert_frame_equal(result, self.frame)
  667. chunks[-1] = chunks[-1].copy()
  668. chunks[-1]['foo'] = 'bar'
  669. result = chunks[0].append(chunks[1:], sort=sort)
  670. tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame)
  671. assert (result['foo'][15:] == 'bar').all()
  672. assert result['foo'][:15].isna().all()
  673. def test_append_preserve_index_name(self):
  674. # #980
  675. df1 = DataFrame(data=None, columns=['A', 'B', 'C'])
  676. df1 = df1.set_index(['A'])
  677. df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]],
  678. columns=['A', 'B', 'C'])
  679. df2 = df2.set_index(['A'])
  680. result = df1.append(df2)
  681. assert result.index.name == 'A'
  682. indexes_can_append = [
  683. pd.RangeIndex(3),
  684. pd.Index([4, 5, 6]),
  685. pd.Index([4.5, 5.5, 6.5]),
  686. pd.Index(list('abc')),
  687. pd.CategoricalIndex('A B C'.split()),
  688. pd.CategoricalIndex('D E F'.split(), ordered=True),
  689. pd.DatetimeIndex([dt.datetime(2013, 1, 3, 0, 0),
  690. dt.datetime(2013, 1, 3, 6, 10),
  691. dt.datetime(2013, 1, 3, 7, 12)]),
  692. ]
  693. indexes_cannot_append_with_other = [
  694. pd.IntervalIndex.from_breaks([0, 1, 2, 3]),
  695. pd.MultiIndex.from_arrays(['A B C'.split(), 'D E F'.split()]),
  696. ]
  697. all_indexes = indexes_can_append + indexes_cannot_append_with_other
  698. @pytest.mark.parametrize("index",
  699. all_indexes,
  700. ids=lambda x: x.__class__.__name__)
  701. def test_append_same_columns_type(self, index):
  702. # GH18359
  703. # df wider than ser
  704. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
  705. ser_index = index[:2]
  706. ser = pd.Series([7, 8], index=ser_index, name=2)
  707. result = df.append(ser)
  708. expected = pd.DataFrame([[1., 2., 3.], [4, 5, 6], [7, 8, np.nan]],
  709. index=[0, 1, 2],
  710. columns=index)
  711. assert_frame_equal(result, expected)
  712. # ser wider than df
  713. ser_index = index
  714. index = index[:2]
  715. df = pd.DataFrame([[1, 2], [4, 5]], columns=index)
  716. ser = pd.Series([7, 8, 9], index=ser_index, name=2)
  717. result = df.append(ser)
  718. expected = pd.DataFrame([[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
  719. index=[0, 1, 2],
  720. columns=ser_index)
  721. assert_frame_equal(result, expected)
  722. @pytest.mark.parametrize("df_columns, series_index",
  723. combinations(indexes_can_append, r=2),
  724. ids=lambda x: x.__class__.__name__)
  725. def test_append_different_columns_types(self, df_columns, series_index):
  726. # GH18359
  727. # See also test 'test_append_different_columns_types_raises' below
  728. # for errors raised when appending
  729. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
  730. ser = pd.Series([7, 8, 9], index=series_index, name=2)
  731. result = df.append(ser)
  732. idx_diff = ser.index.difference(df_columns)
  733. combined_columns = Index(df_columns.tolist()).append(idx_diff)
  734. expected = pd.DataFrame([[1., 2., 3., np.nan, np.nan, np.nan],
  735. [4, 5, 6, np.nan, np.nan, np.nan],
  736. [np.nan, np.nan, np.nan, 7, 8, 9]],
  737. index=[0, 1, 2],
  738. columns=combined_columns)
  739. assert_frame_equal(result, expected)
  740. @pytest.mark.parametrize('index_can_append', indexes_can_append,
  741. ids=lambda x: x.__class__.__name__)
  742. @pytest.mark.parametrize('index_cannot_append_with_other',
  743. indexes_cannot_append_with_other,
  744. ids=lambda x: x.__class__.__name__)
  745. def test_append_different_columns_types_raises(
  746. self, index_can_append, index_cannot_append_with_other):
  747. # GH18359
  748. # Dataframe.append will raise if IntervalIndex/MultiIndex appends
  749. # or is appended to a different index type
  750. #
  751. # See also test 'test_append_different_columns_types' above for
  752. # appending without raising.
  753. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
  754. ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other,
  755. name=2)
  756. msg = ("the other index needs to be an IntervalIndex too, but was"
  757. r" type {}|"
  758. r"object of type '(int|long|float|Timestamp)' has no len\(\)|"
  759. "Expected tuple, got str")
  760. with pytest.raises(TypeError, match=msg.format(
  761. index_can_append.__class__.__name__)):
  762. df.append(ser)
  763. df = pd.DataFrame([[1, 2, 3], [4, 5, 6]],
  764. columns=index_cannot_append_with_other)
  765. ser = pd.Series([7, 8, 9], index=index_can_append, name=2)
  766. msg = (r"unorderable types: (Interval|int)\(\) > "
  767. r"(int|long|float|str)\(\)|"
  768. r"Expected tuple, got (int|long|float|str)|"
  769. r"Cannot compare type 'Timestamp' with type '(int|long)'|"
  770. r"'>' not supported between instances of 'int' and 'str'")
  771. with pytest.raises(TypeError, match=msg):
  772. df.append(ser)
  773. def test_append_dtype_coerce(self, sort):
  774. # GH 4993
  775. # appending with datetime will incorrectly convert datetime64
  776. df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0),
  777. dt.datetime(2013, 1, 2, 0, 0)],
  778. columns=['start_time'])
  779. df2 = DataFrame(index=[4, 5], data=[[dt.datetime(2013, 1, 3, 0, 0),
  780. dt.datetime(2013, 1, 3, 6, 10)],
  781. [dt.datetime(2013, 1, 4, 0, 0),
  782. dt.datetime(2013, 1, 4, 7, 10)]],
  783. columns=['start_time', 'end_time'])
  784. expected = concat([Series([pd.NaT,
  785. pd.NaT,
  786. dt.datetime(2013, 1, 3, 6, 10),
  787. dt.datetime(2013, 1, 4, 7, 10)],
  788. name='end_time'),
  789. Series([dt.datetime(2013, 1, 1, 0, 0),
  790. dt.datetime(2013, 1, 2, 0, 0),
  791. dt.datetime(2013, 1, 3, 0, 0),
  792. dt.datetime(2013, 1, 4, 0, 0)],
  793. name='start_time')],
  794. axis=1, sort=sort)
  795. result = df1.append(df2, ignore_index=True, sort=sort)
  796. if sort:
  797. expected = expected[['end_time', 'start_time']]
  798. else:
  799. expected = expected[['start_time', 'end_time']]
  800. assert_frame_equal(result, expected)
  801. def test_append_missing_column_proper_upcast(self, sort):
  802. df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')})
  803. df2 = DataFrame({'B': np.array([True, False, True, False],
  804. dtype=bool)})
  805. appended = df1.append(df2, ignore_index=True, sort=sort)
  806. assert appended['A'].dtype == 'f8'
  807. assert appended['B'].dtype == 'O'
  808. def test_append_empty_frame_to_series_with_dateutil_tz(self):
  809. # GH 23682
  810. date = Timestamp('2018-10-24 07:30:00', tz=dateutil.tz.tzutc())
  811. s = Series({'date': date, 'a': 1.0, 'b': 2.0})
  812. df = DataFrame(columns=['c', 'd'])
  813. result = df.append(s, ignore_index=True)
  814. # n.b. it's not clear to me that expected is correct here.
  815. # It's possible that the `date` column should have
  816. # datetime64[ns, tz] dtype for both result and expected.
  817. # that would be more consistent with new columns having
  818. # their own dtype (float for a and b, datetime64ns, tz for date).
  819. expected = DataFrame([[np.nan, np.nan, 1., 2., date]],
  820. columns=['c', 'd', 'a', 'b', 'date'],
  821. dtype=object)
  822. # These columns get cast to object after append
  823. expected['a'] = expected['a'].astype(float)
  824. expected['b'] = expected['b'].astype(float)
  825. assert_frame_equal(result, expected)
  826. class TestConcatenate(ConcatenateBase):
  827. def test_concat_copy(self):
  828. df = DataFrame(np.random.randn(4, 3))
  829. df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
  830. df3 = DataFrame({5: 'foo'}, index=range(4))
  831. # These are actual copies.
  832. result = concat([df, df2, df3], axis=1, copy=True)
  833. for b in result._data.blocks:
  834. assert b.values.base is None
  835. # These are the same.
  836. result = concat([df, df2, df3], axis=1, copy=False)
  837. for b in result._data.blocks:
  838. if b.is_float:
  839. assert b.values.base is df._data.blocks[0].values.base
  840. elif b.is_integer:
  841. assert b.values.base is df2._data.blocks[0].values.base
  842. elif b.is_object:
  843. assert b.values.base is not None
  844. # Float block was consolidated.
  845. df4 = DataFrame(np.random.randn(4, 1))
  846. result = concat([df, df2, df3, df4], axis=1, copy=False)
  847. for b in result._data.blocks:
  848. if b.is_float:
  849. assert b.values.base is None
  850. elif b.is_integer:
  851. assert b.values.base is df2._data.blocks[0].values.base
  852. elif b.is_object:
  853. assert b.values.base is not None
  854. def test_concat_with_group_keys(self):
  855. df = DataFrame(np.random.randn(4, 3))
  856. df2 = DataFrame(np.random.randn(4, 4))
  857. # axis=0
  858. df = DataFrame(np.random.randn(3, 4))
  859. df2 = DataFrame(np.random.randn(4, 4))
  860. result = concat([df, df2], keys=[0, 1])
  861. exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1],
  862. [0, 1, 2, 0, 1, 2, 3]])
  863. expected = DataFrame(np.r_[df.values, df2.values],
  864. index=exp_index)
  865. tm.assert_frame_equal(result, expected)
  866. result = concat([df, df], keys=[0, 1])
  867. exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
  868. [0, 1, 2, 0, 1, 2]])
  869. expected = DataFrame(np.r_[df.values, df.values],
  870. index=exp_index2)
  871. tm.assert_frame_equal(result, expected)
  872. # axis=1
  873. df = DataFrame(np.random.randn(4, 3))
  874. df2 = DataFrame(np.random.randn(4, 4))
  875. result = concat([df, df2], keys=[0, 1], axis=1)
  876. expected = DataFrame(np.c_[df.values, df2.values],
  877. columns=exp_index)
  878. tm.assert_frame_equal(result, expected)
  879. result = concat([df, df], keys=[0, 1], axis=1)
  880. expected = DataFrame(np.c_[df.values, df.values],
  881. columns=exp_index2)
  882. tm.assert_frame_equal(result, expected)
  883. def test_concat_keys_specific_levels(self):
  884. df = DataFrame(np.random.randn(10, 4))
  885. pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]]
  886. level = ['three', 'two', 'one', 'zero']
  887. result = concat(pieces, axis=1, keys=['one', 'two', 'three'],
  888. levels=[level],
  889. names=['group_key'])
  890. tm.assert_index_equal(result.columns.levels[0],
  891. Index(level, name='group_key'))
  892. assert result.columns.names[0] == 'group_key'
  893. def test_concat_dataframe_keys_bug(self, sort):
  894. t1 = DataFrame({
  895. 'value': Series([1, 2, 3], index=Index(['a', 'b', 'c'],
  896. name='id'))})
  897. t2 = DataFrame({
  898. 'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
  899. # it works
  900. result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=sort)
  901. assert list(result.columns) == [('t1', 'value'), ('t2', 'value')]
  902. def test_concat_series_partial_columns_names(self):
  903. # GH10698
  904. foo = Series([1, 2], name='foo')
  905. bar = Series([1, 2])
  906. baz = Series([4, 5])
  907. result = concat([foo, bar, baz], axis=1)
  908. expected = DataFrame({'foo': [1, 2], 0: [1, 2], 1: [
  909. 4, 5]}, columns=['foo', 0, 1])
  910. tm.assert_frame_equal(result, expected)
  911. result = concat([foo, bar, baz], axis=1, keys=[
  912. 'red', 'blue', 'yellow'])
  913. expected = DataFrame({'red': [1, 2], 'blue': [1, 2], 'yellow': [
  914. 4, 5]}, columns=['red', 'blue', 'yellow'])
  915. tm.assert_frame_equal(result, expected)
  916. result = concat([foo, bar, baz], axis=1, ignore_index=True)
  917. expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]})
  918. tm.assert_frame_equal(result, expected)
  919. def test_concat_dict(self):
  920. frames = {'foo': DataFrame(np.random.randn(4, 3)),
  921. 'bar': DataFrame(np.random.randn(4, 3)),
  922. 'baz': DataFrame(np.random.randn(4, 3)),
  923. 'qux': DataFrame(np.random.randn(4, 3))}
  924. sorted_keys = sorted(frames)
  925. result = concat(frames)
  926. expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
  927. tm.assert_frame_equal(result, expected)
  928. result = concat(frames, axis=1)
  929. expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys,
  930. axis=1)
  931. tm.assert_frame_equal(result, expected)
  932. keys = ['baz', 'foo', 'bar']
  933. result = concat(frames, keys=keys)
  934. expected = concat([frames[k] for k in keys], keys=keys)
  935. tm.assert_frame_equal(result, expected)
  936. def test_concat_ignore_index(self, sort):
  937. frame1 = DataFrame({"test1": ["a", "b", "c"],
  938. "test2": [1, 2, 3],
  939. "test3": [4.5, 3.2, 1.2]})
  940. frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
  941. frame1.index = Index(["x", "y", "z"])
  942. frame2.index = Index(["x", "y", "q"])
  943. v1 = concat([frame1, frame2], axis=1,
  944. ignore_index=True, sort=sort)
  945. nan = np.nan
  946. expected = DataFrame([[nan, nan, nan, 4.3],
  947. ['a', 1, 4.5, 5.2],
  948. ['b', 2, 3.2, 2.2],
  949. ['c', 3, 1.2, nan]],
  950. index=Index(["q", "x", "y", "z"]))
  951. if not sort:
  952. expected = expected.loc[['x', 'y', 'z', 'q']]
  953. tm.assert_frame_equal(v1, expected)
  954. def test_concat_multiindex_with_keys(self):
  955. index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
  956. ['one', 'two', 'three']],
  957. codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
  958. [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
  959. names=['first', 'second'])
  960. frame = DataFrame(np.random.randn(10, 3), index=index,
  961. columns=Index(['A', 'B', 'C'], name='exp'))
  962. result = concat([frame, frame], keys=[0, 1], names=['iteration'])
  963. assert result.index.names == ('iteration',) + index.names
  964. tm.assert_frame_equal(result.loc[0], frame)
  965. tm.assert_frame_equal(result.loc[1], frame)
  966. assert result.index.nlevels == 3
  967. def test_concat_multiindex_with_tz(self):
  968. # GH 6606
  969. df = DataFrame({'dt': [datetime(2014, 1, 1),
  970. datetime(2014, 1, 2),
  971. datetime(2014, 1, 3)],
  972. 'b': ['A', 'B', 'C'],
  973. 'c': [1, 2, 3], 'd': [4, 5, 6]})
  974. df['dt'] = df['dt'].apply(lambda d: Timestamp(d, tz='US/Pacific'))
  975. df = df.set_index(['dt', 'b'])
  976. exp_idx1 = DatetimeIndex(['2014-01-01', '2014-01-02',
  977. '2014-01-03'] * 2,
  978. tz='US/Pacific', name='dt')
  979. exp_idx2 = Index(['A', 'B', 'C'] * 2, name='b')
  980. exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
  981. expected = DataFrame({'c': [1, 2, 3] * 2, 'd': [4, 5, 6] * 2},
  982. index=exp_idx, columns=['c', 'd'])
  983. result = concat([df, df])
  984. tm.assert_frame_equal(result, expected)
  985. def test_concat_multiindex_with_none_in_index_names(self):
  986. # GH 15787
  987. index = pd.MultiIndex.from_product([[1], range(5)],
  988. names=['level1', None])
  989. df = pd.DataFrame({'col': range(5)}, index=index, dtype=np.int32)
  990. result = concat([df, df], keys=[1, 2], names=['level2'])
  991. index = pd.MultiIndex.from_product([[1, 2], [1], range(5)],
  992. names=['level2', 'level1', None])
  993. expected = pd.DataFrame({'col': list(range(5)) * 2},
  994. index=index, dtype=np.int32)
  995. assert_frame_equal(result, expected)
  996. result = concat([df, df[:2]], keys=[1, 2], names=['level2'])
  997. level2 = [1] * 5 + [2] * 2
  998. level1 = [1] * 7
  999. no_name = list(range(5)) + list(range(2))
  1000. tuples = list(zip(level2, level1, no_name))
  1001. index = pd.MultiIndex.from_tuples(tuples,
  1002. names=['level2', 'level1', None])
  1003. expected = pd.DataFrame({'col': no_name}, index=index,
  1004. dtype=np.int32)
  1005. assert_frame_equal(result, expected)
  1006. def test_concat_keys_and_levels(self):
  1007. df = DataFrame(np.random.randn(1, 3))
  1008. df2 = DataFrame(np.random.randn(1, 4))
  1009. levels = [['foo', 'baz'], ['one', 'two']]
  1010. names = ['first', 'second']
  1011. result = concat([df, df2, df, df2],
  1012. keys=[('foo', 'one'), ('foo', 'two'),
  1013. ('baz', 'one'), ('baz', 'two')],
  1014. levels=levels,
  1015. names=names)
  1016. expected = concat([df, df2, df, df2])
  1017. exp_index = MultiIndex(levels=levels + [[0]],
  1018. codes=[[0, 0, 1, 1], [0, 1, 0, 1],
  1019. [0, 0, 0, 0]],
  1020. names=names + [None])
  1021. expected.index = exp_index
  1022. tm.assert_frame_equal(result, expected)
  1023. # no names
  1024. result = concat([df, df2, df, df2],
  1025. keys=[('foo', 'one'), ('foo', 'two'),
  1026. ('baz', 'one'), ('baz', 'two')],
  1027. levels=levels)
  1028. assert result.index.names == (None,) * 3
  1029. # no levels
  1030. result = concat([df, df2, df, df2],
  1031. keys=[('foo', 'one'), ('foo', 'two'),
  1032. ('baz', 'one'), ('baz', 'two')],
  1033. names=['first', 'second'])
  1034. assert result.index.names == ('first', 'second') + (None,)
  1035. tm.assert_index_equal(result.index.levels[0],
  1036. Index(['baz', 'foo'], name='first'))
  1037. def test_concat_keys_levels_no_overlap(self):
  1038. # GH #1406
  1039. df = DataFrame(np.random.randn(1, 3), index=['a'])
  1040. df2 = DataFrame(np.random.randn(1, 4), index=['b'])
  1041. msg = "Values not found in passed level"
  1042. with pytest.raises(ValueError, match=msg):
  1043. concat([df, df],
  1044. keys=['one', 'two'], levels=[['foo', 'bar', 'baz']])
  1045. msg = "Key one not in level"
  1046. with pytest.raises(ValueError, match=msg):
  1047. concat([df, df2],
  1048. keys=['one', 'two'], levels=[['foo', 'bar', 'baz']])
  1049. def test_concat_rename_index(self):
  1050. a = DataFrame(np.random.rand(3, 3),
  1051. columns=list('ABC'),
  1052. index=Index(list('abc'), name='index_a'))
  1053. b = DataFrame(np.random.rand(3, 3),
  1054. columns=list('ABC'),
  1055. index=Index(list('abc'), name='index_b'))
  1056. result = concat([a, b], keys=['key0', 'key1'],
  1057. names=['lvl0', 'lvl1'])
  1058. exp = concat([a, b], keys=['key0', 'key1'], names=['lvl0'])
  1059. names = list(exp.index.names)
  1060. names[1] = 'lvl1'
  1061. exp.index.set_names(names, inplace=True)
  1062. tm.assert_frame_equal(result, exp)
  1063. assert result.index.names == exp.index.names
  1064. def test_crossed_dtypes_weird_corner(self):
  1065. columns = ['A', 'B', 'C', 'D']
  1066. df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='f8'),
  1067. 'B': np.array([1, 2, 3, 4], dtype='i8'),
  1068. 'C': np.array([1, 2, 3, 4], dtype='f8'),
  1069. 'D': np.array([1, 2, 3, 4], dtype='i8')},
  1070. columns=columns)
  1071. df2 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8'),
  1072. 'B': np.array([1, 2, 3, 4], dtype='f8'),
  1073. 'C': np.array([1, 2, 3, 4], dtype='i8'),
  1074. 'D': np.array([1, 2, 3, 4], dtype='f8')},
  1075. columns=columns)
  1076. appended = df1.append(df2, ignore_index=True)
  1077. expected = DataFrame(np.concatenate([df1.values, df2.values], axis=0),
  1078. columns=columns)
  1079. tm.assert_frame_equal(appended, expected)
  1080. df = DataFrame(np.random.randn(1, 3), index=['a'])
  1081. df2 = DataFrame(np.random.randn(1, 4), index=['b'])
  1082. result = concat(
  1083. [df, df2], keys=['one', 'two'], names=['first', 'second'])
  1084. assert result.index.names == ('first', 'second')
  1085. def test_dups_index(self):
  1086. # GH 4771
  1087. # single dtypes
  1088. df = DataFrame(np.random.randint(0, 10, size=40).reshape(
  1089. 10, 4), columns=['A', 'A', 'C', 'C'])
  1090. result = concat([df, df], axis=1)
  1091. assert_frame_equal(result.iloc[:, :4], df)
  1092. assert_frame_equal(result.iloc[:, 4:], df)
  1093. result = concat([df, df], axis=0)
  1094. assert_frame_equal(result.iloc[:10], df)
  1095. assert_frame_equal(result.iloc[10:], df)
  1096. # multi dtypes
  1097. df = concat([DataFrame(np.random.randn(10, 4),
  1098. columns=['A', 'A', 'B', 'B']),
  1099. DataFrame(np.random.randint(0, 10, size=20)
  1100. .reshape(10, 2),
  1101. columns=['A', 'C'])],
  1102. axis=1)
  1103. result = concat([df, df], axis=1)
  1104. assert_frame_equal(result.iloc[:, :6], df)
  1105. assert_frame_equal(result.iloc[:, 6:], df)
  1106. result = concat([df, df], axis=0)
  1107. assert_frame_equal(result.iloc[:10], df)
  1108. assert_frame_equal(result.iloc[10:], df)
  1109. # append
  1110. result = df.iloc[0:8, :].append(df.iloc[8:])
  1111. assert_frame_equal(result, df)
  1112. result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10])
  1113. assert_frame_equal(result, df)
  1114. expected = concat([df, df], axis=0)
  1115. result = df.append(df)
  1116. assert_frame_equal(result, expected)
  1117. def test_with_mixed_tuples(self, sort):
  1118. # 10697
  1119. # columns have mixed tuples, so handle properly
  1120. df1 = DataFrame({u'A': 'foo', (u'B', 1): 'bar'}, index=range(2))
  1121. df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
  1122. # it works
  1123. concat([df1, df2], sort=sort)
  1124. def test_handle_empty_objects(self, sort):
  1125. df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
  1126. baz = df[:5].copy()
  1127. baz['foo'] = 'bar'
  1128. empty = df[5:5]
  1129. frames = [baz, empty, empty, df[5:]]
  1130. concatted = concat(frames, axis=0, sort=sort)
  1131. expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo'])
  1132. expected['foo'] = expected['foo'].astype('O')
  1133. expected.loc[0:4, 'foo'] = 'bar'
  1134. tm.assert_frame_equal(concatted, expected)
  1135. # empty as first element with time series
  1136. # GH3259
  1137. df = DataFrame(dict(A=range(10000)), index=date_range(
  1138. '20130101', periods=10000, freq='s'))
  1139. empty = DataFrame()
  1140. result = concat([df, empty], axis=1)
  1141. assert_frame_equal(result, df)
  1142. result = concat([empty, df], axis=1)
  1143. assert_frame_equal(result, df)
  1144. result = concat([df, empty])
  1145. assert_frame_equal(result, df)
  1146. result = concat([empty, df])
  1147. assert_frame_equal(result, df)
  1148. def test_concat_mixed_objs(self):
  1149. # concat mixed series/frames
  1150. # G2385
  1151. # axis 1
  1152. index = date_range('01-Jan-2013', periods=10, freq='H')
  1153. arr = np.arange(10, dtype='int64')
  1154. s1 = Series(arr, index=index)
  1155. s2 = Series(arr, index=index)
  1156. df = DataFrame(arr.reshape(-1, 1), index=index)
  1157. expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
  1158. index=index, columns=[0, 0])
  1159. result = concat([df, df], axis=1)
  1160. assert_frame_equal(result, expected)
  1161. expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
  1162. index=index, columns=[0, 1])
  1163. result = concat([s1, s2], axis=1)
  1164. assert_frame_equal(result, expected)
  1165. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  1166. index=index, columns=[0, 1, 2])
  1167. result = concat([s1, s2, s1], axis=1)
  1168. assert_frame_equal(result, expected)
  1169. expected = DataFrame(np.repeat(arr, 5).reshape(-1, 5),
  1170. index=index, columns=[0, 0, 1, 2, 3])
  1171. result = concat([s1, df, s2, s2, s1], axis=1)
  1172. assert_frame_equal(result, expected)
  1173. # with names
  1174. s1.name = 'foo'
  1175. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  1176. index=index, columns=['foo', 0, 0])
  1177. result = concat([s1, df, s2], axis=1)
  1178. assert_frame_equal(result, expected)
  1179. s2.name = 'bar'
  1180. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  1181. index=index, columns=['foo', 0, 'bar'])
  1182. result = concat([s1, df, s2], axis=1)
  1183. assert_frame_equal(result, expected)
  1184. # ignore index
  1185. expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
  1186. index=index, columns=[0, 1, 2])
  1187. result = concat([s1, df, s2], axis=1, ignore_index=True)
  1188. assert_frame_equal(result, expected)
  1189. # axis 0
  1190. expected = DataFrame(np.tile(arr, 3).reshape(-1, 1),
  1191. index=index.tolist() * 3, columns=[0])
  1192. result = concat([s1, df, s2])
  1193. assert_frame_equal(result, expected)
  1194. expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
  1195. result = concat([s1, df, s2], ignore_index=True)
  1196. assert_frame_equal(result, expected)
  1197. # invalid concatente of mixed dims
  1198. with catch_warnings(record=True):
  1199. simplefilter("ignore", FutureWarning)
  1200. panel = tm.makePanel()
  1201. msg = ("cannot concatenate unaligned mixed dimensional NDFrame"
  1202. " objects")
  1203. with pytest.raises(ValueError, match=msg):
  1204. concat([panel, s1], axis=1)
  1205. def test_empty_dtype_coerce(self):
  1206. # xref to #12411
  1207. # xref to #12045
  1208. # xref to #11594
  1209. # see below
  1210. # 10571
  1211. df1 = DataFrame(data=[[1, None], [2, None]], columns=['a', 'b'])
  1212. df2 = DataFrame(data=[[3, None], [4, None]], columns=['a', 'b'])
  1213. result = concat([df1, df2])
  1214. expected = df1.dtypes
  1215. tm.assert_series_equal(result.dtypes, expected)
  1216. def test_dtype_coerceion(self):
  1217. # 12411
  1218. df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'),
  1219. pd.NaT]})
  1220. result = concat([df.iloc[[0]], df.iloc[[1]]])
  1221. tm.assert_series_equal(result.dtypes, df.dtypes)
  1222. # 12045
  1223. import datetime
  1224. df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
  1225. datetime.datetime(1012, 1, 2)]})
  1226. result = concat([df.iloc[[0]], df.iloc[[1]]])
  1227. tm.assert_series_equal(result.dtypes, df.dtypes)
  1228. # 11594
  1229. df = DataFrame({'text': ['some words'] + [None] * 9})
  1230. result = concat([df.iloc[[0]], df.iloc[[1]]])
  1231. tm.assert_series_equal(result.dtypes, df.dtypes)
  1232. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  1233. def test_panel_concat_other_axes(self):
  1234. panel = tm.makePanel()
  1235. p1 = panel.iloc[:, :5, :]
  1236. p2 = panel.iloc[:, 5:, :]
  1237. result = concat([p1, p2], axis=1)
  1238. tm.assert_panel_equal(result, panel)
  1239. p1 = panel.iloc[:, :, :2]
  1240. p2 = panel.iloc[:, :, 2:]
  1241. result = concat([p1, p2], axis=2)
  1242. tm.assert_panel_equal(result, panel)
  1243. # if things are a bit misbehaved
  1244. p1 = panel.iloc[:2, :, :2]
  1245. p2 = panel.iloc[:, :, 2:]
  1246. p1['ItemC'] = 'baz'
  1247. result = concat([p1, p2], axis=2)
  1248. expected = panel.copy()
  1249. expected['ItemC'] = expected['ItemC'].astype('O')
  1250. expected.loc['ItemC', :, :2] = 'baz'
  1251. tm.assert_panel_equal(result, expected)
  1252. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  1253. # Panel.rename warning we don't care about
  1254. @pytest.mark.filterwarnings("ignore:Using:FutureWarning")
  1255. def test_panel_concat_buglet(self, sort):
  1256. # #2257
  1257. def make_panel():
  1258. index = 5
  1259. cols = 3
  1260. def df():
  1261. return DataFrame(np.random.randn(index, cols),
  1262. index=["I%s" % i for i in range(index)],
  1263. columns=["C%s" % i for i in range(cols)])
  1264. return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']})
  1265. panel1 = make_panel()
  1266. panel2 = make_panel()
  1267. panel2 = panel2.rename(major_axis={x: "%s_1" % x
  1268. for x in panel2.major_axis})
  1269. panel3 = panel2.rename(major_axis=lambda x: '%s_1' % x)
  1270. panel3 = panel3.rename(minor_axis=lambda x: '%s_1' % x)
  1271. # it works!
  1272. concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort)
  1273. def test_concat_series(self):
  1274. ts = tm.makeTimeSeries()
  1275. ts.name = 'foo'
  1276. pieces = [ts[:5], ts[5:15], ts[15:]]
  1277. result = concat(pieces)
  1278. tm.assert_series_equal(result, ts)
  1279. assert result.name == ts.name
  1280. result = concat(pieces, keys=[0, 1, 2])
  1281. expected = ts.copy()
  1282. ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[ns]'))
  1283. exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
  1284. np.arange(len(ts))]
  1285. exp_index = MultiIndex(levels=[[0, 1, 2], ts.index],
  1286. codes=exp_codes)
  1287. expected.index = exp_index
  1288. tm.assert_series_equal(result, expected)
  1289. def test_concat_series_axis1(self, sort=sort):
  1290. ts = tm.makeTimeSeries()
  1291. pieces = [ts[:-2], ts[2:], ts[2:-2]]
  1292. result = concat(pieces, axis=1)
  1293. expected = DataFrame(pieces).T
  1294. assert_frame_equal(result, expected)
  1295. result = concat(pieces, keys=['A', 'B', 'C'], axis=1)
  1296. expected = DataFrame(pieces, index=['A', 'B', 'C']).T
  1297. assert_frame_equal(result, expected)
  1298. # preserve series names, #2489
  1299. s = Series(randn(5), name='A')
  1300. s2 = Series(randn(5), name='B')
  1301. result = concat([s, s2], axis=1)
  1302. expected = DataFrame({'A': s, 'B': s2})
  1303. assert_frame_equal(result, expected)
  1304. s2.name = None
  1305. result = concat([s, s2], axis=1)
  1306. tm.assert_index_equal(result.columns,
  1307. Index(['A', 0], dtype='object'))
  1308. # must reindex, #2603
  1309. s = Series(randn(3), index=['c', 'a', 'b'], name='A')
  1310. s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
  1311. result = concat([s, s2], axis=1, sort=sort)
  1312. expected = DataFrame({'A': s, 'B': s2})
  1313. assert_frame_equal(result, expected)
  1314. def test_concat_series_axis1_names_applied(self):
  1315. # ensure names argument is not ignored on axis=1, #23490
  1316. s = Series([1, 2, 3])
  1317. s2 = Series([4, 5, 6])
  1318. result = concat([s, s2], axis=1, keys=['a', 'b'], names=['A'])
  1319. expected = DataFrame([[1, 4], [2, 5], [3, 6]],
  1320. columns=pd.Index(['a', 'b'], name='A'))
  1321. assert_frame_equal(result, expected)
  1322. result = concat([s, s2], axis=1, keys=[('a', 1), ('b', 2)],
  1323. names=['A', 'B'])
  1324. expected = DataFrame([[1, 4], [2, 5], [3, 6]],
  1325. columns=MultiIndex.from_tuples([('a', 1),
  1326. ('b', 2)],
  1327. names=['A', 'B']))
  1328. assert_frame_equal(result, expected)
  1329. def test_concat_single_with_key(self):
  1330. df = DataFrame(np.random.randn(10, 4))
  1331. result = concat([df], keys=['foo'])
  1332. expected = concat([df, df], keys=['foo', 'bar'])
  1333. tm.assert_frame_equal(result, expected[:10])
  1334. def test_concat_exclude_none(self):
  1335. df = DataFrame(np.random.randn(10, 4))
  1336. pieces = [df[:5], None, None, df[5:]]
  1337. result = concat(pieces)
  1338. tm.assert_frame_equal(result, df)
  1339. with pytest.raises(ValueError, match="All objects passed were None"):
  1340. concat([None, None])
  1341. def test_concat_datetime64_block(self):
  1342. from pandas.core.indexes.datetimes import date_range
  1343. rng = date_range('1/1/2000', periods=10)
  1344. df = DataFrame({'time': rng})
  1345. result = concat([df, df])
  1346. assert (result.iloc[:10]['time'] == rng).all()
  1347. assert (result.iloc[10:]['time'] == rng).all()
  1348. def test_concat_timedelta64_block(self):
  1349. from pandas import to_timedelta
  1350. rng = to_timedelta(np.arange(10), unit='s')
  1351. df = DataFrame({'time': rng})
  1352. result = concat([df, df])
  1353. assert (result.iloc[:10]['time'] == rng).all()
  1354. assert (result.iloc[10:]['time'] == rng).all()
  1355. def test_concat_keys_with_none(self):
  1356. # #1649
  1357. df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])
  1358. result = concat(dict(a=None, b=df0, c=df0[:2], d=df0[:1], e=df0))
  1359. expected = concat(dict(b=df0, c=df0[:2], d=df0[:1], e=df0))
  1360. tm.assert_frame_equal(result, expected)
  1361. result = concat([None, df0, df0[:2], df0[:1], df0],
  1362. keys=['a', 'b', 'c', 'd', 'e'])
  1363. expected = concat([df0, df0[:2], df0[:1], df0],
  1364. keys=['b', 'c', 'd', 'e'])
  1365. tm.assert_frame_equal(result, expected)
  1366. def test_concat_bug_1719(self):
  1367. ts1 = tm.makeTimeSeries()
  1368. ts2 = tm.makeTimeSeries()[::2]
  1369. # to join with union
  1370. # these two are of different length!
  1371. left = concat([ts1, ts2], join='outer', axis=1)
  1372. right = concat([ts2, ts1], join='outer', axis=1)
  1373. assert len(left) == len(right)
  1374. def test_concat_bug_2972(self):
  1375. ts0 = Series(np.zeros(5))
  1376. ts1 = Series(np.ones(5))
  1377. ts0.name = ts1.name = 'same name'
  1378. result = concat([ts0, ts1], axis=1)
  1379. expected = DataFrame({0: ts0, 1: ts1})
  1380. expected.columns = ['same name', 'same name']
  1381. assert_frame_equal(result, expected)
  1382. def test_concat_bug_3602(self):
  1383. # GH 3602, duplicate columns
  1384. df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'prc': [6, 6, 6, 6],
  1385. 'stringvar': ['rrr', 'rrr', 'rrr', 'rrr']})
  1386. df2 = DataFrame({'C': [9, 10, 11, 12], 'misc': [1, 2, 3, 4],
  1387. 'prc': [6, 6, 6, 6]})
  1388. expected = DataFrame([[0, 6, 'rrr', 9, 1, 6],
  1389. [0, 6, 'rrr', 10, 2, 6],
  1390. [0, 6, 'rrr', 11, 3, 6],
  1391. [0, 6, 'rrr', 12, 4, 6]])
  1392. expected.columns = ['firmNo', 'prc', 'stringvar', 'C', 'misc', 'prc']
  1393. result = concat([df1, df2], axis=1)
  1394. assert_frame_equal(result, expected)
  1395. def test_concat_inner_join_empty(self):
  1396. # GH 15328
  1397. df_empty = pd.DataFrame()
  1398. df_a = pd.DataFrame({'a': [1, 2]}, index=[0, 1], dtype='int64')
  1399. df_expected = pd.DataFrame({'a': []}, index=[], dtype='int64')
  1400. for how, expected in [('inner', df_expected), ('outer', df_a)]:
  1401. result = pd.concat([df_a, df_empty], axis=1, join=how)
  1402. assert_frame_equal(result, expected)
  1403. def test_concat_series_axis1_same_names_ignore_index(self):
  1404. dates = date_range('01-Jan-2013', '01-Jan-2014', freq='MS')[0:-1]
  1405. s1 = Series(randn(len(dates)), index=dates, name='value')
  1406. s2 = Series(randn(len(dates)), index=dates, name='value')
  1407. result = concat([s1, s2], axis=1, ignore_index=True)
  1408. expected = Index([0, 1])
  1409. tm.assert_index_equal(result.columns, expected)
  1410. def test_concat_iterables(self):
  1411. # GH8645 check concat works with tuples, list, generators, and weird
  1412. # stuff like deque and custom iterables
  1413. df1 = DataFrame([1, 2, 3])
  1414. df2 = DataFrame([4, 5, 6])
  1415. expected = DataFrame([1, 2, 3, 4, 5, 6])
  1416. assert_frame_equal(concat((df1, df2), ignore_index=True), expected)
  1417. assert_frame_equal(concat([df1, df2], ignore_index=True), expected)
  1418. assert_frame_equal(concat((df for df in (df1, df2)),
  1419. ignore_index=True), expected)
  1420. assert_frame_equal(
  1421. concat(deque((df1, df2)), ignore_index=True), expected)
  1422. class CustomIterator1(object):
  1423. def __len__(self):
  1424. return 2
  1425. def __getitem__(self, index):
  1426. try:
  1427. return {0: df1, 1: df2}[index]
  1428. except KeyError:
  1429. raise IndexError
  1430. assert_frame_equal(pd.concat(CustomIterator1(),
  1431. ignore_index=True), expected)
  1432. class CustomIterator2(Iterable):
  1433. def __iter__(self):
  1434. yield df1
  1435. yield df2
  1436. assert_frame_equal(pd.concat(CustomIterator2(),
  1437. ignore_index=True), expected)
  1438. def test_concat_invalid(self):
  1439. # trying to concat a ndframe with a non-ndframe
  1440. df1 = mkdf(10, 2)
  1441. msg = ('cannot concatenate object of type "{}";'
  1442. ' only pd.Series, pd.DataFrame, and pd.Panel'
  1443. r' \(deprecated\) objs are valid')
  1444. for obj in [1, dict(), [1, 2], (1, 2)]:
  1445. with pytest.raises(TypeError, match=msg.format(type(obj))):
  1446. concat([df1, obj])
  1447. def test_concat_invalid_first_argument(self):
  1448. df1 = mkdf(10, 2)
  1449. df2 = mkdf(10, 2)
  1450. msg = ('first argument must be an iterable of pandas '
  1451. 'objects, you passed an object of type "DataFrame"')
  1452. with pytest.raises(TypeError, match=msg):
  1453. concat(df1, df2)
  1454. # generator ok though
  1455. concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))
  1456. # text reader ok
  1457. # GH6583
  1458. data = """index,A,B,C,D
  1459. foo,2,3,4,5
  1460. bar,7,8,9,10
  1461. baz,12,13,14,15
  1462. qux,12,13,14,15
  1463. foo2,12,13,14,15
  1464. bar2,12,13,14,15
  1465. """
  1466. reader = read_csv(StringIO(data), chunksize=1)
  1467. result = concat(reader, ignore_index=True)
  1468. expected = read_csv(StringIO(data))
  1469. assert_frame_equal(result, expected)
  1470. def test_concat_NaT_series(self):
  1471. # GH 11693
  1472. # test for merging NaT series with datetime series.
  1473. x = Series(date_range('20151124 08:00', '20151124 09:00',
  1474. freq='1h', tz='US/Eastern'))
  1475. y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
  1476. expected = Series([x[0], x[1], pd.NaT, pd.NaT])
  1477. result = concat([x, y], ignore_index=True)
  1478. tm.assert_series_equal(result, expected)
  1479. # all NaT with tz
  1480. expected = Series(pd.NaT, index=range(4),
  1481. dtype='datetime64[ns, US/Eastern]')
  1482. result = pd.concat([y, y], ignore_index=True)
  1483. tm.assert_series_equal(result, expected)
  1484. # without tz
  1485. x = pd.Series(pd.date_range('20151124 08:00',
  1486. '20151124 09:00', freq='1h'))
  1487. y = pd.Series(pd.date_range('20151124 10:00',
  1488. '20151124 11:00', freq='1h'))
  1489. y[:] = pd.NaT
  1490. expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT])
  1491. result = pd.concat([x, y], ignore_index=True)
  1492. tm.assert_series_equal(result, expected)
  1493. # all NaT without tz
  1494. x[:] = pd.NaT
  1495. expected = pd.Series(pd.NaT, index=range(4),
  1496. dtype='datetime64[ns]')
  1497. result = pd.concat([x, y], ignore_index=True)
  1498. tm.assert_series_equal(result, expected)
  1499. def test_concat_tz_frame(self):
  1500. df2 = DataFrame(dict(A=pd.Timestamp('20130102', tz='US/Eastern'),
  1501. B=pd.Timestamp('20130603', tz='CET')),
  1502. index=range(5))
  1503. # concat
  1504. df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
  1505. assert_frame_equal(df2, df3)
  1506. def test_concat_tz_series(self):
  1507. # gh-11755: tz and no tz
  1508. x = Series(date_range('20151124 08:00',
  1509. '20151124 09:00',
  1510. freq='1h', tz='UTC'))
  1511. y = Series(date_range('2012-01-01', '2012-01-02'))
  1512. expected = Series([x[0], x[1], y[0], y[1]],
  1513. dtype='object')
  1514. result = concat([x, y], ignore_index=True)
  1515. tm.assert_series_equal(result, expected)
  1516. # gh-11887: concat tz and object
  1517. x = Series(date_range('20151124 08:00',
  1518. '20151124 09:00',
  1519. freq='1h', tz='UTC'))
  1520. y = Series(['a', 'b'])
  1521. expected = Series([x[0], x[1], y[0], y[1]],
  1522. dtype='object')
  1523. result = concat([x, y], ignore_index=True)
  1524. tm.assert_series_equal(result, expected)
  1525. # see gh-12217 and gh-12306
  1526. # Concatenating two UTC times
  1527. first = pd.DataFrame([[datetime(2016, 1, 1)]])
  1528. first[0] = first[0].dt.tz_localize('UTC')
  1529. second = pd.DataFrame([[datetime(2016, 1, 2)]])
  1530. second[0] = second[0].dt.tz_localize('UTC')
  1531. result = pd.concat([first, second])
  1532. assert result[0].dtype == 'datetime64[ns, UTC]'
  1533. # Concatenating two London times
  1534. first = pd.DataFrame([[datetime(2016, 1, 1)]])
  1535. first[0] = first[0].dt.tz_localize('Europe/London')
  1536. second = pd.DataFrame([[datetime(2016, 1, 2)]])
  1537. second[0] = second[0].dt.tz_localize('Europe/London')
  1538. result = pd.concat([first, second])
  1539. assert result[0].dtype == 'datetime64[ns, Europe/London]'
  1540. # Concatenating 2+1 London times
  1541. first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
  1542. first[0] = first[0].dt.tz_localize('Europe/London')
  1543. second = pd.DataFrame([[datetime(2016, 1, 3)]])
  1544. second[0] = second[0].dt.tz_localize('Europe/London')
  1545. result = pd.concat([first, second])
  1546. assert result[0].dtype == 'datetime64[ns, Europe/London]'
  1547. # Concat'ing 1+2 London times
  1548. first = pd.DataFrame([[datetime(2016, 1, 1)]])
  1549. first[0] = first[0].dt.tz_localize('Europe/London')
  1550. second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
  1551. second[0] = second[0].dt.tz_localize('Europe/London')
  1552. result = pd.concat([first, second])
  1553. assert result[0].dtype == 'datetime64[ns, Europe/London]'
  1554. def test_concat_tz_series_with_datetimelike(self):
  1555. # see gh-12620: tz and timedelta
  1556. x = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  1557. pd.Timestamp('2011-02-01', tz='US/Eastern')]
  1558. y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')]
  1559. result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
  1560. tm.assert_series_equal(result, pd.Series(x + y, dtype='object'))
  1561. # tz and period
  1562. y = [pd.Period('2011-03', freq='M'), pd.Period('2011-04', freq='M')]
  1563. result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
  1564. tm.assert_series_equal(result, pd.Series(x + y, dtype='object'))
  1565. def test_concat_tz_series_tzlocal(self):
  1566. # see gh-13583
  1567. x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()),
  1568. pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())]
  1569. y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()),
  1570. pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())]
  1571. result = concat([pd.Series(x), pd.Series(y)], ignore_index=True)
  1572. tm.assert_series_equal(result, pd.Series(x + y))
  1573. assert result.dtype == 'datetime64[ns, tzlocal()]'
  1574. @pytest.mark.parametrize('tz1', [None, 'UTC'])
  1575. @pytest.mark.parametrize('tz2', [None, 'UTC'])
  1576. @pytest.mark.parametrize('s', [pd.NaT, pd.Timestamp('20150101')])
  1577. def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
  1578. # GH 12396
  1579. # tz-naive
  1580. first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply(
  1581. lambda x: x.dt.tz_localize(tz1))
  1582. second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))
  1583. result = pd.concat([first, second], axis=0)
  1584. expected = pd.DataFrame(pd.Series(
  1585. [pd.NaT, pd.NaT, s], index=[0, 1, 0]))
  1586. expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
  1587. if tz1 != tz2:
  1588. expected = expected.astype(object)
  1589. assert_frame_equal(result, expected)
  1590. @pytest.mark.parametrize('tz1', [None, 'UTC'])
  1591. @pytest.mark.parametrize('tz2', [None, 'UTC'])
  1592. def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
  1593. # GH 12396
  1594. first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
  1595. second = pd.DataFrame(pd.Series(
  1596. [pd.NaT]).dt.tz_localize(tz2), columns=[1])
  1597. expected = pd.DataFrame(
  1598. {0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
  1599. 1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2)}
  1600. )
  1601. result = pd.concat([first, second], axis=1)
  1602. assert_frame_equal(result, expected)
  1603. @pytest.mark.parametrize('tz1', [None, 'UTC'])
  1604. @pytest.mark.parametrize('tz2', [None, 'UTC'])
  1605. def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
  1606. # GH 12396
  1607. # tz-naive
  1608. first = pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
  1609. second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz2)],
  1610. [pd.Timestamp('2016/01/01', tz=tz2)]],
  1611. index=[2, 3])
  1612. expected = pd.DataFrame([pd.NaT, pd.NaT,
  1613. pd.Timestamp('2015/01/01', tz=tz2),
  1614. pd.Timestamp('2016/01/01', tz=tz2)])
  1615. if tz1 != tz2:
  1616. expected = expected.astype(object)
  1617. result = pd.concat([first, second])
  1618. assert_frame_equal(result, expected)
  1619. @pytest.mark.parametrize('tz', [None, 'UTC'])
  1620. def test_concat_NaT_dataframes(self, tz):
  1621. # GH 12396
  1622. first = pd.DataFrame([[pd.NaT], [pd.NaT]])
  1623. first = first.apply(lambda x: x.dt.tz_localize(tz))
  1624. second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz)],
  1625. [pd.Timestamp('2016/01/01', tz=tz)]],
  1626. index=[2, 3])
  1627. expected = pd.DataFrame([pd.NaT, pd.NaT,
  1628. pd.Timestamp('2015/01/01', tz=tz),
  1629. pd.Timestamp('2016/01/01', tz=tz)])
  1630. result = pd.concat([first, second], axis=0)
  1631. assert_frame_equal(result, expected)
  1632. def test_concat_period_series(self):
  1633. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1634. y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D'))
  1635. expected = Series([x[0], x[1], y[0], y[1]], dtype='Period[D]')
  1636. result = concat([x, y], ignore_index=True)
  1637. tm.assert_series_equal(result, expected)
  1638. def test_concat_period_multiple_freq_series(self):
  1639. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1640. y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M'))
  1641. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1642. result = concat([x, y], ignore_index=True)
  1643. tm.assert_series_equal(result, expected)
  1644. assert result.dtype == 'object'
  1645. def test_concat_period_other_series(self):
  1646. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1647. y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M'))
  1648. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1649. result = concat([x, y], ignore_index=True)
  1650. tm.assert_series_equal(result, expected)
  1651. assert result.dtype == 'object'
  1652. # non-period
  1653. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1654. y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01']))
  1655. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1656. result = concat([x, y], ignore_index=True)
  1657. tm.assert_series_equal(result, expected)
  1658. assert result.dtype == 'object'
  1659. x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D'))
  1660. y = Series(['A', 'B'])
  1661. expected = Series([x[0], x[1], y[0], y[1]], dtype='object')
  1662. result = concat([x, y], ignore_index=True)
  1663. tm.assert_series_equal(result, expected)
  1664. assert result.dtype == 'object'
  1665. def test_concat_empty_series(self):
  1666. # GH 11082
  1667. s1 = pd.Series([1, 2, 3], name='x')
  1668. s2 = pd.Series(name='y')
  1669. res = pd.concat([s1, s2], axis=1)
  1670. exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
  1671. tm.assert_frame_equal(res, exp)
  1672. s1 = pd.Series([1, 2, 3], name='x')
  1673. s2 = pd.Series(name='y')
  1674. res = pd.concat([s1, s2], axis=0)
  1675. # name will be reset
  1676. exp = pd.Series([1, 2, 3])
  1677. tm.assert_series_equal(res, exp)
  1678. # empty Series with no name
  1679. s1 = pd.Series([1, 2, 3], name='x')
  1680. s2 = pd.Series(name=None)
  1681. res = pd.concat([s1, s2], axis=1)
  1682. exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
  1683. columns=['x', 0])
  1684. tm.assert_frame_equal(res, exp)
  1685. @pytest.mark.parametrize('tz', [None, 'UTC'])
  1686. @pytest.mark.parametrize('values', [[], [1, 2, 3]])
  1687. def test_concat_empty_series_timelike(self, tz, values):
  1688. # GH 18447
  1689. first = Series([], dtype='M8[ns]').dt.tz_localize(tz)
  1690. second = Series(values)
  1691. expected = DataFrame(
  1692. {0: pd.Series([pd.NaT] * len(values),
  1693. dtype='M8[ns]'
  1694. ).dt.tz_localize(tz),
  1695. 1: values})
  1696. result = concat([first, second], axis=1)
  1697. assert_frame_equal(result, expected)
  1698. def test_default_index(self):
  1699. # is_series and ignore_index
  1700. s1 = pd.Series([1, 2, 3], name='x')
  1701. s2 = pd.Series([4, 5, 6], name='y')
  1702. res = pd.concat([s1, s2], axis=1, ignore_index=True)
  1703. assert isinstance(res.columns, pd.RangeIndex)
  1704. exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
  1705. # use check_index_type=True to check the result have
  1706. # RangeIndex (default index)
  1707. tm.assert_frame_equal(res, exp, check_index_type=True,
  1708. check_column_type=True)
  1709. # is_series and all inputs have no names
  1710. s1 = pd.Series([1, 2, 3])
  1711. s2 = pd.Series([4, 5, 6])
  1712. res = pd.concat([s1, s2], axis=1, ignore_index=False)
  1713. assert isinstance(res.columns, pd.RangeIndex)
  1714. exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
  1715. exp.columns = pd.RangeIndex(2)
  1716. tm.assert_frame_equal(res, exp, check_index_type=True,
  1717. check_column_type=True)
  1718. # is_dataframe and ignore_index
  1719. df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]})
  1720. df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]})
  1721. res = pd.concat([df1, df2], axis=0, ignore_index=True)
  1722. exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]],
  1723. columns=['A', 'B'])
  1724. tm.assert_frame_equal(res, exp, check_index_type=True,
  1725. check_column_type=True)
  1726. res = pd.concat([df1, df2], axis=1, ignore_index=True)
  1727. exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
  1728. tm.assert_frame_equal(res, exp, check_index_type=True,
  1729. check_column_type=True)
  1730. def test_concat_multiindex_rangeindex(self):
  1731. # GH13542
  1732. # when multi-index levels are RangeIndex objects
  1733. # there is a bug in concat with objects of len 1
  1734. df = DataFrame(np.random.randn(9, 2))
  1735. df.index = MultiIndex(levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
  1736. codes=[np.repeat(np.arange(3), 3),
  1737. np.tile(np.arange(3), 3)])
  1738. res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
  1739. exp = df.iloc[[2, 3, 4, 5], :]
  1740. tm.assert_frame_equal(res, exp)
  1741. def test_concat_multiindex_dfs_with_deepcopy(self):
  1742. # GH 9967
  1743. from copy import deepcopy
  1744. example_multiindex1 = pd.MultiIndex.from_product([['a'], ['b']])
  1745. example_dataframe1 = pd.DataFrame([0], index=example_multiindex1)
  1746. example_multiindex2 = pd.MultiIndex.from_product([['a'], ['c']])
  1747. example_dataframe2 = pd.DataFrame([1], index=example_multiindex2)
  1748. example_dict = {'s1': example_dataframe1, 's2': example_dataframe2}
  1749. expected_index = pd.MultiIndex(levels=[['s1', 's2'],
  1750. ['a'],
  1751. ['b', 'c']],
  1752. codes=[[0, 1], [0, 0], [0, 1]],
  1753. names=['testname', None, None])
  1754. expected = pd.DataFrame([[0], [1]], index=expected_index)
  1755. result_copy = pd.concat(deepcopy(example_dict), names=['testname'])
  1756. tm.assert_frame_equal(result_copy, expected)
  1757. result_no_copy = pd.concat(example_dict, names=['testname'])
  1758. tm.assert_frame_equal(result_no_copy, expected)
  1759. def test_categorical_concat_append(self):
  1760. cat = Categorical(["a", "b"], categories=["a", "b"])
  1761. vals = [1, 2]
  1762. df = DataFrame({"cats": cat, "vals": vals})
  1763. cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
  1764. vals2 = [1, 2, 1, 2]
  1765. exp = DataFrame({"cats": cat2, "vals": vals2},
  1766. index=Index([0, 1, 0, 1]))
  1767. tm.assert_frame_equal(pd.concat([df, df]), exp)
  1768. tm.assert_frame_equal(df.append(df), exp)
  1769. # GH 13524 can concat different categories
  1770. cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
  1771. vals3 = [1, 2]
  1772. df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
  1773. res = pd.concat([df, df_different_categories], ignore_index=True)
  1774. exp = DataFrame({"cats": list('abab'), "vals": [1, 2, 1, 2]})
  1775. tm.assert_frame_equal(res, exp)
  1776. res = df.append(df_different_categories, ignore_index=True)
  1777. tm.assert_frame_equal(res, exp)
  1778. def test_categorical_concat_dtypes(self):
  1779. # GH8143
  1780. index = ['cat', 'obj', 'num']
  1781. cat = Categorical(['a', 'b', 'c'])
  1782. obj = Series(['a', 'b', 'c'])
  1783. num = Series([1, 2, 3])
  1784. df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
  1785. result = df.dtypes == 'object'
  1786. expected = Series([False, True, False], index=index)
  1787. tm.assert_series_equal(result, expected)
  1788. result = df.dtypes == 'int64'
  1789. expected = Series([False, False, True], index=index)
  1790. tm.assert_series_equal(result, expected)
  1791. result = df.dtypes == 'category'
  1792. expected = Series([True, False, False], index=index)
  1793. tm.assert_series_equal(result, expected)
  1794. def test_categorical_concat(self, sort):
  1795. # See GH 10177
  1796. df1 = DataFrame(np.arange(18, dtype='int64').reshape(6, 3),
  1797. columns=["a", "b", "c"])
  1798. df2 = DataFrame(np.arange(14, dtype='int64').reshape(7, 2),
  1799. columns=["a", "c"])
  1800. cat_values = ["one", "one", "two", "one", "two", "two", "one"]
  1801. df2['h'] = Series(Categorical(cat_values))
  1802. res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
  1803. exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
  1804. 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan,
  1805. np.nan, np.nan, np.nan, np.nan],
  1806. 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
  1807. 'h': [None] * 6 + cat_values})
  1808. tm.assert_frame_equal(res, exp)
  1809. def test_categorical_concat_gh7864(self):
  1810. # GH 7864
  1811. # make sure ordering is preserverd
  1812. df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list('abbaae')})
  1813. df["grade"] = Categorical(df["raw_grade"])
  1814. df['grade'].cat.set_categories(['e', 'a', 'b'])
  1815. df1 = df[0:3]
  1816. df2 = df[3:]
  1817. tm.assert_index_equal(df['grade'].cat.categories,
  1818. df1['grade'].cat.categories)
  1819. tm.assert_index_equal(df['grade'].cat.categories,
  1820. df2['grade'].cat.categories)
  1821. dfx = pd.concat([df1, df2])
  1822. tm.assert_index_equal(df['grade'].cat.categories,
  1823. dfx['grade'].cat.categories)
  1824. dfa = df1.append(df2)
  1825. tm.assert_index_equal(df['grade'].cat.categories,
  1826. dfa['grade'].cat.categories)
  1827. def test_categorical_concat_preserve(self):
  1828. # GH 8641 series concat not preserving category dtype
  1829. # GH 13524 can concat different categories
  1830. s = Series(list('abc'), dtype='category')
  1831. s2 = Series(list('abd'), dtype='category')
  1832. exp = Series(list('abcabd'))
  1833. res = pd.concat([s, s2], ignore_index=True)
  1834. tm.assert_series_equal(res, exp)
  1835. exp = Series(list('abcabc'), dtype='category')
  1836. res = pd.concat([s, s], ignore_index=True)
  1837. tm.assert_series_equal(res, exp)
  1838. exp = Series(list('abcabc'), index=[0, 1, 2, 0, 1, 2],
  1839. dtype='category')
  1840. res = pd.concat([s, s])
  1841. tm.assert_series_equal(res, exp)
  1842. a = Series(np.arange(6, dtype='int64'))
  1843. b = Series(list('aabbca'))
  1844. df2 = DataFrame({'A': a,
  1845. 'B': b.astype(CategoricalDtype(list('cab')))})
  1846. res = pd.concat([df2, df2])
  1847. exp = DataFrame(
  1848. {'A': pd.concat([a, a]),
  1849. 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))})
  1850. tm.assert_frame_equal(res, exp)
  1851. def test_categorical_index_preserver(self):
  1852. a = Series(np.arange(6, dtype='int64'))
  1853. b = Series(list('aabbca'))
  1854. df2 = DataFrame({'A': a,
  1855. 'B': b.astype(CategoricalDtype(list('cab')))
  1856. }).set_index('B')
  1857. result = pd.concat([df2, df2])
  1858. expected = DataFrame(
  1859. {'A': pd.concat([a, a]),
  1860. 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
  1861. }).set_index('B')
  1862. tm.assert_frame_equal(result, expected)
  1863. # wrong catgories
  1864. df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe'))
  1865. }).set_index('B')
  1866. msg = "categories must match existing categories when appending"
  1867. with pytest.raises(TypeError, match=msg):
  1868. pd.concat([df2, df3])
  1869. def test_concat_categoricalindex(self):
  1870. # GH 16111, categories that aren't lexsorted
  1871. categories = [9, 0, 1, 2, 3]
  1872. a = pd.Series(1, index=pd.CategoricalIndex([9, 0],
  1873. categories=categories))
  1874. b = pd.Series(2, index=pd.CategoricalIndex([0, 1],
  1875. categories=categories))
  1876. c = pd.Series(3, index=pd.CategoricalIndex([1, 2],
  1877. categories=categories))
  1878. result = pd.concat([a, b, c], axis=1)
  1879. exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
  1880. exp = pd.DataFrame({0: [1, 1, np.nan, np.nan],
  1881. 1: [np.nan, 2, 2, np.nan],
  1882. 2: [np.nan, np.nan, 3, 3]},
  1883. columns=[0, 1, 2],
  1884. index=exp_idx)
  1885. tm.assert_frame_equal(result, exp)
  1886. def test_concat_order(self):
  1887. # GH 17344
  1888. dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
  1889. dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
  1890. for i in range(100)]
  1891. result = pd.concat(dfs, sort=True).columns
  1892. if PY2:
  1893. # Different sort order between incomparable objects between
  1894. # python 2 and python3 via Index.union.
  1895. expected = dfs[1].columns
  1896. else:
  1897. expected = dfs[0].columns
  1898. tm.assert_index_equal(result, expected)
  1899. def test_concat_datetime_timezone(self):
  1900. # GH 18523
  1901. idx1 = pd.date_range('2011-01-01', periods=3, freq='H',
  1902. tz='Europe/Paris')
  1903. idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H')
  1904. df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1)
  1905. df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2)
  1906. result = pd.concat([df1, df2], axis=1)
  1907. exp_idx = DatetimeIndex(['2011-01-01 00:00:00+01:00',
  1908. '2011-01-01 01:00:00+01:00',
  1909. '2011-01-01 02:00:00+01:00'],
  1910. freq='H'
  1911. ).tz_convert('UTC').tz_convert('Europe/Paris')
  1912. expected = pd.DataFrame([[1, 1], [2, 2], [3, 3]],
  1913. index=exp_idx, columns=['a', 'b'])
  1914. tm.assert_frame_equal(result, expected)
  1915. idx3 = pd.date_range('2011-01-01', periods=3,
  1916. freq='H', tz='Asia/Tokyo')
  1917. df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3)
  1918. result = pd.concat([df1, df3], axis=1)
  1919. exp_idx = DatetimeIndex(['2010-12-31 15:00:00+00:00',
  1920. '2010-12-31 16:00:00+00:00',
  1921. '2010-12-31 17:00:00+00:00',
  1922. '2010-12-31 23:00:00+00:00',
  1923. '2011-01-01 00:00:00+00:00',
  1924. '2011-01-01 01:00:00+00:00']
  1925. )
  1926. expected = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3],
  1927. [1, np.nan], [2, np.nan], [3, np.nan]],
  1928. index=exp_idx, columns=['a', 'b'])
  1929. tm.assert_frame_equal(result, expected)
  1930. # GH 13783: Concat after resample
  1931. result = pd.concat([df1.resample('H').mean(),
  1932. df2.resample('H').mean()], sort=True)
  1933. expected = pd.DataFrame({'a': [1, 2, 3] + [np.nan] * 3,
  1934. 'b': [np.nan] * 3 + [1, 2, 3]},
  1935. index=idx1.append(idx1))
  1936. tm.assert_frame_equal(result, expected)
  1937. @pytest.mark.skipif(PY2, reason="Unhashable Decimal dtype")
  1938. def test_concat_different_extension_dtypes_upcasts(self):
  1939. a = pd.Series(pd.core.arrays.integer_array([1, 2]))
  1940. b = pd.Series(to_decimal([1, 2]))
  1941. result = pd.concat([a, b], ignore_index=True)
  1942. expected = pd.Series([
  1943. 1, 2,
  1944. Decimal(1), Decimal(2)
  1945. ], dtype=object)
  1946. tm.assert_series_equal(result, expected)
  1947. @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
  1948. @pytest.mark.parametrize('dt', np.sctypes['float'])
  1949. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  1950. def test_concat_no_unnecessary_upcast(dt, pdt):
  1951. # GH 13247
  1952. dims = pdt().ndim
  1953. dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
  1954. pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
  1955. pdt(np.array([5], dtype=dt, ndmin=dims))]
  1956. x = pd.concat(dfs)
  1957. assert x.values.dtype == dt
  1958. @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
  1959. @pytest.mark.parametrize('dt', np.sctypes['int'])
  1960. @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
  1961. def test_concat_will_upcast(dt, pdt):
  1962. with catch_warnings(record=True):
  1963. dims = pdt().ndim
  1964. dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
  1965. pdt(np.array([np.nan], ndmin=dims)),
  1966. pdt(np.array([5], dtype=dt, ndmin=dims))]
  1967. x = pd.concat(dfs)
  1968. assert x.values.dtype == 'float64'
  1969. def test_concat_empty_and_non_empty_frame_regression():
  1970. # GH 18178 regression test
  1971. df1 = pd.DataFrame({'foo': [1]})
  1972. df2 = pd.DataFrame({'foo': []})
  1973. expected = pd.DataFrame({'foo': [1.0]})
  1974. result = pd.concat([df1, df2])
  1975. assert_frame_equal(result, expected)
  1976. def test_concat_empty_and_non_empty_series_regression():
  1977. # GH 18187 regression test
  1978. s1 = pd.Series([1])
  1979. s2 = pd.Series([])
  1980. expected = s1
  1981. result = pd.concat([s1, s2])
  1982. tm.assert_series_equal(result, expected)
  1983. def test_concat_sorts_columns(sort_with_none):
  1984. # GH-4588
  1985. df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
  1986. df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]})
  1987. # for sort=True/None
  1988. expected = pd.DataFrame({"a": [1, 2, 3, 4],
  1989. "b": [1, 2, None, None],
  1990. "c": [None, None, 5, 6]},
  1991. columns=['a', 'b', 'c'])
  1992. if sort_with_none is False:
  1993. expected = expected[['b', 'a', 'c']]
  1994. if sort_with_none is None:
  1995. # only warn if not explicitly specified
  1996. ctx = tm.assert_produces_warning(FutureWarning)
  1997. else:
  1998. ctx = tm.assert_produces_warning(None)
  1999. # default
  2000. with ctx:
  2001. result = pd.concat([df1, df2], ignore_index=True, sort=sort_with_none)
  2002. tm.assert_frame_equal(result, expected)
  2003. def test_concat_sorts_index(sort_with_none):
  2004. df1 = pd.DataFrame({"a": [1, 2, 3]}, index=['c', 'a', 'b'])
  2005. df2 = pd.DataFrame({"b": [1, 2]}, index=['a', 'b'])
  2006. # For True/None
  2007. expected = pd.DataFrame({"a": [2, 3, 1], "b": [1, 2, None]},
  2008. index=['a', 'b', 'c'],
  2009. columns=['a', 'b'])
  2010. if sort_with_none is False:
  2011. expected = expected.loc[['c', 'a', 'b']]
  2012. if sort_with_none is None:
  2013. # only warn if not explicitly specified
  2014. ctx = tm.assert_produces_warning(FutureWarning)
  2015. else:
  2016. ctx = tm.assert_produces_warning(None)
  2017. # Warn and sort by default
  2018. with ctx:
  2019. result = pd.concat([df1, df2], axis=1, sort=sort_with_none)
  2020. tm.assert_frame_equal(result, expected)
  2021. def test_concat_inner_sort(sort_with_none):
  2022. # https://github.com/pandas-dev/pandas/pull/20613
  2023. df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
  2024. columns=['b', 'a', 'c'])
  2025. df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
  2026. with tm.assert_produces_warning(None):
  2027. # unset sort should *not* warn for inner join
  2028. # since that never sorted
  2029. result = pd.concat([df1, df2], sort=sort_with_none,
  2030. join='inner',
  2031. ignore_index=True)
  2032. expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
  2033. columns=['b', 'a'])
  2034. if sort_with_none is True:
  2035. expected = expected[['a', 'b']]
  2036. tm.assert_frame_equal(result, expected)
  2037. def test_concat_aligned_sort():
  2038. # GH-4588
  2039. df = pd.DataFrame({"c": [1, 2], "b": [3, 4], 'a': [5, 6]},
  2040. columns=['c', 'b', 'a'])
  2041. result = pd.concat([df, df], sort=True, ignore_index=True)
  2042. expected = pd.DataFrame({'a': [5, 6, 5, 6], 'b': [3, 4, 3, 4],
  2043. 'c': [1, 2, 1, 2]},
  2044. columns=['a', 'b', 'c'])
  2045. tm.assert_frame_equal(result, expected)
  2046. result = pd.concat([df, df[['c', 'b']]], join='inner', sort=True,
  2047. ignore_index=True)
  2048. expected = expected[['b', 'c']]
  2049. tm.assert_frame_equal(result, expected)
  2050. def test_concat_aligned_sort_does_not_raise():
  2051. # GH-4588
  2052. # We catch TypeErrors from sorting internally and do not re-raise.
  2053. df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, 'a'])
  2054. expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
  2055. columns=[1, 'a'])
  2056. result = pd.concat([df, df], ignore_index=True, sort=True)
  2057. tm.assert_frame_equal(result, expected)
  2058. @pytest.mark.parametrize("s1name,s2name", [
  2059. (np.int64(190), (43, 0)), (190, (43, 0))])
  2060. def test_concat_series_name_npscalar_tuple(s1name, s2name):
  2061. # GH21015
  2062. s1 = pd.Series({'a': 1, 'b': 2}, name=s1name)
  2063. s2 = pd.Series({'c': 5, 'd': 6}, name=s2name)
  2064. result = pd.concat([s1, s2])
  2065. expected = pd.Series({'a': 1, 'b': 2, 'c': 5, 'd': 6})
  2066. tm.assert_series_equal(result, expected)
  2067. def test_concat_categorical_tz():
  2068. # GH-23816
  2069. a = pd.Series(pd.date_range('2017-01-01', periods=2, tz='US/Pacific'))
  2070. b = pd.Series(['a', 'b'], dtype='category')
  2071. result = pd.concat([a, b], ignore_index=True)
  2072. expected = pd.Series([
  2073. pd.Timestamp('2017-01-01', tz="US/Pacific"),
  2074. pd.Timestamp('2017-01-02', tz="US/Pacific"),
  2075. 'a', 'b'
  2076. ])
  2077. tm.assert_series_equal(result, expected)