test_alter_axes.py 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import datetime, timedelta
  4. import inspect
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import PY2, lrange
  8. from pandas.core.dtypes.common import (
  9. is_categorical_dtype, is_interval_dtype, is_object_dtype)
  10. from pandas import (
  11. Categorical, DataFrame, DatetimeIndex, Index, IntervalIndex, MultiIndex,
  12. RangeIndex, Series, Timestamp, cut, date_range, to_datetime)
  13. import pandas.util.testing as tm
  14. class TestDataFrameAlterAxes():
  15. def test_set_index_directly(self, float_string_frame):
  16. df = float_string_frame
  17. idx = Index(np.arange(len(df))[::-1])
  18. df.index = idx
  19. tm.assert_index_equal(df.index, idx)
  20. with pytest.raises(ValueError, match='Length mismatch'):
  21. df.index = idx[::2]
  22. def test_set_index(self, float_string_frame):
  23. df = float_string_frame
  24. idx = Index(np.arange(len(df))[::-1])
  25. df = df.set_index(idx)
  26. tm.assert_index_equal(df.index, idx)
  27. with pytest.raises(ValueError, match='Length mismatch'):
  28. df.set_index(idx[::2])
  29. def test_set_index_cast(self):
  30. # issue casting an index then set_index
  31. df = DataFrame({'A': [1.1, 2.2, 3.3], 'B': [5.0, 6.1, 7.2]},
  32. index=[2010, 2011, 2012])
  33. df2 = df.set_index(df.index.astype(np.int32))
  34. tm.assert_frame_equal(df, df2)
  35. # A has duplicate values, C does not
  36. @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
  37. ('tuple', 'as', 'label')])
  38. @pytest.mark.parametrize('inplace', [True, False])
  39. @pytest.mark.parametrize('drop', [True, False])
  40. def test_set_index_drop_inplace(self, frame_of_index_cols,
  41. drop, inplace, keys):
  42. df = frame_of_index_cols
  43. if isinstance(keys, list):
  44. idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys)
  45. else:
  46. idx = Index(df[keys], name=keys)
  47. expected = df.drop(keys, axis=1) if drop else df
  48. expected.index = idx
  49. if inplace:
  50. result = df.copy()
  51. result.set_index(keys, drop=drop, inplace=True)
  52. else:
  53. result = df.set_index(keys, drop=drop)
  54. tm.assert_frame_equal(result, expected)
  55. # A has duplicate values, C does not
  56. @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
  57. ('tuple', 'as', 'label')])
  58. @pytest.mark.parametrize('drop', [True, False])
  59. def test_set_index_append(self, frame_of_index_cols, drop, keys):
  60. df = frame_of_index_cols
  61. keys = keys if isinstance(keys, list) else [keys]
  62. idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys],
  63. names=[None] + keys)
  64. expected = df.drop(keys, axis=1) if drop else df.copy()
  65. expected.index = idx
  66. result = df.set_index(keys, drop=drop, append=True)
  67. tm.assert_frame_equal(result, expected)
  68. # A has duplicate values, C does not
  69. @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
  70. ('tuple', 'as', 'label')])
  71. @pytest.mark.parametrize('drop', [True, False])
  72. def test_set_index_append_to_multiindex(self, frame_of_index_cols,
  73. drop, keys):
  74. # append to existing multiindex
  75. df = frame_of_index_cols.set_index(['D'], drop=drop, append=True)
  76. keys = keys if isinstance(keys, list) else [keys]
  77. expected = frame_of_index_cols.set_index(['D'] + keys,
  78. drop=drop, append=True)
  79. result = df.set_index(keys, drop=drop, append=True)
  80. tm.assert_frame_equal(result, expected)
  81. def test_set_index_after_mutation(self):
  82. # GH1590
  83. df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']})
  84. expected = DataFrame({'val': [1, 2]},
  85. Index(['b', 'c'], name='key'))
  86. df2 = df.loc[df.index.map(lambda indx: indx >= 1)]
  87. result = df2.set_index('key')
  88. tm.assert_frame_equal(result, expected)
  89. # MultiIndex constructor does not work directly on Series -> lambda
  90. # Add list-of-list constructor because list is ambiguous -> lambda
  91. # also test index name if append=True (name is duplicate here for B)
  92. @pytest.mark.parametrize('box', [Series, Index, np.array,
  93. list, lambda x: [list(x)],
  94. lambda x: MultiIndex.from_arrays([x])])
  95. @pytest.mark.parametrize('append, index_name', [(True, None),
  96. (True, 'B'), (True, 'test'), (False, None)])
  97. @pytest.mark.parametrize('drop', [True, False])
  98. def test_set_index_pass_single_array(self, frame_of_index_cols,
  99. drop, append, index_name, box):
  100. df = frame_of_index_cols
  101. df.index.name = index_name
  102. key = box(df['B'])
  103. if box == list:
  104. # list of strings gets interpreted as list of keys
  105. msg = "['one', 'two', 'three', 'one', 'two']"
  106. with pytest.raises(KeyError, match=msg):
  107. df.set_index(key, drop=drop, append=append)
  108. else:
  109. # np.array/list-of-list "forget" the name of B
  110. name_mi = getattr(key, 'names', None)
  111. name = [getattr(key, 'name', None)] if name_mi is None else name_mi
  112. result = df.set_index(key, drop=drop, append=append)
  113. # only valid column keys are dropped
  114. # since B is always passed as array above, nothing is dropped
  115. expected = df.set_index(['B'], drop=False, append=append)
  116. expected.index.names = [index_name] + name if append else name
  117. tm.assert_frame_equal(result, expected)
  118. # MultiIndex constructor does not work directly on Series -> lambda
  119. # also test index name if append=True (name is duplicate here for A & B)
  120. @pytest.mark.parametrize('box', [Series, Index, np.array, list,
  121. lambda x: MultiIndex.from_arrays([x])])
  122. @pytest.mark.parametrize('append, index_name',
  123. [(True, None), (True, 'A'), (True, 'B'),
  124. (True, 'test'), (False, None)])
  125. @pytest.mark.parametrize('drop', [True, False])
  126. def test_set_index_pass_arrays(self, frame_of_index_cols,
  127. drop, append, index_name, box):
  128. df = frame_of_index_cols
  129. df.index.name = index_name
  130. keys = ['A', box(df['B'])]
  131. # np.array/list "forget" the name of B
  132. names = ['A', None if box in [np.array, list, tuple, iter] else 'B']
  133. result = df.set_index(keys, drop=drop, append=append)
  134. # only valid column keys are dropped
  135. # since B is always passed as array above, only A is dropped, if at all
  136. expected = df.set_index(['A', 'B'], drop=False, append=append)
  137. expected = expected.drop('A', axis=1) if drop else expected
  138. expected.index.names = [index_name] + names if append else names
  139. tm.assert_frame_equal(result, expected)
  140. # MultiIndex constructor does not work directly on Series -> lambda
  141. # We also emulate a "constructor" for the label -> lambda
  142. # also test index name if append=True (name is duplicate here for A)
  143. @pytest.mark.parametrize('box2', [Series, Index, np.array, list,
  144. lambda x: MultiIndex.from_arrays([x]),
  145. lambda x: x.name])
  146. @pytest.mark.parametrize('box1', [Series, Index, np.array, list,
  147. lambda x: MultiIndex.from_arrays([x]),
  148. lambda x: x.name])
  149. @pytest.mark.parametrize('append, index_name', [(True, None),
  150. (True, 'A'), (True, 'test'), (False, None)])
  151. @pytest.mark.parametrize('drop', [True, False])
  152. def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
  153. append, index_name, box1, box2):
  154. df = frame_of_index_cols
  155. df.index.name = index_name
  156. keys = [box1(df['A']), box2(df['A'])]
  157. result = df.set_index(keys, drop=drop, append=append)
  158. # need to adapt first drop for case that both keys are 'A' --
  159. # cannot drop the same column twice;
  160. # use "is" because == would give ambiguous Boolean error for containers
  161. first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop
  162. # to test against already-tested behaviour, we add sequentially,
  163. # hence second append always True; must wrap keys in list, otherwise
  164. # box = list would be interpreted as keys
  165. expected = df.set_index([keys[0]], drop=first_drop, append=append)
  166. expected = expected.set_index([keys[1]], drop=drop, append=True)
  167. tm.assert_frame_equal(result, expected)
  168. @pytest.mark.parametrize('append', [True, False])
  169. @pytest.mark.parametrize('drop', [True, False])
  170. def test_set_index_pass_multiindex(self, frame_of_index_cols,
  171. drop, append):
  172. df = frame_of_index_cols
  173. keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])
  174. result = df.set_index(keys, drop=drop, append=append)
  175. # setting with a MultiIndex will never drop columns
  176. expected = df.set_index(['A', 'B'], drop=False, append=append)
  177. tm.assert_frame_equal(result, expected)
  178. def test_set_index_verify_integrity(self, frame_of_index_cols):
  179. df = frame_of_index_cols
  180. with pytest.raises(ValueError, match='Index has duplicate keys'):
  181. df.set_index('A', verify_integrity=True)
  182. # with MultiIndex
  183. with pytest.raises(ValueError, match='Index has duplicate keys'):
  184. df.set_index([df['A'], df['A']], verify_integrity=True)
  185. @pytest.mark.parametrize('append', [True, False])
  186. @pytest.mark.parametrize('drop', [True, False])
  187. def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
  188. df = frame_of_index_cols
  189. with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"):
  190. # column names are A-E, as well as one tuple
  191. df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)
  192. # non-existent key in list with arrays
  193. with pytest.raises(KeyError, match='X'):
  194. df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
  195. msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]"
  196. # tuples always raise KeyError
  197. with pytest.raises(KeyError, match=msg):
  198. df.set_index(tuple(df['A']), drop=drop, append=append)
  199. # also within a list
  200. with pytest.raises(KeyError, match=msg):
  201. df.set_index(['A', df['A'], tuple(df['A'])],
  202. drop=drop, append=append)
  203. @pytest.mark.xfail(reason='broken due to revert, see GH 25085')
  204. @pytest.mark.parametrize('append', [True, False])
  205. @pytest.mark.parametrize('drop', [True, False])
  206. @pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)],
  207. ids=['set', 'iter', 'generator'])
  208. def test_set_index_raise_on_type(self, frame_of_index_cols, box,
  209. drop, append):
  210. df = frame_of_index_cols
  211. msg = 'The parameter "keys" may be a column key, .*'
  212. # forbidden type, e.g. set/iter/generator
  213. with pytest.raises(TypeError, match=msg):
  214. df.set_index(box(df['A']), drop=drop, append=append)
  215. # forbidden type in list, e.g. set/iter/generator
  216. with pytest.raises(TypeError, match=msg):
  217. df.set_index(['A', df['A'], box(df['A'])],
  218. drop=drop, append=append)
  219. def test_set_index_custom_label_type(self):
  220. # GH 24969
  221. class Thing(object):
  222. def __init__(self, name, color):
  223. self.name = name
  224. self.color = color
  225. def __str__(self):
  226. return "<Thing %r>" % (self.name,)
  227. # necessary for pretty KeyError
  228. __repr__ = __str__
  229. thing1 = Thing('One', 'red')
  230. thing2 = Thing('Two', 'blue')
  231. df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
  232. expected = DataFrame({thing1: [0, 1]},
  233. index=Index([2, 3], name=thing2))
  234. # use custom label directly
  235. result = df.set_index(thing2)
  236. tm.assert_frame_equal(result, expected)
  237. # custom label wrapped in list
  238. result = df.set_index([thing2])
  239. tm.assert_frame_equal(result, expected)
  240. # missing key
  241. thing3 = Thing('Three', 'pink')
  242. msg = "<Thing 'Three'>"
  243. with pytest.raises(KeyError, match=msg):
  244. # missing label directly
  245. df.set_index(thing3)
  246. with pytest.raises(KeyError, match=msg):
  247. # missing label in list
  248. df.set_index([thing3])
  249. def test_set_index_custom_label_hashable_iterable(self):
  250. # GH 24969
  251. # actual example discussed in GH 24984 was e.g. for shapely.geometry
  252. # objects (e.g. a collection of Points) that can be both hashable and
  253. # iterable; using frozenset as a stand-in for testing here
  254. class Thing(frozenset):
  255. # need to stabilize repr for KeyError (due to random order in sets)
  256. def __repr__(self):
  257. tmp = sorted(list(self))
  258. # double curly brace prints one brace in format string
  259. return "frozenset({{{}}})".format(', '.join(map(repr, tmp)))
  260. thing1 = Thing(['One', 'red'])
  261. thing2 = Thing(['Two', 'blue'])
  262. df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
  263. expected = DataFrame({thing1: [0, 1]},
  264. index=Index([2, 3], name=thing2))
  265. # use custom label directly
  266. result = df.set_index(thing2)
  267. tm.assert_frame_equal(result, expected)
  268. # custom label wrapped in list
  269. result = df.set_index([thing2])
  270. tm.assert_frame_equal(result, expected)
  271. # missing key
  272. thing3 = Thing(['Three', 'pink'])
  273. msg = '.*' # due to revert, see GH 25085
  274. with pytest.raises(KeyError, match=msg):
  275. # missing label directly
  276. df.set_index(thing3)
  277. with pytest.raises(KeyError, match=msg):
  278. # missing label in list
  279. df.set_index([thing3])
  280. def test_set_index_custom_label_type_raises(self):
  281. # GH 24969
  282. # purposefully inherit from something unhashable
  283. class Thing(set):
  284. def __init__(self, name, color):
  285. self.name = name
  286. self.color = color
  287. def __str__(self):
  288. return "<Thing %r>" % (self.name,)
  289. thing1 = Thing('One', 'red')
  290. thing2 = Thing('Two', 'blue')
  291. df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
  292. msg = 'unhashable type.*'
  293. with pytest.raises(TypeError, match=msg):
  294. # use custom label directly
  295. df.set_index(thing2)
  296. with pytest.raises(TypeError, match=msg):
  297. # custom label wrapped in list
  298. df.set_index([thing2])
  299. def test_construction_with_categorical_index(self):
  300. ci = tm.makeCategoricalIndex(10)
  301. ci.name = 'B'
  302. # with Categorical
  303. df = DataFrame({'A': np.random.randn(10),
  304. 'B': ci.values})
  305. idf = df.set_index('B')
  306. tm.assert_index_equal(idf.index, ci)
  307. # from a CategoricalIndex
  308. df = DataFrame({'A': np.random.randn(10),
  309. 'B': ci})
  310. idf = df.set_index('B')
  311. tm.assert_index_equal(idf.index, ci)
  312. # round-trip
  313. idf = idf.reset_index().set_index('B')
  314. tm.assert_index_equal(idf.index, ci)
  315. def test_set_index_cast_datetimeindex(self):
  316. df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
  317. for i in range(1000)],
  318. 'B': np.random.randn(1000)})
  319. idf = df.set_index('A')
  320. assert isinstance(idf.index, DatetimeIndex)
  321. def test_convert_dti_to_series(self):
  322. # don't cast a DatetimeIndex WITH a tz, leave as object
  323. # GH 6032
  324. idx = DatetimeIndex(to_datetime(['2013-1-1 13:00',
  325. '2013-1-2 14:00']),
  326. name='B').tz_localize('US/Pacific')
  327. df = DataFrame(np.random.randn(2, 1), columns=['A'])
  328. expected = Series(np.array([Timestamp('2013-01-01 13:00:00-0800',
  329. tz='US/Pacific'),
  330. Timestamp('2013-01-02 14:00:00-0800',
  331. tz='US/Pacific')],
  332. dtype="object"), name='B')
  333. # convert index to series
  334. result = Series(idx)
  335. tm.assert_series_equal(result, expected)
  336. # assign to frame
  337. df['B'] = idx
  338. result = df['B']
  339. tm.assert_series_equal(result, expected)
  340. # convert to series while keeping the timezone
  341. result = idx.to_series(keep_tz=True, index=[0, 1])
  342. tm.assert_series_equal(result, expected)
  343. # convert to utc
  344. with tm.assert_produces_warning(FutureWarning):
  345. df['B'] = idx.to_series(keep_tz=False, index=[0, 1])
  346. result = df['B']
  347. comp = Series(DatetimeIndex(expected.values).tz_localize(None),
  348. name='B')
  349. tm.assert_series_equal(result, comp)
  350. with tm.assert_produces_warning(FutureWarning):
  351. result = idx.to_series(index=[0, 1])
  352. tm.assert_series_equal(result, expected.dt.tz_convert(None))
  353. with tm.assert_produces_warning(FutureWarning):
  354. result = idx.to_series(keep_tz=False, index=[0, 1])
  355. tm.assert_series_equal(result, expected.dt.tz_convert(None))
  356. # list of datetimes with a tz
  357. df['B'] = idx.to_pydatetime()
  358. result = df['B']
  359. tm.assert_series_equal(result, expected)
  360. # GH 6785
  361. # set the index manually
  362. import pytz
  363. df = DataFrame(
  364. [{'ts': datetime(2014, 4, 1, tzinfo=pytz.utc), 'foo': 1}])
  365. expected = df.set_index('ts')
  366. df.index = df['ts']
  367. df.pop('ts')
  368. tm.assert_frame_equal(df, expected)
  369. def test_reset_index_tz(self, tz_aware_fixture):
  370. # GH 3950
  371. # reset_index with single level
  372. tz = tz_aware_fixture
  373. idx = date_range('1/1/2011', periods=5,
  374. freq='D', tz=tz, name='idx')
  375. df = DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']},
  376. index=idx)
  377. expected = DataFrame({'idx': [datetime(2011, 1, 1),
  378. datetime(2011, 1, 2),
  379. datetime(2011, 1, 3),
  380. datetime(2011, 1, 4),
  381. datetime(2011, 1, 5)],
  382. 'a': range(5),
  383. 'b': ['A', 'B', 'C', 'D', 'E']},
  384. columns=['idx', 'a', 'b'])
  385. expected['idx'] = expected['idx'].apply(lambda d: Timestamp(d, tz=tz))
  386. tm.assert_frame_equal(df.reset_index(), expected)
  387. def test_set_index_timezone(self):
  388. # GH 12358
  389. # tz-aware Series should retain the tz
  390. idx = to_datetime(["2014-01-01 10:10:10"],
  391. utc=True).tz_convert('Europe/Rome')
  392. df = DataFrame({'A': idx})
  393. assert df.set_index(idx).index[0].hour == 11
  394. assert DatetimeIndex(Series(df.A))[0].hour == 11
  395. assert df.set_index(df.A).index[0].hour == 11
  396. def test_set_index_dst(self):
  397. di = date_range('2006-10-29 00:00:00', periods=3,
  398. freq='H', tz='US/Pacific')
  399. df = DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]},
  400. index=di).reset_index()
  401. # single level
  402. res = df.set_index('index')
  403. exp = DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]},
  404. index=Index(di, name='index'))
  405. tm.assert_frame_equal(res, exp)
  406. # GH 12920
  407. res = df.set_index(['index', 'a'])
  408. exp_index = MultiIndex.from_arrays([di, [0, 1, 2]],
  409. names=['index', 'a'])
  410. exp = DataFrame({'b': [3, 4, 5]}, index=exp_index)
  411. tm.assert_frame_equal(res, exp)
  412. def test_reset_index_with_intervals(self):
  413. idx = IntervalIndex.from_breaks(np.arange(11), name='x')
  414. original = DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']]
  415. result = original.set_index('x')
  416. expected = DataFrame({'y': np.arange(10)}, index=idx)
  417. tm.assert_frame_equal(result, expected)
  418. result2 = result.reset_index()
  419. tm.assert_frame_equal(result2, original)
  420. def test_set_index_multiindexcolumns(self):
  421. columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)])
  422. df = DataFrame(np.random.randn(3, 3), columns=columns)
  423. result = df.set_index(df.columns[0])
  424. expected = df.iloc[:, 1:]
  425. expected.index = df.iloc[:, 0].values
  426. expected.index.names = [df.columns[0]]
  427. tm.assert_frame_equal(result, expected)
  428. def test_set_index_empty_column(self):
  429. # GH 1971
  430. df = DataFrame([
  431. {'a': 1, 'p': 0},
  432. {'a': 2, 'm': 10},
  433. {'a': 3, 'm': 11, 'p': 20},
  434. {'a': 4, 'm': 12, 'p': 21}
  435. ], columns=('a', 'm', 'p', 'x'))
  436. result = df.set_index(['a', 'x'])
  437. expected = df[['m', 'p']]
  438. expected.index = MultiIndex.from_arrays([df['a'], df['x']],
  439. names=['a', 'x'])
  440. tm.assert_frame_equal(result, expected)
  441. def test_set_columns(self, float_string_frame):
  442. cols = Index(np.arange(len(float_string_frame.columns)))
  443. float_string_frame.columns = cols
  444. with pytest.raises(ValueError, match='Length mismatch'):
  445. float_string_frame.columns = cols[::2]
  446. def test_dti_set_index_reindex(self):
  447. # GH 6631
  448. df = DataFrame(np.random.random(6))
  449. idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern')
  450. idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo')
  451. df = df.set_index(idx1)
  452. tm.assert_index_equal(df.index, idx1)
  453. df = df.reindex(idx2)
  454. tm.assert_index_equal(df.index, idx2)
  455. # GH 11314
  456. # with tz
  457. index = date_range(datetime(2015, 10, 1),
  458. datetime(2015, 10, 1, 23),
  459. freq='H', tz='US/Eastern')
  460. df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index)
  461. new_index = date_range(datetime(2015, 10, 2),
  462. datetime(2015, 10, 2, 23),
  463. freq='H', tz='US/Eastern')
  464. result = df.set_index(new_index)
  465. assert result.index.freq == index.freq
  466. # Renaming
  467. def test_rename(self, float_frame):
  468. mapping = {
  469. 'A': 'a',
  470. 'B': 'b',
  471. 'C': 'c',
  472. 'D': 'd'
  473. }
  474. renamed = float_frame.rename(columns=mapping)
  475. renamed2 = float_frame.rename(columns=str.lower)
  476. tm.assert_frame_equal(renamed, renamed2)
  477. tm.assert_frame_equal(renamed2.rename(columns=str.upper),
  478. float_frame, check_names=False)
  479. # index
  480. data = {
  481. 'A': {'foo': 0, 'bar': 1}
  482. }
  483. # gets sorted alphabetical
  484. df = DataFrame(data)
  485. renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'})
  486. tm.assert_index_equal(renamed.index, Index(['foo', 'bar']))
  487. renamed = df.rename(index=str.upper)
  488. tm.assert_index_equal(renamed.index, Index(['BAR', 'FOO']))
  489. # have to pass something
  490. pytest.raises(TypeError, float_frame.rename)
  491. # partial columns
  492. renamed = float_frame.rename(columns={'C': 'foo', 'D': 'bar'})
  493. tm.assert_index_equal(renamed.columns, Index(['A', 'B', 'foo', 'bar']))
  494. # other axis
  495. renamed = float_frame.T.rename(index={'C': 'foo', 'D': 'bar'})
  496. tm.assert_index_equal(renamed.index, Index(['A', 'B', 'foo', 'bar']))
  497. # index with name
  498. index = Index(['foo', 'bar'], name='name')
  499. renamer = DataFrame(data, index=index)
  500. renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'})
  501. tm.assert_index_equal(renamed.index,
  502. Index(['bar', 'foo'], name='name'))
  503. assert renamed.index.name == renamer.index.name
  504. def test_rename_axis_inplace(self, float_frame):
  505. # GH 15704
  506. expected = float_frame.rename_axis('foo')
  507. result = float_frame.copy()
  508. no_return = result.rename_axis('foo', inplace=True)
  509. assert no_return is None
  510. tm.assert_frame_equal(result, expected)
  511. expected = float_frame.rename_axis('bar', axis=1)
  512. result = float_frame.copy()
  513. no_return = result.rename_axis('bar', axis=1, inplace=True)
  514. assert no_return is None
  515. tm.assert_frame_equal(result, expected)
  516. def test_rename_axis_warns(self):
  517. # https://github.com/pandas-dev/pandas/issues/17833
  518. df = DataFrame({"A": [1, 2], "B": [1, 2]})
  519. with tm.assert_produces_warning(FutureWarning) as w:
  520. df.rename_axis(id, axis=0)
  521. assert 'rename' in str(w[0].message)
  522. with tm.assert_produces_warning(FutureWarning) as w:
  523. df.rename_axis({0: 10, 1: 20}, axis=0)
  524. assert 'rename' in str(w[0].message)
  525. with tm.assert_produces_warning(FutureWarning) as w:
  526. df.rename_axis(id, axis=1)
  527. assert 'rename' in str(w[0].message)
  528. with tm.assert_produces_warning(FutureWarning) as w:
  529. df['A'].rename_axis(id)
  530. assert 'rename' in str(w[0].message)
  531. def test_rename_axis_mapper(self):
  532. # GH 19978
  533. mi = MultiIndex.from_product([['a', 'b', 'c'], [1, 2]],
  534. names=['ll', 'nn'])
  535. df = DataFrame({'x': [i for i in range(len(mi))],
  536. 'y': [i * 10 for i in range(len(mi))]},
  537. index=mi)
  538. # Test for rename of the Index object of columns
  539. result = df.rename_axis('cols', axis=1)
  540. tm.assert_index_equal(result.columns,
  541. Index(['x', 'y'], name='cols'))
  542. # Test for rename of the Index object of columns using dict
  543. result = result.rename_axis(columns={'cols': 'new'}, axis=1)
  544. tm.assert_index_equal(result.columns,
  545. Index(['x', 'y'], name='new'))
  546. # Test for renaming index using dict
  547. result = df.rename_axis(index={'ll': 'foo'})
  548. assert result.index.names == ['foo', 'nn']
  549. # Test for renaming index using a function
  550. result = df.rename_axis(index=str.upper, axis=0)
  551. assert result.index.names == ['LL', 'NN']
  552. # Test for renaming index providing complete list
  553. result = df.rename_axis(index=['foo', 'goo'])
  554. assert result.index.names == ['foo', 'goo']
  555. # Test for changing index and columns at same time
  556. sdf = df.reset_index().set_index('nn').drop(columns=['ll', 'y'])
  557. result = sdf.rename_axis(index='foo', columns='meh')
  558. assert result.index.name == 'foo'
  559. assert result.columns.name == 'meh'
  560. # Test different error cases
  561. with pytest.raises(TypeError, match='Must pass'):
  562. df.rename_axis(index='wrong')
  563. with pytest.raises(ValueError, match='Length of names'):
  564. df.rename_axis(index=['wrong'])
  565. with pytest.raises(TypeError, match='bogus'):
  566. df.rename_axis(bogus=None)
  567. @pytest.mark.parametrize('kwargs, rename_index, rename_columns', [
  568. ({'mapper': None, 'axis': 0}, True, False),
  569. ({'mapper': None, 'axis': 1}, False, True),
  570. ({'index': None}, True, False),
  571. ({'columns': None}, False, True),
  572. ({'index': None, 'columns': None}, True, True),
  573. ({}, False, False)])
  574. def test_rename_axis_none(self, kwargs, rename_index, rename_columns):
  575. # GH 25034
  576. index = Index(list('abc'), name='foo')
  577. columns = Index(['col1', 'col2'], name='bar')
  578. data = np.arange(6).reshape(3, 2)
  579. df = DataFrame(data, index, columns)
  580. result = df.rename_axis(**kwargs)
  581. expected_index = index.rename(None) if rename_index else index
  582. expected_columns = columns.rename(None) if rename_columns else columns
  583. expected = DataFrame(data, expected_index, expected_columns)
  584. tm.assert_frame_equal(result, expected)
  585. def test_rename_multiindex(self):
  586. tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')]
  587. tuples_columns = [('fizz1', 'buzz1'), ('fizz2', 'buzz2')]
  588. index = MultiIndex.from_tuples(tuples_index, names=['foo', 'bar'])
  589. columns = MultiIndex.from_tuples(
  590. tuples_columns, names=['fizz', 'buzz'])
  591. df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
  592. #
  593. # without specifying level -> across all levels
  594. renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
  595. columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'})
  596. new_index = MultiIndex.from_tuples([('foo3', 'bar1'),
  597. ('foo2', 'bar3')],
  598. names=['foo', 'bar'])
  599. new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'),
  600. ('fizz2', 'buzz3')],
  601. names=['fizz', 'buzz'])
  602. tm.assert_index_equal(renamed.index, new_index)
  603. tm.assert_index_equal(renamed.columns, new_columns)
  604. assert renamed.index.names == df.index.names
  605. assert renamed.columns.names == df.columns.names
  606. #
  607. # with specifying a level (GH13766)
  608. # dict
  609. new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'),
  610. ('fizz2', 'buzz2')],
  611. names=['fizz', 'buzz'])
  612. renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
  613. level=0)
  614. tm.assert_index_equal(renamed.columns, new_columns)
  615. renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
  616. level='fizz')
  617. tm.assert_index_equal(renamed.columns, new_columns)
  618. new_columns = MultiIndex.from_tuples([('fizz1', 'buzz1'),
  619. ('fizz2', 'buzz3')],
  620. names=['fizz', 'buzz'])
  621. renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
  622. level=1)
  623. tm.assert_index_equal(renamed.columns, new_columns)
  624. renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
  625. level='buzz')
  626. tm.assert_index_equal(renamed.columns, new_columns)
  627. # function
  628. func = str.upper
  629. new_columns = MultiIndex.from_tuples([('FIZZ1', 'buzz1'),
  630. ('FIZZ2', 'buzz2')],
  631. names=['fizz', 'buzz'])
  632. renamed = df.rename(columns=func, level=0)
  633. tm.assert_index_equal(renamed.columns, new_columns)
  634. renamed = df.rename(columns=func, level='fizz')
  635. tm.assert_index_equal(renamed.columns, new_columns)
  636. new_columns = MultiIndex.from_tuples([('fizz1', 'BUZZ1'),
  637. ('fizz2', 'BUZZ2')],
  638. names=['fizz', 'buzz'])
  639. renamed = df.rename(columns=func, level=1)
  640. tm.assert_index_equal(renamed.columns, new_columns)
  641. renamed = df.rename(columns=func, level='buzz')
  642. tm.assert_index_equal(renamed.columns, new_columns)
  643. # index
  644. new_index = MultiIndex.from_tuples([('foo3', 'bar1'),
  645. ('foo2', 'bar2')],
  646. names=['foo', 'bar'])
  647. renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
  648. level=0)
  649. tm.assert_index_equal(renamed.index, new_index)
  650. def test_rename_nocopy(self, float_frame):
  651. renamed = float_frame.rename(columns={'C': 'foo'}, copy=False)
  652. renamed['foo'] = 1.
  653. assert (float_frame['C'] == 1.).all()
  654. def test_rename_inplace(self, float_frame):
  655. float_frame.rename(columns={'C': 'foo'})
  656. assert 'C' in float_frame
  657. assert 'foo' not in float_frame
  658. c_id = id(float_frame['C'])
  659. float_frame = float_frame.copy()
  660. float_frame.rename(columns={'C': 'foo'}, inplace=True)
  661. assert 'C' not in float_frame
  662. assert 'foo' in float_frame
  663. assert id(float_frame['foo']) != c_id
  664. def test_rename_bug(self):
  665. # GH 5344
  666. # rename set ref_locs, and set_index was not resetting
  667. df = DataFrame({0: ['foo', 'bar'], 1: ['bah', 'bas'], 2: [1, 2]})
  668. df = df.rename(columns={0: 'a'})
  669. df = df.rename(columns={1: 'b'})
  670. df = df.set_index(['a', 'b'])
  671. df.columns = ['2001-01-01']
  672. expected = DataFrame([[1], [2]],
  673. index=MultiIndex.from_tuples(
  674. [('foo', 'bah'), ('bar', 'bas')],
  675. names=['a', 'b']),
  676. columns=['2001-01-01'])
  677. tm.assert_frame_equal(df, expected)
  678. def test_rename_bug2(self):
  679. # GH 19497
  680. # rename was changing Index to MultiIndex if Index contained tuples
  681. df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)],
  682. columns=["a"])
  683. df = df.rename({(1, 1): (5, 4)}, axis="index")
  684. expected = DataFrame(data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)],
  685. columns=["a"])
  686. tm.assert_frame_equal(df, expected)
  687. def test_reorder_levels(self):
  688. index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
  689. codes=[[0, 0, 0, 0, 0, 0],
  690. [0, 1, 2, 0, 1, 2],
  691. [0, 1, 0, 1, 0, 1]],
  692. names=['L0', 'L1', 'L2'])
  693. df = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=index)
  694. # no change, position
  695. result = df.reorder_levels([0, 1, 2])
  696. tm.assert_frame_equal(df, result)
  697. # no change, labels
  698. result = df.reorder_levels(['L0', 'L1', 'L2'])
  699. tm.assert_frame_equal(df, result)
  700. # rotate, position
  701. result = df.reorder_levels([1, 2, 0])
  702. e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']],
  703. codes=[[0, 1, 2, 0, 1, 2],
  704. [0, 1, 0, 1, 0, 1],
  705. [0, 0, 0, 0, 0, 0]],
  706. names=['L1', 'L2', 'L0'])
  707. expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
  708. index=e_idx)
  709. tm.assert_frame_equal(result, expected)
  710. result = df.reorder_levels([0, 0, 0])
  711. e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
  712. codes=[[0, 0, 0, 0, 0, 0],
  713. [0, 0, 0, 0, 0, 0],
  714. [0, 0, 0, 0, 0, 0]],
  715. names=['L0', 'L0', 'L0'])
  716. expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
  717. index=e_idx)
  718. tm.assert_frame_equal(result, expected)
  719. result = df.reorder_levels(['L0', 'L0', 'L0'])
  720. tm.assert_frame_equal(result, expected)
  721. def test_reset_index(self, float_frame):
  722. stacked = float_frame.stack()[::2]
  723. stacked = DataFrame({'foo': stacked, 'bar': stacked})
  724. names = ['first', 'second']
  725. stacked.index.names = names
  726. deleveled = stacked.reset_index()
  727. for i, (lev, level_codes) in enumerate(zip(stacked.index.levels,
  728. stacked.index.codes)):
  729. values = lev.take(level_codes)
  730. name = names[i]
  731. tm.assert_index_equal(values, Index(deleveled[name]))
  732. stacked.index.names = [None, None]
  733. deleveled2 = stacked.reset_index()
  734. tm.assert_series_equal(deleveled['first'], deleveled2['level_0'],
  735. check_names=False)
  736. tm.assert_series_equal(deleveled['second'], deleveled2['level_1'],
  737. check_names=False)
  738. # default name assigned
  739. rdf = float_frame.reset_index()
  740. exp = Series(float_frame.index.values, name='index')
  741. tm.assert_series_equal(rdf['index'], exp)
  742. # default name assigned, corner case
  743. df = float_frame.copy()
  744. df['index'] = 'foo'
  745. rdf = df.reset_index()
  746. exp = Series(float_frame.index.values, name='level_0')
  747. tm.assert_series_equal(rdf['level_0'], exp)
  748. # but this is ok
  749. float_frame.index.name = 'index'
  750. deleveled = float_frame.reset_index()
  751. tm.assert_series_equal(deleveled['index'], Series(float_frame.index))
  752. tm.assert_index_equal(deleveled.index,
  753. Index(np.arange(len(deleveled))))
  754. # preserve column names
  755. float_frame.columns.name = 'columns'
  756. resetted = float_frame.reset_index()
  757. assert resetted.columns.name == 'columns'
  758. # only remove certain columns
  759. df = float_frame.reset_index().set_index(['index', 'A', 'B'])
  760. rs = df.reset_index(['A', 'B'])
  761. # TODO should reset_index check_names ?
  762. tm.assert_frame_equal(rs, float_frame, check_names=False)
  763. rs = df.reset_index(['index', 'A', 'B'])
  764. tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False)
  765. rs = df.reset_index(['index', 'A', 'B'])
  766. tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False)
  767. rs = df.reset_index('A')
  768. xp = float_frame.reset_index().set_index(['index', 'B'])
  769. tm.assert_frame_equal(rs, xp, check_names=False)
  770. # test resetting in place
  771. df = float_frame.copy()
  772. resetted = float_frame.reset_index()
  773. df.reset_index(inplace=True)
  774. tm.assert_frame_equal(df, resetted, check_names=False)
  775. df = float_frame.reset_index().set_index(['index', 'A', 'B'])
  776. rs = df.reset_index('A', drop=True)
  777. xp = float_frame.copy()
  778. del xp['A']
  779. xp = xp.set_index(['B'], append=True)
  780. tm.assert_frame_equal(rs, xp, check_names=False)
  781. def test_reset_index_name(self):
  782. df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
  783. columns=['A', 'B', 'C', 'D'],
  784. index=Index(range(2), name='x'))
  785. assert df.reset_index().index.name is None
  786. assert df.reset_index(drop=True).index.name is None
  787. df.reset_index(inplace=True)
  788. assert df.index.name is None
  789. def test_reset_index_level(self):
  790. df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
  791. columns=['A', 'B', 'C', 'D'])
  792. for levels in ['A', 'B'], [0, 1]:
  793. # With MultiIndex
  794. result = df.set_index(['A', 'B']).reset_index(level=levels[0])
  795. tm.assert_frame_equal(result, df.set_index('B'))
  796. result = df.set_index(['A', 'B']).reset_index(level=levels[:1])
  797. tm.assert_frame_equal(result, df.set_index('B'))
  798. result = df.set_index(['A', 'B']).reset_index(level=levels)
  799. tm.assert_frame_equal(result, df)
  800. result = df.set_index(['A', 'B']).reset_index(level=levels,
  801. drop=True)
  802. tm.assert_frame_equal(result, df[['C', 'D']])
  803. # With single-level Index (GH 16263)
  804. result = df.set_index('A').reset_index(level=levels[0])
  805. tm.assert_frame_equal(result, df)
  806. result = df.set_index('A').reset_index(level=levels[:1])
  807. tm.assert_frame_equal(result, df)
  808. result = df.set_index(['A']).reset_index(level=levels[0],
  809. drop=True)
  810. tm.assert_frame_equal(result, df[['B', 'C', 'D']])
  811. # Missing levels - for both MultiIndex and single-level Index:
  812. for idx_lev in ['A', 'B'], ['A']:
  813. with pytest.raises(KeyError, match='Level E '):
  814. df.set_index(idx_lev).reset_index(level=['A', 'E'])
  815. with pytest.raises(IndexError, match='Too many levels'):
  816. df.set_index(idx_lev).reset_index(level=[0, 1, 2])
  817. def test_reset_index_right_dtype(self):
  818. time = np.arange(0.0, 10, np.sqrt(2) / 2)
  819. s1 = Series((9.81 * time ** 2) / 2,
  820. index=Index(time, name='time'),
  821. name='speed')
  822. df = DataFrame(s1)
  823. resetted = s1.reset_index()
  824. assert resetted['time'].dtype == np.float64
  825. resetted = df.reset_index()
  826. assert resetted['time'].dtype == np.float64
  827. def test_reset_index_multiindex_col(self):
  828. vals = np.random.randn(3, 3).astype(object)
  829. idx = ['x', 'y', 'z']
  830. full = np.hstack(([[x] for x in idx], vals))
  831. df = DataFrame(vals, Index(idx, name='a'),
  832. columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']])
  833. rs = df.reset_index()
  834. xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'],
  835. ['', 'mean', 'median', 'mean']])
  836. tm.assert_frame_equal(rs, xp)
  837. rs = df.reset_index(col_fill=None)
  838. xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'],
  839. ['a', 'mean', 'median', 'mean']])
  840. tm.assert_frame_equal(rs, xp)
  841. rs = df.reset_index(col_level=1, col_fill='blah')
  842. xp = DataFrame(full, columns=[['blah', 'b', 'b', 'c'],
  843. ['a', 'mean', 'median', 'mean']])
  844. tm.assert_frame_equal(rs, xp)
  845. df = DataFrame(vals,
  846. MultiIndex.from_arrays([[0, 1, 2], ['x', 'y', 'z']],
  847. names=['d', 'a']),
  848. columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']])
  849. rs = df.reset_index('a', )
  850. xp = DataFrame(full, Index([0, 1, 2], name='d'),
  851. columns=[['a', 'b', 'b', 'c'],
  852. ['', 'mean', 'median', 'mean']])
  853. tm.assert_frame_equal(rs, xp)
  854. rs = df.reset_index('a', col_fill=None)
  855. xp = DataFrame(full, Index(lrange(3), name='d'),
  856. columns=[['a', 'b', 'b', 'c'],
  857. ['a', 'mean', 'median', 'mean']])
  858. tm.assert_frame_equal(rs, xp)
  859. rs = df.reset_index('a', col_fill='blah', col_level=1)
  860. xp = DataFrame(full, Index(lrange(3), name='d'),
  861. columns=[['blah', 'b', 'b', 'c'],
  862. ['a', 'mean', 'median', 'mean']])
  863. tm.assert_frame_equal(rs, xp)
  864. def test_reset_index_multiindex_nan(self):
  865. # GH6322, testing reset_index on MultiIndexes
  866. # when we have a nan or all nan
  867. df = DataFrame({'A': ['a', 'b', 'c'],
  868. 'B': [0, 1, np.nan],
  869. 'C': np.random.rand(3)})
  870. rs = df.set_index(['A', 'B']).reset_index()
  871. tm.assert_frame_equal(rs, df)
  872. df = DataFrame({'A': [np.nan, 'b', 'c'],
  873. 'B': [0, 1, 2],
  874. 'C': np.random.rand(3)})
  875. rs = df.set_index(['A', 'B']).reset_index()
  876. tm.assert_frame_equal(rs, df)
  877. df = DataFrame({'A': ['a', 'b', 'c'],
  878. 'B': [0, 1, 2],
  879. 'C': [np.nan, 1.1, 2.2]})
  880. rs = df.set_index(['A', 'B']).reset_index()
  881. tm.assert_frame_equal(rs, df)
  882. df = DataFrame({'A': ['a', 'b', 'c'],
  883. 'B': [np.nan, np.nan, np.nan],
  884. 'C': np.random.rand(3)})
  885. rs = df.set_index(['A', 'B']).reset_index()
  886. tm.assert_frame_equal(rs, df)
  887. def test_reset_index_with_datetimeindex_cols(self):
  888. # GH5818
  889. #
  890. df = DataFrame([[1, 2], [3, 4]],
  891. columns=date_range('1/1/2013', '1/2/2013'),
  892. index=['A', 'B'])
  893. result = df.reset_index()
  894. expected = DataFrame([['A', 1, 2], ['B', 3, 4]],
  895. columns=['index', datetime(2013, 1, 1),
  896. datetime(2013, 1, 2)])
  897. tm.assert_frame_equal(result, expected)
  898. def test_reset_index_range(self):
  899. # GH 12071
  900. df = DataFrame([[0, 0], [1, 1]], columns=['A', 'B'],
  901. index=RangeIndex(stop=2))
  902. result = df.reset_index()
  903. assert isinstance(result.index, RangeIndex)
  904. expected = DataFrame([[0, 0, 0], [1, 1, 1]],
  905. columns=['index', 'A', 'B'],
  906. index=RangeIndex(stop=2))
  907. tm.assert_frame_equal(result, expected)
  908. def test_set_index_names(self):
  909. df = tm.makeDataFrame()
  910. df.index.name = 'name'
  911. assert df.set_index(df.index).index.names == ['name']
  912. mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B'])
  913. mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values,
  914. names=['A', 'B', 'C', 'D'])
  915. df = df.set_index(['A', 'B'])
  916. assert df.set_index(df.index).index.names == ['A', 'B']
  917. # Check that set_index isn't converting a MultiIndex into an Index
  918. assert isinstance(df.set_index(df.index).index, MultiIndex)
  919. # Check actual equality
  920. tm.assert_index_equal(df.set_index(df.index).index, mi)
  921. idx2 = df.index.rename(['C', 'D'])
  922. # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
  923. # than a pair of tuples
  924. assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex)
  925. # Check equality
  926. tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
  927. def test_rename_objects(self, float_string_frame):
  928. renamed = float_string_frame.rename(columns=str.upper)
  929. assert 'FOO' in renamed
  930. assert 'foo' not in renamed
  931. def test_rename_axis_style(self):
  932. # https://github.com/pandas-dev/pandas/issues/12392
  933. df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y'])
  934. expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y'])
  935. result = df.rename(str.lower, axis=1)
  936. tm.assert_frame_equal(result, expected)
  937. result = df.rename(str.lower, axis='columns')
  938. tm.assert_frame_equal(result, expected)
  939. result = df.rename({"A": 'a', 'B': 'b'}, axis=1)
  940. tm.assert_frame_equal(result, expected)
  941. result = df.rename({"A": 'a', 'B': 'b'}, axis='columns')
  942. tm.assert_frame_equal(result, expected)
  943. # Index
  944. expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y'])
  945. result = df.rename(str.lower, axis=0)
  946. tm.assert_frame_equal(result, expected)
  947. result = df.rename(str.lower, axis='index')
  948. tm.assert_frame_equal(result, expected)
  949. result = df.rename({'X': 'x', 'Y': 'y'}, axis=0)
  950. tm.assert_frame_equal(result, expected)
  951. result = df.rename({'X': 'x', 'Y': 'y'}, axis='index')
  952. tm.assert_frame_equal(result, expected)
  953. result = df.rename(mapper=str.lower, axis='index')
  954. tm.assert_frame_equal(result, expected)
  955. def test_rename_mapper_multi(self):
  956. df = DataFrame({"A": ['a', 'b'], "B": ['c', 'd'],
  957. 'C': [1, 2]}).set_index(["A", "B"])
  958. result = df.rename(str.upper)
  959. expected = df.rename(index=str.upper)
  960. tm.assert_frame_equal(result, expected)
  961. def test_rename_positional_named(self):
  962. # https://github.com/pandas-dev/pandas/issues/12392
  963. df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y'])
  964. result = df.rename(str.lower, columns=str.upper)
  965. expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y'])
  966. tm.assert_frame_equal(result, expected)
  967. def test_rename_axis_style_raises(self):
  968. # see gh-12392
  969. df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"])
  970. # Named target and axis
  971. over_spec_msg = ("Cannot specify both 'axis' and "
  972. "any of 'index' or 'columns'")
  973. with pytest.raises(TypeError, match=over_spec_msg):
  974. df.rename(index=str.lower, axis=1)
  975. with pytest.raises(TypeError, match=over_spec_msg):
  976. df.rename(index=str.lower, axis="columns")
  977. with pytest.raises(TypeError, match=over_spec_msg):
  978. df.rename(columns=str.lower, axis="columns")
  979. with pytest.raises(TypeError, match=over_spec_msg):
  980. df.rename(index=str.lower, axis=0)
  981. # Multiple targets and axis
  982. with pytest.raises(TypeError, match=over_spec_msg):
  983. df.rename(str.lower, str.lower, axis="columns")
  984. # Too many targets
  985. over_spec_msg = "Cannot specify all of 'mapper', 'index', 'columns'."
  986. with pytest.raises(TypeError, match=over_spec_msg):
  987. df.rename(str.lower, str.lower, str.lower)
  988. # Duplicates
  989. with pytest.raises(TypeError, match="multiple values"):
  990. df.rename(id, mapper=id)
  991. def test_reindex_api_equivalence(self):
  992. # equivalence of the labels/axis and index/columns API's
  993. df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
  994. index=['a', 'b', 'c'],
  995. columns=['d', 'e', 'f'])
  996. res1 = df.reindex(['b', 'a'])
  997. res2 = df.reindex(index=['b', 'a'])
  998. res3 = df.reindex(labels=['b', 'a'])
  999. res4 = df.reindex(labels=['b', 'a'], axis=0)
  1000. res5 = df.reindex(['b', 'a'], axis=0)
  1001. for res in [res2, res3, res4, res5]:
  1002. tm.assert_frame_equal(res1, res)
  1003. res1 = df.reindex(columns=['e', 'd'])
  1004. res2 = df.reindex(['e', 'd'], axis=1)
  1005. res3 = df.reindex(labels=['e', 'd'], axis=1)
  1006. for res in [res2, res3]:
  1007. tm.assert_frame_equal(res1, res)
  1008. res1 = df.reindex(index=['b', 'a'], columns=['e', 'd'])
  1009. res2 = df.reindex(columns=['e', 'd'], index=['b', 'a'])
  1010. res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'],
  1011. axis=1)
  1012. for res in [res2, res3]:
  1013. tm.assert_frame_equal(res1, res)
  1014. def test_rename_positional(self):
  1015. df = DataFrame(columns=['A', 'B'])
  1016. with tm.assert_produces_warning(FutureWarning) as rec:
  1017. result = df.rename(None, str.lower)
  1018. expected = DataFrame(columns=['a', 'b'])
  1019. tm.assert_frame_equal(result, expected)
  1020. assert len(rec) == 1
  1021. message = str(rec[0].message)
  1022. assert 'rename' in message
  1023. assert 'Use named arguments' in message
  1024. def test_assign_columns(self, float_frame):
  1025. float_frame['hi'] = 'there'
  1026. df = float_frame.copy()
  1027. df.columns = ['foo', 'bar', 'baz', 'quux', 'foo2']
  1028. tm.assert_series_equal(float_frame['C'], df['baz'], check_names=False)
  1029. tm.assert_series_equal(float_frame['hi'], df['foo2'],
  1030. check_names=False)
  1031. def test_set_index_preserve_categorical_dtype(self):
  1032. # GH13743, GH13854
  1033. df = DataFrame({'A': [1, 2, 1, 1, 2],
  1034. 'B': [10, 16, 22, 28, 34],
  1035. 'C1': Categorical(list("abaab"),
  1036. categories=list("bac"),
  1037. ordered=False),
  1038. 'C2': Categorical(list("abaab"),
  1039. categories=list("bac"),
  1040. ordered=True)})
  1041. for cols in ['C1', 'C2', ['A', 'C1'], ['A', 'C2'], ['C1', 'C2']]:
  1042. result = df.set_index(cols).reset_index()
  1043. result = result.reindex(columns=df.columns)
  1044. tm.assert_frame_equal(result, df)
  1045. def test_ambiguous_warns(self):
  1046. df = DataFrame({"A": [1, 2]})
  1047. with tm.assert_produces_warning(FutureWarning):
  1048. df.rename(id, id)
  1049. with tm.assert_produces_warning(FutureWarning):
  1050. df.rename({0: 10}, {"A": "B"})
  1051. @pytest.mark.skipif(PY2, reason="inspect.signature")
  1052. def test_rename_signature(self):
  1053. sig = inspect.signature(DataFrame.rename)
  1054. parameters = set(sig.parameters)
  1055. assert parameters == {"self", "mapper", "index", "columns", "axis",
  1056. "inplace", "copy", "level"}
  1057. @pytest.mark.skipif(PY2, reason="inspect.signature")
  1058. def test_reindex_signature(self):
  1059. sig = inspect.signature(DataFrame.reindex)
  1060. parameters = set(sig.parameters)
  1061. assert parameters == {"self", "labels", "index", "columns", "axis",
  1062. "limit", "copy", "level", "method",
  1063. "fill_value", "tolerance"}
  1064. def test_droplevel(self):
  1065. # GH20342
  1066. df = DataFrame([
  1067. [1, 2, 3, 4],
  1068. [5, 6, 7, 8],
  1069. [9, 10, 11, 12]
  1070. ])
  1071. df = df.set_index([0, 1]).rename_axis(['a', 'b'])
  1072. df.columns = MultiIndex.from_tuples([('c', 'e'), ('d', 'f')],
  1073. names=['level_1', 'level_2'])
  1074. # test that dropping of a level in index works
  1075. expected = df.reset_index('a', drop=True)
  1076. result = df.droplevel('a', axis='index')
  1077. tm.assert_frame_equal(result, expected)
  1078. # test that dropping of a level in columns works
  1079. expected = df.copy()
  1080. expected.columns = Index(['c', 'd'], name='level_1')
  1081. result = df.droplevel('level_2', axis='columns')
  1082. tm.assert_frame_equal(result, expected)
  1083. class TestIntervalIndex(object):
  1084. def test_setitem(self):
  1085. df = DataFrame({'A': range(10)})
  1086. s = cut(df.A, 5)
  1087. assert isinstance(s.cat.categories, IntervalIndex)
  1088. # B & D end up as Categoricals
  1089. # the remainer are converted to in-line objects
  1090. # contining an IntervalIndex.values
  1091. df['B'] = s
  1092. df['C'] = np.array(s)
  1093. df['D'] = s.values
  1094. df['E'] = np.array(s.values)
  1095. assert is_categorical_dtype(df['B'])
  1096. assert is_interval_dtype(df['B'].cat.categories)
  1097. assert is_categorical_dtype(df['D'])
  1098. assert is_interval_dtype(df['D'].cat.categories)
  1099. assert is_object_dtype(df['C'])
  1100. assert is_object_dtype(df['E'])
  1101. # they compare equal as Index
  1102. # when converted to numpy objects
  1103. c = lambda x: Index(np.array(x))
  1104. tm.assert_index_equal(c(df.B), c(df.B), check_names=False)
  1105. tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
  1106. tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
  1107. tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
  1108. # B & D are the same Series
  1109. tm.assert_series_equal(df['B'], df['B'], check_names=False)
  1110. tm.assert_series_equal(df['B'], df['D'], check_names=False)
  1111. # C & E are the same Series
  1112. tm.assert_series_equal(df['C'], df['C'], check_names=False)
  1113. tm.assert_series_equal(df['C'], df['E'], check_names=False)
  1114. def test_set_reset_index(self):
  1115. df = DataFrame({'A': range(10)})
  1116. s = cut(df.A, 5)
  1117. df['B'] = s
  1118. df = df.set_index('B')
  1119. df = df.reset_index()
  1120. def test_set_axis_inplace(self):
  1121. # GH14636
  1122. df = DataFrame({'A': [1.1, 2.2, 3.3],
  1123. 'B': [5.0, 6.1, 7.2],
  1124. 'C': [4.4, 5.5, 6.6]},
  1125. index=[2010, 2011, 2012])
  1126. expected = {0: df.copy(),
  1127. 1: df.copy()}
  1128. expected[0].index = list('abc')
  1129. expected[1].columns = list('abc')
  1130. expected['index'] = expected[0]
  1131. expected['columns'] = expected[1]
  1132. for axis in expected:
  1133. # inplace=True
  1134. # The FutureWarning comes from the fact that we would like to have
  1135. # inplace default to False some day
  1136. for inplace, warn in (None, FutureWarning), (True, None):
  1137. kwargs = {'inplace': inplace}
  1138. result = df.copy()
  1139. with tm.assert_produces_warning(warn):
  1140. result.set_axis(list('abc'), axis=axis, **kwargs)
  1141. tm.assert_frame_equal(result, expected[axis])
  1142. # inplace=False
  1143. result = df.set_axis(list('abc'), axis=axis, inplace=False)
  1144. tm.assert_frame_equal(expected[axis], result)
  1145. # omitting the "axis" parameter
  1146. with tm.assert_produces_warning(None):
  1147. result = df.set_axis(list('abc'), inplace=False)
  1148. tm.assert_frame_equal(result, expected[0])
  1149. # wrong values for the "axis" parameter
  1150. for axis in 3, 'foo':
  1151. with pytest.raises(ValueError, match='No axis named'):
  1152. df.set_axis(list('abc'), axis=axis, inplace=False)
  1153. def test_set_axis_prior_to_deprecation_signature(self):
  1154. df = DataFrame({'A': [1.1, 2.2, 3.3],
  1155. 'B': [5.0, 6.1, 7.2],
  1156. 'C': [4.4, 5.5, 6.6]},
  1157. index=[2010, 2011, 2012])
  1158. expected = {0: df.copy(),
  1159. 1: df.copy()}
  1160. expected[0].index = list('abc')
  1161. expected[1].columns = list('abc')
  1162. expected['index'] = expected[0]
  1163. expected['columns'] = expected[1]
  1164. # old signature
  1165. for axis in expected:
  1166. with tm.assert_produces_warning(FutureWarning):
  1167. result = df.set_axis(axis, list('abc'), inplace=False)
  1168. tm.assert_frame_equal(result, expected[axis])