test_apply.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from collections import OrderedDict
  4. from datetime import datetime
  5. from itertools import chain
  6. import operator
  7. import warnings
  8. import numpy as np
  9. import pytest
  10. from pandas.core.dtypes.dtypes import CategoricalDtype
  11. import pandas as pd
  12. from pandas import (
  13. DataFrame, MultiIndex, Series, Timestamp, compat, date_range, notna)
  14. from pandas.conftest import _get_cython_table_params
  15. from pandas.core.apply import frame_apply
  16. import pandas.util.testing as tm
  17. from pandas.util.testing import assert_frame_equal, assert_series_equal
  18. @pytest.fixture
  19. def int_frame_const_col():
  20. """
  21. Fixture for DataFrame of ints which are constant per column
  22. Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3]
  23. """
  24. df = DataFrame(np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
  25. columns=['A', 'B', 'C'])
  26. return df
  27. class TestDataFrameApply():
  28. def test_apply(self, float_frame):
  29. with np.errstate(all='ignore'):
  30. # ufunc
  31. applied = float_frame.apply(np.sqrt)
  32. tm.assert_series_equal(np.sqrt(float_frame['A']), applied['A'])
  33. # aggregator
  34. applied = float_frame.apply(np.mean)
  35. assert applied['A'] == np.mean(float_frame['A'])
  36. d = float_frame.index[0]
  37. applied = float_frame.apply(np.mean, axis=1)
  38. assert applied[d] == np.mean(float_frame.xs(d))
  39. assert applied.index is float_frame.index # want this
  40. # invalid axis
  41. df = DataFrame(
  42. [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
  43. with pytest.raises(ValueError):
  44. df.apply(lambda x: x, 2)
  45. # GH 9573
  46. df = DataFrame({'c0': ['A', 'A', 'B', 'B'],
  47. 'c1': ['C', 'C', 'D', 'D']})
  48. df = df.apply(lambda ts: ts.astype('category'))
  49. assert df.shape == (4, 2)
  50. assert isinstance(df['c0'].dtype, CategoricalDtype)
  51. assert isinstance(df['c1'].dtype, CategoricalDtype)
  52. def test_apply_mixed_datetimelike(self):
  53. # mixed datetimelike
  54. # GH 7778
  55. df = DataFrame({'A': date_range('20130101', periods=3),
  56. 'B': pd.to_timedelta(np.arange(3), unit='s')})
  57. result = df.apply(lambda x: x, axis=1)
  58. assert_frame_equal(result, df)
  59. def test_apply_empty(self, float_frame, empty_frame):
  60. # empty
  61. applied = empty_frame.apply(np.sqrt)
  62. assert applied.empty
  63. applied = empty_frame.apply(np.mean)
  64. assert applied.empty
  65. no_rows = float_frame[:0]
  66. result = no_rows.apply(lambda x: x.mean())
  67. expected = Series(np.nan, index=float_frame.columns)
  68. assert_series_equal(result, expected)
  69. no_cols = float_frame.loc[:, []]
  70. result = no_cols.apply(lambda x: x.mean(), axis=1)
  71. expected = Series(np.nan, index=float_frame.index)
  72. assert_series_equal(result, expected)
  73. # GH 2476
  74. expected = DataFrame(index=['a'])
  75. result = expected.apply(lambda x: x['a'], axis=1)
  76. assert_frame_equal(expected, result)
  77. def test_apply_with_reduce_empty(self, empty_frame):
  78. # reduce with an empty DataFrame
  79. x = []
  80. result = empty_frame.apply(x.append, axis=1, result_type='expand')
  81. assert_frame_equal(result, empty_frame)
  82. result = empty_frame.apply(x.append, axis=1, result_type='reduce')
  83. assert_series_equal(result, Series(
  84. [], index=pd.Index([], dtype=object)))
  85. empty_with_cols = DataFrame(columns=['a', 'b', 'c'])
  86. result = empty_with_cols.apply(x.append, axis=1, result_type='expand')
  87. assert_frame_equal(result, empty_with_cols)
  88. result = empty_with_cols.apply(x.append, axis=1, result_type='reduce')
  89. assert_series_equal(result, Series(
  90. [], index=pd.Index([], dtype=object)))
  91. # Ensure that x.append hasn't been called
  92. assert x == []
  93. def test_apply_deprecate_reduce(self, empty_frame):
  94. x = []
  95. with tm.assert_produces_warning(FutureWarning):
  96. empty_frame.apply(x.append, axis=1, reduce=True)
  97. def test_apply_standard_nonunique(self):
  98. df = DataFrame(
  99. [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
  100. result = df.apply(lambda s: s[0], axis=1)
  101. expected = Series([1, 4, 7], ['a', 'a', 'c'])
  102. assert_series_equal(result, expected)
  103. result = df.T.apply(lambda s: s[0], axis=0)
  104. assert_series_equal(result, expected)
  105. @pytest.mark.parametrize('func', ['sum', 'mean', 'min', 'max', 'std'])
  106. @pytest.mark.parametrize('args,kwds', [
  107. pytest.param([], {}, id='no_args_or_kwds'),
  108. pytest.param([1], {}, id='axis_from_args'),
  109. pytest.param([], {'axis': 1}, id='axis_from_kwds'),
  110. pytest.param([], {'numeric_only': True}, id='optional_kwds'),
  111. pytest.param([1, None], {'numeric_only': True}, id='args_and_kwds')
  112. ])
  113. def test_apply_with_string_funcs(self, float_frame, func, args, kwds):
  114. result = float_frame.apply(func, *args, **kwds)
  115. expected = getattr(float_frame, func)(*args, **kwds)
  116. tm.assert_series_equal(result, expected)
  117. def test_apply_broadcast_deprecated(self, float_frame):
  118. with tm.assert_produces_warning(FutureWarning):
  119. float_frame.apply(np.mean, broadcast=True)
  120. def test_apply_broadcast(self, float_frame, int_frame_const_col):
  121. # scalars
  122. result = float_frame.apply(np.mean, result_type='broadcast')
  123. expected = DataFrame([float_frame.mean()], index=float_frame.index)
  124. tm.assert_frame_equal(result, expected)
  125. result = float_frame.apply(np.mean, axis=1, result_type='broadcast')
  126. m = float_frame.mean(axis=1)
  127. expected = DataFrame({c: m for c in float_frame.columns})
  128. tm.assert_frame_equal(result, expected)
  129. # lists
  130. result = float_frame.apply(
  131. lambda x: list(range(len(float_frame.columns))),
  132. axis=1,
  133. result_type='broadcast')
  134. m = list(range(len(float_frame.columns)))
  135. expected = DataFrame([m] * len(float_frame.index),
  136. dtype='float64',
  137. index=float_frame.index,
  138. columns=float_frame.columns)
  139. tm.assert_frame_equal(result, expected)
  140. result = float_frame.apply(lambda x:
  141. list(range(len(float_frame.index))),
  142. result_type='broadcast')
  143. m = list(range(len(float_frame.index)))
  144. expected = DataFrame({c: m for c in float_frame.columns},
  145. dtype='float64',
  146. index=float_frame.index)
  147. tm.assert_frame_equal(result, expected)
  148. # preserve columns
  149. df = int_frame_const_col
  150. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast')
  151. tm.assert_frame_equal(result, df)
  152. df = int_frame_const_col
  153. result = df.apply(lambda x: Series([1, 2, 3], index=list('abc')),
  154. axis=1, result_type='broadcast')
  155. expected = df.copy()
  156. tm.assert_frame_equal(result, expected)
  157. def test_apply_broadcast_error(self, int_frame_const_col):
  158. df = int_frame_const_col
  159. # > 1 ndim
  160. with pytest.raises(ValueError):
  161. df.apply(lambda x: np.array([1, 2]).reshape(-1, 2),
  162. axis=1, result_type='broadcast')
  163. # cannot broadcast
  164. with pytest.raises(ValueError):
  165. df.apply(lambda x: [1, 2], axis=1, result_type='broadcast')
  166. with pytest.raises(ValueError):
  167. df.apply(lambda x: Series([1, 2]), axis=1, result_type='broadcast')
  168. def test_apply_raw(self, float_frame):
  169. result0 = float_frame.apply(np.mean, raw=True)
  170. result1 = float_frame.apply(np.mean, axis=1, raw=True)
  171. expected0 = float_frame.apply(lambda x: x.values.mean())
  172. expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1)
  173. assert_series_equal(result0, expected0)
  174. assert_series_equal(result1, expected1)
  175. # no reduction
  176. result = float_frame.apply(lambda x: x * 2, raw=True)
  177. expected = float_frame * 2
  178. assert_frame_equal(result, expected)
  179. def test_apply_axis1(self, float_frame):
  180. d = float_frame.index[0]
  181. tapplied = float_frame.apply(np.mean, axis=1)
  182. assert tapplied[d] == np.mean(float_frame.xs(d))
  183. def test_apply_ignore_failures(self, float_string_frame):
  184. result = frame_apply(float_string_frame, np.mean, 0,
  185. ignore_failures=True).apply_standard()
  186. expected = float_string_frame._get_numeric_data().apply(np.mean)
  187. assert_series_equal(result, expected)
  188. def test_apply_mixed_dtype_corner(self):
  189. df = DataFrame({'A': ['foo'],
  190. 'B': [1.]})
  191. result = df[:0].apply(np.mean, axis=1)
  192. # the result here is actually kind of ambiguous, should it be a Series
  193. # or a DataFrame?
  194. expected = Series(np.nan, index=pd.Index([], dtype='int64'))
  195. assert_series_equal(result, expected)
  196. df = DataFrame({'A': ['foo'],
  197. 'B': [1.]})
  198. result = df.apply(lambda x: x['A'], axis=1)
  199. expected = Series(['foo'], index=[0])
  200. assert_series_equal(result, expected)
  201. result = df.apply(lambda x: x['B'], axis=1)
  202. expected = Series([1.], index=[0])
  203. assert_series_equal(result, expected)
  204. def test_apply_empty_infer_type(self):
  205. no_cols = DataFrame(index=['a', 'b', 'c'])
  206. no_index = DataFrame(columns=['a', 'b', 'c'])
  207. def _check(df, f):
  208. with warnings.catch_warnings(record=True):
  209. warnings.simplefilter("ignore", RuntimeWarning)
  210. test_res = f(np.array([], dtype='f8'))
  211. is_reduction = not isinstance(test_res, np.ndarray)
  212. def _checkit(axis=0, raw=False):
  213. result = df.apply(f, axis=axis, raw=raw)
  214. if is_reduction:
  215. agg_axis = df._get_agg_axis(axis)
  216. assert isinstance(result, Series)
  217. assert result.index is agg_axis
  218. else:
  219. assert isinstance(result, DataFrame)
  220. _checkit()
  221. _checkit(axis=1)
  222. _checkit(raw=True)
  223. _checkit(axis=0, raw=True)
  224. with np.errstate(all='ignore'):
  225. _check(no_cols, lambda x: x)
  226. _check(no_cols, lambda x: x.mean())
  227. _check(no_index, lambda x: x)
  228. _check(no_index, lambda x: x.mean())
  229. result = no_cols.apply(lambda x: x.mean(), result_type='broadcast')
  230. assert isinstance(result, DataFrame)
  231. def test_apply_with_args_kwds(self, float_frame):
  232. def add_some(x, howmuch=0):
  233. return x + howmuch
  234. def agg_and_add(x, howmuch=0):
  235. return x.mean() + howmuch
  236. def subtract_and_divide(x, sub, divide=1):
  237. return (x - sub) / divide
  238. result = float_frame.apply(add_some, howmuch=2)
  239. expected = float_frame.apply(lambda x: x + 2)
  240. assert_frame_equal(result, expected)
  241. result = float_frame.apply(agg_and_add, howmuch=2)
  242. expected = float_frame.apply(lambda x: x.mean() + 2)
  243. assert_series_equal(result, expected)
  244. result = float_frame.apply(subtract_and_divide, args=(2,), divide=2)
  245. expected = float_frame.apply(lambda x: (x - 2.) / 2.)
  246. assert_frame_equal(result, expected)
  247. def test_apply_yield_list(self, float_frame):
  248. result = float_frame.apply(list)
  249. assert_frame_equal(result, float_frame)
  250. def test_apply_reduce_Series(self, float_frame):
  251. float_frame.loc[::2, 'A'] = np.nan
  252. expected = float_frame.mean(1)
  253. result = float_frame.apply(np.mean, axis=1)
  254. assert_series_equal(result, expected)
  255. def test_apply_reduce_rows_to_dict(self):
  256. # GH 25196
  257. data = pd.DataFrame([[1, 2], [3, 4]])
  258. expected = pd.Series([{0: 1, 1: 3}, {0: 2, 1: 4}])
  259. result = data.apply(dict)
  260. assert_series_equal(result, expected)
  261. def test_apply_differently_indexed(self):
  262. df = DataFrame(np.random.randn(20, 10))
  263. result0 = df.apply(Series.describe, axis=0)
  264. expected0 = DataFrame({i: v.describe()
  265. for i, v in compat.iteritems(df)},
  266. columns=df.columns)
  267. assert_frame_equal(result0, expected0)
  268. result1 = df.apply(Series.describe, axis=1)
  269. expected1 = DataFrame({i: v.describe()
  270. for i, v in compat.iteritems(df.T)},
  271. columns=df.index).T
  272. assert_frame_equal(result1, expected1)
  273. def test_apply_modify_traceback(self):
  274. data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo',
  275. 'bar', 'bar', 'bar', 'bar',
  276. 'foo', 'foo', 'foo'],
  277. 'B': ['one', 'one', 'one', 'two',
  278. 'one', 'one', 'one', 'two',
  279. 'two', 'two', 'one'],
  280. 'C': ['dull', 'dull', 'shiny', 'dull',
  281. 'dull', 'shiny', 'shiny', 'dull',
  282. 'shiny', 'shiny', 'shiny'],
  283. 'D': np.random.randn(11),
  284. 'E': np.random.randn(11),
  285. 'F': np.random.randn(11)})
  286. data.loc[4, 'C'] = np.nan
  287. def transform(row):
  288. if row['C'].startswith('shin') and row['A'] == 'foo':
  289. row['D'] = 7
  290. return row
  291. def transform2(row):
  292. if (notna(row['C']) and row['C'].startswith('shin') and
  293. row['A'] == 'foo'):
  294. row['D'] = 7
  295. return row
  296. try:
  297. data.apply(transform, axis=1)
  298. except AttributeError as e:
  299. assert len(e.args) == 2
  300. assert e.args[1] == 'occurred at index 4'
  301. assert e.args[0] == "'float' object has no attribute 'startswith'"
  302. def test_apply_bug(self):
  303. # GH 6125
  304. positions = pd.DataFrame([[1, 'ABC0', 50], [1, 'YUM0', 20],
  305. [1, 'DEF0', 20], [2, 'ABC1', 50],
  306. [2, 'YUM1', 20], [2, 'DEF1', 20]],
  307. columns=['a', 'market', 'position'])
  308. def f(r):
  309. return r['market']
  310. expected = positions.apply(f, axis=1)
  311. positions = DataFrame([[datetime(2013, 1, 1), 'ABC0', 50],
  312. [datetime(2013, 1, 2), 'YUM0', 20],
  313. [datetime(2013, 1, 3), 'DEF0', 20],
  314. [datetime(2013, 1, 4), 'ABC1', 50],
  315. [datetime(2013, 1, 5), 'YUM1', 20],
  316. [datetime(2013, 1, 6), 'DEF1', 20]],
  317. columns=['a', 'market', 'position'])
  318. result = positions.apply(f, axis=1)
  319. assert_series_equal(result, expected)
  320. def test_apply_convert_objects(self):
  321. data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo',
  322. 'bar', 'bar', 'bar', 'bar',
  323. 'foo', 'foo', 'foo'],
  324. 'B': ['one', 'one', 'one', 'two',
  325. 'one', 'one', 'one', 'two',
  326. 'two', 'two', 'one'],
  327. 'C': ['dull', 'dull', 'shiny', 'dull',
  328. 'dull', 'shiny', 'shiny', 'dull',
  329. 'shiny', 'shiny', 'shiny'],
  330. 'D': np.random.randn(11),
  331. 'E': np.random.randn(11),
  332. 'F': np.random.randn(11)})
  333. result = data.apply(lambda x: x, axis=1)
  334. assert_frame_equal(result._convert(datetime=True), data)
  335. def test_apply_attach_name(self, float_frame):
  336. result = float_frame.apply(lambda x: x.name)
  337. expected = Series(float_frame.columns, index=float_frame.columns)
  338. assert_series_equal(result, expected)
  339. result = float_frame.apply(lambda x: x.name, axis=1)
  340. expected = Series(float_frame.index, index=float_frame.index)
  341. assert_series_equal(result, expected)
  342. # non-reductions
  343. result = float_frame.apply(lambda x: np.repeat(x.name, len(x)))
  344. expected = DataFrame(np.tile(float_frame.columns,
  345. (len(float_frame.index), 1)),
  346. index=float_frame.index,
  347. columns=float_frame.columns)
  348. assert_frame_equal(result, expected)
  349. result = float_frame.apply(lambda x: np.repeat(x.name, len(x)),
  350. axis=1)
  351. expected = Series(np.repeat(t[0], len(float_frame.columns))
  352. for t in float_frame.itertuples())
  353. expected.index = float_frame.index
  354. assert_series_equal(result, expected)
  355. def test_apply_multi_index(self, float_frame):
  356. index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']])
  357. s = DataFrame([[1, 2], [3, 4], [5, 6]],
  358. index=index,
  359. columns=['col1', 'col2'])
  360. result = s.apply(
  361. lambda x: Series({'min': min(x), 'max': max(x)}), 1)
  362. expected = DataFrame([[1, 2], [3, 4], [5, 6]],
  363. index=index,
  364. columns=['min', 'max'])
  365. assert_frame_equal(result, expected, check_like=True)
  366. def test_apply_dict(self):
  367. # GH 8735
  368. A = DataFrame([['foo', 'bar'], ['spam', 'eggs']])
  369. A_dicts = Series([dict([(0, 'foo'), (1, 'spam')]),
  370. dict([(0, 'bar'), (1, 'eggs')])])
  371. B = DataFrame([[0, 1], [2, 3]])
  372. B_dicts = Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])])
  373. fn = lambda x: x.to_dict()
  374. for df, dicts in [(A, A_dicts), (B, B_dicts)]:
  375. reduce_true = df.apply(fn, result_type='reduce')
  376. reduce_false = df.apply(fn, result_type='expand')
  377. reduce_none = df.apply(fn)
  378. assert_series_equal(reduce_true, dicts)
  379. assert_frame_equal(reduce_false, df)
  380. assert_series_equal(reduce_none, dicts)
  381. def test_applymap(self, float_frame):
  382. applied = float_frame.applymap(lambda x: x * 2)
  383. tm.assert_frame_equal(applied, float_frame * 2)
  384. float_frame.applymap(type)
  385. # GH 465: function returning tuples
  386. result = float_frame.applymap(lambda x: (x, x))
  387. assert isinstance(result['A'][0], tuple)
  388. # GH 2909: object conversion to float in constructor?
  389. df = DataFrame(data=[1, 'a'])
  390. result = df.applymap(lambda x: x)
  391. assert result.dtypes[0] == object
  392. df = DataFrame(data=[1., 'a'])
  393. result = df.applymap(lambda x: x)
  394. assert result.dtypes[0] == object
  395. # GH 2786
  396. df = DataFrame(np.random.random((3, 4)))
  397. df2 = df.copy()
  398. cols = ['a', 'a', 'a', 'a']
  399. df.columns = cols
  400. expected = df2.applymap(str)
  401. expected.columns = cols
  402. result = df.applymap(str)
  403. tm.assert_frame_equal(result, expected)
  404. # datetime/timedelta
  405. df['datetime'] = Timestamp('20130101')
  406. df['timedelta'] = pd.Timedelta('1 min')
  407. result = df.applymap(str)
  408. for f in ['datetime', 'timedelta']:
  409. assert result.loc[0, f] == str(df.loc[0, f])
  410. # GH 8222
  411. empty_frames = [pd.DataFrame(),
  412. pd.DataFrame(columns=list('ABC')),
  413. pd.DataFrame(index=list('ABC')),
  414. pd.DataFrame({'A': [], 'B': [], 'C': []})]
  415. for frame in empty_frames:
  416. for func in [round, lambda x: x]:
  417. result = frame.applymap(func)
  418. tm.assert_frame_equal(result, frame)
  419. def test_applymap_box_timestamps(self):
  420. # GH 2689, GH 2627
  421. ser = pd.Series(date_range('1/1/2000', periods=10))
  422. def func(x):
  423. return (x.hour, x.day, x.month)
  424. # it works!
  425. pd.DataFrame(ser).applymap(func)
  426. def test_applymap_box(self):
  427. # ufunc will not be boxed. Same test cases as the test_map_box
  428. df = pd.DataFrame({'a': [pd.Timestamp('2011-01-01'),
  429. pd.Timestamp('2011-01-02')],
  430. 'b': [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  431. pd.Timestamp('2011-01-02', tz='US/Eastern')],
  432. 'c': [pd.Timedelta('1 days'),
  433. pd.Timedelta('2 days')],
  434. 'd': [pd.Period('2011-01-01', freq='M'),
  435. pd.Period('2011-01-02', freq='M')]})
  436. result = df.applymap(lambda x: '{0}'.format(x.__class__.__name__))
  437. expected = pd.DataFrame({'a': ['Timestamp', 'Timestamp'],
  438. 'b': ['Timestamp', 'Timestamp'],
  439. 'c': ['Timedelta', 'Timedelta'],
  440. 'd': ['Period', 'Period']})
  441. tm.assert_frame_equal(result, expected)
  442. def test_frame_apply_dont_convert_datetime64(self):
  443. from pandas.tseries.offsets import BDay
  444. df = DataFrame({'x1': [datetime(1996, 1, 1)]})
  445. df = df.applymap(lambda x: x + BDay())
  446. df = df.applymap(lambda x: x + BDay())
  447. assert df.x1.dtype == 'M8[ns]'
  448. def test_apply_non_numpy_dtype(self):
  449. # GH 12244
  450. df = DataFrame({'dt': pd.date_range(
  451. "2015-01-01", periods=3, tz='Europe/Brussels')})
  452. result = df.apply(lambda x: x)
  453. assert_frame_equal(result, df)
  454. result = df.apply(lambda x: x + pd.Timedelta('1day'))
  455. expected = DataFrame({'dt': pd.date_range(
  456. "2015-01-02", periods=3, tz='Europe/Brussels')})
  457. assert_frame_equal(result, expected)
  458. df = DataFrame({'dt': ['a', 'b', 'c', 'a']}, dtype='category')
  459. result = df.apply(lambda x: x)
  460. assert_frame_equal(result, df)
  461. def test_apply_dup_names_multi_agg(self):
  462. # GH 21063
  463. df = pd.DataFrame([[0, 1], [2, 3]], columns=['a', 'a'])
  464. expected = pd.DataFrame([[0, 1]], columns=['a', 'a'], index=['min'])
  465. result = df.agg(['min'])
  466. tm.assert_frame_equal(result, expected)
  467. class TestInferOutputShape(object):
  468. # the user has supplied an opaque UDF where
  469. # they are transforming the input that requires
  470. # us to infer the output
  471. def test_infer_row_shape(self):
  472. # GH 17437
  473. # if row shape is changing, infer it
  474. df = pd.DataFrame(np.random.rand(10, 2))
  475. result = df.apply(np.fft.fft, axis=0)
  476. assert result.shape == (10, 2)
  477. result = df.apply(np.fft.rfft, axis=0)
  478. assert result.shape == (6, 2)
  479. def test_with_dictlike_columns(self):
  480. # GH 17602
  481. df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
  482. result = df.apply(lambda x: {'s': x['a'] + x['b']},
  483. axis=1)
  484. expected = Series([{'s': 3} for t in df.itertuples()])
  485. assert_series_equal(result, expected)
  486. df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'),
  487. pd.Timestamp('2017-05-02 00:00:00')]
  488. result = df.apply(lambda x: {'s': x['a'] + x['b']},
  489. axis=1)
  490. assert_series_equal(result, expected)
  491. # compose a series
  492. result = (df['a'] + df['b']).apply(lambda x: {'s': x})
  493. expected = Series([{'s': 3}, {'s': 3}])
  494. assert_series_equal(result, expected)
  495. # GH 18775
  496. df = DataFrame()
  497. df["author"] = ["X", "Y", "Z"]
  498. df["publisher"] = ["BBC", "NBC", "N24"]
  499. df["date"] = pd.to_datetime(['17-10-2010 07:15:30',
  500. '13-05-2011 08:20:35',
  501. '15-01-2013 09:09:09'])
  502. result = df.apply(lambda x: {}, axis=1)
  503. expected = Series([{}, {}, {}])
  504. assert_series_equal(result, expected)
  505. def test_with_dictlike_columns_with_infer(self):
  506. # GH 17602
  507. df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
  508. result = df.apply(lambda x: {'s': x['a'] + x['b']},
  509. axis=1, result_type='expand')
  510. expected = DataFrame({'s': [3, 3]})
  511. assert_frame_equal(result, expected)
  512. df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'),
  513. pd.Timestamp('2017-05-02 00:00:00')]
  514. result = df.apply(lambda x: {'s': x['a'] + x['b']},
  515. axis=1, result_type='expand')
  516. assert_frame_equal(result, expected)
  517. def test_with_listlike_columns(self):
  518. # GH 17348
  519. df = DataFrame({'a': Series(np.random.randn(4)),
  520. 'b': ['a', 'list', 'of', 'words'],
  521. 'ts': date_range('2016-10-01', periods=4, freq='H')})
  522. result = df[['a', 'b']].apply(tuple, axis=1)
  523. expected = Series([t[1:] for t in df[['a', 'b']].itertuples()])
  524. assert_series_equal(result, expected)
  525. result = df[['a', 'ts']].apply(tuple, axis=1)
  526. expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()])
  527. assert_series_equal(result, expected)
  528. # GH 18919
  529. df = DataFrame({'x': Series([['a', 'b'], ['q']]),
  530. 'y': Series([['z'], ['q', 't']])})
  531. df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')])
  532. result = df.apply(
  533. lambda row: [el for el in row['x'] if el in row['y']],
  534. axis=1)
  535. expected = Series([[], ['q']], index=df.index)
  536. assert_series_equal(result, expected)
  537. def test_infer_output_shape_columns(self):
  538. # GH 18573
  539. df = DataFrame({'number': [1., 2.],
  540. 'string': ['foo', 'bar'],
  541. 'datetime': [pd.Timestamp('2017-11-29 03:30:00'),
  542. pd.Timestamp('2017-11-29 03:45:00')]})
  543. result = df.apply(lambda row: (row.number, row.string), axis=1)
  544. expected = Series([(t.number, t.string) for t in df.itertuples()])
  545. assert_series_equal(result, expected)
  546. def test_infer_output_shape_listlike_columns(self):
  547. # GH 16353
  548. df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C'])
  549. result = df.apply(lambda x: [1, 2, 3], axis=1)
  550. expected = Series([[1, 2, 3] for t in df.itertuples()])
  551. assert_series_equal(result, expected)
  552. result = df.apply(lambda x: [1, 2], axis=1)
  553. expected = Series([[1, 2] for t in df.itertuples()])
  554. assert_series_equal(result, expected)
  555. # GH 17970
  556. df = DataFrame({"a": [1, 2, 3]}, index=list('abc'))
  557. result = df.apply(lambda row: np.ones(1), axis=1)
  558. expected = Series([np.ones(1) for t in df.itertuples()],
  559. index=df.index)
  560. assert_series_equal(result, expected)
  561. result = df.apply(lambda row: np.ones(2), axis=1)
  562. expected = Series([np.ones(2) for t in df.itertuples()],
  563. index=df.index)
  564. assert_series_equal(result, expected)
  565. # GH 17892
  566. df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'),
  567. pd.Timestamp('2010-02-04'),
  568. pd.Timestamp('2010-02-05'),
  569. pd.Timestamp('2010-02-06')],
  570. 'b': [9, 5, 4, 3],
  571. 'c': [5, 3, 4, 2],
  572. 'd': [1, 2, 3, 4]})
  573. def fun(x):
  574. return (1, 2)
  575. result = df.apply(fun, axis=1)
  576. expected = Series([(1, 2) for t in df.itertuples()])
  577. assert_series_equal(result, expected)
  578. def test_consistent_coerce_for_shapes(self):
  579. # we want column names to NOT be propagated
  580. # just because the shape matches the input shape
  581. df = DataFrame(np.random.randn(4, 3), columns=['A', 'B', 'C'])
  582. result = df.apply(lambda x: [1, 2, 3], axis=1)
  583. expected = Series([[1, 2, 3] for t in df.itertuples()])
  584. assert_series_equal(result, expected)
  585. result = df.apply(lambda x: [1, 2], axis=1)
  586. expected = Series([[1, 2] for t in df.itertuples()])
  587. assert_series_equal(result, expected)
  588. def test_consistent_names(self, int_frame_const_col):
  589. # if a Series is returned, we should use the resulting index names
  590. df = int_frame_const_col
  591. result = df.apply(lambda x: Series([1, 2, 3],
  592. index=['test', 'other', 'cols']),
  593. axis=1)
  594. expected = int_frame_const_col.rename(columns={'A': 'test',
  595. 'B': 'other',
  596. 'C': 'cols'})
  597. assert_frame_equal(result, expected)
  598. result = df.apply(lambda x: Series([1, 2], index=['test', 'other']),
  599. axis=1)
  600. expected = expected[['test', 'other']]
  601. assert_frame_equal(result, expected)
  602. def test_result_type(self, int_frame_const_col):
  603. # result_type should be consistent no matter which
  604. # path we take in the code
  605. df = int_frame_const_col
  606. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand')
  607. expected = df.copy()
  608. expected.columns = [0, 1, 2]
  609. assert_frame_equal(result, expected)
  610. result = df.apply(lambda x: [1, 2], axis=1, result_type='expand')
  611. expected = df[['A', 'B']].copy()
  612. expected.columns = [0, 1]
  613. assert_frame_equal(result, expected)
  614. # broadcast result
  615. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast')
  616. expected = df.copy()
  617. assert_frame_equal(result, expected)
  618. columns = ['other', 'col', 'names']
  619. result = df.apply(lambda x: Series([1, 2, 3], index=columns),
  620. axis=1, result_type='broadcast')
  621. expected = df.copy()
  622. assert_frame_equal(result, expected)
  623. # series result
  624. result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1)
  625. expected = df.copy()
  626. assert_frame_equal(result, expected)
  627. # series result with other index
  628. columns = ['other', 'col', 'names']
  629. result = df.apply(lambda x: Series([1, 2, 3], index=columns), axis=1)
  630. expected = df.copy()
  631. expected.columns = columns
  632. assert_frame_equal(result, expected)
  633. @pytest.mark.parametrize("result_type", ['foo', 1])
  634. def test_result_type_error(self, result_type, int_frame_const_col):
  635. # allowed result_type
  636. df = int_frame_const_col
  637. with pytest.raises(ValueError):
  638. df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
  639. @pytest.mark.parametrize(
  640. "box",
  641. [lambda x: list(x),
  642. lambda x: tuple(x),
  643. lambda x: np.array(x, dtype='int64')],
  644. ids=['list', 'tuple', 'array'])
  645. def test_consistency_for_boxed(self, box, int_frame_const_col):
  646. # passing an array or list should not affect the output shape
  647. df = int_frame_const_col
  648. result = df.apply(lambda x: box([1, 2]), axis=1)
  649. expected = Series([box([1, 2]) for t in df.itertuples()])
  650. assert_series_equal(result, expected)
  651. result = df.apply(lambda x: box([1, 2]), axis=1, result_type='expand')
  652. expected = int_frame_const_col[['A', 'B']].rename(columns={'A': 0,
  653. 'B': 1})
  654. assert_frame_equal(result, expected)
  655. def zip_frames(frames, axis=1):
  656. """
  657. take a list of frames, zip them together under the
  658. assumption that these all have the first frames' index/columns.
  659. Returns
  660. -------
  661. new_frame : DataFrame
  662. """
  663. if axis == 1:
  664. columns = frames[0].columns
  665. zipped = [f.loc[:, c] for c in columns for f in frames]
  666. return pd.concat(zipped, axis=1)
  667. else:
  668. index = frames[0].index
  669. zipped = [f.loc[i, :] for i in index for f in frames]
  670. return pd.DataFrame(zipped)
  671. class TestDataFrameAggregate():
  672. def test_agg_transform(self, axis, float_frame):
  673. other_axis = 1 if axis in {0, 'index'} else 0
  674. with np.errstate(all='ignore'):
  675. f_abs = np.abs(float_frame)
  676. f_sqrt = np.sqrt(float_frame)
  677. # ufunc
  678. result = float_frame.transform(np.sqrt, axis=axis)
  679. expected = f_sqrt.copy()
  680. assert_frame_equal(result, expected)
  681. result = float_frame.apply(np.sqrt, axis=axis)
  682. assert_frame_equal(result, expected)
  683. result = float_frame.transform(np.sqrt, axis=axis)
  684. assert_frame_equal(result, expected)
  685. # list-like
  686. result = float_frame.apply([np.sqrt], axis=axis)
  687. expected = f_sqrt.copy()
  688. if axis in {0, 'index'}:
  689. expected.columns = pd.MultiIndex.from_product(
  690. [float_frame.columns, ['sqrt']])
  691. else:
  692. expected.index = pd.MultiIndex.from_product(
  693. [float_frame.index, ['sqrt']])
  694. assert_frame_equal(result, expected)
  695. result = float_frame.transform([np.sqrt], axis=axis)
  696. assert_frame_equal(result, expected)
  697. # multiple items in list
  698. # these are in the order as if we are applying both
  699. # functions per series and then concatting
  700. result = float_frame.apply([np.abs, np.sqrt], axis=axis)
  701. expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
  702. if axis in {0, 'index'}:
  703. expected.columns = pd.MultiIndex.from_product(
  704. [float_frame.columns, ['absolute', 'sqrt']])
  705. else:
  706. expected.index = pd.MultiIndex.from_product(
  707. [float_frame.index, ['absolute', 'sqrt']])
  708. assert_frame_equal(result, expected)
  709. result = float_frame.transform([np.abs, 'sqrt'], axis=axis)
  710. assert_frame_equal(result, expected)
  711. def test_transform_and_agg_err(self, axis, float_frame):
  712. # cannot both transform and agg
  713. with pytest.raises(ValueError):
  714. float_frame.transform(['max', 'min'], axis=axis)
  715. with pytest.raises(ValueError):
  716. with np.errstate(all='ignore'):
  717. float_frame.agg(['max', 'sqrt'], axis=axis)
  718. with pytest.raises(ValueError):
  719. with np.errstate(all='ignore'):
  720. float_frame.transform(['max', 'sqrt'], axis=axis)
  721. df = pd.DataFrame({'A': range(5), 'B': 5})
  722. def f():
  723. with np.errstate(all='ignore'):
  724. df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}, axis=axis)
  725. @pytest.mark.parametrize('method', [
  726. 'abs', 'shift', 'pct_change', 'cumsum', 'rank',
  727. ])
  728. def test_transform_method_name(self, method):
  729. # GH 19760
  730. df = pd.DataFrame({"A": [-1, 2]})
  731. result = df.transform(method)
  732. expected = operator.methodcaller(method)(df)
  733. tm.assert_frame_equal(result, expected)
  734. def test_demo(self):
  735. # demonstration tests
  736. df = pd.DataFrame({'A': range(5), 'B': 5})
  737. result = df.agg(['min', 'max'])
  738. expected = DataFrame({'A': [0, 4], 'B': [5, 5]},
  739. columns=['A', 'B'],
  740. index=['min', 'max'])
  741. tm.assert_frame_equal(result, expected)
  742. result = df.agg({'A': ['min', 'max'], 'B': ['sum', 'max']})
  743. expected = DataFrame({'A': [4.0, 0.0, np.nan],
  744. 'B': [5.0, np.nan, 25.0]},
  745. columns=['A', 'B'],
  746. index=['max', 'min', 'sum'])
  747. tm.assert_frame_equal(result.reindex_like(expected), expected)
  748. def test_agg_multiple_mixed_no_warning(self):
  749. # GH 20909
  750. mdf = pd.DataFrame({'A': [1, 2, 3],
  751. 'B': [1., 2., 3.],
  752. 'C': ['foo', 'bar', 'baz'],
  753. 'D': pd.date_range('20130101', periods=3)})
  754. expected = pd.DataFrame({"A": [1, 6], 'B': [1.0, 6.0],
  755. "C": ['bar', 'foobarbaz'],
  756. "D": [pd.Timestamp('2013-01-01'), pd.NaT]},
  757. index=['min', 'sum'])
  758. # sorted index
  759. with tm.assert_produces_warning(None):
  760. result = mdf.agg(['min', 'sum'])
  761. tm.assert_frame_equal(result, expected)
  762. with tm.assert_produces_warning(None):
  763. result = mdf[['D', 'C', 'B', 'A']].agg(['sum', 'min'])
  764. # For backwards compatibility, the result's index is
  765. # still sorted by function name, so it's ['min', 'sum']
  766. # not ['sum', 'min'].
  767. expected = expected[['D', 'C', 'B', 'A']]
  768. tm.assert_frame_equal(result, expected)
  769. def test_agg_dict_nested_renaming_depr(self):
  770. df = pd.DataFrame({'A': range(5), 'B': 5})
  771. # nested renaming
  772. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  773. df.agg({'A': {'foo': 'min'},
  774. 'B': {'bar': 'max'}})
  775. def test_agg_reduce(self, axis, float_frame):
  776. other_axis = 1 if axis in {0, 'index'} else 0
  777. name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
  778. # all reducers
  779. expected = pd.concat([float_frame.mean(axis=axis),
  780. float_frame.max(axis=axis),
  781. float_frame.sum(axis=axis),
  782. ], axis=1)
  783. expected.columns = ['mean', 'max', 'sum']
  784. expected = expected.T if axis in {0, 'index'} else expected
  785. result = float_frame.agg(['mean', 'max', 'sum'], axis=axis)
  786. assert_frame_equal(result, expected)
  787. # dict input with scalars
  788. func = OrderedDict([(name1, 'mean'), (name2, 'sum')])
  789. result = float_frame.agg(func, axis=axis)
  790. expected = Series([float_frame.loc(other_axis)[name1].mean(),
  791. float_frame.loc(other_axis)[name2].sum()],
  792. index=[name1, name2])
  793. assert_series_equal(result, expected)
  794. # dict input with lists
  795. func = OrderedDict([(name1, ['mean']), (name2, ['sum'])])
  796. result = float_frame.agg(func, axis=axis)
  797. expected = DataFrame({
  798. name1: Series([float_frame.loc(other_axis)[name1].mean()],
  799. index=['mean']),
  800. name2: Series([float_frame.loc(other_axis)[name2].sum()],
  801. index=['sum'])})
  802. expected = expected.T if axis in {1, 'columns'} else expected
  803. assert_frame_equal(result, expected)
  804. # dict input with lists with multiple
  805. func = OrderedDict([(name1, ['mean', 'sum']), (name2, ['sum', 'max'])])
  806. result = float_frame.agg(func, axis=axis)
  807. expected = DataFrame(OrderedDict([
  808. (name1, Series([float_frame.loc(other_axis)[name1].mean(),
  809. float_frame.loc(other_axis)[name1].sum()],
  810. index=['mean', 'sum'])),
  811. (name2, Series([float_frame.loc(other_axis)[name2].sum(),
  812. float_frame.loc(other_axis)[name2].max()],
  813. index=['sum', 'max'])),
  814. ]))
  815. expected = expected.T if axis in {1, 'columns'} else expected
  816. assert_frame_equal(result, expected)
  817. def test_nuiscance_columns(self):
  818. # GH 15015
  819. df = DataFrame({'A': [1, 2, 3],
  820. 'B': [1., 2., 3.],
  821. 'C': ['foo', 'bar', 'baz'],
  822. 'D': pd.date_range('20130101', periods=3)})
  823. result = df.agg('min')
  824. expected = Series([1, 1., 'bar', pd.Timestamp('20130101')],
  825. index=df.columns)
  826. assert_series_equal(result, expected)
  827. result = df.agg(['min'])
  828. expected = DataFrame([[1, 1., 'bar', pd.Timestamp('20130101')]],
  829. index=['min'], columns=df.columns)
  830. assert_frame_equal(result, expected)
  831. result = df.agg('sum')
  832. expected = Series([6, 6., 'foobarbaz'],
  833. index=['A', 'B', 'C'])
  834. assert_series_equal(result, expected)
  835. result = df.agg(['sum'])
  836. expected = DataFrame([[6, 6., 'foobarbaz']],
  837. index=['sum'], columns=['A', 'B', 'C'])
  838. assert_frame_equal(result, expected)
  839. def test_non_callable_aggregates(self):
  840. # GH 16405
  841. # 'size' is a property of frame/series
  842. # validate that this is working
  843. df = DataFrame({'A': [None, 2, 3],
  844. 'B': [1.0, np.nan, 3.0],
  845. 'C': ['foo', None, 'bar']})
  846. # Function aggregate
  847. result = df.agg({'A': 'count'})
  848. expected = Series({'A': 2})
  849. assert_series_equal(result, expected)
  850. # Non-function aggregate
  851. result = df.agg({'A': 'size'})
  852. expected = Series({'A': 3})
  853. assert_series_equal(result, expected)
  854. # Mix function and non-function aggs
  855. result1 = df.agg(['count', 'size'])
  856. result2 = df.agg({'A': ['count', 'size'],
  857. 'B': ['count', 'size'],
  858. 'C': ['count', 'size']})
  859. expected = pd.DataFrame({'A': {'count': 2, 'size': 3},
  860. 'B': {'count': 2, 'size': 3},
  861. 'C': {'count': 2, 'size': 3}})
  862. assert_frame_equal(result1, result2, check_like=True)
  863. assert_frame_equal(result2, expected, check_like=True)
  864. # Just functional string arg is same as calling df.arg()
  865. result = df.agg('count')
  866. expected = df.count()
  867. assert_series_equal(result, expected)
  868. # Just a string attribute arg same as calling df.arg
  869. result = df.agg('size')
  870. expected = df.size
  871. assert result == expected
  872. @pytest.mark.parametrize("df, func, expected", chain(
  873. _get_cython_table_params(
  874. DataFrame(), [
  875. ('sum', Series()),
  876. ('max', Series()),
  877. ('min', Series()),
  878. ('all', Series(dtype=bool)),
  879. ('any', Series(dtype=bool)),
  880. ('mean', Series()),
  881. ('prod', Series()),
  882. ('std', Series()),
  883. ('var', Series()),
  884. ('median', Series()),
  885. ]),
  886. _get_cython_table_params(
  887. DataFrame([[np.nan, 1], [1, 2]]), [
  888. ('sum', Series([1., 3])),
  889. ('max', Series([1., 2])),
  890. ('min', Series([1., 1])),
  891. ('all', Series([True, True])),
  892. ('any', Series([True, True])),
  893. ('mean', Series([1, 1.5])),
  894. ('prod', Series([1., 2])),
  895. ('std', Series([np.nan, 0.707107])),
  896. ('var', Series([np.nan, 0.5])),
  897. ('median', Series([1, 1.5])),
  898. ]),
  899. ))
  900. def test_agg_cython_table(self, df, func, expected, axis):
  901. # GH 21224
  902. # test reducing functions in
  903. # pandas.core.base.SelectionMixin._cython_table
  904. result = df.agg(func, axis=axis)
  905. tm.assert_series_equal(result, expected)
  906. @pytest.mark.parametrize("df, func, expected", chain(
  907. _get_cython_table_params(
  908. DataFrame(), [
  909. ('cumprod', DataFrame()),
  910. ('cumsum', DataFrame()),
  911. ]),
  912. _get_cython_table_params(
  913. DataFrame([[np.nan, 1], [1, 2]]), [
  914. ('cumprod', DataFrame([[np.nan, 1], [1., 2.]])),
  915. ('cumsum', DataFrame([[np.nan, 1], [1., 3.]])),
  916. ]),
  917. ))
  918. def test_agg_cython_table_transform(self, df, func, expected, axis):
  919. # GH 21224
  920. # test transforming functions in
  921. # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
  922. result = df.agg(func, axis=axis)
  923. tm.assert_frame_equal(result, expected)
  924. @pytest.mark.parametrize("df, func, expected", _get_cython_table_params(
  925. DataFrame([['a', 'b'], ['b', 'a']]), [
  926. ['cumprod', TypeError],
  927. ]),
  928. )
  929. def test_agg_cython_table_raises(self, df, func, expected, axis):
  930. # GH 21224
  931. with pytest.raises(expected):
  932. df.agg(func, axis=axis)
  933. @pytest.mark.parametrize("num_cols", [2, 3, 5])
  934. def test_frequency_is_original(self, num_cols):
  935. # GH 22150
  936. index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"])
  937. original = index.copy()
  938. df = DataFrame(1, index=index, columns=range(num_cols))
  939. df.apply(lambda x: x)
  940. assert index.freq == original.freq