test_apply.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from collections import Counter, OrderedDict, defaultdict
  4. from itertools import chain
  5. import numpy as np
  6. import pytest
  7. import pandas.compat as compat
  8. from pandas.compat import lrange
  9. import pandas as pd
  10. from pandas import DataFrame, Index, Series, isna
  11. from pandas.conftest import _get_cython_table_params
  12. import pandas.util.testing as tm
  13. from pandas.util.testing import assert_frame_equal, assert_series_equal
  14. class TestSeriesApply():
  15. def test_apply(self, datetime_series):
  16. with np.errstate(all='ignore'):
  17. tm.assert_series_equal(datetime_series.apply(np.sqrt),
  18. np.sqrt(datetime_series))
  19. # element-wise apply
  20. import math
  21. tm.assert_series_equal(datetime_series.apply(math.exp),
  22. np.exp(datetime_series))
  23. # empty series
  24. s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
  25. rs = s.apply(lambda x: x)
  26. tm.assert_series_equal(s, rs)
  27. # check all metadata (GH 9322)
  28. assert s is not rs
  29. assert s.index is rs.index
  30. assert s.dtype == rs.dtype
  31. assert s.name == rs.name
  32. # index but no data
  33. s = Series(index=[1, 2, 3])
  34. rs = s.apply(lambda x: x)
  35. tm.assert_series_equal(s, rs)
  36. def test_apply_same_length_inference_bug(self):
  37. s = Series([1, 2])
  38. f = lambda x: (x, x + 1)
  39. result = s.apply(f)
  40. expected = s.map(f)
  41. assert_series_equal(result, expected)
  42. s = Series([1, 2, 3])
  43. result = s.apply(f)
  44. expected = s.map(f)
  45. assert_series_equal(result, expected)
  46. def test_apply_dont_convert_dtype(self):
  47. s = Series(np.random.randn(10))
  48. f = lambda x: x if x > 0 else np.nan
  49. result = s.apply(f, convert_dtype=False)
  50. assert result.dtype == object
  51. def test_with_string_args(self, datetime_series):
  52. for arg in ['sum', 'mean', 'min', 'max', 'std']:
  53. result = datetime_series.apply(arg)
  54. expected = getattr(datetime_series, arg)()
  55. assert result == expected
  56. def test_apply_args(self):
  57. s = Series(['foo,bar'])
  58. result = s.apply(str.split, args=(',', ))
  59. assert result[0] == ['foo', 'bar']
  60. assert isinstance(result[0], list)
  61. def test_series_map_box_timestamps(self):
  62. # GH#2689, GH#2627
  63. ser = Series(pd.date_range('1/1/2000', periods=10))
  64. def func(x):
  65. return (x.hour, x.day, x.month)
  66. # it works!
  67. ser.map(func)
  68. ser.apply(func)
  69. def test_apply_box(self):
  70. # ufunc will not be boxed. Same test cases as the test_map_box
  71. vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
  72. s = pd.Series(vals)
  73. assert s.dtype == 'datetime64[ns]'
  74. # boxed value must be Timestamp instance
  75. res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
  76. x.day, x.tz))
  77. exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
  78. tm.assert_series_equal(res, exp)
  79. vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  80. pd.Timestamp('2011-01-02', tz='US/Eastern')]
  81. s = pd.Series(vals)
  82. assert s.dtype == 'datetime64[ns, US/Eastern]'
  83. res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
  84. x.day, x.tz))
  85. exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
  86. tm.assert_series_equal(res, exp)
  87. # timedelta
  88. vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
  89. s = pd.Series(vals)
  90. assert s.dtype == 'timedelta64[ns]'
  91. res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
  92. exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
  93. tm.assert_series_equal(res, exp)
  94. # period
  95. vals = [pd.Period('2011-01-01', freq='M'),
  96. pd.Period('2011-01-02', freq='M')]
  97. s = pd.Series(vals)
  98. assert s.dtype == 'Period[M]'
  99. res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__,
  100. x.freqstr))
  101. exp = pd.Series(['Period_M', 'Period_M'])
  102. tm.assert_series_equal(res, exp)
  103. def test_apply_datetimetz(self):
  104. values = pd.date_range('2011-01-01', '2011-01-02',
  105. freq='H').tz_localize('Asia/Tokyo')
  106. s = pd.Series(values, name='XX')
  107. result = s.apply(lambda x: x + pd.offsets.Day())
  108. exp_values = pd.date_range('2011-01-02', '2011-01-03',
  109. freq='H').tz_localize('Asia/Tokyo')
  110. exp = pd.Series(exp_values, name='XX')
  111. tm.assert_series_equal(result, exp)
  112. # change dtype
  113. # GH 14506 : Returned dtype changed from int32 to int64
  114. result = s.apply(lambda x: x.hour)
  115. exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
  116. tm.assert_series_equal(result, exp)
  117. # not vectorized
  118. def f(x):
  119. if not isinstance(x, pd.Timestamp):
  120. raise ValueError
  121. return str(x.tz)
  122. result = s.map(f)
  123. exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
  124. tm.assert_series_equal(result, exp)
  125. def test_apply_dict_depr(self):
  126. tsdf = pd.DataFrame(np.random.randn(10, 3),
  127. columns=['A', 'B', 'C'],
  128. index=pd.date_range('1/1/2000', periods=10))
  129. with tm.assert_produces_warning(FutureWarning):
  130. tsdf.A.agg({'foo': ['sum', 'mean']})
  131. class TestSeriesAggregate():
  132. def test_transform(self, string_series):
  133. # transforming functions
  134. with np.errstate(all='ignore'):
  135. f_sqrt = np.sqrt(string_series)
  136. f_abs = np.abs(string_series)
  137. # ufunc
  138. result = string_series.transform(np.sqrt)
  139. expected = f_sqrt.copy()
  140. assert_series_equal(result, expected)
  141. result = string_series.apply(np.sqrt)
  142. assert_series_equal(result, expected)
  143. # list-like
  144. result = string_series.transform([np.sqrt])
  145. expected = f_sqrt.to_frame().copy()
  146. expected.columns = ['sqrt']
  147. assert_frame_equal(result, expected)
  148. result = string_series.transform([np.sqrt])
  149. assert_frame_equal(result, expected)
  150. result = string_series.transform(['sqrt'])
  151. assert_frame_equal(result, expected)
  152. # multiple items in list
  153. # these are in the order as if we are applying both functions per
  154. # series and then concatting
  155. expected = pd.concat([f_sqrt, f_abs], axis=1)
  156. expected.columns = ['sqrt', 'absolute']
  157. result = string_series.apply([np.sqrt, np.abs])
  158. assert_frame_equal(result, expected)
  159. result = string_series.transform(['sqrt', 'abs'])
  160. expected.columns = ['sqrt', 'abs']
  161. assert_frame_equal(result, expected)
  162. # dict, provide renaming
  163. expected = pd.concat([f_sqrt, f_abs], axis=1)
  164. expected.columns = ['foo', 'bar']
  165. expected = expected.unstack().rename('series')
  166. result = string_series.apply({'foo': np.sqrt, 'bar': np.abs})
  167. assert_series_equal(result.reindex_like(expected), expected)
  168. def test_transform_and_agg_error(self, string_series):
  169. # we are trying to transform with an aggregator
  170. with pytest.raises(ValueError):
  171. string_series.transform(['min', 'max'])
  172. with pytest.raises(ValueError):
  173. with np.errstate(all='ignore'):
  174. string_series.agg(['sqrt', 'max'])
  175. with pytest.raises(ValueError):
  176. with np.errstate(all='ignore'):
  177. string_series.transform(['sqrt', 'max'])
  178. with pytest.raises(ValueError):
  179. with np.errstate(all='ignore'):
  180. string_series.agg({'foo': np.sqrt, 'bar': 'sum'})
  181. def test_demo(self):
  182. # demonstration tests
  183. s = Series(range(6), dtype='int64', name='series')
  184. result = s.agg(['min', 'max'])
  185. expected = Series([0, 5], index=['min', 'max'], name='series')
  186. tm.assert_series_equal(result, expected)
  187. result = s.agg({'foo': 'min'})
  188. expected = Series([0], index=['foo'], name='series')
  189. tm.assert_series_equal(result, expected)
  190. # nested renaming
  191. with tm.assert_produces_warning(FutureWarning):
  192. result = s.agg({'foo': ['min', 'max']})
  193. expected = DataFrame(
  194. {'foo': [0, 5]},
  195. index=['min', 'max']).unstack().rename('series')
  196. tm.assert_series_equal(result, expected)
  197. def test_multiple_aggregators_with_dict_api(self):
  198. s = Series(range(6), dtype='int64', name='series')
  199. # nested renaming
  200. with tm.assert_produces_warning(FutureWarning):
  201. result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})
  202. expected = DataFrame(
  203. {'foo': [5.0, np.nan, 0.0, np.nan],
  204. 'bar': [np.nan, 2.5, np.nan, 15.0]},
  205. columns=['foo', 'bar'],
  206. index=['max', 'mean',
  207. 'min', 'sum']).unstack().rename('series')
  208. tm.assert_series_equal(result.reindex_like(expected), expected)
  209. def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
  210. # test that we are evaluating row-by-row first
  211. # before vectorized evaluation
  212. result = string_series.apply(lambda x: str(x))
  213. expected = string_series.agg(lambda x: str(x))
  214. tm.assert_series_equal(result, expected)
  215. result = string_series.apply(str)
  216. expected = string_series.agg(str)
  217. tm.assert_series_equal(result, expected)
  218. def test_with_nested_series(self, datetime_series):
  219. # GH 2316
  220. # .agg with a reducer and a transform, what to do
  221. result = datetime_series.apply(lambda x: Series(
  222. [x, x ** 2], index=['x', 'x^2']))
  223. expected = DataFrame({'x': datetime_series,
  224. 'x^2': datetime_series ** 2})
  225. tm.assert_frame_equal(result, expected)
  226. result = datetime_series.agg(lambda x: Series(
  227. [x, x ** 2], index=['x', 'x^2']))
  228. tm.assert_frame_equal(result, expected)
  229. def test_replicate_describe(self, string_series):
  230. # this also tests a result set that is all scalars
  231. expected = string_series.describe()
  232. result = string_series.apply(OrderedDict(
  233. [('count', 'count'),
  234. ('mean', 'mean'),
  235. ('std', 'std'),
  236. ('min', 'min'),
  237. ('25%', lambda x: x.quantile(0.25)),
  238. ('50%', 'median'),
  239. ('75%', lambda x: x.quantile(0.75)),
  240. ('max', 'max')]))
  241. assert_series_equal(result, expected)
  242. def test_reduce(self, string_series):
  243. # reductions with named functions
  244. result = string_series.agg(['sum', 'mean'])
  245. expected = Series([string_series.sum(),
  246. string_series.mean()],
  247. ['sum', 'mean'],
  248. name=string_series.name)
  249. assert_series_equal(result, expected)
  250. def test_non_callable_aggregates(self):
  251. # test agg using non-callable series attributes
  252. s = Series([1, 2, None])
  253. # Calling agg w/ just a string arg same as calling s.arg
  254. result = s.agg('size')
  255. expected = s.size
  256. assert result == expected
  257. # test when mixed w/ callable reducers
  258. result = s.agg(['size', 'count', 'mean'])
  259. expected = Series(OrderedDict([('size', 3.0),
  260. ('count', 2.0),
  261. ('mean', 1.5)]))
  262. assert_series_equal(result[expected.index], expected)
  263. @pytest.mark.parametrize("series, func, expected", chain(
  264. _get_cython_table_params(Series(), [
  265. ('sum', 0),
  266. ('max', np.nan),
  267. ('min', np.nan),
  268. ('all', True),
  269. ('any', False),
  270. ('mean', np.nan),
  271. ('prod', 1),
  272. ('std', np.nan),
  273. ('var', np.nan),
  274. ('median', np.nan),
  275. ]),
  276. _get_cython_table_params(Series([np.nan, 1, 2, 3]), [
  277. ('sum', 6),
  278. ('max', 3),
  279. ('min', 1),
  280. ('all', True),
  281. ('any', True),
  282. ('mean', 2),
  283. ('prod', 6),
  284. ('std', 1),
  285. ('var', 1),
  286. ('median', 2),
  287. ]),
  288. _get_cython_table_params(Series('a b c'.split()), [
  289. ('sum', 'abc'),
  290. ('max', 'c'),
  291. ('min', 'a'),
  292. ('all', 'c'), # see GH12863
  293. ('any', 'a'),
  294. ]),
  295. ))
  296. def test_agg_cython_table(self, series, func, expected):
  297. # GH21224
  298. # test reducing functions in
  299. # pandas.core.base.SelectionMixin._cython_table
  300. result = series.agg(func)
  301. if tm.is_number(expected):
  302. assert np.isclose(result, expected, equal_nan=True)
  303. else:
  304. assert result == expected
  305. @pytest.mark.parametrize("series, func, expected", chain(
  306. _get_cython_table_params(Series(), [
  307. ('cumprod', Series([], Index([]))),
  308. ('cumsum', Series([], Index([]))),
  309. ]),
  310. _get_cython_table_params(Series([np.nan, 1, 2, 3]), [
  311. ('cumprod', Series([np.nan, 1, 2, 6])),
  312. ('cumsum', Series([np.nan, 1, 3, 6])),
  313. ]),
  314. _get_cython_table_params(Series('a b c'.split()), [
  315. ('cumsum', Series(['a', 'ab', 'abc'])),
  316. ]),
  317. ))
  318. def test_agg_cython_table_transform(self, series, func, expected):
  319. # GH21224
  320. # test transforming functions in
  321. # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
  322. result = series.agg(func)
  323. tm.assert_series_equal(result, expected)
  324. @pytest.mark.parametrize("series, func, expected", chain(
  325. _get_cython_table_params(Series('a b c'.split()), [
  326. ('mean', TypeError), # mean raises TypeError
  327. ('prod', TypeError),
  328. ('std', TypeError),
  329. ('var', TypeError),
  330. ('median', TypeError),
  331. ('cumprod', TypeError),
  332. ])
  333. ))
  334. def test_agg_cython_table_raises(self, series, func, expected):
  335. # GH21224
  336. with pytest.raises(expected):
  337. # e.g. Series('a b'.split()).cumprod() will raise
  338. series.agg(func)
  339. class TestSeriesMap():
  340. def test_map(self, datetime_series):
  341. index, data = tm.getMixedTypeDict()
  342. source = Series(data['B'], index=data['C'])
  343. target = Series(data['C'][:4], index=data['D'][:4])
  344. merged = target.map(source)
  345. for k, v in compat.iteritems(merged):
  346. assert v == source[target[k]]
  347. # input could be a dict
  348. merged = target.map(source.to_dict())
  349. for k, v in compat.iteritems(merged):
  350. assert v == source[target[k]]
  351. # function
  352. result = datetime_series.map(lambda x: x * 2)
  353. tm.assert_series_equal(result, datetime_series * 2)
  354. # GH 10324
  355. a = Series([1, 2, 3, 4])
  356. b = Series(["even", "odd", "even", "odd"], dtype="category")
  357. c = Series(["even", "odd", "even", "odd"])
  358. exp = Series(["odd", "even", "odd", np.nan], dtype="category")
  359. tm.assert_series_equal(a.map(b), exp)
  360. exp = Series(["odd", "even", "odd", np.nan])
  361. tm.assert_series_equal(a.map(c), exp)
  362. a = Series(['a', 'b', 'c', 'd'])
  363. b = Series([1, 2, 3, 4],
  364. index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
  365. c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e']))
  366. exp = Series([np.nan, 1, 2, 3])
  367. tm.assert_series_equal(a.map(b), exp)
  368. exp = Series([np.nan, 1, 2, 3])
  369. tm.assert_series_equal(a.map(c), exp)
  370. a = Series(['a', 'b', 'c', 'd'])
  371. b = Series(['B', 'C', 'D', 'E'], dtype='category',
  372. index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
  373. c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))
  374. exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
  375. categories=['B', 'C', 'D', 'E']))
  376. tm.assert_series_equal(a.map(b), exp)
  377. exp = Series([np.nan, 'B', 'C', 'D'])
  378. tm.assert_series_equal(a.map(c), exp)
  379. @pytest.mark.parametrize("index", tm.all_index_generator(10))
  380. def test_map_empty(self, index):
  381. s = Series(index)
  382. result = s.map({})
  383. expected = pd.Series(np.nan, index=s.index)
  384. tm.assert_series_equal(result, expected)
  385. def test_map_compat(self):
  386. # related GH 8024
  387. s = Series([True, True, False], index=[1, 2, 3])
  388. result = s.map({True: 'foo', False: 'bar'})
  389. expected = Series(['foo', 'foo', 'bar'], index=[1, 2, 3])
  390. assert_series_equal(result, expected)
  391. def test_map_int(self):
  392. left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4})
  393. right = Series({1: 11, 2: 22, 3: 33})
  394. assert left.dtype == np.float_
  395. assert issubclass(right.dtype.type, np.integer)
  396. merged = left.map(right)
  397. assert merged.dtype == np.float_
  398. assert isna(merged['d'])
  399. assert not isna(merged['c'])
  400. def test_map_type_inference(self):
  401. s = Series(lrange(3))
  402. s2 = s.map(lambda x: np.where(x == 0, 0, 1))
  403. assert issubclass(s2.dtype.type, np.integer)
  404. def test_map_decimal(self, string_series):
  405. from decimal import Decimal
  406. result = string_series.map(lambda x: Decimal(str(x)))
  407. assert result.dtype == np.object_
  408. assert isinstance(result[0], Decimal)
  409. def test_map_na_exclusion(self):
  410. s = Series([1.5, np.nan, 3, np.nan, 5])
  411. result = s.map(lambda x: x * 2, na_action='ignore')
  412. exp = s * 2
  413. assert_series_equal(result, exp)
  414. def test_map_dict_with_tuple_keys(self):
  415. """
  416. Due to new MultiIndex-ing behaviour in v0.14.0,
  417. dicts with tuple keys passed to map were being
  418. converted to a multi-index, preventing tuple values
  419. from being mapped properly.
  420. """
  421. # GH 18496
  422. df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]})
  423. label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'}
  424. df['labels'] = df['a'].map(label_mappings)
  425. df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index)
  426. # All labels should be filled now
  427. tm.assert_series_equal(df['labels'], df['expected_labels'],
  428. check_names=False)
  429. def test_map_counter(self):
  430. s = Series(['a', 'b', 'c'], index=[1, 2, 3])
  431. counter = Counter()
  432. counter['b'] = 5
  433. counter['c'] += 1
  434. result = s.map(counter)
  435. expected = Series([0, 5, 1], index=[1, 2, 3])
  436. assert_series_equal(result, expected)
  437. def test_map_defaultdict(self):
  438. s = Series([1, 2, 3], index=['a', 'b', 'c'])
  439. default_dict = defaultdict(lambda: 'blank')
  440. default_dict[1] = 'stuff'
  441. result = s.map(default_dict)
  442. expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c'])
  443. assert_series_equal(result, expected)
  444. def test_map_dict_subclass_with_missing(self):
  445. """
  446. Test Series.map with a dictionary subclass that defines __missing__,
  447. i.e. sets a default value (GH #15999).
  448. """
  449. class DictWithMissing(dict):
  450. def __missing__(self, key):
  451. return 'missing'
  452. s = Series([1, 2, 3])
  453. dictionary = DictWithMissing({3: 'three'})
  454. result = s.map(dictionary)
  455. expected = Series(['missing', 'missing', 'three'])
  456. assert_series_equal(result, expected)
  457. def test_map_dict_subclass_without_missing(self):
  458. class DictWithoutMissing(dict):
  459. pass
  460. s = Series([1, 2, 3])
  461. dictionary = DictWithoutMissing({3: 'three'})
  462. result = s.map(dictionary)
  463. expected = Series([np.nan, np.nan, 'three'])
  464. assert_series_equal(result, expected)
  465. def test_map_box(self):
  466. vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
  467. s = pd.Series(vals)
  468. assert s.dtype == 'datetime64[ns]'
  469. # boxed value must be Timestamp instance
  470. res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
  471. x.day, x.tz))
  472. exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
  473. tm.assert_series_equal(res, exp)
  474. vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  475. pd.Timestamp('2011-01-02', tz='US/Eastern')]
  476. s = pd.Series(vals)
  477. assert s.dtype == 'datetime64[ns, US/Eastern]'
  478. res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
  479. x.day, x.tz))
  480. exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
  481. tm.assert_series_equal(res, exp)
  482. # timedelta
  483. vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
  484. s = pd.Series(vals)
  485. assert s.dtype == 'timedelta64[ns]'
  486. res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
  487. exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
  488. tm.assert_series_equal(res, exp)
  489. # period
  490. vals = [pd.Period('2011-01-01', freq='M'),
  491. pd.Period('2011-01-02', freq='M')]
  492. s = pd.Series(vals)
  493. assert s.dtype == 'Period[M]'
  494. res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__,
  495. x.freqstr))
  496. exp = pd.Series(['Period_M', 'Period_M'])
  497. tm.assert_series_equal(res, exp)
  498. def test_map_categorical(self):
  499. values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
  500. ordered=True)
  501. s = pd.Series(values, name='XX', index=list('abcdefg'))
  502. result = s.map(lambda x: x.lower())
  503. exp_values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
  504. ordered=True)
  505. exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
  506. tm.assert_series_equal(result, exp)
  507. tm.assert_categorical_equal(result.values, exp_values)
  508. result = s.map(lambda x: 'A')
  509. exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
  510. tm.assert_series_equal(result, exp)
  511. assert result.dtype == np.object
  512. with pytest.raises(NotImplementedError):
  513. s.map(lambda x: x, na_action='ignore')
  514. def test_map_datetimetz(self):
  515. values = pd.date_range('2011-01-01', '2011-01-02',
  516. freq='H').tz_localize('Asia/Tokyo')
  517. s = pd.Series(values, name='XX')
  518. # keep tz
  519. result = s.map(lambda x: x + pd.offsets.Day())
  520. exp_values = pd.date_range('2011-01-02', '2011-01-03',
  521. freq='H').tz_localize('Asia/Tokyo')
  522. exp = pd.Series(exp_values, name='XX')
  523. tm.assert_series_equal(result, exp)
  524. # change dtype
  525. # GH 14506 : Returned dtype changed from int32 to int64
  526. result = s.map(lambda x: x.hour)
  527. exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
  528. tm.assert_series_equal(result, exp)
  529. with pytest.raises(NotImplementedError):
  530. s.map(lambda x: x, na_action='ignore')
  531. # not vectorized
  532. def f(x):
  533. if not isinstance(x, pd.Timestamp):
  534. raise ValueError
  535. return str(x.tz)
  536. result = s.map(f)
  537. exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
  538. tm.assert_series_equal(result, exp)
  539. @pytest.mark.parametrize("vals,mapping,exp", [
  540. (list('abc'), {np.nan: 'not NaN'}, [np.nan] * 3 + ['not NaN']),
  541. (list('abc'), {'a': 'a letter'}, ['a letter'] + [np.nan] * 3),
  542. (list(range(3)), {0: 42}, [42] + [np.nan] * 3)])
  543. def test_map_missing_mixed(self, vals, mapping, exp):
  544. # GH20495
  545. s = pd.Series(vals + [np.nan])
  546. result = s.map(mapping)
  547. tm.assert_series_equal(result, pd.Series(exp))