test_api.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. from collections import OrderedDict
  4. import pydoc
  5. import warnings
  6. import numpy as np
  7. import pytest
  8. import pandas.compat as compat
  9. from pandas.compat import isidentifier, lzip, range, string_types
  10. import pandas as pd
  11. from pandas import (
  12. Categorical, DataFrame, DatetimeIndex, Index, Series, TimedeltaIndex,
  13. date_range, period_range, timedelta_range)
  14. from pandas.core.arrays import PeriodArray
  15. from pandas.core.indexes.datetimes import Timestamp
  16. import pandas.util.testing as tm
  17. from pandas.util.testing import assert_series_equal, ensure_clean
  18. import pandas.io.formats.printing as printing
  19. from .common import TestData
  20. class SharedWithSparse(object):
  21. """
  22. A collection of tests Series and SparseSeries can share.
  23. In generic tests on this class, use ``self._assert_series_equal()``
  24. which is implemented in sub-classes.
  25. """
  26. def _assert_series_equal(self, left, right):
  27. """Dispatch to series class dependent assertion"""
  28. raise NotImplementedError
  29. def test_scalarop_preserve_name(self):
  30. result = self.ts * 2
  31. assert result.name == self.ts.name
  32. def test_copy_name(self):
  33. result = self.ts.copy()
  34. assert result.name == self.ts.name
  35. def test_copy_index_name_checking(self):
  36. # don't want to be able to modify the index stored elsewhere after
  37. # making a copy
  38. self.ts.index.name = None
  39. assert self.ts.index.name is None
  40. assert self.ts is self.ts
  41. cp = self.ts.copy()
  42. cp.index.name = 'foo'
  43. printing.pprint_thing(self.ts.index.name)
  44. assert self.ts.index.name is None
  45. def test_append_preserve_name(self):
  46. result = self.ts[:5].append(self.ts[5:])
  47. assert result.name == self.ts.name
  48. def test_binop_maybe_preserve_name(self):
  49. # names match, preserve
  50. result = self.ts * self.ts
  51. assert result.name == self.ts.name
  52. result = self.ts.mul(self.ts)
  53. assert result.name == self.ts.name
  54. result = self.ts * self.ts[:-2]
  55. assert result.name == self.ts.name
  56. # names don't match, don't preserve
  57. cp = self.ts.copy()
  58. cp.name = 'something else'
  59. result = self.ts + cp
  60. assert result.name is None
  61. result = self.ts.add(cp)
  62. assert result.name is None
  63. ops = ['add', 'sub', 'mul', 'div', 'truediv', 'floordiv', 'mod', 'pow']
  64. ops = ops + ['r' + op for op in ops]
  65. for op in ops:
  66. # names match, preserve
  67. s = self.ts.copy()
  68. result = getattr(s, op)(s)
  69. assert result.name == self.ts.name
  70. # names don't match, don't preserve
  71. cp = self.ts.copy()
  72. cp.name = 'changed'
  73. result = getattr(s, op)(cp)
  74. assert result.name is None
  75. def test_combine_first_name(self):
  76. result = self.ts.combine_first(self.ts[:5])
  77. assert result.name == self.ts.name
  78. def test_getitem_preserve_name(self):
  79. result = self.ts[self.ts > 0]
  80. assert result.name == self.ts.name
  81. result = self.ts[[0, 2, 4]]
  82. assert result.name == self.ts.name
  83. result = self.ts[5:10]
  84. assert result.name == self.ts.name
  85. def test_pickle(self):
  86. unp_series = self._pickle_roundtrip(self.series)
  87. unp_ts = self._pickle_roundtrip(self.ts)
  88. assert_series_equal(unp_series, self.series)
  89. assert_series_equal(unp_ts, self.ts)
  90. def _pickle_roundtrip(self, obj):
  91. with ensure_clean() as path:
  92. obj.to_pickle(path)
  93. unpickled = pd.read_pickle(path)
  94. return unpickled
  95. def test_argsort_preserve_name(self):
  96. result = self.ts.argsort()
  97. assert result.name == self.ts.name
  98. def test_sort_index_name(self):
  99. result = self.ts.sort_index(ascending=False)
  100. assert result.name == self.ts.name
  101. def test_to_sparse_pass_name(self):
  102. result = self.ts.to_sparse()
  103. assert result.name == self.ts.name
  104. def test_constructor_dict(self):
  105. d = {'a': 0., 'b': 1., 'c': 2.}
  106. result = self.series_klass(d)
  107. expected = self.series_klass(d, index=sorted(d.keys()))
  108. self._assert_series_equal(result, expected)
  109. result = self.series_klass(d, index=['b', 'c', 'd', 'a'])
  110. expected = self.series_klass([1, 2, np.nan, 0],
  111. index=['b', 'c', 'd', 'a'])
  112. self._assert_series_equal(result, expected)
  113. def test_constructor_subclass_dict(self):
  114. data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
  115. series = self.series_klass(data)
  116. expected = self.series_klass(dict(compat.iteritems(data)))
  117. self._assert_series_equal(series, expected)
  118. def test_constructor_ordereddict(self):
  119. # GH3283
  120. data = OrderedDict(
  121. ('col%s' % i, np.random.random()) for i in range(12))
  122. series = self.series_klass(data)
  123. expected = self.series_klass(list(data.values()), list(data.keys()))
  124. self._assert_series_equal(series, expected)
  125. # Test with subclass
  126. class A(OrderedDict):
  127. pass
  128. series = self.series_klass(A(data))
  129. self._assert_series_equal(series, expected)
  130. def test_constructor_dict_multiindex(self):
  131. d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.}
  132. _d = sorted(d.items())
  133. result = self.series_klass(d)
  134. expected = self.series_klass(
  135. [x[1] for x in _d],
  136. index=pd.MultiIndex.from_tuples([x[0] for x in _d]))
  137. self._assert_series_equal(result, expected)
  138. d['z'] = 111.
  139. _d.insert(0, ('z', d['z']))
  140. result = self.series_klass(d)
  141. expected = self.series_klass([x[1] for x in _d],
  142. index=pd.Index([x[0] for x in _d],
  143. tupleize_cols=False))
  144. result = result.reindex(index=expected.index)
  145. self._assert_series_equal(result, expected)
  146. def test_constructor_dict_timedelta_index(self):
  147. # GH #12169 : Resample category data with timedelta index
  148. # construct Series from dict as data and TimedeltaIndex as index
  149. # will result NaN in result Series data
  150. expected = self.series_klass(
  151. data=['A', 'B', 'C'],
  152. index=pd.to_timedelta([0, 10, 20], unit='s')
  153. )
  154. result = self.series_klass(
  155. data={pd.to_timedelta(0, unit='s'): 'A',
  156. pd.to_timedelta(10, unit='s'): 'B',
  157. pd.to_timedelta(20, unit='s'): 'C'},
  158. index=pd.to_timedelta([0, 10, 20], unit='s')
  159. )
  160. self._assert_series_equal(result, expected)
  161. def test_from_array_deprecated(self):
  162. with tm.assert_produces_warning(FutureWarning):
  163. self.series_klass.from_array([1, 2, 3])
  164. def test_sparse_accessor_updates_on_inplace(self):
  165. s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
  166. s.drop([0, 1], inplace=True)
  167. assert s.sparse.density == 1.0
  168. class TestSeriesMisc(TestData, SharedWithSparse):
  169. series_klass = Series
  170. # SharedWithSparse tests use generic, series_klass-agnostic assertion
  171. _assert_series_equal = staticmethod(tm.assert_series_equal)
  172. def test_tab_completion(self):
  173. # GH 9910
  174. s = Series(list('abcd'))
  175. # Series of str values should have .str but not .dt/.cat in __dir__
  176. assert 'str' in dir(s)
  177. assert 'dt' not in dir(s)
  178. assert 'cat' not in dir(s)
  179. # similarly for .dt
  180. s = Series(date_range('1/1/2015', periods=5))
  181. assert 'dt' in dir(s)
  182. assert 'str' not in dir(s)
  183. assert 'cat' not in dir(s)
  184. # Similarly for .cat, but with the twist that str and dt should be
  185. # there if the categories are of that type first cat and str.
  186. s = Series(list('abbcd'), dtype="category")
  187. assert 'cat' in dir(s)
  188. assert 'str' in dir(s) # as it is a string categorical
  189. assert 'dt' not in dir(s)
  190. # similar to cat and str
  191. s = Series(date_range('1/1/2015', periods=5)).astype("category")
  192. assert 'cat' in dir(s)
  193. assert 'str' not in dir(s)
  194. assert 'dt' in dir(s) # as it is a datetime categorical
  195. def test_tab_completion_with_categorical(self):
  196. # test the tab completion display
  197. ok_for_cat = ['name', 'index', 'categorical', 'categories', 'codes',
  198. 'ordered', 'set_categories', 'add_categories',
  199. 'remove_categories', 'rename_categories',
  200. 'reorder_categories', 'remove_unused_categories',
  201. 'as_ordered', 'as_unordered']
  202. def get_dir(s):
  203. results = [r for r in s.cat.__dir__() if not r.startswith('_')]
  204. return list(sorted(set(results)))
  205. s = Series(list('aabbcde')).astype('category')
  206. results = get_dir(s)
  207. tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))
  208. @pytest.mark.parametrize("index", [
  209. tm.makeUnicodeIndex(10),
  210. tm.makeStringIndex(10),
  211. tm.makeCategoricalIndex(10),
  212. Index(['foo', 'bar', 'baz'] * 2),
  213. tm.makeDateIndex(10),
  214. tm.makePeriodIndex(10),
  215. tm.makeTimedeltaIndex(10),
  216. tm.makeIntIndex(10),
  217. tm.makeUIntIndex(10),
  218. tm.makeIntIndex(10),
  219. tm.makeFloatIndex(10),
  220. Index([True, False]),
  221. Index(['a{}'.format(i) for i in range(101)]),
  222. pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
  223. pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ])
  224. def test_index_tab_completion(self, index):
  225. # dir contains string-like values of the Index.
  226. s = pd.Series(index=index)
  227. dir_s = dir(s)
  228. for i, x in enumerate(s.index.unique(level=0)):
  229. if i < 100:
  230. assert (not isinstance(x, string_types) or
  231. not isidentifier(x) or x in dir_s)
  232. else:
  233. assert x not in dir_s
  234. def test_not_hashable(self):
  235. s_empty = Series()
  236. s = Series([1])
  237. msg = "'Series' objects are mutable, thus they cannot be hashed"
  238. with pytest.raises(TypeError, match=msg):
  239. hash(s_empty)
  240. with pytest.raises(TypeError, match=msg):
  241. hash(s)
  242. def test_contains(self):
  243. tm.assert_contains_all(self.ts.index, self.ts)
  244. def test_iter(self):
  245. for i, val in enumerate(self.series):
  246. assert val == self.series[i]
  247. for i, val in enumerate(self.ts):
  248. assert val == self.ts[i]
  249. def test_keys(self):
  250. # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
  251. # to .keys() in a list()
  252. getkeys = self.ts.keys
  253. assert getkeys() is self.ts.index
  254. def test_values(self):
  255. tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)
  256. def test_iteritems(self):
  257. for idx, val in compat.iteritems(self.series):
  258. assert val == self.series[idx]
  259. for idx, val in compat.iteritems(self.ts):
  260. assert val == self.ts[idx]
  261. # assert is lazy (genrators don't define reverse, lists do)
  262. assert not hasattr(self.series.iteritems(), 'reverse')
  263. def test_items(self):
  264. for idx, val in self.series.items():
  265. assert val == self.series[idx]
  266. for idx, val in self.ts.items():
  267. assert val == self.ts[idx]
  268. # assert is lazy (genrators don't define reverse, lists do)
  269. assert not hasattr(self.series.items(), 'reverse')
  270. def test_raise_on_info(self):
  271. s = Series(np.random.randn(10))
  272. msg = "'Series' object has no attribute 'info'"
  273. with pytest.raises(AttributeError, match=msg):
  274. s.info()
  275. def test_copy(self):
  276. for deep in [None, False, True]:
  277. s = Series(np.arange(10), dtype='float64')
  278. # default deep is True
  279. if deep is None:
  280. s2 = s.copy()
  281. else:
  282. s2 = s.copy(deep=deep)
  283. s2[::2] = np.NaN
  284. if deep is None or deep is True:
  285. # Did not modify original Series
  286. assert np.isnan(s2[0])
  287. assert not np.isnan(s[0])
  288. else:
  289. # we DID modify the original Series
  290. assert np.isnan(s2[0])
  291. assert np.isnan(s[0])
  292. # GH 11794
  293. # copy of tz-aware
  294. expected = Series([Timestamp('2012/01/01', tz='UTC')])
  295. expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
  296. for deep in [None, False, True]:
  297. s = Series([Timestamp('2012/01/01', tz='UTC')])
  298. if deep is None:
  299. s2 = s.copy()
  300. else:
  301. s2 = s.copy(deep=deep)
  302. s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
  303. # default deep is True
  304. if deep is None or deep is True:
  305. # Did not modify original Series
  306. assert_series_equal(s2, expected2)
  307. assert_series_equal(s, expected)
  308. else:
  309. # we DID modify the original Series
  310. assert_series_equal(s2, expected2)
  311. assert_series_equal(s, expected2)
  312. def test_axis_alias(self):
  313. s = Series([1, 2, np.nan])
  314. assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
  315. assert s.dropna().sum('rows') == 3
  316. assert s._get_axis_number('rows') == 0
  317. assert s._get_axis_name('rows') == 'index'
  318. def test_class_axis(self):
  319. # https://github.com/pandas-dev/pandas/issues/18147
  320. # no exception and no empty docstring
  321. assert pydoc.getdoc(Series.index)
  322. def test_numpy_unique(self):
  323. # it works!
  324. np.unique(self.ts)
  325. def test_ndarray_compat(self):
  326. # test numpy compat with Series as sub-class of NDFrame
  327. tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
  328. index=date_range('1/1/2000', periods=1000))
  329. def f(x):
  330. return x[x.idxmax()]
  331. result = tsdf.apply(f)
  332. expected = tsdf.max()
  333. tm.assert_series_equal(result, expected)
  334. # .item()
  335. s = Series([1])
  336. result = s.item()
  337. assert result == 1
  338. assert s.item() == s.iloc[0]
  339. # using an ndarray like function
  340. s = Series(np.random.randn(10))
  341. result = Series(np.ones_like(s))
  342. expected = Series(1, index=range(10), dtype='float64')
  343. tm.assert_series_equal(result, expected)
  344. # ravel
  345. s = Series(np.random.randn(10))
  346. tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F'))
  347. # compress
  348. # GH 6658
  349. s = Series([0, 1., -1], index=list('abc'))
  350. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  351. result = np.compress(s > 0, s)
  352. tm.assert_series_equal(result, Series([1.], index=['b']))
  353. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  354. result = np.compress(s < -1, s)
  355. # result empty Index(dtype=object) as the same as original
  356. exp = Series([], dtype='float64', index=Index([], dtype='object'))
  357. tm.assert_series_equal(result, exp)
  358. s = Series([0, 1., -1], index=[.1, .2, .3])
  359. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  360. result = np.compress(s > 0, s)
  361. tm.assert_series_equal(result, Series([1.], index=[.2]))
  362. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  363. result = np.compress(s < -1, s)
  364. # result empty Float64Index as the same as original
  365. exp = Series([], dtype='float64', index=Index([], dtype='float64'))
  366. tm.assert_series_equal(result, exp)
  367. def test_str_accessor_updates_on_inplace(self):
  368. s = pd.Series(list('abc'))
  369. s.drop([0], inplace=True)
  370. assert len(s.str.lower()) == 2
  371. def test_str_attribute(self):
  372. # GH9068
  373. methods = ['strip', 'rstrip', 'lstrip']
  374. s = Series([' jack', 'jill ', ' jesse ', 'frank'])
  375. for method in methods:
  376. expected = Series([getattr(str, method)(x) for x in s.values])
  377. assert_series_equal(getattr(Series.str, method)(s.str), expected)
  378. # str accessor only valid with string values
  379. s = Series(range(5))
  380. with pytest.raises(AttributeError, match='only use .str accessor'):
  381. s.str.repeat(2)
  382. def test_empty_method(self):
  383. s_empty = pd.Series()
  384. assert s_empty.empty
  385. for full_series in [pd.Series([1]), pd.Series(index=[1])]:
  386. assert not full_series.empty
  387. def test_tab_complete_warning(self, ip):
  388. # https://github.com/pandas-dev/pandas/issues/16409
  389. pytest.importorskip('IPython', minversion="6.0.0")
  390. from IPython.core.completer import provisionalcompleter
  391. code = "import pandas as pd; s = pd.Series()"
  392. ip.run_code(code)
  393. with tm.assert_produces_warning(None):
  394. with provisionalcompleter('ignore'):
  395. list(ip.Completer.completions('s.', 1))
  396. def test_integer_series_size(self):
  397. # GH 25580
  398. s = Series(range(9))
  399. assert s.size == 9
  400. s = Series(range(9), dtype="Int64")
  401. assert s.size == 9
  402. class TestCategoricalSeries(object):
  403. @pytest.mark.parametrize(
  404. "method",
  405. [
  406. lambda x: x.cat.set_categories([1, 2, 3]),
  407. lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
  408. lambda x: x.cat.rename_categories([1, 2, 3]),
  409. lambda x: x.cat.remove_unused_categories(),
  410. lambda x: x.cat.remove_categories([2]),
  411. lambda x: x.cat.add_categories([4]),
  412. lambda x: x.cat.as_ordered(),
  413. lambda x: x.cat.as_unordered(),
  414. ])
  415. def test_getname_categorical_accessor(self, method):
  416. # GH 17509
  417. s = Series([1, 2, 3], name='A').astype('category')
  418. expected = 'A'
  419. result = method(s).name
  420. assert result == expected
  421. def test_cat_accessor(self):
  422. s = Series(Categorical(["a", "b", np.nan, "a"]))
  423. tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
  424. assert not s.cat.ordered, False
  425. exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
  426. s.cat.set_categories(["b", "a"], inplace=True)
  427. tm.assert_categorical_equal(s.values, exp)
  428. res = s.cat.set_categories(["b", "a"])
  429. tm.assert_categorical_equal(res.values, exp)
  430. s[:] = "a"
  431. s = s.cat.remove_unused_categories()
  432. tm.assert_index_equal(s.cat.categories, Index(["a"]))
  433. def test_cat_accessor_api(self):
  434. # GH 9322
  435. from pandas.core.arrays.categorical import CategoricalAccessor
  436. assert Series.cat is CategoricalAccessor
  437. s = Series(list('aabbcde')).astype('category')
  438. assert isinstance(s.cat, CategoricalAccessor)
  439. invalid = Series([1])
  440. with pytest.raises(AttributeError, match="only use .cat accessor"):
  441. invalid.cat
  442. assert not hasattr(invalid, 'cat')
  443. def test_cat_accessor_no_new_attributes(self):
  444. # https://github.com/pandas-dev/pandas/issues/10673
  445. c = Series(list('aabbcde')).astype('category')
  446. with pytest.raises(AttributeError,
  447. match="You cannot add any new attribute"):
  448. c.cat.xlabel = "a"
  449. def test_cat_accessor_updates_on_inplace(self):
  450. s = Series(list('abc')).astype('category')
  451. s.drop(0, inplace=True)
  452. s.cat.remove_unused_categories(inplace=True)
  453. assert len(s.cat.categories) == 2
  454. def test_categorical_delegations(self):
  455. # invalid accessor
  456. msg = r"Can only use \.cat accessor with a 'category' dtype"
  457. with pytest.raises(AttributeError, match=msg):
  458. Series([1, 2, 3]).cat
  459. with pytest.raises(AttributeError, match=msg):
  460. Series([1, 2, 3]).cat()
  461. with pytest.raises(AttributeError, match=msg):
  462. Series(['a', 'b', 'c']).cat
  463. with pytest.raises(AttributeError, match=msg):
  464. Series(np.arange(5.)).cat
  465. with pytest.raises(AttributeError, match=msg):
  466. Series([Timestamp('20130101')]).cat
  467. # Series should delegate calls to '.categories', '.codes', '.ordered'
  468. # and the methods '.set_categories()' 'drop_unused_categories()' to the
  469. # categorical# -*- coding: utf-8 -*-
  470. s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
  471. exp_categories = Index(["a", "b", "c"])
  472. tm.assert_index_equal(s.cat.categories, exp_categories)
  473. s.cat.categories = [1, 2, 3]
  474. exp_categories = Index([1, 2, 3])
  475. tm.assert_index_equal(s.cat.categories, exp_categories)
  476. exp_codes = Series([0, 1, 2, 0], dtype='int8')
  477. tm.assert_series_equal(s.cat.codes, exp_codes)
  478. assert s.cat.ordered
  479. s = s.cat.as_unordered()
  480. assert not s.cat.ordered
  481. s.cat.as_ordered(inplace=True)
  482. assert s.cat.ordered
  483. # reorder
  484. s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
  485. exp_categories = Index(["c", "b", "a"])
  486. exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
  487. s = s.cat.set_categories(["c", "b", "a"])
  488. tm.assert_index_equal(s.cat.categories, exp_categories)
  489. tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
  490. tm.assert_numpy_array_equal(s.__array__(), exp_values)
  491. # remove unused categories
  492. s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"
  493. ]))
  494. exp_categories = Index(["a", "b"])
  495. exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
  496. s = s.cat.remove_unused_categories()
  497. tm.assert_index_equal(s.cat.categories, exp_categories)
  498. tm.assert_numpy_array_equal(s.values.__array__(), exp_values)
  499. tm.assert_numpy_array_equal(s.__array__(), exp_values)
  500. # This method is likely to be confused, so test that it raises an error
  501. # on wrong inputs:
  502. msg = "'Series' object has no attribute 'set_categories'"
  503. with pytest.raises(AttributeError, match=msg):
  504. s.set_categories([4, 3, 2, 1])
  505. # right: s.cat.set_categories([4,3,2,1])
  506. # GH18862 (let Series.cat.rename_categories take callables)
  507. s = Series(Categorical(["a", "b", "c", "a"], ordered=True))
  508. result = s.cat.rename_categories(lambda x: x.upper())
  509. expected = Series(Categorical(["A", "B", "C", "A"],
  510. categories=["A", "B", "C"],
  511. ordered=True))
  512. tm.assert_series_equal(result, expected)
  513. def test_dt_accessor_api_for_categorical(self):
  514. # https://github.com/pandas-dev/pandas/issues/10661
  515. from pandas.core.indexes.accessors import Properties
  516. s_dr = Series(date_range('1/1/2015', periods=5, tz="MET"))
  517. c_dr = s_dr.astype("category")
  518. s_pr = Series(period_range('1/1/2015', freq='D', periods=5))
  519. c_pr = s_pr.astype("category")
  520. s_tdr = Series(timedelta_range('1 days', '10 days'))
  521. c_tdr = s_tdr.astype("category")
  522. # only testing field (like .day)
  523. # and bool (is_month_start)
  524. get_ops = lambda x: x._datetimelike_ops
  525. test_data = [
  526. ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
  527. ("Period", get_ops(PeriodArray), s_pr, c_pr),
  528. ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)]
  529. assert isinstance(c_dr.dt, Properties)
  530. special_func_defs = [
  531. ('strftime', ("%Y-%m-%d",), {}),
  532. ('tz_convert', ("EST",), {}),
  533. ('round', ("D",), {}),
  534. ('floor', ("D",), {}),
  535. ('ceil', ("D",), {}),
  536. ('asfreq', ("D",), {}),
  537. # ('tz_localize', ("UTC",), {}),
  538. ]
  539. _special_func_names = [f[0] for f in special_func_defs]
  540. # the series is already localized
  541. _ignore_names = ['tz_localize', 'components']
  542. for name, attr_names, s, c in test_data:
  543. func_names = [f
  544. for f in dir(s.dt)
  545. if not (f.startswith("_") or f in attr_names or f in
  546. _special_func_names or f in _ignore_names)]
  547. func_defs = [(f, (), {}) for f in func_names]
  548. for f_def in special_func_defs:
  549. if f_def[0] in dir(s.dt):
  550. func_defs.append(f_def)
  551. for func, args, kwargs in func_defs:
  552. with warnings.catch_warnings():
  553. if func == 'to_period':
  554. # dropping TZ
  555. warnings.simplefilter("ignore", UserWarning)
  556. res = getattr(c.dt, func)(*args, **kwargs)
  557. exp = getattr(s.dt, func)(*args, **kwargs)
  558. if isinstance(res, DataFrame):
  559. tm.assert_frame_equal(res, exp)
  560. elif isinstance(res, Series):
  561. tm.assert_series_equal(res, exp)
  562. else:
  563. tm.assert_almost_equal(res, exp)
  564. for attr in attr_names:
  565. try:
  566. res = getattr(c.dt, attr)
  567. exp = getattr(s.dt, attr)
  568. except Exception as e:
  569. print(name, attr)
  570. raise e
  571. if isinstance(res, DataFrame):
  572. tm.assert_frame_equal(res, exp)
  573. elif isinstance(res, Series):
  574. tm.assert_series_equal(res, exp)
  575. else:
  576. tm.assert_almost_equal(res, exp)
  577. invalid = Series([1, 2, 3]).astype('category')
  578. msg = "Can only use .dt accessor with datetimelike"
  579. with pytest.raises(AttributeError, match=msg):
  580. invalid.dt
  581. assert not hasattr(invalid, 'str')