test_api.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. # pylint: disable-msg=W0612,E1101
  4. from copy import deepcopy
  5. import pydoc
  6. import numpy as np
  7. import pytest
  8. from pandas.compat import long, lrange, range
  9. import pandas as pd
  10. from pandas import (
  11. Categorical, DataFrame, Series, SparseDataFrame, compat, date_range,
  12. timedelta_range)
  13. import pandas.util.testing as tm
  14. from pandas.util.testing import (
  15. assert_almost_equal, assert_frame_equal, assert_series_equal)
  16. class SharedWithSparse(object):
  17. """
  18. A collection of tests DataFrame and SparseDataFrame can share.
  19. In generic tests on this class, use ``self._assert_frame_equal()`` and
  20. ``self._assert_series_equal()`` which are implemented in sub-classes
  21. and dispatch correctly.
  22. """
  23. def _assert_frame_equal(self, left, right):
  24. """Dispatch to frame class dependent assertion"""
  25. raise NotImplementedError
  26. def _assert_series_equal(self, left, right):
  27. """Dispatch to series class dependent assertion"""
  28. raise NotImplementedError
  29. def test_copy_index_name_checking(self, float_frame):
  30. # don't want to be able to modify the index stored elsewhere after
  31. # making a copy
  32. for attr in ('index', 'columns'):
  33. ind = getattr(float_frame, attr)
  34. ind.name = None
  35. cp = float_frame.copy()
  36. getattr(cp, attr).name = 'foo'
  37. assert getattr(float_frame, attr).name is None
  38. def test_getitem_pop_assign_name(self, float_frame):
  39. s = float_frame['A']
  40. assert s.name == 'A'
  41. s = float_frame.pop('A')
  42. assert s.name == 'A'
  43. s = float_frame.loc[:, 'B']
  44. assert s.name == 'B'
  45. s2 = s.loc[:]
  46. assert s2.name == 'B'
  47. def test_get_value(self, float_frame):
  48. for idx in float_frame.index:
  49. for col in float_frame.columns:
  50. with tm.assert_produces_warning(FutureWarning,
  51. check_stacklevel=False):
  52. result = float_frame.get_value(idx, col)
  53. expected = float_frame[col][idx]
  54. tm.assert_almost_equal(result, expected)
  55. def test_add_prefix_suffix(self, float_frame):
  56. with_prefix = float_frame.add_prefix('foo#')
  57. expected = pd.Index(['foo#%s' % c for c in float_frame.columns])
  58. tm.assert_index_equal(with_prefix.columns, expected)
  59. with_suffix = float_frame.add_suffix('#foo')
  60. expected = pd.Index(['%s#foo' % c for c in float_frame.columns])
  61. tm.assert_index_equal(with_suffix.columns, expected)
  62. with_pct_prefix = float_frame.add_prefix('%')
  63. expected = pd.Index(['%{}'.format(c) for c in float_frame.columns])
  64. tm.assert_index_equal(with_pct_prefix.columns, expected)
  65. with_pct_suffix = float_frame.add_suffix('%')
  66. expected = pd.Index(['{}%'.format(c) for c in float_frame.columns])
  67. tm.assert_index_equal(with_pct_suffix.columns, expected)
  68. def test_get_axis(self, float_frame):
  69. f = float_frame
  70. assert f._get_axis_number(0) == 0
  71. assert f._get_axis_number(1) == 1
  72. assert f._get_axis_number('index') == 0
  73. assert f._get_axis_number('rows') == 0
  74. assert f._get_axis_number('columns') == 1
  75. assert f._get_axis_name(0) == 'index'
  76. assert f._get_axis_name(1) == 'columns'
  77. assert f._get_axis_name('index') == 'index'
  78. assert f._get_axis_name('rows') == 'index'
  79. assert f._get_axis_name('columns') == 'columns'
  80. assert f._get_axis(0) is f.index
  81. assert f._get_axis(1) is f.columns
  82. with pytest.raises(ValueError, match='No axis named'):
  83. f._get_axis_number(2)
  84. with pytest.raises(ValueError, match='No axis.*foo'):
  85. f._get_axis_name('foo')
  86. with pytest.raises(ValueError, match='No axis.*None'):
  87. f._get_axis_name(None)
  88. with pytest.raises(ValueError, match='No axis named'):
  89. f._get_axis_number(None)
  90. def test_keys(self, float_frame):
  91. getkeys = float_frame.keys
  92. assert getkeys() is float_frame.columns
  93. def test_column_contains_typeerror(self, float_frame):
  94. try:
  95. float_frame.columns in float_frame
  96. except TypeError:
  97. pass
  98. def test_tab_completion(self):
  99. # DataFrame whose columns are identifiers shall have them in __dir__.
  100. df = pd.DataFrame([list('abcd'), list('efgh')], columns=list('ABCD'))
  101. for key in list('ABCD'):
  102. assert key in dir(df)
  103. assert isinstance(df.__getitem__('A'), pd.Series)
  104. # DataFrame whose first-level columns are identifiers shall have
  105. # them in __dir__.
  106. df = pd.DataFrame(
  107. [list('abcd'), list('efgh')],
  108. columns=pd.MultiIndex.from_tuples(list(zip('ABCD', 'EFGH'))))
  109. for key in list('ABCD'):
  110. assert key in dir(df)
  111. for key in list('EFGH'):
  112. assert key not in dir(df)
  113. assert isinstance(df.__getitem__('A'), pd.DataFrame)
  114. def test_not_hashable(self, empty_frame):
  115. df = self.klass([1])
  116. pytest.raises(TypeError, hash, df)
  117. pytest.raises(TypeError, hash, empty_frame)
  118. def test_new_empty_index(self):
  119. df1 = self.klass(np.random.randn(0, 3))
  120. df2 = self.klass(np.random.randn(0, 3))
  121. df1.index.name = 'foo'
  122. assert df2.index.name is None
  123. def test_array_interface(self, float_frame):
  124. with np.errstate(all='ignore'):
  125. result = np.sqrt(float_frame)
  126. assert isinstance(result, type(float_frame))
  127. assert result.index is float_frame.index
  128. assert result.columns is float_frame.columns
  129. self._assert_frame_equal(result, float_frame.apply(np.sqrt))
  130. def test_get_agg_axis(self, float_frame):
  131. cols = float_frame._get_agg_axis(0)
  132. assert cols is float_frame.columns
  133. idx = float_frame._get_agg_axis(1)
  134. assert idx is float_frame.index
  135. pytest.raises(ValueError, float_frame._get_agg_axis, 2)
  136. def test_nonzero(self, float_frame, float_string_frame, empty_frame):
  137. assert empty_frame.empty
  138. assert not float_frame.empty
  139. assert not float_string_frame.empty
  140. # corner case
  141. df = DataFrame({'A': [1., 2., 3.],
  142. 'B': ['a', 'b', 'c']},
  143. index=np.arange(3))
  144. del df['A']
  145. assert not df.empty
  146. def test_iteritems(self):
  147. df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
  148. for k, v in compat.iteritems(df):
  149. assert isinstance(v, self.klass._constructor_sliced)
  150. def test_items(self):
  151. # GH 17213, GH 13918
  152. cols = ['a', 'b', 'c']
  153. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
  154. for c, (k, v) in zip(cols, df.items()):
  155. assert c == k
  156. assert isinstance(v, Series)
  157. assert (df[k] == v).all()
  158. def test_iter(self, float_frame):
  159. assert tm.equalContents(list(float_frame), float_frame.columns)
  160. def test_iterrows(self, float_frame, float_string_frame):
  161. for k, v in float_frame.iterrows():
  162. exp = float_frame.loc[k]
  163. self._assert_series_equal(v, exp)
  164. for k, v in float_string_frame.iterrows():
  165. exp = float_string_frame.loc[k]
  166. self._assert_series_equal(v, exp)
  167. def test_iterrows_iso8601(self):
  168. # GH 19671
  169. if self.klass == SparseDataFrame:
  170. pytest.xfail(reason='SparseBlock datetime type not implemented.')
  171. s = self.klass(
  172. {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'],
  173. 'iso8601': date_range('2000-01-01', periods=4, freq='M')})
  174. for k, v in s.iterrows():
  175. exp = s.loc[k]
  176. self._assert_series_equal(v, exp)
  177. def test_itertuples(self, float_frame):
  178. for i, tup in enumerate(float_frame.itertuples()):
  179. s = self.klass._constructor_sliced(tup[1:])
  180. s.name = tup[0]
  181. expected = float_frame.iloc[i, :].reset_index(drop=True)
  182. self._assert_series_equal(s, expected)
  183. df = self.klass({'floats': np.random.randn(5),
  184. 'ints': lrange(5)}, columns=['floats', 'ints'])
  185. for tup in df.itertuples(index=False):
  186. assert isinstance(tup[1], (int, long))
  187. df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
  188. dfaa = df[['a', 'a']]
  189. assert (list(dfaa.itertuples()) ==
  190. [(0, 1, 1), (1, 2, 2), (2, 3, 3)])
  191. # repr with be int/long on 32-bit/windows
  192. if not (compat.is_platform_windows() or compat.is_platform_32bit()):
  193. assert (repr(list(df.itertuples(name=None))) ==
  194. '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')
  195. tup = next(df.itertuples(name='TestName'))
  196. assert tup._fields == ('Index', 'a', 'b')
  197. assert (tup.Index, tup.a, tup.b) == tup
  198. assert type(tup).__name__ == 'TestName'
  199. df.columns = ['def', 'return']
  200. tup2 = next(df.itertuples(name='TestName'))
  201. assert tup2 == (0, 1, 4)
  202. assert tup2._fields == ('Index', '_1', '_2')
  203. df3 = DataFrame({'f' + str(i): [i] for i in range(1024)})
  204. # will raise SyntaxError if trying to create namedtuple
  205. tup3 = next(df3.itertuples())
  206. assert not hasattr(tup3, '_fields')
  207. assert isinstance(tup3, tuple)
  208. def test_sequence_like_with_categorical(self):
  209. # GH 7839
  210. # make sure can iterate
  211. df = DataFrame({"id": [1, 2, 3, 4, 5, 6],
  212. "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
  213. df['grade'] = Categorical(df['raw_grade'])
  214. # basic sequencing testing
  215. result = list(df.grade.values)
  216. expected = np.array(df.grade.values).tolist()
  217. tm.assert_almost_equal(result, expected)
  218. # iteration
  219. for t in df.itertuples(index=False):
  220. str(t)
  221. for row, s in df.iterrows():
  222. str(s)
  223. for c, col in df.iteritems():
  224. str(s)
  225. def test_len(self, float_frame):
  226. assert len(float_frame) == len(float_frame.index)
  227. def test_values(self, float_frame, float_string_frame):
  228. frame = float_frame
  229. arr = frame.values
  230. frame_cols = frame.columns
  231. for i, row in enumerate(arr):
  232. for j, value in enumerate(row):
  233. col = frame_cols[j]
  234. if np.isnan(value):
  235. assert np.isnan(frame[col][i])
  236. else:
  237. assert value == frame[col][i]
  238. # mixed type
  239. arr = float_string_frame[['foo', 'A']].values
  240. assert arr[0, 0] == 'bar'
  241. df = self.klass({'complex': [1j, 2j, 3j], 'real': [1, 2, 3]})
  242. arr = df.values
  243. assert arr[0, 0] == 1j
  244. # single block corner case
  245. arr = float_frame[['A', 'B']].values
  246. expected = float_frame.reindex(columns=['A', 'B']).values
  247. assert_almost_equal(arr, expected)
  248. def test_to_numpy(self):
  249. df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
  250. expected = np.array([[1, 3], [2, 4.5]])
  251. result = df.to_numpy()
  252. tm.assert_numpy_array_equal(result, expected)
  253. def test_to_numpy_dtype(self):
  254. df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
  255. expected = np.array([[1, 3], [2, 4]], dtype="int64")
  256. result = df.to_numpy(dtype="int64")
  257. tm.assert_numpy_array_equal(result, expected)
  258. def test_to_numpy_copy(self):
  259. arr = np.random.randn(4, 3)
  260. df = pd.DataFrame(arr)
  261. assert df.values.base is arr
  262. assert df.to_numpy(copy=False).base is arr
  263. assert df.to_numpy(copy=True).base is None
  264. def test_transpose(self, float_frame):
  265. frame = float_frame
  266. dft = frame.T
  267. for idx, series in compat.iteritems(dft):
  268. for col, value in compat.iteritems(series):
  269. if np.isnan(value):
  270. assert np.isnan(frame[col][idx])
  271. else:
  272. assert value == frame[col][idx]
  273. # mixed type
  274. index, data = tm.getMixedTypeDict()
  275. mixed = self.klass(data, index=index)
  276. mixed_T = mixed.T
  277. for col, s in compat.iteritems(mixed_T):
  278. assert s.dtype == np.object_
  279. def test_swapaxes(self):
  280. df = self.klass(np.random.randn(10, 5))
  281. self._assert_frame_equal(df.T, df.swapaxes(0, 1))
  282. self._assert_frame_equal(df.T, df.swapaxes(1, 0))
  283. self._assert_frame_equal(df, df.swapaxes(0, 0))
  284. pytest.raises(ValueError, df.swapaxes, 2, 5)
  285. def test_axis_aliases(self, float_frame):
  286. f = float_frame
  287. # reg name
  288. expected = f.sum(axis=0)
  289. result = f.sum(axis='index')
  290. assert_series_equal(result, expected)
  291. expected = f.sum(axis=1)
  292. result = f.sum(axis='columns')
  293. assert_series_equal(result, expected)
  294. def test_class_axis(self):
  295. # GH 18147
  296. # no exception and no empty docstring
  297. assert pydoc.getdoc(DataFrame.index)
  298. assert pydoc.getdoc(DataFrame.columns)
  299. def test_more_values(self, float_string_frame):
  300. values = float_string_frame.values
  301. assert values.shape[1] == len(float_string_frame.columns)
  302. def test_repr_with_mi_nat(self, float_string_frame):
  303. df = self.klass({'X': [1, 2]},
  304. index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']])
  305. result = repr(df)
  306. expected = ' X\nNaT a 1\n2013-01-01 b 2'
  307. assert result == expected
  308. def test_iteritems_names(self, float_string_frame):
  309. for k, v in compat.iteritems(float_string_frame):
  310. assert v.name == k
  311. def test_series_put_names(self, float_string_frame):
  312. series = float_string_frame._series
  313. for k, v in compat.iteritems(series):
  314. assert v.name == k
  315. def test_empty_nonzero(self):
  316. df = self.klass([1, 2, 3])
  317. assert not df.empty
  318. df = self.klass(index=[1], columns=[1])
  319. assert not df.empty
  320. df = self.klass(index=['a', 'b'], columns=['c', 'd']).dropna()
  321. assert df.empty
  322. assert df.T.empty
  323. empty_frames = [self.klass(),
  324. self.klass(index=[1]),
  325. self.klass(columns=[1]),
  326. self.klass({1: []})]
  327. for df in empty_frames:
  328. assert df.empty
  329. assert df.T.empty
  330. def test_with_datetimelikes(self):
  331. df = self.klass({'A': date_range('20130101', periods=10),
  332. 'B': timedelta_range('1 day', periods=10)})
  333. t = df.T
  334. result = t.get_dtype_counts()
  335. if self.klass is DataFrame:
  336. expected = Series({'object': 10})
  337. else:
  338. expected = Series({'Sparse[object, nan]': 10})
  339. tm.assert_series_equal(result, expected)
  340. class TestDataFrameMisc(SharedWithSparse):
  341. klass = DataFrame
  342. # SharedWithSparse tests use generic, klass-agnostic assertion
  343. _assert_frame_equal = staticmethod(assert_frame_equal)
  344. _assert_series_equal = staticmethod(assert_series_equal)
  345. def test_values(self, float_frame):
  346. float_frame.values[:, 0] = 5.
  347. assert (float_frame.values[:, 0] == 5).all()
  348. def test_as_matrix_deprecated(self, float_frame):
  349. # GH 18458
  350. with tm.assert_produces_warning(FutureWarning):
  351. cols = float_frame.columns.tolist()
  352. result = float_frame.as_matrix(columns=cols)
  353. expected = float_frame.values
  354. tm.assert_numpy_array_equal(result, expected)
  355. def test_deepcopy(self, float_frame):
  356. cp = deepcopy(float_frame)
  357. series = cp['A']
  358. series[:] = 10
  359. for idx, value in compat.iteritems(series):
  360. assert float_frame['A'][idx] != value
  361. def test_transpose_get_view(self, float_frame):
  362. dft = float_frame.T
  363. dft.values[:, 5:10] = 5
  364. assert (float_frame.values[5:10] == 5).all()
  365. def test_inplace_return_self(self):
  366. # GH 1893
  367. data = DataFrame({'a': ['foo', 'bar', 'baz', 'qux'],
  368. 'b': [0, 0, 1, 1],
  369. 'c': [1, 2, 3, 4]})
  370. def _check_f(base, f):
  371. result = f(base)
  372. assert result is None
  373. # -----DataFrame-----
  374. # set_index
  375. f = lambda x: x.set_index('a', inplace=True)
  376. _check_f(data.copy(), f)
  377. # reset_index
  378. f = lambda x: x.reset_index(inplace=True)
  379. _check_f(data.set_index('a'), f)
  380. # drop_duplicates
  381. f = lambda x: x.drop_duplicates(inplace=True)
  382. _check_f(data.copy(), f)
  383. # sort
  384. f = lambda x: x.sort_values('b', inplace=True)
  385. _check_f(data.copy(), f)
  386. # sort_index
  387. f = lambda x: x.sort_index(inplace=True)
  388. _check_f(data.copy(), f)
  389. # fillna
  390. f = lambda x: x.fillna(0, inplace=True)
  391. _check_f(data.copy(), f)
  392. # replace
  393. f = lambda x: x.replace(1, 0, inplace=True)
  394. _check_f(data.copy(), f)
  395. # rename
  396. f = lambda x: x.rename({1: 'foo'}, inplace=True)
  397. _check_f(data.copy(), f)
  398. # -----Series-----
  399. d = data.copy()['c']
  400. # reset_index
  401. f = lambda x: x.reset_index(inplace=True, drop=True)
  402. _check_f(data.set_index('a')['c'], f)
  403. # fillna
  404. f = lambda x: x.fillna(0, inplace=True)
  405. _check_f(d.copy(), f)
  406. # replace
  407. f = lambda x: x.replace(1, 0, inplace=True)
  408. _check_f(d.copy(), f)
  409. # rename
  410. f = lambda x: x.rename({1: 'foo'}, inplace=True)
  411. _check_f(d.copy(), f)
  412. def test_tab_complete_warning(self, ip):
  413. # GH 16409
  414. pytest.importorskip('IPython', minversion="6.0.0")
  415. from IPython.core.completer import provisionalcompleter
  416. code = "import pandas as pd; df = pd.DataFrame()"
  417. ip.run_code(code)
  418. with tm.assert_produces_warning(None):
  419. with provisionalcompleter('ignore'):
  420. list(ip.Completer.completions('df.', 1))