test_generic.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. # -*- coding: utf-8 -*-
  2. # pylint: disable-msg=E1101,W0612
  3. from copy import copy, deepcopy
  4. from warnings import catch_warnings, simplefilter
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import PY3, range, zip
  8. from pandas.core.dtypes.common import is_scalar
  9. import pandas as pd
  10. from pandas import DataFrame, MultiIndex, Panel, Series, date_range
  11. import pandas.util.testing as tm
  12. from pandas.util.testing import (
  13. assert_frame_equal, assert_panel_equal, assert_series_equal)
  14. import pandas.io.formats.printing as printing
  15. # ----------------------------------------------------------------------
  16. # Generic types test cases
  17. class Generic(object):
  18. @property
  19. def _ndim(self):
  20. return self._typ._AXIS_LEN
  21. def _axes(self):
  22. """ return the axes for my object typ """
  23. return self._typ._AXIS_ORDERS
  24. def _construct(self, shape, value=None, dtype=None, **kwargs):
  25. """ construct an object for the given shape
  26. if value is specified use that if its a scalar
  27. if value is an array, repeat it as needed """
  28. if isinstance(shape, int):
  29. shape = tuple([shape] * self._ndim)
  30. if value is not None:
  31. if is_scalar(value):
  32. if value == 'empty':
  33. arr = None
  34. # remove the info axis
  35. kwargs.pop(self._typ._info_axis_name, None)
  36. else:
  37. arr = np.empty(shape, dtype=dtype)
  38. arr.fill(value)
  39. else:
  40. fshape = np.prod(shape)
  41. arr = value.ravel()
  42. new_shape = fshape / arr.shape[0]
  43. if fshape % arr.shape[0] != 0:
  44. raise Exception("invalid value passed in _construct")
  45. arr = np.repeat(arr, new_shape).reshape(shape)
  46. else:
  47. arr = np.random.randn(*shape)
  48. return self._typ(arr, dtype=dtype, **kwargs)
  49. def _compare(self, result, expected):
  50. self._comparator(result, expected)
  51. def test_rename(self):
  52. # single axis
  53. idx = list('ABCD')
  54. # relabeling values passed into self.rename
  55. args = [
  56. str.lower,
  57. {x: x.lower() for x in idx},
  58. Series({x: x.lower() for x in idx}),
  59. ]
  60. for axis in self._axes():
  61. kwargs = {axis: idx}
  62. obj = self._construct(4, **kwargs)
  63. for arg in args:
  64. # rename a single axis
  65. result = obj.rename(**{axis: arg})
  66. expected = obj.copy()
  67. setattr(expected, axis, list('abcd'))
  68. self._compare(result, expected)
  69. # multiple axes at once
  70. def test_get_numeric_data(self):
  71. n = 4
  72. kwargs = {self._typ._AXIS_NAMES[i]: list(range(n))
  73. for i in range(self._ndim)}
  74. # get the numeric data
  75. o = self._construct(n, **kwargs)
  76. result = o._get_numeric_data()
  77. self._compare(result, o)
  78. # non-inclusion
  79. result = o._get_bool_data()
  80. expected = self._construct(n, value='empty', **kwargs)
  81. self._compare(result, expected)
  82. # get the bool data
  83. arr = np.array([True, True, False, True])
  84. o = self._construct(n, value=arr, **kwargs)
  85. result = o._get_numeric_data()
  86. self._compare(result, o)
  87. # _get_numeric_data is includes _get_bool_data, so can't test for
  88. # non-inclusion
  89. def test_get_default(self):
  90. # GH 7725
  91. d0 = "a", "b", "c", "d"
  92. d1 = np.arange(4, dtype='int64')
  93. others = "e", 10
  94. for data, index in ((d0, d1), (d1, d0)):
  95. s = Series(data, index=index)
  96. for i, d in zip(index, data):
  97. assert s.get(i) == d
  98. assert s.get(i, d) == d
  99. assert s.get(i, "z") == d
  100. for other in others:
  101. assert s.get(other, "z") == "z"
  102. assert s.get(other, other) == other
  103. def test_nonzero(self):
  104. # GH 4633
  105. # look at the boolean/nonzero behavior for objects
  106. obj = self._construct(shape=4)
  107. pytest.raises(ValueError, lambda: bool(obj == 0))
  108. pytest.raises(ValueError, lambda: bool(obj == 1))
  109. pytest.raises(ValueError, lambda: bool(obj))
  110. obj = self._construct(shape=4, value=1)
  111. pytest.raises(ValueError, lambda: bool(obj == 0))
  112. pytest.raises(ValueError, lambda: bool(obj == 1))
  113. pytest.raises(ValueError, lambda: bool(obj))
  114. obj = self._construct(shape=4, value=np.nan)
  115. pytest.raises(ValueError, lambda: bool(obj == 0))
  116. pytest.raises(ValueError, lambda: bool(obj == 1))
  117. pytest.raises(ValueError, lambda: bool(obj))
  118. # empty
  119. obj = self._construct(shape=0)
  120. pytest.raises(ValueError, lambda: bool(obj))
  121. # invalid behaviors
  122. obj1 = self._construct(shape=4, value=1)
  123. obj2 = self._construct(shape=4, value=1)
  124. def f():
  125. if obj1:
  126. printing.pprint_thing("this works and shouldn't")
  127. pytest.raises(ValueError, f)
  128. pytest.raises(ValueError, lambda: obj1 and obj2)
  129. pytest.raises(ValueError, lambda: obj1 or obj2)
  130. pytest.raises(ValueError, lambda: not obj1)
  131. def test_downcast(self):
  132. # test close downcasting
  133. o = self._construct(shape=4, value=9, dtype=np.int64)
  134. result = o.copy()
  135. result._data = o._data.downcast(dtypes='infer')
  136. self._compare(result, o)
  137. o = self._construct(shape=4, value=9.)
  138. expected = o.astype(np.int64)
  139. result = o.copy()
  140. result._data = o._data.downcast(dtypes='infer')
  141. self._compare(result, expected)
  142. o = self._construct(shape=4, value=9.5)
  143. result = o.copy()
  144. result._data = o._data.downcast(dtypes='infer')
  145. self._compare(result, o)
  146. # are close
  147. o = self._construct(shape=4, value=9.000000000005)
  148. result = o.copy()
  149. result._data = o._data.downcast(dtypes='infer')
  150. expected = o.astype(np.int64)
  151. self._compare(result, expected)
  152. def test_constructor_compound_dtypes(self):
  153. # see gh-5191
  154. # Compound dtypes should raise NotImplementedError.
  155. def f(dtype):
  156. return self._construct(shape=3, value=1, dtype=dtype)
  157. pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
  158. ("B", "str"),
  159. ("C", "int32")])
  160. # these work (though results may be unexpected)
  161. f('int64')
  162. f('float64')
  163. f('M8[ns]')
  164. def check_metadata(self, x, y=None):
  165. for m in x._metadata:
  166. v = getattr(x, m, None)
  167. if y is None:
  168. assert v is None
  169. else:
  170. assert v == getattr(y, m, None)
  171. def test_metadata_propagation(self):
  172. # check that the metadata matches up on the resulting ops
  173. o = self._construct(shape=3)
  174. o.name = 'foo'
  175. o2 = self._construct(shape=3)
  176. o2.name = 'bar'
  177. # TODO
  178. # Once panel can do non-trivial combine operations
  179. # (currently there is an a raise in the Panel arith_ops to prevent
  180. # this, though it actually does work)
  181. # can remove all of these try: except: blocks on the actual operations
  182. # ----------
  183. # preserving
  184. # ----------
  185. # simple ops with scalars
  186. for op in ['__add__', '__sub__', '__truediv__', '__mul__']:
  187. result = getattr(o, op)(1)
  188. self.check_metadata(o, result)
  189. # ops with like
  190. for op in ['__add__', '__sub__', '__truediv__', '__mul__']:
  191. try:
  192. result = getattr(o, op)(o)
  193. self.check_metadata(o, result)
  194. except (ValueError, AttributeError):
  195. pass
  196. # simple boolean
  197. for op in ['__eq__', '__le__', '__ge__']:
  198. v1 = getattr(o, op)(o)
  199. self.check_metadata(o, v1)
  200. try:
  201. self.check_metadata(o, v1 & v1)
  202. except (ValueError):
  203. pass
  204. try:
  205. self.check_metadata(o, v1 | v1)
  206. except (ValueError):
  207. pass
  208. # combine_first
  209. try:
  210. result = o.combine_first(o2)
  211. self.check_metadata(o, result)
  212. except (AttributeError):
  213. pass
  214. # ---------------------------
  215. # non-preserving (by default)
  216. # ---------------------------
  217. # add non-like
  218. try:
  219. result = o + o2
  220. self.check_metadata(result)
  221. except (ValueError, AttributeError):
  222. pass
  223. # simple boolean
  224. for op in ['__eq__', '__le__', '__ge__']:
  225. # this is a name matching op
  226. v1 = getattr(o, op)(o)
  227. v2 = getattr(o, op)(o2)
  228. self.check_metadata(v2)
  229. try:
  230. self.check_metadata(v1 & v2)
  231. except (ValueError):
  232. pass
  233. try:
  234. self.check_metadata(v1 | v2)
  235. except (ValueError):
  236. pass
  237. def test_head_tail(self):
  238. # GH5370
  239. o = self._construct(shape=10)
  240. # check all index types
  241. for index in [tm.makeFloatIndex, tm.makeIntIndex, tm.makeStringIndex,
  242. tm.makeUnicodeIndex, tm.makeDateIndex,
  243. tm.makePeriodIndex]:
  244. axis = o._get_axis_name(0)
  245. setattr(o, axis, index(len(getattr(o, axis))))
  246. # Panel + dims
  247. try:
  248. o.head()
  249. except (NotImplementedError):
  250. pytest.skip('not implemented on {0}'.format(
  251. o.__class__.__name__))
  252. self._compare(o.head(), o.iloc[:5])
  253. self._compare(o.tail(), o.iloc[-5:])
  254. # 0-len
  255. self._compare(o.head(0), o.iloc[0:0])
  256. self._compare(o.tail(0), o.iloc[0:0])
  257. # bounded
  258. self._compare(o.head(len(o) + 1), o)
  259. self._compare(o.tail(len(o) + 1), o)
  260. # neg index
  261. self._compare(o.head(-3), o.head(7))
  262. self._compare(o.tail(-3), o.tail(7))
  263. def test_sample(self):
  264. # Fixes issue: 2419
  265. o = self._construct(shape=10)
  266. ###
  267. # Check behavior of random_state argument
  268. ###
  269. # Check for stability when receives seed or random state -- run 10
  270. # times.
  271. for test in range(10):
  272. seed = np.random.randint(0, 100)
  273. self._compare(
  274. o.sample(n=4, random_state=seed), o.sample(n=4,
  275. random_state=seed))
  276. self._compare(
  277. o.sample(frac=0.7, random_state=seed), o.sample(
  278. frac=0.7, random_state=seed))
  279. self._compare(
  280. o.sample(n=4, random_state=np.random.RandomState(test)),
  281. o.sample(n=4, random_state=np.random.RandomState(test)))
  282. self._compare(
  283. o.sample(frac=0.7, random_state=np.random.RandomState(test)),
  284. o.sample(frac=0.7, random_state=np.random.RandomState(test)))
  285. os1, os2 = [], []
  286. for _ in range(2):
  287. np.random.seed(test)
  288. os1.append(o.sample(n=4))
  289. os2.append(o.sample(frac=0.7))
  290. self._compare(*os1)
  291. self._compare(*os2)
  292. # Check for error when random_state argument invalid.
  293. with pytest.raises(ValueError):
  294. o.sample(random_state='astring!')
  295. ###
  296. # Check behavior of `frac` and `N`
  297. ###
  298. # Giving both frac and N throws error
  299. with pytest.raises(ValueError):
  300. o.sample(n=3, frac=0.3)
  301. # Check that raises right error for negative lengths
  302. with pytest.raises(ValueError):
  303. o.sample(n=-3)
  304. with pytest.raises(ValueError):
  305. o.sample(frac=-0.3)
  306. # Make sure float values of `n` give error
  307. with pytest.raises(ValueError):
  308. o.sample(n=3.2)
  309. # Check lengths are right
  310. assert len(o.sample(n=4) == 4)
  311. assert len(o.sample(frac=0.34) == 3)
  312. assert len(o.sample(frac=0.36) == 4)
  313. ###
  314. # Check weights
  315. ###
  316. # Weight length must be right
  317. with pytest.raises(ValueError):
  318. o.sample(n=3, weights=[0, 1])
  319. with pytest.raises(ValueError):
  320. bad_weights = [0.5] * 11
  321. o.sample(n=3, weights=bad_weights)
  322. with pytest.raises(ValueError):
  323. bad_weight_series = Series([0, 0, 0.2])
  324. o.sample(n=4, weights=bad_weight_series)
  325. # Check won't accept negative weights
  326. with pytest.raises(ValueError):
  327. bad_weights = [-0.1] * 10
  328. o.sample(n=3, weights=bad_weights)
  329. # Check inf and -inf throw errors:
  330. with pytest.raises(ValueError):
  331. weights_with_inf = [0.1] * 10
  332. weights_with_inf[0] = np.inf
  333. o.sample(n=3, weights=weights_with_inf)
  334. with pytest.raises(ValueError):
  335. weights_with_ninf = [0.1] * 10
  336. weights_with_ninf[0] = -np.inf
  337. o.sample(n=3, weights=weights_with_ninf)
  338. # All zeros raises errors
  339. zero_weights = [0] * 10
  340. with pytest.raises(ValueError):
  341. o.sample(n=3, weights=zero_weights)
  342. # All missing weights
  343. nan_weights = [np.nan] * 10
  344. with pytest.raises(ValueError):
  345. o.sample(n=3, weights=nan_weights)
  346. # Check np.nan are replaced by zeros.
  347. weights_with_nan = [np.nan] * 10
  348. weights_with_nan[5] = 0.5
  349. self._compare(
  350. o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6])
  351. # Check None are also replaced by zeros.
  352. weights_with_None = [None] * 10
  353. weights_with_None[5] = 0.5
  354. self._compare(
  355. o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6])
  356. def test_size_compat(self):
  357. # GH8846
  358. # size property should be defined
  359. o = self._construct(shape=10)
  360. assert o.size == np.prod(o.shape)
  361. assert o.size == 10 ** len(o.axes)
  362. def test_split_compat(self):
  363. # xref GH8846
  364. o = self._construct(shape=10)
  365. assert len(np.array_split(o, 5)) == 5
  366. assert len(np.array_split(o, 2)) == 2
  367. def test_unexpected_keyword(self): # GH8597
  368. df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe'])
  369. ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
  370. ts = df['joe'].copy()
  371. ts[2] = np.nan
  372. with pytest.raises(TypeError, match='unexpected keyword'):
  373. df.drop('joe', axis=1, in_place=True)
  374. with pytest.raises(TypeError, match='unexpected keyword'):
  375. df.reindex([1, 0], inplace=True)
  376. with pytest.raises(TypeError, match='unexpected keyword'):
  377. ca.fillna(0, inplace=True)
  378. with pytest.raises(TypeError, match='unexpected keyword'):
  379. ts.fillna(0, in_place=True)
  380. # See gh-12301
  381. def test_stat_unexpected_keyword(self):
  382. obj = self._construct(5)
  383. starwars = 'Star Wars'
  384. errmsg = 'unexpected keyword'
  385. with pytest.raises(TypeError, match=errmsg):
  386. obj.max(epic=starwars) # stat_function
  387. with pytest.raises(TypeError, match=errmsg):
  388. obj.var(epic=starwars) # stat_function_ddof
  389. with pytest.raises(TypeError, match=errmsg):
  390. obj.sum(epic=starwars) # cum_function
  391. with pytest.raises(TypeError, match=errmsg):
  392. obj.any(epic=starwars) # logical_function
  393. def test_api_compat(self):
  394. # GH 12021
  395. # compat for __name__, __qualname__
  396. obj = self._construct(5)
  397. for func in ['sum', 'cumsum', 'any', 'var']:
  398. f = getattr(obj, func)
  399. assert f.__name__ == func
  400. if PY3:
  401. assert f.__qualname__.endswith(func)
  402. def test_stat_non_defaults_args(self):
  403. obj = self._construct(5)
  404. out = np.array([0])
  405. errmsg = "the 'out' parameter is not supported"
  406. with pytest.raises(ValueError, match=errmsg):
  407. obj.max(out=out) # stat_function
  408. with pytest.raises(ValueError, match=errmsg):
  409. obj.var(out=out) # stat_function_ddof
  410. with pytest.raises(ValueError, match=errmsg):
  411. obj.sum(out=out) # cum_function
  412. with pytest.raises(ValueError, match=errmsg):
  413. obj.any(out=out) # logical_function
  414. def test_truncate_out_of_bounds(self):
  415. # GH11382
  416. # small
  417. shape = [int(2e3)] + ([1] * (self._ndim - 1))
  418. small = self._construct(shape, dtype='int8', value=1)
  419. self._compare(small.truncate(), small)
  420. self._compare(small.truncate(before=0, after=3e3), small)
  421. self._compare(small.truncate(before=-1, after=2e3), small)
  422. # big
  423. shape = [int(2e6)] + ([1] * (self._ndim - 1))
  424. big = self._construct(shape, dtype='int8', value=1)
  425. self._compare(big.truncate(), big)
  426. self._compare(big.truncate(before=0, after=3e6), big)
  427. self._compare(big.truncate(before=-1, after=2e6), big)
  428. def test_validate_bool_args(self):
  429. df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  430. invalid_values = [1, "True", [1, 2, 3], 5.0]
  431. for value in invalid_values:
  432. with pytest.raises(ValueError):
  433. super(DataFrame, df).rename_axis(mapper={'a': 'x', 'b': 'y'},
  434. axis=1, inplace=value)
  435. with pytest.raises(ValueError):
  436. super(DataFrame, df).drop('a', axis=1, inplace=value)
  437. with pytest.raises(ValueError):
  438. super(DataFrame, df).sort_index(inplace=value)
  439. with pytest.raises(ValueError):
  440. super(DataFrame, df)._consolidate(inplace=value)
  441. with pytest.raises(ValueError):
  442. super(DataFrame, df).fillna(value=0, inplace=value)
  443. with pytest.raises(ValueError):
  444. super(DataFrame, df).replace(to_replace=1, value=7,
  445. inplace=value)
  446. with pytest.raises(ValueError):
  447. super(DataFrame, df).interpolate(inplace=value)
  448. with pytest.raises(ValueError):
  449. super(DataFrame, df)._where(cond=df.a > 2, inplace=value)
  450. with pytest.raises(ValueError):
  451. super(DataFrame, df).mask(cond=df.a > 2, inplace=value)
  452. def test_copy_and_deepcopy(self):
  453. # GH 15444
  454. for shape in [0, 1, 2]:
  455. obj = self._construct(shape)
  456. for func in [copy,
  457. deepcopy,
  458. lambda x: x.copy(deep=False),
  459. lambda x: x.copy(deep=True)]:
  460. obj_copy = func(obj)
  461. assert obj_copy is not obj
  462. self._compare(obj_copy, obj)
  463. @pytest.mark.parametrize("periods,fill_method,limit,exp", [
  464. (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]),
  465. (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]),
  466. (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]),
  467. (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]),
  468. (-1, "ffill", None, [np.nan, np.nan, -.5, -.5, -.6, 0, 0, np.nan]),
  469. (-1, "ffill", 1, [np.nan, np.nan, -.5, -.5, -.6, 0, np.nan, np.nan]),
  470. (-1, "bfill", None, [0, 0, -.5, -.5, -.6, np.nan, np.nan, np.nan]),
  471. (-1, "bfill", 1, [np.nan, 0, -.5, -.5, -.6, np.nan, np.nan, np.nan])
  472. ])
  473. def test_pct_change(self, periods, fill_method, limit, exp):
  474. vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan]
  475. obj = self._typ(vals)
  476. func = getattr(obj, 'pct_change')
  477. res = func(periods=periods, fill_method=fill_method, limit=limit)
  478. if type(obj) is DataFrame:
  479. tm.assert_frame_equal(res, DataFrame(exp))
  480. else:
  481. tm.assert_series_equal(res, Series(exp))
  482. class TestNDFrame(object):
  483. # tests that don't fit elsewhere
  484. def test_sample(sel):
  485. # Fixes issue: 2419
  486. # additional specific object based tests
  487. # A few dataframe test with degenerate weights.
  488. easy_weight_list = [0] * 10
  489. easy_weight_list[5] = 1
  490. df = pd.DataFrame({'col1': range(10, 20),
  491. 'col2': range(20, 30),
  492. 'colString': ['a'] * 10,
  493. 'easyweights': easy_weight_list})
  494. sample1 = df.sample(n=1, weights='easyweights')
  495. assert_frame_equal(sample1, df.iloc[5:6])
  496. # Ensure proper error if string given as weight for Series, panel, or
  497. # DataFrame with axis = 1.
  498. s = Series(range(10))
  499. with pytest.raises(ValueError):
  500. s.sample(n=3, weights='weight_column')
  501. with catch_warnings(record=True):
  502. simplefilter("ignore", FutureWarning)
  503. panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4],
  504. minor_axis=[3, 4, 5])
  505. with pytest.raises(ValueError):
  506. panel.sample(n=1, weights='weight_column')
  507. with pytest.raises(ValueError):
  508. df.sample(n=1, weights='weight_column', axis=1)
  509. # Check weighting key error
  510. with pytest.raises(KeyError):
  511. df.sample(n=3, weights='not_a_real_column_name')
  512. # Check that re-normalizes weights that don't sum to one.
  513. weights_less_than_1 = [0] * 10
  514. weights_less_than_1[0] = 0.5
  515. tm.assert_frame_equal(
  516. df.sample(n=1, weights=weights_less_than_1), df.iloc[:1])
  517. ###
  518. # Test axis argument
  519. ###
  520. # Test axis argument
  521. df = pd.DataFrame({'col1': range(10), 'col2': ['a'] * 10})
  522. second_column_weight = [0, 1]
  523. assert_frame_equal(
  524. df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']])
  525. # Different axis arg types
  526. assert_frame_equal(df.sample(n=1, axis='columns',
  527. weights=second_column_weight),
  528. df[['col2']])
  529. weight = [0] * 10
  530. weight[5] = 0.5
  531. assert_frame_equal(df.sample(n=1, axis='rows', weights=weight),
  532. df.iloc[5:6])
  533. assert_frame_equal(df.sample(n=1, axis='index', weights=weight),
  534. df.iloc[5:6])
  535. # Check out of range axis values
  536. with pytest.raises(ValueError):
  537. df.sample(n=1, axis=2)
  538. with pytest.raises(ValueError):
  539. df.sample(n=1, axis='not_a_name')
  540. with pytest.raises(ValueError):
  541. s = pd.Series(range(10))
  542. s.sample(n=1, axis=1)
  543. # Test weight length compared to correct axis
  544. with pytest.raises(ValueError):
  545. df.sample(n=1, axis=1, weights=[0.5] * 10)
  546. # Check weights with axis = 1
  547. easy_weight_list = [0] * 3
  548. easy_weight_list[2] = 1
  549. df = pd.DataFrame({'col1': range(10, 20),
  550. 'col2': range(20, 30),
  551. 'colString': ['a'] * 10})
  552. sample1 = df.sample(n=1, axis=1, weights=easy_weight_list)
  553. assert_frame_equal(sample1, df[['colString']])
  554. # Test default axes
  555. with catch_warnings(record=True):
  556. simplefilter("ignore", FutureWarning)
  557. p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6],
  558. minor_axis=[1, 3, 5])
  559. assert_panel_equal(
  560. p.sample(n=3, random_state=42), p.sample(n=3, axis=1,
  561. random_state=42))
  562. assert_frame_equal(
  563. df.sample(n=3, random_state=42), df.sample(n=3, axis=0,
  564. random_state=42))
  565. # Test that function aligns weights with frame
  566. df = DataFrame(
  567. {'col1': [5, 6, 7],
  568. 'col2': ['a', 'b', 'c'], }, index=[9, 5, 3])
  569. s = Series([1, 0, 0], index=[3, 5, 9])
  570. assert_frame_equal(df.loc[[3]], df.sample(1, weights=s))
  571. # Weights have index values to be dropped because not in
  572. # sampled DataFrame
  573. s2 = Series([0.001, 0, 10000], index=[3, 5, 10])
  574. assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2))
  575. # Weights have empty values to be filed with zeros
  576. s3 = Series([0.01, 0], index=[3, 5])
  577. assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3))
  578. # No overlap in weight and sampled DataFrame indices
  579. s4 = Series([1, 0], index=[1, 2])
  580. with pytest.raises(ValueError):
  581. df.sample(1, weights=s4)
  582. def test_squeeze(self):
  583. # noop
  584. for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
  585. tm.makeObjectSeries()]:
  586. tm.assert_series_equal(s.squeeze(), s)
  587. for df in [tm.makeTimeDataFrame()]:
  588. tm.assert_frame_equal(df.squeeze(), df)
  589. with catch_warnings(record=True):
  590. simplefilter("ignore", FutureWarning)
  591. for p in [tm.makePanel()]:
  592. tm.assert_panel_equal(p.squeeze(), p)
  593. # squeezing
  594. df = tm.makeTimeDataFrame().reindex(columns=['A'])
  595. tm.assert_series_equal(df.squeeze(), df['A'])
  596. with catch_warnings(record=True):
  597. simplefilter("ignore", FutureWarning)
  598. p = tm.makePanel().reindex(items=['ItemA'])
  599. tm.assert_frame_equal(p.squeeze(), p['ItemA'])
  600. p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A'])
  601. tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A'])
  602. # don't fail with 0 length dimensions GH11229 & GH8999
  603. empty_series = Series([], name='five')
  604. empty_frame = DataFrame([empty_series])
  605. with catch_warnings(record=True):
  606. simplefilter("ignore", FutureWarning)
  607. empty_panel = Panel({'six': empty_frame})
  608. [tm.assert_series_equal(empty_series, higher_dim.squeeze())
  609. for higher_dim in [empty_series, empty_frame, empty_panel]]
  610. # axis argument
  611. df = tm.makeTimeDataFrame(nper=1).iloc[:, :1]
  612. assert df.shape == (1, 1)
  613. tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
  614. tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0])
  615. tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
  616. tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0])
  617. assert df.squeeze() == df.iloc[0, 0]
  618. pytest.raises(ValueError, df.squeeze, axis=2)
  619. pytest.raises(ValueError, df.squeeze, axis='x')
  620. df = tm.makeTimeDataFrame(3)
  621. tm.assert_frame_equal(df.squeeze(axis=0), df)
  622. def test_numpy_squeeze(self):
  623. s = tm.makeFloatSeries()
  624. tm.assert_series_equal(np.squeeze(s), s)
  625. df = tm.makeTimeDataFrame().reindex(columns=['A'])
  626. tm.assert_series_equal(np.squeeze(df), df['A'])
  627. def test_transpose(self):
  628. msg = (r"transpose\(\) got multiple values for "
  629. r"keyword argument 'axes'")
  630. for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
  631. tm.makeObjectSeries()]:
  632. # calls implementation in pandas/core/base.py
  633. tm.assert_series_equal(s.transpose(), s)
  634. for df in [tm.makeTimeDataFrame()]:
  635. tm.assert_frame_equal(df.transpose().transpose(), df)
  636. with catch_warnings(record=True):
  637. simplefilter("ignore", FutureWarning)
  638. for p in [tm.makePanel()]:
  639. tm.assert_panel_equal(p.transpose(2, 0, 1)
  640. .transpose(1, 2, 0), p)
  641. with pytest.raises(TypeError, match=msg):
  642. p.transpose(2, 0, 1, axes=(2, 0, 1))
  643. def test_numpy_transpose(self):
  644. msg = "the 'axes' parameter is not supported"
  645. s = tm.makeFloatSeries()
  646. tm.assert_series_equal(np.transpose(s), s)
  647. with pytest.raises(ValueError, match=msg):
  648. np.transpose(s, axes=1)
  649. df = tm.makeTimeDataFrame()
  650. tm.assert_frame_equal(np.transpose(np.transpose(df)), df)
  651. with pytest.raises(ValueError, match=msg):
  652. np.transpose(df, axes=1)
  653. with catch_warnings(record=True):
  654. simplefilter("ignore", FutureWarning)
  655. p = tm.makePanel()
  656. tm.assert_panel_equal(np.transpose(
  657. np.transpose(p, axes=(2, 0, 1)),
  658. axes=(1, 2, 0)), p)
  659. def test_take(self):
  660. indices = [1, 5, -2, 6, 3, -1]
  661. for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
  662. tm.makeObjectSeries()]:
  663. out = s.take(indices)
  664. expected = Series(data=s.values.take(indices),
  665. index=s.index.take(indices), dtype=s.dtype)
  666. tm.assert_series_equal(out, expected)
  667. for df in [tm.makeTimeDataFrame()]:
  668. out = df.take(indices)
  669. expected = DataFrame(data=df.values.take(indices, axis=0),
  670. index=df.index.take(indices),
  671. columns=df.columns)
  672. tm.assert_frame_equal(out, expected)
  673. indices = [-3, 2, 0, 1]
  674. with catch_warnings(record=True):
  675. simplefilter("ignore", FutureWarning)
  676. for p in [tm.makePanel()]:
  677. out = p.take(indices)
  678. expected = Panel(data=p.values.take(indices, axis=0),
  679. items=p.items.take(indices),
  680. major_axis=p.major_axis,
  681. minor_axis=p.minor_axis)
  682. tm.assert_panel_equal(out, expected)
  683. def test_take_invalid_kwargs(self):
  684. indices = [-3, 2, 0, 1]
  685. s = tm.makeFloatSeries()
  686. df = tm.makeTimeDataFrame()
  687. with catch_warnings(record=True):
  688. simplefilter("ignore", FutureWarning)
  689. p = tm.makePanel()
  690. for obj in (s, df, p):
  691. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  692. with pytest.raises(TypeError, match=msg):
  693. obj.take(indices, foo=2)
  694. msg = "the 'out' parameter is not supported"
  695. with pytest.raises(ValueError, match=msg):
  696. obj.take(indices, out=indices)
  697. msg = "the 'mode' parameter is not supported"
  698. with pytest.raises(ValueError, match=msg):
  699. obj.take(indices, mode='clip')
  700. def test_equals(self):
  701. s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
  702. s2 = s1.copy()
  703. assert s1.equals(s2)
  704. s1[1] = 99
  705. assert not s1.equals(s2)
  706. # NaNs compare as equal
  707. s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3])
  708. s2 = s1.copy()
  709. assert s1.equals(s2)
  710. s2[0] = 9.9
  711. assert not s1.equals(s2)
  712. idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
  713. s1 = Series([1, 2, np.nan], index=idx)
  714. s2 = s1.copy()
  715. assert s1.equals(s2)
  716. # Add object dtype column with nans
  717. index = np.random.random(10)
  718. df1 = DataFrame(
  719. np.random.random(10, ), index=index, columns=['floats'])
  720. df1['text'] = 'the sky is so blue. we could use more chocolate.'.split(
  721. )
  722. df1['start'] = date_range('2000-1-1', periods=10, freq='T')
  723. df1['end'] = date_range('2000-1-1', periods=10, freq='D')
  724. df1['diff'] = df1['end'] - df1['start']
  725. df1['bool'] = (np.arange(10) % 3 == 0)
  726. df1.loc[::2] = np.nan
  727. df2 = df1.copy()
  728. assert df1['text'].equals(df2['text'])
  729. assert df1['start'].equals(df2['start'])
  730. assert df1['end'].equals(df2['end'])
  731. assert df1['diff'].equals(df2['diff'])
  732. assert df1['bool'].equals(df2['bool'])
  733. assert df1.equals(df2)
  734. assert not df1.equals(object)
  735. # different dtype
  736. different = df1.copy()
  737. different['floats'] = different['floats'].astype('float32')
  738. assert not df1.equals(different)
  739. # different index
  740. different_index = -index
  741. different = df2.set_index(different_index)
  742. assert not df1.equals(different)
  743. # different columns
  744. different = df2.copy()
  745. different.columns = df2.columns[::-1]
  746. assert not df1.equals(different)
  747. # DatetimeIndex
  748. index = pd.date_range('2000-1-1', periods=10, freq='T')
  749. df1 = df1.set_index(index)
  750. df2 = df1.copy()
  751. assert df1.equals(df2)
  752. # MultiIndex
  753. df3 = df1.set_index(['text'], append=True)
  754. df2 = df1.set_index(['text'], append=True)
  755. assert df3.equals(df2)
  756. df2 = df1.set_index(['floats'], append=True)
  757. assert not df3.equals(df2)
  758. # NaN in index
  759. df3 = df1.set_index(['floats'], append=True)
  760. df2 = df1.set_index(['floats'], append=True)
  761. assert df3.equals(df2)
  762. # GH 8437
  763. a = pd.Series([False, np.nan])
  764. b = pd.Series([False, np.nan])
  765. c = pd.Series(index=range(2))
  766. d = pd.Series(index=range(2))
  767. e = pd.Series(index=range(2))
  768. f = pd.Series(index=range(2))
  769. c[:-1] = d[:-1] = e[0] = f[0] = False
  770. assert a.equals(a)
  771. assert a.equals(b)
  772. assert a.equals(c)
  773. assert a.equals(d)
  774. assert a.equals(e)
  775. assert e.equals(f)
  776. def test_describe_raises(self):
  777. with catch_warnings(record=True):
  778. simplefilter("ignore", FutureWarning)
  779. with pytest.raises(NotImplementedError):
  780. tm.makePanel().describe()
  781. def test_pipe(self):
  782. df = DataFrame({'A': [1, 2, 3]})
  783. f = lambda x, y: x ** y
  784. result = df.pipe(f, 2)
  785. expected = DataFrame({'A': [1, 4, 9]})
  786. assert_frame_equal(result, expected)
  787. result = df.A.pipe(f, 2)
  788. assert_series_equal(result, expected.A)
  789. def test_pipe_tuple(self):
  790. df = DataFrame({'A': [1, 2, 3]})
  791. f = lambda x, y: y
  792. result = df.pipe((f, 'y'), 0)
  793. assert_frame_equal(result, df)
  794. result = df.A.pipe((f, 'y'), 0)
  795. assert_series_equal(result, df.A)
  796. def test_pipe_tuple_error(self):
  797. df = DataFrame({"A": [1, 2, 3]})
  798. f = lambda x, y: y
  799. with pytest.raises(ValueError):
  800. df.pipe((f, 'y'), x=1, y=0)
  801. with pytest.raises(ValueError):
  802. df.A.pipe((f, 'y'), x=1, y=0)
  803. def test_pipe_panel(self):
  804. with catch_warnings(record=True):
  805. simplefilter("ignore", FutureWarning)
  806. wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})})
  807. f = lambda x, y: x + y
  808. result = wp.pipe(f, 2)
  809. expected = wp + 2
  810. assert_panel_equal(result, expected)
  811. result = wp.pipe((f, 'y'), x=1)
  812. expected = wp + 1
  813. assert_panel_equal(result, expected)
  814. with pytest.raises(ValueError):
  815. wp.pipe((f, 'y'), x=1, y=1)
  816. @pytest.mark.parametrize('box', [pd.Series, pd.DataFrame])
  817. def test_axis_classmethods(self, box):
  818. obj = box()
  819. values = (list(box._AXIS_NAMES.keys()) +
  820. list(box._AXIS_NUMBERS.keys()) +
  821. list(box._AXIS_ALIASES.keys()))
  822. for v in values:
  823. assert obj._get_axis_number(v) == box._get_axis_number(v)
  824. assert obj._get_axis_name(v) == box._get_axis_name(v)
  825. assert obj._get_block_manager_axis(v) == \
  826. box._get_block_manager_axis(v)