test_nanops.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. # -*- coding: utf-8 -*-
  2. from __future__ import division, print_function
  3. from functools import partial
  4. import warnings
  5. import numpy as np
  6. import pytest
  7. from pandas.compat.numpy import _np_version_under1p13
  8. import pandas.util._test_decorators as td
  9. from pandas.core.dtypes.common import is_integer_dtype
  10. import pandas as pd
  11. from pandas import Series, isna
  12. from pandas.core.arrays import DatetimeArray
  13. import pandas.core.nanops as nanops
  14. import pandas.util.testing as tm
  15. use_bn = nanops._USE_BOTTLENECK
  16. class TestnanopsDataFrame(object):
  17. def setup_method(self, method):
  18. np.random.seed(11235)
  19. nanops._USE_BOTTLENECK = False
  20. arr_shape = (11, 7, 5)
  21. self.arr_float = np.random.randn(*arr_shape)
  22. self.arr_float1 = np.random.randn(*arr_shape)
  23. self.arr_complex = self.arr_float + self.arr_float1 * 1j
  24. self.arr_int = np.random.randint(-10, 10, arr_shape)
  25. self.arr_bool = np.random.randint(0, 2, arr_shape) == 0
  26. self.arr_str = np.abs(self.arr_float).astype('S')
  27. self.arr_utf = np.abs(self.arr_float).astype('U')
  28. self.arr_date = np.random.randint(0, 20000,
  29. arr_shape).astype('M8[ns]')
  30. self.arr_tdelta = np.random.randint(0, 20000,
  31. arr_shape).astype('m8[ns]')
  32. self.arr_nan = np.tile(np.nan, arr_shape)
  33. self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
  34. self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
  35. self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
  36. self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
  37. self.arr_inf = self.arr_float * np.inf
  38. self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
  39. self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
  40. self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan,
  41. self.arr_inf])
  42. self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan,
  43. self.arr_inf])
  44. self.arr_obj = np.vstack([
  45. self.arr_float.astype('O'),
  46. self.arr_int.astype('O'),
  47. self.arr_bool.astype('O'),
  48. self.arr_complex.astype('O'),
  49. self.arr_str.astype('O'),
  50. self.arr_utf.astype('O'),
  51. self.arr_date.astype('O'),
  52. self.arr_tdelta.astype('O')
  53. ])
  54. with np.errstate(invalid='ignore'):
  55. self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
  56. self.arr_complex_nan = np.vstack([self.arr_complex,
  57. self.arr_nan_nanj])
  58. self.arr_nan_infj = self.arr_inf * 1j
  59. self.arr_complex_nan_infj = np.vstack([self.arr_complex,
  60. self.arr_nan_infj])
  61. self.arr_float_2d = self.arr_float[:, :, 0]
  62. self.arr_float1_2d = self.arr_float1[:, :, 0]
  63. self.arr_nan_2d = self.arr_nan[:, :, 0]
  64. self.arr_float_nan_2d = self.arr_float_nan[:, :, 0]
  65. self.arr_float1_nan_2d = self.arr_float1_nan[:, :, 0]
  66. self.arr_nan_float1_2d = self.arr_nan_float1[:, :, 0]
  67. self.arr_float_1d = self.arr_float[:, 0, 0]
  68. self.arr_float1_1d = self.arr_float1[:, 0, 0]
  69. self.arr_nan_1d = self.arr_nan[:, 0, 0]
  70. self.arr_float_nan_1d = self.arr_float_nan[:, 0, 0]
  71. self.arr_float1_nan_1d = self.arr_float1_nan[:, 0, 0]
  72. self.arr_nan_float1_1d = self.arr_nan_float1[:, 0, 0]
  73. def teardown_method(self, method):
  74. nanops._USE_BOTTLENECK = use_bn
  75. def check_results(self, targ, res, axis, check_dtype=True):
  76. res = getattr(res, 'asm8', res)
  77. res = getattr(res, 'values', res)
  78. # timedeltas are a beast here
  79. def _coerce_tds(targ, res):
  80. if hasattr(targ, 'dtype') and targ.dtype == 'm8[ns]':
  81. if len(targ) == 1:
  82. targ = targ[0].item()
  83. res = res.item()
  84. else:
  85. targ = targ.view('i8')
  86. return targ, res
  87. try:
  88. if axis != 0 and hasattr(
  89. targ, 'shape') and targ.ndim and targ.shape != res.shape:
  90. res = np.split(res, [targ.shape[0]], axis=0)[0]
  91. except (ValueError, IndexError):
  92. targ, res = _coerce_tds(targ, res)
  93. try:
  94. tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
  95. except AssertionError:
  96. # handle timedelta dtypes
  97. if hasattr(targ, 'dtype') and targ.dtype == 'm8[ns]':
  98. targ, res = _coerce_tds(targ, res)
  99. tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
  100. return
  101. # There are sometimes rounding errors with
  102. # complex and object dtypes.
  103. # If it isn't one of those, re-raise the error.
  104. if not hasattr(res, 'dtype') or res.dtype.kind not in ['c', 'O']:
  105. raise
  106. # convert object dtypes to something that can be split into
  107. # real and imaginary parts
  108. if res.dtype.kind == 'O':
  109. if targ.dtype.kind != 'O':
  110. res = res.astype(targ.dtype)
  111. else:
  112. try:
  113. res = res.astype('c16')
  114. except RuntimeError:
  115. res = res.astype('f8')
  116. try:
  117. targ = targ.astype('c16')
  118. except RuntimeError:
  119. targ = targ.astype('f8')
  120. # there should never be a case where numpy returns an object
  121. # but nanops doesn't, so make that an exception
  122. elif targ.dtype.kind == 'O':
  123. raise
  124. tm.assert_almost_equal(targ.real, res.real,
  125. check_dtype=check_dtype)
  126. tm.assert_almost_equal(targ.imag, res.imag,
  127. check_dtype=check_dtype)
  128. def check_fun_data(self, testfunc, targfunc, testarval, targarval,
  129. targarnanval, check_dtype=True, empty_targfunc=None,
  130. **kwargs):
  131. for axis in list(range(targarval.ndim)) + [None]:
  132. for skipna in [False, True]:
  133. targartempval = targarval if skipna else targarnanval
  134. if skipna and empty_targfunc and isna(targartempval).all():
  135. targ = empty_targfunc(targartempval, axis=axis, **kwargs)
  136. else:
  137. targ = targfunc(targartempval, axis=axis, **kwargs)
  138. try:
  139. res = testfunc(testarval, axis=axis, skipna=skipna,
  140. **kwargs)
  141. self.check_results(targ, res, axis,
  142. check_dtype=check_dtype)
  143. if skipna:
  144. res = testfunc(testarval, axis=axis, **kwargs)
  145. self.check_results(targ, res, axis,
  146. check_dtype=check_dtype)
  147. if axis is None:
  148. res = testfunc(testarval, skipna=skipna, **kwargs)
  149. self.check_results(targ, res, axis,
  150. check_dtype=check_dtype)
  151. if skipna and axis is None:
  152. res = testfunc(testarval, **kwargs)
  153. self.check_results(targ, res, axis,
  154. check_dtype=check_dtype)
  155. except BaseException as exc:
  156. exc.args += ('axis: %s of %s' % (axis, testarval.ndim - 1),
  157. 'skipna: %s' % skipna, 'kwargs: %s' % kwargs)
  158. raise
  159. if testarval.ndim <= 1:
  160. return
  161. try:
  162. testarval2 = np.take(testarval, 0, axis=-1)
  163. targarval2 = np.take(targarval, 0, axis=-1)
  164. targarnanval2 = np.take(targarnanval, 0, axis=-1)
  165. except ValueError:
  166. return
  167. self.check_fun_data(testfunc, targfunc, testarval2, targarval2,
  168. targarnanval2, check_dtype=check_dtype,
  169. empty_targfunc=empty_targfunc, **kwargs)
  170. def check_fun(self, testfunc, targfunc, testar, targar=None,
  171. targarnan=None, empty_targfunc=None, **kwargs):
  172. if targar is None:
  173. targar = testar
  174. if targarnan is None:
  175. targarnan = testar
  176. testarval = getattr(self, testar)
  177. targarval = getattr(self, targar)
  178. targarnanval = getattr(self, targarnan)
  179. try:
  180. self.check_fun_data(testfunc, targfunc, testarval, targarval,
  181. targarnanval, empty_targfunc=empty_targfunc,
  182. **kwargs)
  183. except BaseException as exc:
  184. exc.args += ('testar: %s' % testar, 'targar: %s' % targar,
  185. 'targarnan: %s' % targarnan)
  186. raise
  187. def check_funs(self, testfunc, targfunc, allow_complex=True,
  188. allow_all_nan=True, allow_str=True, allow_date=True,
  189. allow_tdelta=True, allow_obj=True, **kwargs):
  190. self.check_fun(testfunc, targfunc, 'arr_float', **kwargs)
  191. self.check_fun(testfunc, targfunc, 'arr_float_nan', 'arr_float',
  192. **kwargs)
  193. self.check_fun(testfunc, targfunc, 'arr_int', **kwargs)
  194. self.check_fun(testfunc, targfunc, 'arr_bool', **kwargs)
  195. objs = [self.arr_float.astype('O'), self.arr_int.astype('O'),
  196. self.arr_bool.astype('O')]
  197. if allow_all_nan:
  198. self.check_fun(testfunc, targfunc, 'arr_nan', **kwargs)
  199. if allow_complex:
  200. self.check_fun(testfunc, targfunc, 'arr_complex', **kwargs)
  201. self.check_fun(testfunc, targfunc, 'arr_complex_nan',
  202. 'arr_complex', **kwargs)
  203. if allow_all_nan:
  204. self.check_fun(testfunc, targfunc, 'arr_nan_nanj', **kwargs)
  205. objs += [self.arr_complex.astype('O')]
  206. if allow_str:
  207. self.check_fun(testfunc, targfunc, 'arr_str', **kwargs)
  208. self.check_fun(testfunc, targfunc, 'arr_utf', **kwargs)
  209. objs += [self.arr_str.astype('O'), self.arr_utf.astype('O')]
  210. if allow_date:
  211. try:
  212. targfunc(self.arr_date)
  213. except TypeError:
  214. pass
  215. else:
  216. self.check_fun(testfunc, targfunc, 'arr_date', **kwargs)
  217. objs += [self.arr_date.astype('O')]
  218. if allow_tdelta:
  219. try:
  220. targfunc(self.arr_tdelta)
  221. except TypeError:
  222. pass
  223. else:
  224. self.check_fun(testfunc, targfunc, 'arr_tdelta', **kwargs)
  225. objs += [self.arr_tdelta.astype('O')]
  226. if allow_obj:
  227. self.arr_obj = np.vstack(objs)
  228. # some nanops handle object dtypes better than their numpy
  229. # counterparts, so the numpy functions need to be given something
  230. # else
  231. if allow_obj == 'convert':
  232. targfunc = partial(self._badobj_wrap, func=targfunc,
  233. allow_complex=allow_complex)
  234. self.check_fun(testfunc, targfunc, 'arr_obj', **kwargs)
  235. def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
  236. if value.dtype.kind == 'O':
  237. if allow_complex:
  238. value = value.astype('c16')
  239. else:
  240. value = value.astype('f8')
  241. return func(value, **kwargs)
  242. def test_nanany(self):
  243. self.check_funs(nanops.nanany, np.any, allow_all_nan=False,
  244. allow_str=False, allow_date=False, allow_tdelta=False)
  245. def test_nanall(self):
  246. self.check_funs(nanops.nanall, np.all, allow_all_nan=False,
  247. allow_str=False, allow_date=False, allow_tdelta=False)
  248. def test_nansum(self):
  249. self.check_funs(nanops.nansum, np.sum, allow_str=False,
  250. allow_date=False, allow_tdelta=True, check_dtype=False,
  251. empty_targfunc=np.nansum)
  252. def test_nanmean(self):
  253. self.check_funs(nanops.nanmean, np.mean, allow_complex=False,
  254. allow_obj=False, allow_str=False, allow_date=False,
  255. allow_tdelta=True)
  256. def test_nanmean_overflow(self):
  257. # GH 10155
  258. # In the previous implementation mean can overflow for int dtypes, it
  259. # is now consistent with numpy
  260. for a in [2 ** 55, -2 ** 55, 20150515061816532]:
  261. s = Series(a, index=range(500), dtype=np.int64)
  262. result = s.mean()
  263. np_result = s.values.mean()
  264. assert result == a
  265. assert result == np_result
  266. assert result.dtype == np.float64
  267. def test_returned_dtype(self):
  268. dtypes = [np.int16, np.int32, np.int64, np.float32, np.float64]
  269. if hasattr(np, 'float128'):
  270. dtypes.append(np.float128)
  271. for dtype in dtypes:
  272. s = Series(range(10), dtype=dtype)
  273. group_a = ['mean', 'std', 'var', 'skew', 'kurt']
  274. group_b = ['min', 'max']
  275. for method in group_a + group_b:
  276. result = getattr(s, method)()
  277. if is_integer_dtype(dtype) and method in group_a:
  278. assert result.dtype == np.float64
  279. else:
  280. assert result.dtype == dtype
  281. def test_nanmedian(self):
  282. with warnings.catch_warnings(record=True):
  283. warnings.simplefilter("ignore", RuntimeWarning)
  284. self.check_funs(nanops.nanmedian, np.median, allow_complex=False,
  285. allow_str=False, allow_date=False,
  286. allow_tdelta=True, allow_obj='convert')
  287. @pytest.mark.parametrize('ddof', range(3))
  288. def test_nanvar(self, ddof):
  289. self.check_funs(nanops.nanvar, np.var, allow_complex=False,
  290. allow_str=False, allow_date=False,
  291. allow_tdelta=True, allow_obj='convert', ddof=ddof)
  292. @pytest.mark.parametrize('ddof', range(3))
  293. def test_nanstd(self, ddof):
  294. self.check_funs(nanops.nanstd, np.std, allow_complex=False,
  295. allow_str=False, allow_date=False,
  296. allow_tdelta=True, allow_obj='convert', ddof=ddof)
  297. @td.skip_if_no('scipy', min_version='0.17.0')
  298. @pytest.mark.parametrize('ddof', range(3))
  299. def test_nansem(self, ddof):
  300. from scipy.stats import sem
  301. with np.errstate(invalid='ignore'):
  302. self.check_funs(nanops.nansem, sem, allow_complex=False,
  303. allow_str=False, allow_date=False,
  304. allow_tdelta=False, allow_obj='convert', ddof=ddof)
  305. def _minmax_wrap(self, value, axis=None, func=None):
  306. # numpy warns if all nan
  307. res = func(value, axis)
  308. if res.dtype.kind == 'm':
  309. res = np.atleast_1d(res)
  310. return res
  311. def test_nanmin(self):
  312. with warnings.catch_warnings(record=True):
  313. warnings.simplefilter("ignore", RuntimeWarning)
  314. func = partial(self._minmax_wrap, func=np.min)
  315. self.check_funs(nanops.nanmin, func,
  316. allow_str=False, allow_obj=False)
  317. def test_nanmax(self):
  318. with warnings.catch_warnings():
  319. warnings.simplefilter("ignore", RuntimeWarning)
  320. func = partial(self._minmax_wrap, func=np.max)
  321. self.check_funs(nanops.nanmax, func,
  322. allow_str=False, allow_obj=False)
  323. def _argminmax_wrap(self, value, axis=None, func=None):
  324. res = func(value, axis)
  325. nans = np.min(value, axis)
  326. nullnan = isna(nans)
  327. if res.ndim:
  328. res[nullnan] = -1
  329. elif (hasattr(nullnan, 'all') and nullnan.all() or
  330. not hasattr(nullnan, 'all') and nullnan):
  331. res = -1
  332. return res
  333. def test_nanargmax(self):
  334. with warnings.catch_warnings(record=True):
  335. warnings.simplefilter("ignore", RuntimeWarning)
  336. func = partial(self._argminmax_wrap, func=np.argmax)
  337. self.check_funs(nanops.nanargmax, func,
  338. allow_str=False, allow_obj=False,
  339. allow_date=True, allow_tdelta=True)
  340. def test_nanargmin(self):
  341. with warnings.catch_warnings(record=True):
  342. warnings.simplefilter("ignore", RuntimeWarning)
  343. func = partial(self._argminmax_wrap, func=np.argmin)
  344. self.check_funs(nanops.nanargmin, func, allow_str=False,
  345. allow_obj=False)
  346. def _skew_kurt_wrap(self, values, axis=None, func=None):
  347. if not isinstance(values.dtype.type, np.floating):
  348. values = values.astype('f8')
  349. result = func(values, axis=axis, bias=False)
  350. # fix for handling cases where all elements in an axis are the same
  351. if isinstance(result, np.ndarray):
  352. result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
  353. return result
  354. elif np.max(values) == np.min(values):
  355. return 0.
  356. return result
  357. @td.skip_if_no('scipy', min_version='0.17.0')
  358. def test_nanskew(self):
  359. from scipy.stats import skew
  360. func = partial(self._skew_kurt_wrap, func=skew)
  361. with np.errstate(invalid='ignore'):
  362. self.check_funs(nanops.nanskew, func, allow_complex=False,
  363. allow_str=False, allow_date=False,
  364. allow_tdelta=False)
  365. @td.skip_if_no('scipy', min_version='0.17.0')
  366. def test_nankurt(self):
  367. from scipy.stats import kurtosis
  368. func1 = partial(kurtosis, fisher=True)
  369. func = partial(self._skew_kurt_wrap, func=func1)
  370. with np.errstate(invalid='ignore'):
  371. self.check_funs(nanops.nankurt, func, allow_complex=False,
  372. allow_str=False, allow_date=False,
  373. allow_tdelta=False)
  374. def test_nanprod(self):
  375. self.check_funs(nanops.nanprod, np.prod, allow_str=False,
  376. allow_date=False, allow_tdelta=False,
  377. empty_targfunc=np.nanprod)
  378. def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
  379. res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
  380. res01 = checkfun(self.arr_float_2d, self.arr_float1_2d,
  381. min_periods=len(self.arr_float_2d) - 1, **kwargs)
  382. tm.assert_almost_equal(targ0, res00)
  383. tm.assert_almost_equal(targ0, res01)
  384. res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d,
  385. **kwargs)
  386. res11 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d,
  387. min_periods=len(self.arr_float_2d) - 1, **kwargs)
  388. tm.assert_almost_equal(targ1, res10)
  389. tm.assert_almost_equal(targ1, res11)
  390. targ2 = np.nan
  391. res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
  392. res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
  393. res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
  394. res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d,
  395. **kwargs)
  396. res24 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d,
  397. min_periods=len(self.arr_float_2d) - 1, **kwargs)
  398. res25 = checkfun(self.arr_float_2d, self.arr_float1_2d,
  399. min_periods=len(self.arr_float_2d) + 1, **kwargs)
  400. tm.assert_almost_equal(targ2, res20)
  401. tm.assert_almost_equal(targ2, res21)
  402. tm.assert_almost_equal(targ2, res22)
  403. tm.assert_almost_equal(targ2, res23)
  404. tm.assert_almost_equal(targ2, res24)
  405. tm.assert_almost_equal(targ2, res25)
  406. def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
  407. res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
  408. res01 = checkfun(self.arr_float_1d, self.arr_float1_1d,
  409. min_periods=len(self.arr_float_1d) - 1, **kwargs)
  410. tm.assert_almost_equal(targ0, res00)
  411. tm.assert_almost_equal(targ0, res01)
  412. res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d,
  413. **kwargs)
  414. res11 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d,
  415. min_periods=len(self.arr_float_1d) - 1, **kwargs)
  416. tm.assert_almost_equal(targ1, res10)
  417. tm.assert_almost_equal(targ1, res11)
  418. targ2 = np.nan
  419. res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
  420. res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
  421. res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
  422. res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d,
  423. **kwargs)
  424. res24 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d,
  425. min_periods=len(self.arr_float_1d) - 1, **kwargs)
  426. res25 = checkfun(self.arr_float_1d, self.arr_float1_1d,
  427. min_periods=len(self.arr_float_1d) + 1, **kwargs)
  428. tm.assert_almost_equal(targ2, res20)
  429. tm.assert_almost_equal(targ2, res21)
  430. tm.assert_almost_equal(targ2, res22)
  431. tm.assert_almost_equal(targ2, res23)
  432. tm.assert_almost_equal(targ2, res24)
  433. tm.assert_almost_equal(targ2, res25)
  434. def test_nancorr(self):
  435. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  436. targ1 = np.corrcoef(self.arr_float_2d.flat,
  437. self.arr_float1_2d.flat)[0, 1]
  438. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
  439. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  440. targ1 = np.corrcoef(self.arr_float_1d.flat,
  441. self.arr_float1_1d.flat)[0, 1]
  442. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
  443. method='pearson')
  444. def test_nancorr_pearson(self):
  445. targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  446. targ1 = np.corrcoef(self.arr_float_2d.flat,
  447. self.arr_float1_2d.flat)[0, 1]
  448. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
  449. method='pearson')
  450. targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  451. targ1 = np.corrcoef(self.arr_float_1d.flat,
  452. self.arr_float1_1d.flat)[0, 1]
  453. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
  454. method='pearson')
  455. @td.skip_if_no_scipy
  456. def test_nancorr_kendall(self):
  457. from scipy.stats import kendalltau
  458. targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
  459. targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  460. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
  461. method='kendall')
  462. targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
  463. targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  464. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
  465. method='kendall')
  466. @td.skip_if_no_scipy
  467. def test_nancorr_spearman(self):
  468. from scipy.stats import spearmanr
  469. targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
  470. targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
  471. self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
  472. method='spearman')
  473. targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
  474. targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
  475. self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
  476. method='spearman')
  477. def test_nancov(self):
  478. targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
  479. targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
  480. self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
  481. targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
  482. targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
  483. self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
  484. def check_nancomp(self, checkfun, targ0):
  485. arr_float = self.arr_float
  486. arr_float1 = self.arr_float1
  487. arr_nan = self.arr_nan
  488. arr_nan_nan = self.arr_nan_nan
  489. arr_float_nan = self.arr_float_nan
  490. arr_float1_nan = self.arr_float1_nan
  491. arr_nan_float1 = self.arr_nan_float1
  492. while targ0.ndim:
  493. try:
  494. res0 = checkfun(arr_float, arr_float1)
  495. tm.assert_almost_equal(targ0, res0)
  496. if targ0.ndim > 1:
  497. targ1 = np.vstack([targ0, arr_nan])
  498. else:
  499. targ1 = np.hstack([targ0, arr_nan])
  500. res1 = checkfun(arr_float_nan, arr_float1_nan)
  501. tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)
  502. targ2 = arr_nan_nan
  503. res2 = checkfun(arr_float_nan, arr_nan_float1)
  504. tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)
  505. except Exception as exc:
  506. exc.args += ('ndim: %s' % arr_float.ndim, )
  507. raise
  508. try:
  509. arr_float = np.take(arr_float, 0, axis=-1)
  510. arr_float1 = np.take(arr_float1, 0, axis=-1)
  511. arr_nan = np.take(arr_nan, 0, axis=-1)
  512. arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1)
  513. arr_float_nan = np.take(arr_float_nan, 0, axis=-1)
  514. arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1)
  515. arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1)
  516. targ0 = np.take(targ0, 0, axis=-1)
  517. except ValueError:
  518. break
  519. def test_nangt(self):
  520. targ0 = self.arr_float > self.arr_float1
  521. self.check_nancomp(nanops.nangt, targ0)
  522. def test_nange(self):
  523. targ0 = self.arr_float >= self.arr_float1
  524. self.check_nancomp(nanops.nange, targ0)
  525. def test_nanlt(self):
  526. targ0 = self.arr_float < self.arr_float1
  527. self.check_nancomp(nanops.nanlt, targ0)
  528. def test_nanle(self):
  529. targ0 = self.arr_float <= self.arr_float1
  530. self.check_nancomp(nanops.nanle, targ0)
  531. def test_naneq(self):
  532. targ0 = self.arr_float == self.arr_float1
  533. self.check_nancomp(nanops.naneq, targ0)
  534. def test_nanne(self):
  535. targ0 = self.arr_float != self.arr_float1
  536. self.check_nancomp(nanops.nanne, targ0)
  537. def check_bool(self, func, value, correct, *args, **kwargs):
  538. while getattr(value, 'ndim', True):
  539. try:
  540. res0 = func(value, *args, **kwargs)
  541. if correct:
  542. assert res0
  543. else:
  544. assert not res0
  545. except BaseException as exc:
  546. exc.args += ('dim: %s' % getattr(value, 'ndim', value), )
  547. raise
  548. if not hasattr(value, 'ndim'):
  549. break
  550. try:
  551. value = np.take(value, 0, axis=-1)
  552. except ValueError:
  553. break
  554. def test__has_infs(self):
  555. pairs = [('arr_complex', False), ('arr_int', False),
  556. ('arr_bool', False), ('arr_str', False), ('arr_utf', False),
  557. ('arr_complex', False), ('arr_complex_nan', False),
  558. ('arr_nan_nanj', False), ('arr_nan_infj', True),
  559. ('arr_complex_nan_infj', True)]
  560. pairs_float = [('arr_float', False), ('arr_nan', False),
  561. ('arr_float_nan', False), ('arr_nan_nan', False),
  562. ('arr_float_inf', True), ('arr_inf', True),
  563. ('arr_nan_inf', True), ('arr_float_nan_inf', True),
  564. ('arr_nan_nan_inf', True)]
  565. for arr, correct in pairs:
  566. val = getattr(self, arr)
  567. try:
  568. self.check_bool(nanops._has_infs, val, correct)
  569. except BaseException as exc:
  570. exc.args += (arr, )
  571. raise
  572. for arr, correct in pairs_float:
  573. val = getattr(self, arr)
  574. try:
  575. self.check_bool(nanops._has_infs, val, correct)
  576. self.check_bool(nanops._has_infs, val.astype('f4'), correct)
  577. self.check_bool(nanops._has_infs, val.astype('f2'), correct)
  578. except BaseException as exc:
  579. exc.args += (arr, )
  580. raise
  581. def test__isfinite(self):
  582. pairs = [('arr_complex', False), ('arr_int', False),
  583. ('arr_bool', False), ('arr_str', False), ('arr_utf', False),
  584. ('arr_complex', False), ('arr_complex_nan', True),
  585. ('arr_nan_nanj', True), ('arr_nan_infj', True),
  586. ('arr_complex_nan_infj', True)]
  587. pairs_float = [('arr_float', False), ('arr_nan', True),
  588. ('arr_float_nan', True), ('arr_nan_nan', True),
  589. ('arr_float_inf', True), ('arr_inf', True),
  590. ('arr_nan_inf', True), ('arr_float_nan_inf', True),
  591. ('arr_nan_nan_inf', True)]
  592. func1 = lambda x: np.any(nanops._isfinite(x).ravel())
  593. # TODO: unused?
  594. # func2 = lambda x: np.any(nanops._isfinite(x).values.ravel())
  595. for arr, correct in pairs:
  596. val = getattr(self, arr)
  597. try:
  598. self.check_bool(func1, val, correct)
  599. except BaseException as exc:
  600. exc.args += (arr, )
  601. raise
  602. for arr, correct in pairs_float:
  603. val = getattr(self, arr)
  604. try:
  605. self.check_bool(func1, val, correct)
  606. self.check_bool(func1, val.astype('f4'), correct)
  607. self.check_bool(func1, val.astype('f2'), correct)
  608. except BaseException as exc:
  609. exc.args += (arr, )
  610. raise
  611. def test__bn_ok_dtype(self):
  612. assert nanops._bn_ok_dtype(self.arr_float.dtype, 'test')
  613. assert nanops._bn_ok_dtype(self.arr_complex.dtype, 'test')
  614. assert nanops._bn_ok_dtype(self.arr_int.dtype, 'test')
  615. assert nanops._bn_ok_dtype(self.arr_bool.dtype, 'test')
  616. assert nanops._bn_ok_dtype(self.arr_str.dtype, 'test')
  617. assert nanops._bn_ok_dtype(self.arr_utf.dtype, 'test')
  618. assert not nanops._bn_ok_dtype(self.arr_date.dtype, 'test')
  619. assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test')
  620. assert not nanops._bn_ok_dtype(self.arr_obj.dtype, 'test')
  621. class TestEnsureNumeric(object):
  622. def test_numeric_values(self):
  623. # Test integer
  624. assert nanops._ensure_numeric(1) == 1
  625. # Test float
  626. assert nanops._ensure_numeric(1.1) == 1.1
  627. # Test complex
  628. assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
  629. def test_ndarray(self):
  630. # Test numeric ndarray
  631. values = np.array([1, 2, 3])
  632. assert np.allclose(nanops._ensure_numeric(values), values)
  633. # Test object ndarray
  634. o_values = values.astype(object)
  635. assert np.allclose(nanops._ensure_numeric(o_values), values)
  636. # Test convertible string ndarray
  637. s_values = np.array(['1', '2', '3'], dtype=object)
  638. assert np.allclose(nanops._ensure_numeric(s_values), values)
  639. # Test non-convertible string ndarray
  640. s_values = np.array(['foo', 'bar', 'baz'], dtype=object)
  641. pytest.raises(ValueError, lambda: nanops._ensure_numeric(s_values))
  642. def test_convertable_values(self):
  643. assert np.allclose(nanops._ensure_numeric('1'), 1.0)
  644. assert np.allclose(nanops._ensure_numeric('1.1'), 1.1)
  645. assert np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j)
  646. def test_non_convertable_values(self):
  647. pytest.raises(TypeError, lambda: nanops._ensure_numeric('foo'))
  648. pytest.raises(TypeError, lambda: nanops._ensure_numeric({}))
  649. pytest.raises(TypeError, lambda: nanops._ensure_numeric([]))
  650. class TestNanvarFixedValues(object):
  651. # xref GH10242
  652. def setup_method(self, method):
  653. # Samples from a normal distribution.
  654. self.variance = variance = 3.0
  655. self.samples = self.prng.normal(scale=variance ** 0.5, size=100000)
  656. def test_nanvar_all_finite(self):
  657. samples = self.samples
  658. actual_variance = nanops.nanvar(samples)
  659. tm.assert_almost_equal(actual_variance, self.variance,
  660. check_less_precise=2)
  661. def test_nanvar_nans(self):
  662. samples = np.nan * np.ones(2 * self.samples.shape[0])
  663. samples[::2] = self.samples
  664. actual_variance = nanops.nanvar(samples, skipna=True)
  665. tm.assert_almost_equal(actual_variance, self.variance,
  666. check_less_precise=2)
  667. actual_variance = nanops.nanvar(samples, skipna=False)
  668. tm.assert_almost_equal(actual_variance, np.nan, check_less_precise=2)
  669. def test_nanstd_nans(self):
  670. samples = np.nan * np.ones(2 * self.samples.shape[0])
  671. samples[::2] = self.samples
  672. actual_std = nanops.nanstd(samples, skipna=True)
  673. tm.assert_almost_equal(actual_std, self.variance ** 0.5,
  674. check_less_precise=2)
  675. actual_std = nanops.nanvar(samples, skipna=False)
  676. tm.assert_almost_equal(actual_std, np.nan,
  677. check_less_precise=2)
  678. def test_nanvar_axis(self):
  679. # Generate some sample data.
  680. samples_norm = self.samples
  681. samples_unif = self.prng.uniform(size=samples_norm.shape[0])
  682. samples = np.vstack([samples_norm, samples_unif])
  683. actual_variance = nanops.nanvar(samples, axis=1)
  684. tm.assert_almost_equal(actual_variance, np.array(
  685. [self.variance, 1.0 / 12]), check_less_precise=2)
  686. def test_nanvar_ddof(self):
  687. n = 5
  688. samples = self.prng.uniform(size=(10000, n + 1))
  689. samples[:, -1] = np.nan # Force use of our own algorithm.
  690. variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
  691. variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
  692. variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
  693. # The unbiased estimate.
  694. var = 1.0 / 12
  695. tm.assert_almost_equal(variance_1, var,
  696. check_less_precise=2)
  697. # The underestimated variance.
  698. tm.assert_almost_equal(variance_0, (n - 1.0) / n * var,
  699. check_less_precise=2)
  700. # The overestimated variance.
  701. tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var,
  702. check_less_precise=2)
  703. def test_ground_truth(self):
  704. # Test against values that were precomputed with Numpy.
  705. samples = np.empty((4, 4))
  706. samples[:3, :3] = np.array([[0.97303362, 0.21869576, 0.55560287
  707. ], [0.72980153, 0.03109364, 0.99155171],
  708. [0.09317602, 0.60078248, 0.15871292]])
  709. samples[3] = samples[:, 3] = np.nan
  710. # Actual variances along axis=0, 1 for ddof=0, 1, 2
  711. variance = np.array([[[0.13762259, 0.05619224, 0.11568816
  712. ], [0.20643388, 0.08428837, 0.17353224],
  713. [0.41286776, 0.16857673, 0.34706449]],
  714. [[0.09519783, 0.16435395, 0.05082054
  715. ], [0.14279674, 0.24653093, 0.07623082],
  716. [0.28559348, 0.49306186, 0.15246163]]])
  717. # Test nanvar.
  718. for axis in range(2):
  719. for ddof in range(3):
  720. var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
  721. tm.assert_almost_equal(var[:3], variance[axis, ddof])
  722. assert np.isnan(var[3])
  723. # Test nanstd.
  724. for axis in range(2):
  725. for ddof in range(3):
  726. std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
  727. tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
  728. assert np.isnan(std[3])
  729. def test_nanstd_roundoff(self):
  730. # Regression test for GH 10242 (test data taken from GH 10489). Ensure
  731. # that variance is stable.
  732. data = Series(766897346 * np.ones(10))
  733. for ddof in range(3):
  734. result = data.std(ddof=ddof)
  735. assert result == 0.0
  736. @property
  737. def prng(self):
  738. return np.random.RandomState(1234)
  739. class TestNanskewFixedValues(object):
  740. # xref GH 11974
  741. def setup_method(self, method):
  742. # Test data + skewness value (computed with scipy.stats.skew)
  743. self.samples = np.sin(np.linspace(0, 1, 200))
  744. self.actual_skew = -0.1875895205961754
  745. def test_constant_series(self):
  746. # xref GH 11974
  747. for val in [3075.2, 3075.3, 3075.5]:
  748. data = val * np.ones(300)
  749. skew = nanops.nanskew(data)
  750. assert skew == 0.0
  751. def test_all_finite(self):
  752. alpha, beta = 0.3, 0.1
  753. left_tailed = self.prng.beta(alpha, beta, size=100)
  754. assert nanops.nanskew(left_tailed) < 0
  755. alpha, beta = 0.1, 0.3
  756. right_tailed = self.prng.beta(alpha, beta, size=100)
  757. assert nanops.nanskew(right_tailed) > 0
  758. def test_ground_truth(self):
  759. skew = nanops.nanskew(self.samples)
  760. tm.assert_almost_equal(skew, self.actual_skew)
  761. def test_axis(self):
  762. samples = np.vstack([self.samples,
  763. np.nan * np.ones(len(self.samples))])
  764. skew = nanops.nanskew(samples, axis=1)
  765. tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan]))
  766. def test_nans(self):
  767. samples = np.hstack([self.samples, np.nan])
  768. skew = nanops.nanskew(samples, skipna=False)
  769. assert np.isnan(skew)
  770. def test_nans_skipna(self):
  771. samples = np.hstack([self.samples, np.nan])
  772. skew = nanops.nanskew(samples, skipna=True)
  773. tm.assert_almost_equal(skew, self.actual_skew)
  774. @property
  775. def prng(self):
  776. return np.random.RandomState(1234)
  777. class TestNankurtFixedValues(object):
  778. # xref GH 11974
  779. def setup_method(self, method):
  780. # Test data + kurtosis value (computed with scipy.stats.kurtosis)
  781. self.samples = np.sin(np.linspace(0, 1, 200))
  782. self.actual_kurt = -1.2058303433799713
  783. def test_constant_series(self):
  784. # xref GH 11974
  785. for val in [3075.2, 3075.3, 3075.5]:
  786. data = val * np.ones(300)
  787. kurt = nanops.nankurt(data)
  788. assert kurt == 0.0
  789. def test_all_finite(self):
  790. alpha, beta = 0.3, 0.1
  791. left_tailed = self.prng.beta(alpha, beta, size=100)
  792. assert nanops.nankurt(left_tailed) < 0
  793. alpha, beta = 0.1, 0.3
  794. right_tailed = self.prng.beta(alpha, beta, size=100)
  795. assert nanops.nankurt(right_tailed) > 0
  796. def test_ground_truth(self):
  797. kurt = nanops.nankurt(self.samples)
  798. tm.assert_almost_equal(kurt, self.actual_kurt)
  799. def test_axis(self):
  800. samples = np.vstack([self.samples,
  801. np.nan * np.ones(len(self.samples))])
  802. kurt = nanops.nankurt(samples, axis=1)
  803. tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan]))
  804. def test_nans(self):
  805. samples = np.hstack([self.samples, np.nan])
  806. kurt = nanops.nankurt(samples, skipna=False)
  807. assert np.isnan(kurt)
  808. def test_nans_skipna(self):
  809. samples = np.hstack([self.samples, np.nan])
  810. kurt = nanops.nankurt(samples, skipna=True)
  811. tm.assert_almost_equal(kurt, self.actual_kurt)
  812. @property
  813. def prng(self):
  814. return np.random.RandomState(1234)
  815. class TestDatetime64NaNOps(object):
  816. @pytest.mark.parametrize('tz', [None, 'UTC'])
  817. @pytest.mark.xfail(reason="disabled")
  818. # Enabling mean changes the behavior of DataFrame.mean
  819. # See https://github.com/pandas-dev/pandas/issues/24752
  820. def test_nanmean(self, tz):
  821. dti = pd.date_range('2016-01-01', periods=3, tz=tz)
  822. expected = dti[1]
  823. for obj in [dti, DatetimeArray(dti), Series(dti)]:
  824. result = nanops.nanmean(obj)
  825. assert result == expected
  826. dti2 = dti.insert(1, pd.NaT)
  827. for obj in [dti2, DatetimeArray(dti2), Series(dti2)]:
  828. result = nanops.nanmean(obj)
  829. assert result == expected
  830. def test_use_bottleneck():
  831. if nanops._BOTTLENECK_INSTALLED:
  832. pd.set_option('use_bottleneck', True)
  833. assert pd.get_option('use_bottleneck')
  834. pd.set_option('use_bottleneck', False)
  835. assert not pd.get_option('use_bottleneck')
  836. pd.set_option('use_bottleneck', use_bn)
  837. @pytest.mark.parametrize("numpy_op, expected", [
  838. (np.sum, 10),
  839. (np.nansum, 10),
  840. (np.mean, 2.5),
  841. (np.nanmean, 2.5),
  842. (np.median, 2.5),
  843. (np.nanmedian, 2.5),
  844. (np.min, 1),
  845. (np.max, 4),
  846. ])
  847. def test_numpy_ops(numpy_op, expected):
  848. # GH8383
  849. result = numpy_op(pd.Series([1, 2, 3, 4]))
  850. assert result == expected
  851. @pytest.mark.parametrize("numpy_op, expected", [
  852. (np.nanmin, 1),
  853. (np.nanmax, 4),
  854. ])
  855. def test_numpy_ops_np_version_under1p13(numpy_op, expected):
  856. # GH8383
  857. result = numpy_op(pd.Series([1, 2, 3, 4]))
  858. if _np_version_under1p13:
  859. # bug for numpy < 1.13, where result is a series, should be a scalar
  860. with pytest.raises(ValueError):
  861. assert result == expected
  862. else:
  863. assert result == expected
  864. @pytest.mark.parametrize("operation", [
  865. nanops.nanany,
  866. nanops.nanall,
  867. nanops.nansum,
  868. nanops.nanmean,
  869. nanops.nanmedian,
  870. nanops.nanstd,
  871. nanops.nanvar,
  872. nanops.nansem,
  873. nanops.nanargmax,
  874. nanops.nanargmin,
  875. nanops.nanmax,
  876. nanops.nanmin,
  877. nanops.nanskew,
  878. nanops.nankurt,
  879. nanops.nanprod,
  880. ])
  881. def test_nanops_independent_of_mask_param(operation):
  882. # GH22764
  883. s = pd.Series([1, 2, np.nan, 3, np.nan, 4])
  884. mask = s.isna()
  885. median_expected = operation(s)
  886. median_result = operation(s, mask=mask)
  887. assert median_expected == median_result