test_reductions.py 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159
  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime, timedelta
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. from pandas import (
  7. Categorical, DataFrame, DatetimeIndex, Index, NaT, Period, PeriodIndex,
  8. RangeIndex, Series, Timedelta, TimedeltaIndex, Timestamp, compat, isna,
  9. timedelta_range, to_timedelta)
  10. from pandas.core import nanops
  11. import pandas.util.testing as tm
  12. def get_objs():
  13. indexes = [
  14. tm.makeBoolIndex(10, name='a'),
  15. tm.makeIntIndex(10, name='a'),
  16. tm.makeFloatIndex(10, name='a'),
  17. tm.makeDateIndex(10, name='a'),
  18. tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern'),
  19. tm.makePeriodIndex(10, name='a'),
  20. tm.makeStringIndex(10, name='a'),
  21. tm.makeUnicodeIndex(10, name='a')
  22. ]
  23. arr = np.random.randn(10)
  24. series = [Series(arr, index=idx, name='a') for idx in indexes]
  25. objs = indexes + series
  26. return objs
  27. objs = get_objs()
  28. class TestReductions(object):
  29. @pytest.mark.parametrize('opname', ['max', 'min'])
  30. @pytest.mark.parametrize('obj', objs)
  31. def test_ops(self, opname, obj):
  32. result = getattr(obj, opname)()
  33. if not isinstance(obj, PeriodIndex):
  34. expected = getattr(obj.values, opname)()
  35. else:
  36. expected = pd.Period(
  37. ordinal=getattr(obj._ndarray_values, opname)(),
  38. freq=obj.freq)
  39. try:
  40. assert result == expected
  41. except TypeError:
  42. # comparing tz-aware series with np.array results in
  43. # TypeError
  44. expected = expected.astype('M8[ns]').astype('int64')
  45. assert result.value == expected
  46. def test_nanops(self):
  47. # GH#7261
  48. for opname in ['max', 'min']:
  49. for klass in [Index, Series]:
  50. arg_op = 'arg' + opname if klass is Index else 'idx' + opname
  51. obj = klass([np.nan, 2.0])
  52. assert getattr(obj, opname)() == 2.0
  53. obj = klass([np.nan])
  54. assert pd.isna(getattr(obj, opname)())
  55. assert pd.isna(getattr(obj, opname)(skipna=False))
  56. obj = klass([])
  57. assert pd.isna(getattr(obj, opname)())
  58. assert pd.isna(getattr(obj, opname)(skipna=False))
  59. obj = klass([pd.NaT, datetime(2011, 11, 1)])
  60. # check DatetimeIndex monotonic path
  61. assert getattr(obj, opname)() == datetime(2011, 11, 1)
  62. assert getattr(obj, opname)(skipna=False) is pd.NaT
  63. assert getattr(obj, arg_op)() == 1
  64. result = getattr(obj, arg_op)(skipna=False)
  65. if klass is Series:
  66. assert np.isnan(result)
  67. else:
  68. assert result == -1
  69. obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
  70. # check DatetimeIndex non-monotonic path
  71. assert getattr(obj, opname)(), datetime(2011, 11, 1)
  72. assert getattr(obj, opname)(skipna=False) is pd.NaT
  73. assert getattr(obj, arg_op)() == 1
  74. result = getattr(obj, arg_op)(skipna=False)
  75. if klass is Series:
  76. assert np.isnan(result)
  77. else:
  78. assert result == -1
  79. for dtype in ["M8[ns]", "datetime64[ns, UTC]"]:
  80. # cases with empty Series/DatetimeIndex
  81. obj = klass([], dtype=dtype)
  82. assert getattr(obj, opname)() is pd.NaT
  83. assert getattr(obj, opname)(skipna=False) is pd.NaT
  84. with pytest.raises(ValueError, match="empty sequence"):
  85. getattr(obj, arg_op)()
  86. with pytest.raises(ValueError, match="empty sequence"):
  87. getattr(obj, arg_op)(skipna=False)
  88. # argmin/max
  89. obj = Index(np.arange(5, dtype='int64'))
  90. assert obj.argmin() == 0
  91. assert obj.argmax() == 4
  92. obj = Index([np.nan, 1, np.nan, 2])
  93. assert obj.argmin() == 1
  94. assert obj.argmax() == 3
  95. assert obj.argmin(skipna=False) == -1
  96. assert obj.argmax(skipna=False) == -1
  97. obj = Index([np.nan])
  98. assert obj.argmin() == -1
  99. assert obj.argmax() == -1
  100. assert obj.argmin(skipna=False) == -1
  101. assert obj.argmax(skipna=False) == -1
  102. obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2),
  103. pd.NaT])
  104. assert obj.argmin() == 1
  105. assert obj.argmax() == 2
  106. assert obj.argmin(skipna=False) == -1
  107. assert obj.argmax(skipna=False) == -1
  108. obj = Index([pd.NaT])
  109. assert obj.argmin() == -1
  110. assert obj.argmax() == -1
  111. assert obj.argmin(skipna=False) == -1
  112. assert obj.argmax(skipna=False) == -1
  113. @pytest.mark.parametrize('op, expected_col', [
  114. ['max', 'a'], ['min', 'b']
  115. ])
  116. def test_same_tz_min_max_axis_1(self, op, expected_col):
  117. # GH 10390
  118. df = DataFrame(pd.date_range('2016-01-01 00:00:00', periods=3,
  119. tz='UTC'),
  120. columns=['a'])
  121. df['b'] = df.a.subtract(pd.Timedelta(seconds=3600))
  122. result = getattr(df, op)(axis=1)
  123. expected = df[expected_col]
  124. tm.assert_series_equal(result, expected)
  125. class TestIndexReductions(object):
  126. # Note: the name TestIndexReductions indicates these tests
  127. # were moved from a Index-specific test file, _not_ that these tests are
  128. # intended long-term to be Index-specific
  129. @pytest.mark.parametrize('start,stop,step',
  130. [(0, 400, 3), (500, 0, -6), (-10**6, 10**6, 4),
  131. (10**6, -10**6, -4), (0, 10, 20)])
  132. def test_max_min_range(self, start, stop, step):
  133. # GH#17607
  134. idx = RangeIndex(start, stop, step)
  135. expected = idx._int64index.max()
  136. result = idx.max()
  137. assert result == expected
  138. # skipna should be irrelevant since RangeIndex should never have NAs
  139. result2 = idx.max(skipna=False)
  140. assert result2 == expected
  141. expected = idx._int64index.min()
  142. result = idx.min()
  143. assert result == expected
  144. # skipna should be irrelevant since RangeIndex should never have NAs
  145. result2 = idx.min(skipna=False)
  146. assert result2 == expected
  147. # empty
  148. idx = RangeIndex(start, stop, -step)
  149. assert isna(idx.max())
  150. assert isna(idx.min())
  151. def test_minmax_timedelta64(self):
  152. # monotonic
  153. idx1 = TimedeltaIndex(['1 days', '2 days', '3 days'])
  154. assert idx1.is_monotonic
  155. # non-monotonic
  156. idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT'])
  157. assert not idx2.is_monotonic
  158. for idx in [idx1, idx2]:
  159. assert idx.min() == Timedelta('1 days')
  160. assert idx.max() == Timedelta('3 days')
  161. assert idx.argmin() == 0
  162. assert idx.argmax() == 2
  163. for op in ['min', 'max']:
  164. # Return NaT
  165. obj = TimedeltaIndex([])
  166. assert pd.isna(getattr(obj, op)())
  167. obj = TimedeltaIndex([pd.NaT])
  168. assert pd.isna(getattr(obj, op)())
  169. obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
  170. assert pd.isna(getattr(obj, op)())
  171. def test_numpy_minmax_timedelta64(self):
  172. td = timedelta_range('16815 days', '16820 days', freq='D')
  173. assert np.min(td) == Timedelta('16815 days')
  174. assert np.max(td) == Timedelta('16820 days')
  175. errmsg = "the 'out' parameter is not supported"
  176. with pytest.raises(ValueError, match=errmsg):
  177. np.min(td, out=0)
  178. with pytest.raises(ValueError, match=errmsg):
  179. np.max(td, out=0)
  180. assert np.argmin(td) == 0
  181. assert np.argmax(td) == 5
  182. errmsg = "the 'out' parameter is not supported"
  183. with pytest.raises(ValueError, match=errmsg):
  184. np.argmin(td, out=0)
  185. with pytest.raises(ValueError, match=errmsg):
  186. np.argmax(td, out=0)
  187. def test_timedelta_ops(self):
  188. # GH#4984
  189. # make sure ops return Timedelta
  190. s = Series([Timestamp('20130101') + timedelta(seconds=i * i)
  191. for i in range(10)])
  192. td = s.diff()
  193. result = td.mean()
  194. expected = to_timedelta(timedelta(seconds=9))
  195. assert result == expected
  196. result = td.to_frame().mean()
  197. assert result[0] == expected
  198. result = td.quantile(.1)
  199. expected = Timedelta(np.timedelta64(2600, 'ms'))
  200. assert result == expected
  201. result = td.median()
  202. expected = to_timedelta('00:00:09')
  203. assert result == expected
  204. result = td.to_frame().median()
  205. assert result[0] == expected
  206. # GH#6462
  207. # consistency in returned values for sum
  208. result = td.sum()
  209. expected = to_timedelta('00:01:21')
  210. assert result == expected
  211. result = td.to_frame().sum()
  212. assert result[0] == expected
  213. # std
  214. result = td.std()
  215. expected = to_timedelta(Series(td.dropna().values).std())
  216. assert result == expected
  217. result = td.to_frame().std()
  218. assert result[0] == expected
  219. # invalid ops
  220. for op in ['skew', 'kurt', 'sem', 'prod']:
  221. pytest.raises(TypeError, getattr(td, op))
  222. # GH#10040
  223. # make sure NaT is properly handled by median()
  224. s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
  225. assert s.diff().median() == timedelta(days=4)
  226. s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'),
  227. Timestamp('2015-02-15')])
  228. assert s.diff().median() == timedelta(days=6)
  229. def test_minmax_tz(self, tz_naive_fixture):
  230. tz = tz_naive_fixture
  231. # monotonic
  232. idx1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02',
  233. '2011-01-03'], tz=tz)
  234. assert idx1.is_monotonic
  235. # non-monotonic
  236. idx2 = pd.DatetimeIndex(['2011-01-01', pd.NaT, '2011-01-03',
  237. '2011-01-02', pd.NaT], tz=tz)
  238. assert not idx2.is_monotonic
  239. for idx in [idx1, idx2]:
  240. assert idx.min() == Timestamp('2011-01-01', tz=tz)
  241. assert idx.max() == Timestamp('2011-01-03', tz=tz)
  242. assert idx.argmin() == 0
  243. assert idx.argmax() == 2
  244. @pytest.mark.parametrize('op', ['min', 'max'])
  245. def test_minmax_nat_datetime64(self, op):
  246. # Return NaT
  247. obj = DatetimeIndex([])
  248. assert pd.isna(getattr(obj, op)())
  249. obj = DatetimeIndex([pd.NaT])
  250. assert pd.isna(getattr(obj, op)())
  251. obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT])
  252. assert pd.isna(getattr(obj, op)())
  253. def test_numpy_minmax_datetime64(self):
  254. dr = pd.date_range(start='2016-01-15', end='2016-01-20')
  255. assert np.min(dr) == Timestamp('2016-01-15 00:00:00', freq='D')
  256. assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D')
  257. errmsg = "the 'out' parameter is not supported"
  258. with pytest.raises(ValueError, match=errmsg):
  259. np.min(dr, out=0)
  260. with pytest.raises(ValueError, match=errmsg):
  261. np.max(dr, out=0)
  262. assert np.argmin(dr) == 0
  263. assert np.argmax(dr) == 5
  264. errmsg = "the 'out' parameter is not supported"
  265. with pytest.raises(ValueError, match=errmsg):
  266. np.argmin(dr, out=0)
  267. with pytest.raises(ValueError, match=errmsg):
  268. np.argmax(dr, out=0)
  269. def test_minmax_period(self):
  270. # monotonic
  271. idx1 = pd.PeriodIndex([NaT, '2011-01-01', '2011-01-02',
  272. '2011-01-03'], freq='D')
  273. assert idx1.is_monotonic
  274. # non-monotonic
  275. idx2 = pd.PeriodIndex(['2011-01-01', NaT, '2011-01-03',
  276. '2011-01-02', NaT], freq='D')
  277. assert not idx2.is_monotonic
  278. for idx in [idx1, idx2]:
  279. assert idx.min() == pd.Period('2011-01-01', freq='D')
  280. assert idx.max() == pd.Period('2011-01-03', freq='D')
  281. assert idx1.argmin() == 1
  282. assert idx2.argmin() == 0
  283. assert idx1.argmax() == 3
  284. assert idx2.argmax() == 2
  285. for op in ['min', 'max']:
  286. # Return NaT
  287. obj = PeriodIndex([], freq='M')
  288. result = getattr(obj, op)()
  289. assert result is NaT
  290. obj = PeriodIndex([NaT], freq='M')
  291. result = getattr(obj, op)()
  292. assert result is NaT
  293. obj = PeriodIndex([NaT, NaT, NaT], freq='M')
  294. result = getattr(obj, op)()
  295. assert result is NaT
  296. def test_numpy_minmax_period(self):
  297. pr = pd.period_range(start='2016-01-15', end='2016-01-20')
  298. assert np.min(pr) == Period('2016-01-15', freq='D')
  299. assert np.max(pr) == Period('2016-01-20', freq='D')
  300. errmsg = "the 'out' parameter is not supported"
  301. with pytest.raises(ValueError, match=errmsg):
  302. np.min(pr, out=0)
  303. with pytest.raises(ValueError, match=errmsg):
  304. np.max(pr, out=0)
  305. assert np.argmin(pr) == 0
  306. assert np.argmax(pr) == 5
  307. errmsg = "the 'out' parameter is not supported"
  308. with pytest.raises(ValueError, match=errmsg):
  309. np.argmin(pr, out=0)
  310. with pytest.raises(ValueError, match=errmsg):
  311. np.argmax(pr, out=0)
  312. def test_min_max_categorical(self):
  313. ci = pd.CategoricalIndex(list('aabbca'),
  314. categories=list('cab'),
  315. ordered=False)
  316. with pytest.raises(TypeError):
  317. ci.min()
  318. with pytest.raises(TypeError):
  319. ci.max()
  320. ci = pd.CategoricalIndex(list('aabbca'),
  321. categories=list('cab'),
  322. ordered=True)
  323. assert ci.min() == 'c'
  324. assert ci.max() == 'b'
  325. class TestSeriesReductions(object):
  326. # Note: the name TestSeriesReductions indicates these tests
  327. # were moved from a series-specific test file, _not_ that these tests are
  328. # intended long-term to be series-specific
  329. def test_sum_inf(self):
  330. s = Series(np.random.randn(10))
  331. s2 = s.copy()
  332. s[5:8] = np.inf
  333. s2[5:8] = np.nan
  334. assert np.isinf(s.sum())
  335. arr = np.random.randn(100, 100).astype('f4')
  336. arr[:, 2] = np.inf
  337. with pd.option_context("mode.use_inf_as_na", True):
  338. tm.assert_almost_equal(s.sum(), s2.sum())
  339. res = nanops.nansum(arr, axis=1)
  340. assert np.isinf(res).all()
  341. @pytest.mark.parametrize("use_bottleneck", [True, False])
  342. @pytest.mark.parametrize("method, unit", [
  343. ("sum", 0.0),
  344. ("prod", 1.0)
  345. ])
  346. def test_empty(self, method, unit, use_bottleneck):
  347. with pd.option_context("use_bottleneck", use_bottleneck):
  348. # GH#9422 / GH#18921
  349. # Entirely empty
  350. s = Series([])
  351. # NA by default
  352. result = getattr(s, method)()
  353. assert result == unit
  354. # Explicit
  355. result = getattr(s, method)(min_count=0)
  356. assert result == unit
  357. result = getattr(s, method)(min_count=1)
  358. assert pd.isna(result)
  359. # Skipna, default
  360. result = getattr(s, method)(skipna=True)
  361. result == unit
  362. # Skipna, explicit
  363. result = getattr(s, method)(skipna=True, min_count=0)
  364. assert result == unit
  365. result = getattr(s, method)(skipna=True, min_count=1)
  366. assert pd.isna(result)
  367. # All-NA
  368. s = Series([np.nan])
  369. # NA by default
  370. result = getattr(s, method)()
  371. assert result == unit
  372. # Explicit
  373. result = getattr(s, method)(min_count=0)
  374. assert result == unit
  375. result = getattr(s, method)(min_count=1)
  376. assert pd.isna(result)
  377. # Skipna, default
  378. result = getattr(s, method)(skipna=True)
  379. result == unit
  380. # skipna, explicit
  381. result = getattr(s, method)(skipna=True, min_count=0)
  382. assert result == unit
  383. result = getattr(s, method)(skipna=True, min_count=1)
  384. assert pd.isna(result)
  385. # Mix of valid, empty
  386. s = Series([np.nan, 1])
  387. # Default
  388. result = getattr(s, method)()
  389. assert result == 1.0
  390. # Explicit
  391. result = getattr(s, method)(min_count=0)
  392. assert result == 1.0
  393. result = getattr(s, method)(min_count=1)
  394. assert result == 1.0
  395. # Skipna
  396. result = getattr(s, method)(skipna=True)
  397. assert result == 1.0
  398. result = getattr(s, method)(skipna=True, min_count=0)
  399. assert result == 1.0
  400. result = getattr(s, method)(skipna=True, min_count=1)
  401. assert result == 1.0
  402. # GH#844 (changed in GH#9422)
  403. df = DataFrame(np.empty((10, 0)))
  404. assert (getattr(df, method)(1) == unit).all()
  405. s = pd.Series([1])
  406. result = getattr(s, method)(min_count=2)
  407. assert pd.isna(result)
  408. s = pd.Series([np.nan])
  409. result = getattr(s, method)(min_count=2)
  410. assert pd.isna(result)
  411. s = pd.Series([np.nan, 1])
  412. result = getattr(s, method)(min_count=2)
  413. assert pd.isna(result)
  414. @pytest.mark.parametrize('method, unit', [
  415. ('sum', 0.0),
  416. ('prod', 1.0),
  417. ])
  418. def test_empty_multi(self, method, unit):
  419. s = pd.Series([1, np.nan, np.nan, np.nan],
  420. index=pd.MultiIndex.from_product([('a', 'b'), (0, 1)]))
  421. # 1 / 0 by default
  422. result = getattr(s, method)(level=0)
  423. expected = pd.Series([1, unit], index=['a', 'b'])
  424. tm.assert_series_equal(result, expected)
  425. # min_count=0
  426. result = getattr(s, method)(level=0, min_count=0)
  427. expected = pd.Series([1, unit], index=['a', 'b'])
  428. tm.assert_series_equal(result, expected)
  429. # min_count=1
  430. result = getattr(s, method)(level=0, min_count=1)
  431. expected = pd.Series([1, np.nan], index=['a', 'b'])
  432. tm.assert_series_equal(result, expected)
  433. @pytest.mark.parametrize(
  434. "method", ['mean', 'median', 'std', 'var'])
  435. def test_ops_consistency_on_empty(self, method):
  436. # GH#7869
  437. # consistency on empty
  438. # float
  439. result = getattr(Series(dtype=float), method)()
  440. assert pd.isna(result)
  441. # timedelta64[ns]
  442. result = getattr(Series(dtype='m8[ns]'), method)()
  443. assert result is pd.NaT
  444. def test_nansum_buglet(self):
  445. ser = Series([1.0, np.nan], index=[0, 1])
  446. result = np.nansum(ser)
  447. tm.assert_almost_equal(result, 1)
  448. @pytest.mark.parametrize("use_bottleneck", [True, False])
  449. def test_sum_overflow(self, use_bottleneck):
  450. with pd.option_context('use_bottleneck', use_bottleneck):
  451. # GH#6915
  452. # overflowing on the smaller int dtypes
  453. for dtype in ['int32', 'int64']:
  454. v = np.arange(5000000, dtype=dtype)
  455. s = Series(v)
  456. result = s.sum(skipna=False)
  457. assert int(result) == v.sum(dtype='int64')
  458. result = s.min(skipna=False)
  459. assert int(result) == 0
  460. result = s.max(skipna=False)
  461. assert int(result) == v[-1]
  462. for dtype in ['float32', 'float64']:
  463. v = np.arange(5000000, dtype=dtype)
  464. s = Series(v)
  465. result = s.sum(skipna=False)
  466. assert result == v.sum(dtype=dtype)
  467. result = s.min(skipna=False)
  468. assert np.allclose(float(result), 0.0)
  469. result = s.max(skipna=False)
  470. assert np.allclose(float(result), v[-1])
  471. def test_empty_timeseries_reductions_return_nat(self):
  472. # covers GH#11245
  473. for dtype in ('m8[ns]', 'm8[ns]', 'M8[ns]', 'M8[ns, UTC]'):
  474. assert Series([], dtype=dtype).min() is pd.NaT
  475. assert Series([], dtype=dtype).max() is pd.NaT
  476. assert Series([], dtype=dtype).min(skipna=False) is pd.NaT
  477. assert Series([], dtype=dtype).max(skipna=False) is pd.NaT
  478. def test_numpy_argmin_deprecated(self):
  479. # See GH#16830
  480. data = np.arange(1, 11)
  481. s = Series(data, index=data)
  482. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  483. # The deprecation of Series.argmin also causes a deprecation
  484. # warning when calling np.argmin. This behavior is temporary
  485. # until the implementation of Series.argmin is corrected.
  486. result = np.argmin(s)
  487. assert result == 1
  488. with tm.assert_produces_warning(FutureWarning):
  489. # argmin is aliased to idxmin
  490. result = s.argmin()
  491. assert result == 1
  492. with tm.assert_produces_warning(FutureWarning,
  493. check_stacklevel=False):
  494. msg = "the 'out' parameter is not supported"
  495. with pytest.raises(ValueError, match=msg):
  496. np.argmin(s, out=data)
  497. def test_numpy_argmax_deprecated(self):
  498. # See GH#16830
  499. data = np.arange(1, 11)
  500. s = Series(data, index=data)
  501. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
  502. # The deprecation of Series.argmax also causes a deprecation
  503. # warning when calling np.argmax. This behavior is temporary
  504. # until the implementation of Series.argmax is corrected.
  505. result = np.argmax(s)
  506. assert result == 10
  507. with tm.assert_produces_warning(FutureWarning):
  508. # argmax is aliased to idxmax
  509. result = s.argmax()
  510. assert result == 10
  511. with tm.assert_produces_warning(FutureWarning,
  512. check_stacklevel=False):
  513. msg = "the 'out' parameter is not supported"
  514. with pytest.raises(ValueError, match=msg):
  515. np.argmax(s, out=data)
  516. def test_idxmin(self):
  517. # test idxmin
  518. # _check_stat_op approach can not be used here because of isna check.
  519. string_series = tm.makeStringSeries().rename('series')
  520. # add some NaNs
  521. string_series[5:15] = np.NaN
  522. # skipna or no
  523. assert string_series[string_series.idxmin()] == string_series.min()
  524. assert pd.isna(string_series.idxmin(skipna=False))
  525. # no NaNs
  526. nona = string_series.dropna()
  527. assert nona[nona.idxmin()] == nona.min()
  528. assert (nona.index.values.tolist().index(nona.idxmin()) ==
  529. nona.values.argmin())
  530. # all NaNs
  531. allna = string_series * np.nan
  532. assert pd.isna(allna.idxmin())
  533. # datetime64[ns]
  534. s = Series(pd.date_range('20130102', periods=6))
  535. result = s.idxmin()
  536. assert result == 0
  537. s[0] = np.nan
  538. result = s.idxmin()
  539. assert result == 1
  540. def test_idxmax(self):
  541. # test idxmax
  542. # _check_stat_op approach can not be used here because of isna check.
  543. string_series = tm.makeStringSeries().rename('series')
  544. # add some NaNs
  545. string_series[5:15] = np.NaN
  546. # skipna or no
  547. assert string_series[string_series.idxmax()] == string_series.max()
  548. assert pd.isna(string_series.idxmax(skipna=False))
  549. # no NaNs
  550. nona = string_series.dropna()
  551. assert nona[nona.idxmax()] == nona.max()
  552. assert (nona.index.values.tolist().index(nona.idxmax()) ==
  553. nona.values.argmax())
  554. # all NaNs
  555. allna = string_series * np.nan
  556. assert pd.isna(allna.idxmax())
  557. from pandas import date_range
  558. s = Series(date_range('20130102', periods=6))
  559. result = s.idxmax()
  560. assert result == 5
  561. s[5] = np.nan
  562. result = s.idxmax()
  563. assert result == 4
  564. # Float64Index
  565. # GH#5914
  566. s = pd.Series([1, 2, 3], [1.1, 2.1, 3.1])
  567. result = s.idxmax()
  568. assert result == 3.1
  569. result = s.idxmin()
  570. assert result == 1.1
  571. s = pd.Series(s.index, s.index)
  572. result = s.idxmax()
  573. assert result == 3.1
  574. result = s.idxmin()
  575. assert result == 1.1
  576. def test_all_any(self):
  577. ts = tm.makeTimeSeries()
  578. bool_series = ts > 0
  579. assert not bool_series.all()
  580. assert bool_series.any()
  581. # Alternative types, with implicit 'object' dtype.
  582. s = Series(['abc', True])
  583. assert 'abc' == s.any() # 'abc' || True => 'abc'
  584. def test_all_any_params(self):
  585. # Check skipna, with implicit 'object' dtype.
  586. s1 = Series([np.nan, True])
  587. s2 = Series([np.nan, False])
  588. assert s1.all(skipna=False) # nan && True => True
  589. assert s1.all(skipna=True)
  590. assert np.isnan(s2.any(skipna=False)) # nan || False => nan
  591. assert not s2.any(skipna=True)
  592. # Check level.
  593. s = pd.Series([False, False, True, True, False, True],
  594. index=[0, 0, 1, 1, 2, 2])
  595. tm.assert_series_equal(s.all(level=0), Series([False, True, False]))
  596. tm.assert_series_equal(s.any(level=0), Series([False, True, True]))
  597. # bool_only is not implemented with level option.
  598. with pytest.raises(NotImplementedError):
  599. s.any(bool_only=True, level=0)
  600. with pytest.raises(NotImplementedError):
  601. s.all(bool_only=True, level=0)
  602. # bool_only is not implemented alone.
  603. with pytest.raises(NotImplementedError):
  604. s.any(bool_only=True,)
  605. with pytest.raises(NotImplementedError):
  606. s.all(bool_only=True)
  607. def test_timedelta64_analytics(self):
  608. # index min/max
  609. dti = pd.date_range('2012-1-1', periods=3, freq='D')
  610. td = Series(dti) - pd.Timestamp('20120101')
  611. result = td.idxmin()
  612. assert result == 0
  613. result = td.idxmax()
  614. assert result == 2
  615. # GH#2982
  616. # with NaT
  617. td[0] = np.nan
  618. result = td.idxmin()
  619. assert result == 1
  620. result = td.idxmax()
  621. assert result == 2
  622. # abs
  623. s1 = Series(pd.date_range('20120101', periods=3))
  624. s2 = Series(pd.date_range('20120102', periods=3))
  625. expected = Series(s2 - s1)
  626. # FIXME: don't leave commented-out code
  627. # this fails as numpy returns timedelta64[us]
  628. # result = np.abs(s1-s2)
  629. # assert_frame_equal(result,expected)
  630. result = (s1 - s2).abs()
  631. tm.assert_series_equal(result, expected)
  632. # max/min
  633. result = td.max()
  634. expected = pd.Timedelta('2 days')
  635. assert result == expected
  636. result = td.min()
  637. expected = pd.Timedelta('1 days')
  638. assert result == expected
  639. @pytest.mark.parametrize(
  640. "test_input,error_type",
  641. [
  642. (pd.Series([]), ValueError),
  643. # For strings, or any Series with dtype 'O'
  644. (pd.Series(['foo', 'bar', 'baz']), TypeError),
  645. (pd.Series([(1,), (2,)]), TypeError),
  646. # For mixed data types
  647. (
  648. pd.Series(['foo', 'foo', 'bar', 'bar', None, np.nan, 'baz']),
  649. TypeError
  650. ),
  651. ]
  652. )
  653. def test_assert_idxminmax_raises(self, test_input, error_type):
  654. """
  655. Cases where ``Series.argmax`` and related should raise an exception
  656. """
  657. with pytest.raises(error_type):
  658. test_input.idxmin()
  659. with pytest.raises(error_type):
  660. test_input.idxmin(skipna=False)
  661. with pytest.raises(error_type):
  662. test_input.idxmax()
  663. with pytest.raises(error_type):
  664. test_input.idxmax(skipna=False)
  665. def test_idxminmax_with_inf(self):
  666. # For numeric data with NA and Inf (GH #13595)
  667. s = pd.Series([0, -np.inf, np.inf, np.nan])
  668. assert s.idxmin() == 1
  669. assert np.isnan(s.idxmin(skipna=False))
  670. assert s.idxmax() == 2
  671. assert np.isnan(s.idxmax(skipna=False))
  672. # Using old-style behavior that treats floating point nan, -inf, and
  673. # +inf as missing
  674. with pd.option_context('mode.use_inf_as_na', True):
  675. assert s.idxmin() == 0
  676. assert np.isnan(s.idxmin(skipna=False))
  677. assert s.idxmax() == 0
  678. np.isnan(s.idxmax(skipna=False))
  679. class TestDatetime64SeriesReductions(object):
  680. # Note: the name TestDatetime64SeriesReductions indicates these tests
  681. # were moved from a series-specific test file, _not_ that these tests are
  682. # intended long-term to be series-specific
  683. @pytest.mark.parametrize('nat_ser', [
  684. Series([pd.NaT, pd.NaT]),
  685. Series([pd.NaT, pd.Timedelta('nat')]),
  686. Series([pd.Timedelta('nat'), pd.Timedelta('nat')])])
  687. def test_minmax_nat_series(self, nat_ser):
  688. # GH#23282
  689. assert nat_ser.min() is pd.NaT
  690. assert nat_ser.max() is pd.NaT
  691. assert nat_ser.min(skipna=False) is pd.NaT
  692. assert nat_ser.max(skipna=False) is pd.NaT
  693. @pytest.mark.parametrize('nat_df', [
  694. pd.DataFrame([pd.NaT, pd.NaT]),
  695. pd.DataFrame([pd.NaT, pd.Timedelta('nat')]),
  696. pd.DataFrame([pd.Timedelta('nat'), pd.Timedelta('nat')])])
  697. def test_minmax_nat_dataframe(self, nat_df):
  698. # GH#23282
  699. assert nat_df.min()[0] is pd.NaT
  700. assert nat_df.max()[0] is pd.NaT
  701. assert nat_df.min(skipna=False)[0] is pd.NaT
  702. assert nat_df.max(skipna=False)[0] is pd.NaT
  703. def test_min_max(self):
  704. rng = pd.date_range('1/1/2000', '12/31/2000')
  705. rng2 = rng.take(np.random.permutation(len(rng)))
  706. the_min = rng2.min()
  707. the_max = rng2.max()
  708. assert isinstance(the_min, pd.Timestamp)
  709. assert isinstance(the_max, pd.Timestamp)
  710. assert the_min == rng[0]
  711. assert the_max == rng[-1]
  712. assert rng.min() == rng[0]
  713. assert rng.max() == rng[-1]
  714. def test_min_max_series(self):
  715. rng = pd.date_range('1/1/2000', periods=10, freq='4h')
  716. lvls = ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'C']
  717. df = DataFrame({'TS': rng, 'V': np.random.randn(len(rng)), 'L': lvls})
  718. result = df.TS.max()
  719. exp = pd.Timestamp(df.TS.iat[-1])
  720. assert isinstance(result, pd.Timestamp)
  721. assert result == exp
  722. result = df.TS.min()
  723. exp = pd.Timestamp(df.TS.iat[0])
  724. assert isinstance(result, pd.Timestamp)
  725. assert result == exp
  726. class TestCategoricalSeriesReductions(object):
  727. # Note: the name TestCategoricalSeriesReductions indicates these tests
  728. # were moved from a series-specific test file, _not_ that these tests are
  729. # intended long-term to be series-specific
  730. def test_min_max(self):
  731. # unordered cats have no min/max
  732. cat = Series(Categorical(["a", "b", "c", "d"], ordered=False))
  733. with pytest.raises(TypeError):
  734. cat.min()
  735. with pytest.raises(TypeError):
  736. cat.max()
  737. cat = Series(Categorical(["a", "b", "c", "d"], ordered=True))
  738. _min = cat.min()
  739. _max = cat.max()
  740. assert _min == "a"
  741. assert _max == "d"
  742. cat = Series(Categorical(["a", "b", "c", "d"], categories=[
  743. 'd', 'c', 'b', 'a'], ordered=True))
  744. _min = cat.min()
  745. _max = cat.max()
  746. assert _min == "d"
  747. assert _max == "a"
  748. cat = Series(Categorical(
  749. [np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a'
  750. ], ordered=True))
  751. _min = cat.min()
  752. _max = cat.max()
  753. assert np.isnan(_min)
  754. assert _max == "b"
  755. cat = Series(Categorical(
  756. [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True))
  757. _min = cat.min()
  758. _max = cat.max()
  759. assert np.isnan(_min)
  760. assert _max == 1
  761. def test_min_max_numeric_only(self):
  762. # TODO deprecate numeric_only argument for Categorical and use
  763. # skipna as well, see GH25303
  764. cat = Series(Categorical(
  765. ["a", "b", np.nan, "a"], categories=['b', 'a'], ordered=True))
  766. _min = cat.min()
  767. _max = cat.max()
  768. assert np.isnan(_min)
  769. assert _max == "a"
  770. _min = cat.min(numeric_only=True)
  771. _max = cat.max(numeric_only=True)
  772. assert _min == "b"
  773. assert _max == "a"
  774. _min = cat.min(numeric_only=False)
  775. _max = cat.max(numeric_only=False)
  776. assert np.isnan(_min)
  777. assert _max == "a"
  778. class TestSeriesMode(object):
  779. # Note: the name TestSeriesMode indicates these tests
  780. # were moved from a series-specific test file, _not_ that these tests are
  781. # intended long-term to be series-specific
  782. @pytest.mark.parametrize('dropna, expected', [
  783. (True, Series([], dtype=np.float64)),
  784. (False, Series([], dtype=np.float64))
  785. ])
  786. def test_mode_empty(self, dropna, expected):
  787. s = Series([], dtype=np.float64)
  788. result = s.mode(dropna)
  789. tm.assert_series_equal(result, expected)
  790. @pytest.mark.parametrize('dropna, data, expected', [
  791. (True, [1, 1, 1, 2], [1]),
  792. (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
  793. (False, [1, 1, 1, 2], [1]),
  794. (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
  795. ])
  796. @pytest.mark.parametrize(
  797. 'dt',
  798. list(np.typecodes['AllInteger'] + np.typecodes['Float'])
  799. )
  800. def test_mode_numerical(self, dropna, data, expected, dt):
  801. s = Series(data, dtype=dt)
  802. result = s.mode(dropna)
  803. expected = Series(expected, dtype=dt)
  804. tm.assert_series_equal(result, expected)
  805. @pytest.mark.parametrize('dropna, expected', [
  806. (True, [1.0]),
  807. (False, [1, np.nan]),
  808. ])
  809. def test_mode_numerical_nan(self, dropna, expected):
  810. s = Series([1, 1, 2, np.nan, np.nan])
  811. result = s.mode(dropna)
  812. expected = Series(expected)
  813. tm.assert_series_equal(result, expected)
  814. @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
  815. (True, ['b'], ['bar'], ['nan']),
  816. (False, ['b'], [np.nan], ['nan'])
  817. ])
  818. def test_mode_str_obj(self, dropna, expected1, expected2, expected3):
  819. # Test string and object types.
  820. data = ['a'] * 2 + ['b'] * 3
  821. s = Series(data, dtype='c')
  822. result = s.mode(dropna)
  823. expected1 = Series(expected1, dtype='c')
  824. tm.assert_series_equal(result, expected1)
  825. data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]
  826. s = Series(data, dtype=object)
  827. result = s.mode(dropna)
  828. expected2 = Series(expected2, dtype=object)
  829. tm.assert_series_equal(result, expected2)
  830. data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]
  831. s = Series(data, dtype=object).astype(str)
  832. result = s.mode(dropna)
  833. expected3 = Series(expected3, dtype=str)
  834. tm.assert_series_equal(result, expected3)
  835. @pytest.mark.parametrize('dropna, expected1, expected2', [
  836. (True, ['foo'], ['foo']),
  837. (False, ['foo'], [np.nan])
  838. ])
  839. def test_mode_mixeddtype(self, dropna, expected1, expected2):
  840. s = Series([1, 'foo', 'foo'])
  841. result = s.mode(dropna)
  842. expected = Series(expected1)
  843. tm.assert_series_equal(result, expected)
  844. s = Series([1, 'foo', 'foo', np.nan, np.nan, np.nan])
  845. result = s.mode(dropna)
  846. expected = Series(expected2, dtype=object)
  847. tm.assert_series_equal(result, expected)
  848. @pytest.mark.parametrize('dropna, expected1, expected2', [
  849. (True, ['1900-05-03', '2011-01-03', '2013-01-02'],
  850. ['2011-01-03', '2013-01-02']),
  851. (False, [np.nan], [np.nan, '2011-01-03', '2013-01-02']),
  852. ])
  853. def test_mode_datetime(self, dropna, expected1, expected2):
  854. s = Series(['2011-01-03', '2013-01-02',
  855. '1900-05-03', 'nan', 'nan'], dtype='M8[ns]')
  856. result = s.mode(dropna)
  857. expected1 = Series(expected1, dtype='M8[ns]')
  858. tm.assert_series_equal(result, expected1)
  859. s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
  860. '2011-01-03', '2013-01-02', 'nan', 'nan'],
  861. dtype='M8[ns]')
  862. result = s.mode(dropna)
  863. expected2 = Series(expected2, dtype='M8[ns]')
  864. tm.assert_series_equal(result, expected2)
  865. @pytest.mark.parametrize('dropna, expected1, expected2', [
  866. (True, ['-1 days', '0 days', '1 days'], ['2 min', '1 day']),
  867. (False, [np.nan], [np.nan, '2 min', '1 day']),
  868. ])
  869. def test_mode_timedelta(self, dropna, expected1, expected2):
  870. # gh-5986: Test timedelta types.
  871. s = Series(['1 days', '-1 days', '0 days', 'nan', 'nan'],
  872. dtype='timedelta64[ns]')
  873. result = s.mode(dropna)
  874. expected1 = Series(expected1, dtype='timedelta64[ns]')
  875. tm.assert_series_equal(result, expected1)
  876. s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
  877. '2 min', '2 min', 'nan', 'nan'],
  878. dtype='timedelta64[ns]')
  879. result = s.mode(dropna)
  880. expected2 = Series(expected2, dtype='timedelta64[ns]')
  881. tm.assert_series_equal(result, expected2)
  882. @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
  883. (True, Categorical([1, 2], categories=[1, 2]),
  884. Categorical(['a'], categories=[1, 'a']),
  885. Categorical([3, 1], categories=[3, 2, 1], ordered=True)),
  886. (False, Categorical([np.nan], categories=[1, 2]),
  887. Categorical([np.nan, 'a'], categories=[1, 'a']),
  888. Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True)),
  889. ])
  890. def test_mode_category(self, dropna, expected1, expected2, expected3):
  891. s = Series(Categorical([1, 2, np.nan, np.nan]))
  892. result = s.mode(dropna)
  893. expected1 = Series(expected1, dtype='category')
  894. tm.assert_series_equal(result, expected1)
  895. s = Series(Categorical([1, 'a', 'a', np.nan, np.nan]))
  896. result = s.mode(dropna)
  897. expected2 = Series(expected2, dtype='category')
  898. tm.assert_series_equal(result, expected2)
  899. s = Series(Categorical([1, 1, 2, 3, 3, np.nan, np.nan],
  900. categories=[3, 2, 1], ordered=True))
  901. result = s.mode(dropna)
  902. expected3 = Series(expected3, dtype='category')
  903. tm.assert_series_equal(result, expected3)
  904. @pytest.mark.parametrize('dropna, expected1, expected2', [
  905. (True, [2**63], [1, 2**63]),
  906. (False, [2**63], [1, 2**63])
  907. ])
  908. def test_mode_intoverflow(self, dropna, expected1, expected2):
  909. # Test for uint64 overflow.
  910. s = Series([1, 2**63, 2**63], dtype=np.uint64)
  911. result = s.mode(dropna)
  912. expected1 = Series(expected1, dtype=np.uint64)
  913. tm.assert_series_equal(result, expected1)
  914. s = Series([1, 2**63], dtype=np.uint64)
  915. result = s.mode(dropna)
  916. expected2 = Series(expected2, dtype=np.uint64)
  917. tm.assert_series_equal(result, expected2)
  918. @pytest.mark.skipif(not compat.PY3, reason="only PY3")
  919. def test_mode_sortwarning(self):
  920. # Check for the warning that is raised when the mode
  921. # results cannot be sorted
  922. expected = Series(['foo', np.nan])
  923. s = Series([1, 'foo', 'foo', np.nan, np.nan])
  924. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  925. result = s.mode(dropna=False)
  926. result = result.sort_values().reset_index(drop=True)
  927. tm.assert_series_equal(result, expected)