test_quantile.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # coding=utf-8
  2. # pylint: disable-msg=E1101,W0612
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.common import is_integer
  6. import pandas as pd
  7. from pandas import Index, Series
  8. from pandas.core.indexes.datetimes import Timestamp
  9. import pandas.util.testing as tm
  10. from .common import TestData
  11. class TestSeriesQuantile(TestData):
  12. def test_quantile(self):
  13. q = self.ts.quantile(0.1)
  14. assert q == np.percentile(self.ts.dropna(), 10)
  15. q = self.ts.quantile(0.9)
  16. assert q == np.percentile(self.ts.dropna(), 90)
  17. # object dtype
  18. q = Series(self.ts, dtype=object).quantile(0.9)
  19. assert q == np.percentile(self.ts.dropna(), 90)
  20. # datetime64[ns] dtype
  21. dts = self.ts.index.to_series()
  22. q = dts.quantile(.2)
  23. assert q == Timestamp('2000-01-10 19:12:00')
  24. # timedelta64[ns] dtype
  25. tds = dts.diff()
  26. q = tds.quantile(.25)
  27. assert q == pd.to_timedelta('24:00:00')
  28. # GH7661
  29. result = Series([np.timedelta64('NaT')]).sum()
  30. assert result == pd.Timedelta(0)
  31. msg = 'percentiles should all be in the interval \\[0, 1\\]'
  32. for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
  33. with pytest.raises(ValueError, match=msg):
  34. self.ts.quantile(invalid)
  35. def test_quantile_multi(self):
  36. qs = [.1, .9]
  37. result = self.ts.quantile(qs)
  38. expected = pd.Series([np.percentile(self.ts.dropna(), 10),
  39. np.percentile(self.ts.dropna(), 90)],
  40. index=qs, name=self.ts.name)
  41. tm.assert_series_equal(result, expected)
  42. dts = self.ts.index.to_series()
  43. dts.name = 'xxx'
  44. result = dts.quantile((.2, .2))
  45. expected = Series([Timestamp('2000-01-10 19:12:00'),
  46. Timestamp('2000-01-10 19:12:00')],
  47. index=[.2, .2], name='xxx')
  48. tm.assert_series_equal(result, expected)
  49. result = self.ts.quantile([])
  50. expected = pd.Series([], name=self.ts.name, index=Index(
  51. [], dtype=float))
  52. tm.assert_series_equal(result, expected)
  53. def test_quantile_interpolation(self):
  54. # see gh-10174
  55. # interpolation = linear (default case)
  56. q = self.ts.quantile(0.1, interpolation='linear')
  57. assert q == np.percentile(self.ts.dropna(), 10)
  58. q1 = self.ts.quantile(0.1)
  59. assert q1 == np.percentile(self.ts.dropna(), 10)
  60. # test with and without interpolation keyword
  61. assert q == q1
  62. def test_quantile_interpolation_dtype(self):
  63. # GH #10174
  64. # interpolation = linear (default case)
  65. q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower')
  66. assert q == np.percentile(np.array([1, 3, 4]), 50)
  67. assert is_integer(q)
  68. q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher')
  69. assert q == np.percentile(np.array([1, 3, 4]), 50)
  70. assert is_integer(q)
  71. def test_quantile_nan(self):
  72. # GH 13098
  73. s = pd.Series([1, 2, 3, 4, np.nan])
  74. result = s.quantile(0.5)
  75. expected = 2.5
  76. assert result == expected
  77. # all nan/empty
  78. cases = [Series([]), Series([np.nan, np.nan])]
  79. for s in cases:
  80. res = s.quantile(0.5)
  81. assert np.isnan(res)
  82. res = s.quantile([0.5])
  83. tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5]))
  84. res = s.quantile([0.2, 0.3])
  85. tm.assert_series_equal(res, pd.Series([np.nan, np.nan],
  86. index=[0.2, 0.3]))
  87. @pytest.mark.parametrize('case', [
  88. [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
  89. pd.Timestamp('2011-01-03')],
  90. [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  91. pd.Timestamp('2011-01-02', tz='US/Eastern'),
  92. pd.Timestamp('2011-01-03', tz='US/Eastern')],
  93. [pd.Timedelta('1 days'), pd.Timedelta('2 days'),
  94. pd.Timedelta('3 days')],
  95. # NaT
  96. [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
  97. pd.Timestamp('2011-01-03'), pd.NaT],
  98. [pd.Timestamp('2011-01-01', tz='US/Eastern'),
  99. pd.Timestamp('2011-01-02', tz='US/Eastern'),
  100. pd.Timestamp('2011-01-03', tz='US/Eastern'), pd.NaT],
  101. [pd.Timedelta('1 days'), pd.Timedelta('2 days'),
  102. pd.Timedelta('3 days'), pd.NaT]])
  103. def test_quantile_box(self, case):
  104. s = pd.Series(case, name='XXX')
  105. res = s.quantile(0.5)
  106. assert res == case[1]
  107. res = s.quantile([0.5])
  108. exp = pd.Series([case[1]], index=[0.5], name='XXX')
  109. tm.assert_series_equal(res, exp)
  110. def test_datetime_timedelta_quantiles(self):
  111. # covers #9694
  112. assert pd.isna(Series([], dtype='M8[ns]').quantile(.5))
  113. assert pd.isna(Series([], dtype='m8[ns]').quantile(.5))
  114. def test_quantile_nat(self):
  115. res = Series([pd.NaT, pd.NaT]).quantile(0.5)
  116. assert res is pd.NaT
  117. res = Series([pd.NaT, pd.NaT]).quantile([0.5])
  118. tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
  119. @pytest.mark.parametrize('values, dtype', [
  120. ([0, 0, 0, 1, 2, 3], 'Sparse[int]'),
  121. ([0., None, 1., 2.], 'Sparse[float]'),
  122. ])
  123. def test_quantile_sparse(self, values, dtype):
  124. ser = pd.Series(values, dtype=dtype)
  125. result = ser.quantile([0.5])
  126. expected = pd.Series(np.asarray(ser)).quantile([0.5])
  127. tm.assert_series_equal(result, expected)
  128. def test_quantile_empty(self):
  129. # floats
  130. s = Series([], dtype='float64')
  131. res = s.quantile(0.5)
  132. assert np.isnan(res)
  133. res = s.quantile([0.5])
  134. exp = Series([np.nan], index=[0.5])
  135. tm.assert_series_equal(res, exp)
  136. # int
  137. s = Series([], dtype='int64')
  138. res = s.quantile(0.5)
  139. assert np.isnan(res)
  140. res = s.quantile([0.5])
  141. exp = Series([np.nan], index=[0.5])
  142. tm.assert_series_equal(res, exp)
  143. # datetime
  144. s = Series([], dtype='datetime64[ns]')
  145. res = s.quantile(0.5)
  146. assert res is pd.NaT
  147. res = s.quantile([0.5])
  148. exp = Series([pd.NaT], index=[0.5])
  149. tm.assert_series_equal(res, exp)