test_morestats.py 62 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642
  1. # Author: Travis Oliphant, 2002
  2. #
  3. # Further enhancements and tests added by numerous SciPy developers.
  4. #
  5. from __future__ import division, print_function, absolute_import
  6. import warnings
  7. import numpy as np
  8. from numpy.random import RandomState
  9. from numpy.testing import (assert_array_equal,
  10. assert_almost_equal, assert_array_less, assert_array_almost_equal,
  11. assert_, assert_allclose, assert_equal, assert_warns)
  12. import pytest
  13. from pytest import raises as assert_raises
  14. from scipy._lib._numpy_compat import suppress_warnings
  15. from scipy import stats
  16. from .common_tests import check_named_results
  17. # Matplotlib is not a scipy dependency but is optionally used in probplot, so
  18. # check if it's available
  19. try:
  20. import matplotlib.pyplot as plt
  21. have_matplotlib = True
  22. except Exception:
  23. have_matplotlib = False
  24. # test data gear.dat from NIST for Levene and Bartlett test
  25. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3581.htm
  26. g1 = [1.006, 0.996, 0.998, 1.000, 0.992, 0.993, 1.002, 0.999, 0.994, 1.000]
  27. g2 = [0.998, 1.006, 1.000, 1.002, 0.997, 0.998, 0.996, 1.000, 1.006, 0.988]
  28. g3 = [0.991, 0.987, 0.997, 0.999, 0.995, 0.994, 1.000, 0.999, 0.996, 0.996]
  29. g4 = [1.005, 1.002, 0.994, 1.000, 0.995, 0.994, 0.998, 0.996, 1.002, 0.996]
  30. g5 = [0.998, 0.998, 0.982, 0.990, 1.002, 0.984, 0.996, 0.993, 0.980, 0.996]
  31. g6 = [1.009, 1.013, 1.009, 0.997, 0.988, 1.002, 0.995, 0.998, 0.981, 0.996]
  32. g7 = [0.990, 1.004, 0.996, 1.001, 0.998, 1.000, 1.018, 1.010, 0.996, 1.002]
  33. g8 = [0.998, 1.000, 1.006, 1.000, 1.002, 0.996, 0.998, 0.996, 1.002, 1.006]
  34. g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991]
  35. g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]
  36. class TestBayes_mvs(object):
  37. def test_basic(self):
  38. # Expected values in this test simply taken from the function. For
  39. # some checks regarding correctness of implementation, see review in
  40. # gh-674
  41. data = [6, 9, 12, 7, 8, 8, 13]
  42. mean, var, std = stats.bayes_mvs(data)
  43. assert_almost_equal(mean.statistic, 9.0)
  44. assert_allclose(mean.minmax, (7.1036502226125329, 10.896349777387467),
  45. rtol=1e-14)
  46. assert_almost_equal(var.statistic, 10.0)
  47. assert_allclose(var.minmax, (3.1767242068607087, 24.45910381334018),
  48. rtol=1e-09)
  49. assert_almost_equal(std.statistic, 2.9724954732045084, decimal=14)
  50. assert_allclose(std.minmax, (1.7823367265645145, 4.9456146050146312),
  51. rtol=1e-14)
  52. def test_empty_input(self):
  53. assert_raises(ValueError, stats.bayes_mvs, [])
  54. def test_result_attributes(self):
  55. x = np.arange(15)
  56. attributes = ('statistic', 'minmax')
  57. res = stats.bayes_mvs(x)
  58. for i in res:
  59. check_named_results(i, attributes)
  60. class TestMvsdist(object):
  61. def test_basic(self):
  62. data = [6, 9, 12, 7, 8, 8, 13]
  63. mean, var, std = stats.mvsdist(data)
  64. assert_almost_equal(mean.mean(), 9.0)
  65. assert_allclose(mean.interval(0.9), (7.1036502226125329,
  66. 10.896349777387467), rtol=1e-14)
  67. assert_almost_equal(var.mean(), 10.0)
  68. assert_allclose(var.interval(0.9), (3.1767242068607087,
  69. 24.45910381334018), rtol=1e-09)
  70. assert_almost_equal(std.mean(), 2.9724954732045084, decimal=14)
  71. assert_allclose(std.interval(0.9), (1.7823367265645145,
  72. 4.9456146050146312), rtol=1e-14)
  73. def test_empty_input(self):
  74. assert_raises(ValueError, stats.mvsdist, [])
  75. def test_bad_arg(self):
  76. # Raise ValueError if fewer than two data points are given.
  77. data = [1]
  78. assert_raises(ValueError, stats.mvsdist, data)
  79. def test_warns(self):
  80. # regression test for gh-5270
  81. # make sure there are no spurious divide-by-zero warnings
  82. with warnings.catch_warnings():
  83. warnings.simplefilter('error', RuntimeWarning)
  84. [x.mean() for x in stats.mvsdist([1, 2, 3])]
  85. [x.mean() for x in stats.mvsdist([1, 2, 3, 4, 5])]
  86. class TestShapiro(object):
  87. def test_basic(self):
  88. x1 = [0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
  89. 4.43, 0.21, 4.75, 0.71, 1.52, 3.24,
  90. 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]
  91. w, pw = stats.shapiro(x1)
  92. assert_almost_equal(w, 0.90047299861907959, 6)
  93. assert_almost_equal(pw, 0.042089745402336121, 6)
  94. x2 = [1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
  95. 3.48, 1.10, 0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
  96. 0.08, 3.67, 2.81, 3.49]
  97. w, pw = stats.shapiro(x2)
  98. assert_almost_equal(w, 0.9590270, 6)
  99. assert_almost_equal(pw, 0.52460, 3)
  100. # Verified against R
  101. np.random.seed(12345678)
  102. x3 = stats.norm.rvs(loc=5, scale=3, size=100)
  103. w, pw = stats.shapiro(x3)
  104. assert_almost_equal(w, 0.9772805571556091, decimal=6)
  105. assert_almost_equal(pw, 0.08144091814756393, decimal=3)
  106. # Extracted from original paper
  107. x4 = [0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614,
  108. 0.655, 0.954, 1.392, 1.557, 1.648, 1.690, 1.994, 2.174, 2.206,
  109. 3.245, 3.510, 3.571, 4.354, 4.980, 6.084, 8.351]
  110. W_expected = 0.83467
  111. p_expected = 0.000914
  112. w, pw = stats.shapiro(x4)
  113. assert_almost_equal(w, W_expected, decimal=4)
  114. assert_almost_equal(pw, p_expected, decimal=5)
  115. def test_2d(self):
  116. x1 = [[0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
  117. 4.43, 0.21, 4.75], [0.71, 1.52, 3.24,
  118. 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]]
  119. w, pw = stats.shapiro(x1)
  120. assert_almost_equal(w, 0.90047299861907959, 6)
  121. assert_almost_equal(pw, 0.042089745402336121, 6)
  122. x2 = [[1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
  123. 3.48, 1.10], [0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
  124. 0.08, 3.67, 2.81, 3.49]]
  125. w, pw = stats.shapiro(x2)
  126. assert_almost_equal(w, 0.9590270, 6)
  127. assert_almost_equal(pw, 0.52460, 3)
  128. def test_empty_input(self):
  129. assert_raises(ValueError, stats.shapiro, [])
  130. assert_raises(ValueError, stats.shapiro, [[], [], []])
  131. def test_not_enough_values(self):
  132. assert_raises(ValueError, stats.shapiro, [1, 2])
  133. assert_raises(ValueError, stats.shapiro, [[], [2]])
  134. def test_bad_arg(self):
  135. # Length of x is less than 3.
  136. x = [1]
  137. assert_raises(ValueError, stats.shapiro, x)
  138. def test_nan_input(self):
  139. x = np.arange(10.)
  140. x[9] = np.nan
  141. w, pw = stats.shapiro(x)
  142. assert_equal(w, np.nan)
  143. assert_almost_equal(pw, 1.0)
  144. class TestAnderson(object):
  145. def test_normal(self):
  146. rs = RandomState(1234567890)
  147. x1 = rs.standard_exponential(size=50)
  148. x2 = rs.standard_normal(size=50)
  149. A, crit, sig = stats.anderson(x1)
  150. assert_array_less(crit[:-1], A)
  151. A, crit, sig = stats.anderson(x2)
  152. assert_array_less(A, crit[-2:])
  153. v = np.ones(10)
  154. v[0] = 0
  155. A, crit, sig = stats.anderson(v)
  156. # The expected statistic 3.208057 was computed independently of scipy.
  157. # For example, in R:
  158. # > library(nortest)
  159. # > v <- rep(1, 10)
  160. # > v[1] <- 0
  161. # > result <- ad.test(v)
  162. # > result$statistic
  163. # A
  164. # 3.208057
  165. assert_allclose(A, 3.208057)
  166. def test_expon(self):
  167. rs = RandomState(1234567890)
  168. x1 = rs.standard_exponential(size=50)
  169. x2 = rs.standard_normal(size=50)
  170. A, crit, sig = stats.anderson(x1, 'expon')
  171. assert_array_less(A, crit[-2:])
  172. olderr = np.seterr(all='ignore')
  173. try:
  174. A, crit, sig = stats.anderson(x2, 'expon')
  175. finally:
  176. np.seterr(**olderr)
  177. assert_(A > crit[-1])
  178. def test_gumbel(self):
  179. # Regression test for gh-6306. Before that issue was fixed,
  180. # this case would return a2=inf.
  181. v = np.ones(100)
  182. v[0] = 0.0
  183. a2, crit, sig = stats.anderson(v, 'gumbel')
  184. # A brief reimplementation of the calculation of the statistic.
  185. n = len(v)
  186. xbar, s = stats.gumbel_l.fit(v)
  187. logcdf = stats.gumbel_l.logcdf(v, xbar, s)
  188. logsf = stats.gumbel_l.logsf(v, xbar, s)
  189. i = np.arange(1, n+1)
  190. expected_a2 = -n - np.mean((2*i - 1) * (logcdf + logsf[::-1]))
  191. assert_allclose(a2, expected_a2)
  192. def test_bad_arg(self):
  193. assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
  194. def test_result_attributes(self):
  195. rs = RandomState(1234567890)
  196. x = rs.standard_exponential(size=50)
  197. res = stats.anderson(x)
  198. attributes = ('statistic', 'critical_values', 'significance_level')
  199. check_named_results(res, attributes)
  200. def test_gumbel_l(self):
  201. # gh-2592, gh-6337
  202. # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
  203. rs = RandomState(1234567890)
  204. x = rs.gumbel(size=100)
  205. A1, crit1, sig1 = stats.anderson(x, 'gumbel')
  206. A2, crit2, sig2 = stats.anderson(x, 'gumbel_l')
  207. assert_allclose(A2, A1)
  208. def test_gumbel_r(self):
  209. # gh-2592, gh-6337
  210. # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
  211. rs = RandomState(1234567890)
  212. x1 = rs.gumbel(size=100)
  213. x2 = np.ones(100)
  214. A1, crit1, sig1 = stats.anderson(x1, 'gumbel_r')
  215. A2, crit2, sig2 = stats.anderson(x2, 'gumbel_r')
  216. assert_array_less(A1, crit1[-2:])
  217. assert_(A2 > crit2[-1])
  218. class TestAndersonKSamp(object):
  219. def test_example1a(self):
  220. # Example data from Scholz & Stephens (1987), originally
  221. # published in Lehmann (1995, Nonparametrics, Statistical
  222. # Methods Based on Ranks, p. 309)
  223. # Pass a mixture of lists and arrays
  224. t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
  225. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  226. t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
  227. t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
  228. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)
  229. assert_almost_equal(Tk, 4.449, 3)
  230. assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
  231. tm[0:5], 4)
  232. assert_allclose(p, 0.0021, atol=0.00025)
  233. def test_example1b(self):
  234. # Example data from Scholz & Stephens (1987), originally
  235. # published in Lehmann (1995, Nonparametrics, Statistical
  236. # Methods Based on Ranks, p. 309)
  237. # Pass arrays
  238. t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
  239. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  240. t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
  241. t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
  242. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True)
  243. assert_almost_equal(Tk, 4.480, 3)
  244. assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
  245. tm[0:5], 4)
  246. assert_allclose(p, 0.0020, atol=0.00025)
  247. def test_example2a(self):
  248. # Example data taken from an earlier technical report of
  249. # Scholz and Stephens
  250. # Pass lists instead of arrays
  251. t1 = [194, 15, 41, 29, 33, 181]
  252. t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
  253. t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
  254. t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
  255. 118, 25, 156, 310, 76, 26, 44, 23, 62]
  256. t5 = [130, 208, 70, 101, 208]
  257. t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
  258. t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
  259. t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
  260. 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
  261. t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
  262. 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
  263. t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
  264. 22, 139, 210, 97, 30, 23, 13, 14]
  265. t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
  266. t12 = [50, 254, 5, 283, 35, 12]
  267. t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
  268. t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
  269. 61, 34]
  270. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
  271. t9, t10, t11, t12, t13, t14),
  272. midrank=False)
  273. assert_almost_equal(Tk, 3.288, 3)
  274. assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
  275. tm[0:5], 4)
  276. assert_allclose(p, 0.0041, atol=0.00025)
  277. def test_example2b(self):
  278. # Example data taken from an earlier technical report of
  279. # Scholz and Stephens
  280. t1 = [194, 15, 41, 29, 33, 181]
  281. t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
  282. t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
  283. t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
  284. 118, 25, 156, 310, 76, 26, 44, 23, 62]
  285. t5 = [130, 208, 70, 101, 208]
  286. t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
  287. t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
  288. t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
  289. 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
  290. t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
  291. 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
  292. t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
  293. 22, 139, 210, 97, 30, 23, 13, 14]
  294. t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
  295. t12 = [50, 254, 5, 283, 35, 12]
  296. t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
  297. t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
  298. 61, 34]
  299. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
  300. t9, t10, t11, t12, t13, t14),
  301. midrank=True)
  302. assert_almost_equal(Tk, 3.294, 3)
  303. assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
  304. tm[0:5], 4)
  305. assert_allclose(p, 0.0041, atol=0.00025)
  306. def test_R_kSamples(self):
  307. # test values generates with R package kSamples
  308. # package version 1.2-6 (2017-06-14)
  309. # r1 = 1:100
  310. # continuous case (no ties) --> version 1
  311. # res <- kSamples::ad.test(r1, r1 + 40.5)
  312. # res$ad[1, "T.AD"] # 41.105
  313. # res$ad[1, " asympt. P-value"] # 5.8399e-18
  314. #
  315. # discrete case (ties allowed) --> version 2 (here: midrank=True)
  316. # res$ad[2, "T.AD"] # 41.235
  317. #
  318. # res <- kSamples::ad.test(r1, r1 + .5)
  319. # res$ad[1, "T.AD"] # -1.2824
  320. # res$ad[1, " asympt. P-value"] # 1
  321. # res$ad[2, "T.AD"] # -1.2944
  322. #
  323. # res <- kSamples::ad.test(r1, r1 + 7.5)
  324. # res$ad[1, "T.AD"] # 1.4923
  325. # res$ad[1, " asympt. P-value"] # 0.077501
  326. #
  327. # res <- kSamples::ad.test(r1, r1 + 6)
  328. # res$ad[2, "T.AD"] # 0.63892
  329. # res$ad[2, " asympt. P-value"] # 0.17981
  330. #
  331. # res <- kSamples::ad.test(r1, r1 + 11.5)
  332. # res$ad[1, "T.AD"] # 4.5042
  333. # res$ad[1, " asympt. P-value"] # 0.00545
  334. #
  335. # res <- kSamples::ad.test(r1, r1 + 13.5)
  336. # res$ad[1, "T.AD"] # 6.2982
  337. # res$ad[1, " asympt. P-value"] # 0.00118
  338. x1 = np.linspace(1, 100, 100)
  339. # test case: different distributions;p-value floored at 0.001
  340. # test case for issue #5493 / #8536
  341. with suppress_warnings() as sup:
  342. sup.filter(UserWarning, message='p-value floored')
  343. s, _, p = stats.anderson_ksamp([x1, x1 + 40.5], midrank=False)
  344. assert_almost_equal(s, 41.105, 3)
  345. assert_equal(p, 0.001)
  346. with suppress_warnings() as sup:
  347. sup.filter(UserWarning, message='p-value floored')
  348. s, _, p = stats.anderson_ksamp([x1, x1 + 40.5])
  349. assert_almost_equal(s, 41.235, 3)
  350. assert_equal(p, 0.001)
  351. # test case: similar distributions --> p-value capped at 0.25
  352. with suppress_warnings() as sup:
  353. sup.filter(UserWarning, message='p-value capped')
  354. s, _, p = stats.anderson_ksamp([x1, x1 + .5], midrank=False)
  355. assert_almost_equal(s, -1.2824, 4)
  356. assert_equal(p, 0.25)
  357. with suppress_warnings() as sup:
  358. sup.filter(UserWarning, message='p-value capped')
  359. s, _, p = stats.anderson_ksamp([x1, x1 + .5])
  360. assert_almost_equal(s, -1.2944, 4)
  361. assert_equal(p, 0.25)
  362. # test case: check interpolated p-value in [0.01, 0.25] (no ties)
  363. s, _, p = stats.anderson_ksamp([x1, x1 + 7.5], midrank=False)
  364. assert_almost_equal(s, 1.4923, 4)
  365. assert_allclose(p, 0.0775, atol=0.005, rtol=0)
  366. # test case: check interpolated p-value in [0.01, 0.25] (w/ ties)
  367. s, _, p = stats.anderson_ksamp([x1, x1 + 6])
  368. assert_almost_equal(s, 0.6389, 4)
  369. assert_allclose(p, 0.1798, atol=0.005, rtol=0)
  370. # test extended critical values for p=0.001 and p=0.005
  371. s, _, p = stats.anderson_ksamp([x1, x1 + 11.5], midrank=False)
  372. assert_almost_equal(s, 4.5042, 4)
  373. assert_allclose(p, 0.00545, atol=0.0005, rtol=0)
  374. s, _, p = stats.anderson_ksamp([x1, x1 + 13.5], midrank=False)
  375. assert_almost_equal(s, 6.2982, 4)
  376. assert_allclose(p, 0.00118, atol=0.0001, rtol=0)
  377. def test_not_enough_samples(self):
  378. assert_raises(ValueError, stats.anderson_ksamp, np.ones(5))
  379. def test_no_distinct_observations(self):
  380. assert_raises(ValueError, stats.anderson_ksamp,
  381. (np.ones(5), np.ones(5)))
  382. def test_empty_sample(self):
  383. assert_raises(ValueError, stats.anderson_ksamp, (np.ones(5), []))
  384. def test_result_attributes(self):
  385. # Pass a mixture of lists and arrays
  386. t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
  387. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  388. res = stats.anderson_ksamp((t1, t2), midrank=False)
  389. attributes = ('statistic', 'critical_values', 'significance_level')
  390. check_named_results(res, attributes)
  391. class TestAnsari(object):
  392. def test_small(self):
  393. x = [1, 2, 3, 3, 4]
  394. y = [3, 2, 6, 1, 6, 1, 4, 1]
  395. with suppress_warnings() as sup:
  396. sup.filter(UserWarning, "Ties preclude use of exact statistic.")
  397. W, pval = stats.ansari(x, y)
  398. assert_almost_equal(W, 23.5, 11)
  399. assert_almost_equal(pval, 0.13499256881897437, 11)
  400. def test_approx(self):
  401. ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
  402. 101, 96, 97, 102, 107, 113, 116, 113, 110, 98))
  403. parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104,
  404. 100, 96, 108, 103, 104, 114, 114, 113, 108,
  405. 106, 99))
  406. with suppress_warnings() as sup:
  407. sup.filter(UserWarning, "Ties preclude use of exact statistic.")
  408. W, pval = stats.ansari(ramsay, parekh)
  409. assert_almost_equal(W, 185.5, 11)
  410. assert_almost_equal(pval, 0.18145819972867083, 11)
  411. def test_exact(self):
  412. W, pval = stats.ansari([1, 2, 3, 4], [15, 5, 20, 8, 10, 12])
  413. assert_almost_equal(W, 10.0, 11)
  414. assert_almost_equal(pval, 0.533333333333333333, 7)
  415. def test_bad_arg(self):
  416. assert_raises(ValueError, stats.ansari, [], [1])
  417. assert_raises(ValueError, stats.ansari, [1], [])
  418. def test_result_attributes(self):
  419. x = [1, 2, 3, 3, 4]
  420. y = [3, 2, 6, 1, 6, 1, 4, 1]
  421. with suppress_warnings() as sup:
  422. sup.filter(UserWarning, "Ties preclude use of exact statistic.")
  423. res = stats.ansari(x, y)
  424. attributes = ('statistic', 'pvalue')
  425. check_named_results(res, attributes)
  426. class TestBartlett(object):
  427. def test_data(self):
  428. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda357.htm
  429. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  430. T, pval = stats.bartlett(*args)
  431. assert_almost_equal(T, 20.78587342806484, 7)
  432. assert_almost_equal(pval, 0.0136358632781, 7)
  433. def test_bad_arg(self):
  434. # Too few args raises ValueError.
  435. assert_raises(ValueError, stats.bartlett, [1])
  436. def test_result_attributes(self):
  437. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  438. res = stats.bartlett(*args)
  439. attributes = ('statistic', 'pvalue')
  440. check_named_results(res, attributes)
  441. def test_empty_arg(self):
  442. args = (g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, [])
  443. assert_equal((np.nan, np.nan), stats.bartlett(*args))
  444. # temporary fix for issue #9252: only accept 1d input
  445. def test_1d_input(self):
  446. x = np.array([[1, 2], [3, 4]])
  447. assert_raises(ValueError, stats.bartlett, g1, x)
  448. class TestLevene(object):
  449. def test_data(self):
  450. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
  451. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  452. W, pval = stats.levene(*args)
  453. assert_almost_equal(W, 1.7059176930008939, 7)
  454. assert_almost_equal(pval, 0.0990829755522, 7)
  455. def test_trimmed1(self):
  456. # Test that center='trimmed' gives the same result as center='mean'
  457. # when proportiontocut=0.
  458. W1, pval1 = stats.levene(g1, g2, g3, center='mean')
  459. W2, pval2 = stats.levene(g1, g2, g3, center='trimmed',
  460. proportiontocut=0.0)
  461. assert_almost_equal(W1, W2)
  462. assert_almost_equal(pval1, pval2)
  463. def test_trimmed2(self):
  464. x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
  465. y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
  466. np.random.seed(1234)
  467. x2 = np.random.permutation(x)
  468. # Use center='trimmed'
  469. W0, pval0 = stats.levene(x, y, center='trimmed',
  470. proportiontocut=0.125)
  471. W1, pval1 = stats.levene(x2, y, center='trimmed',
  472. proportiontocut=0.125)
  473. # Trim the data here, and use center='mean'
  474. W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean')
  475. # Result should be the same.
  476. assert_almost_equal(W0, W2)
  477. assert_almost_equal(W1, W2)
  478. assert_almost_equal(pval1, pval2)
  479. def test_equal_mean_median(self):
  480. x = np.linspace(-1, 1, 21)
  481. np.random.seed(1234)
  482. x2 = np.random.permutation(x)
  483. y = x**3
  484. W1, pval1 = stats.levene(x, y, center='mean')
  485. W2, pval2 = stats.levene(x2, y, center='median')
  486. assert_almost_equal(W1, W2)
  487. assert_almost_equal(pval1, pval2)
  488. def test_bad_keyword(self):
  489. x = np.linspace(-1, 1, 21)
  490. assert_raises(TypeError, stats.levene, x, x, portiontocut=0.1)
  491. def test_bad_center_value(self):
  492. x = np.linspace(-1, 1, 21)
  493. assert_raises(ValueError, stats.levene, x, x, center='trim')
  494. def test_too_few_args(self):
  495. assert_raises(ValueError, stats.levene, [1])
  496. def test_result_attributes(self):
  497. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  498. res = stats.levene(*args)
  499. attributes = ('statistic', 'pvalue')
  500. check_named_results(res, attributes)
  501. # temporary fix for issue #9252: only accept 1d input
  502. def test_1d_input(self):
  503. x = np.array([[1, 2], [3, 4]])
  504. assert_raises(ValueError, stats.levene, g1, x)
  505. class TestBinomP(object):
  506. def test_data(self):
  507. pval = stats.binom_test(100, 250)
  508. assert_almost_equal(pval, 0.0018833009350757682, 11)
  509. pval = stats.binom_test(201, 405)
  510. assert_almost_equal(pval, 0.92085205962670713, 11)
  511. pval = stats.binom_test([682, 243], p=3.0/4)
  512. assert_almost_equal(pval, 0.38249155957481695, 11)
  513. def test_bad_len_x(self):
  514. # Length of x must be 1 or 2.
  515. assert_raises(ValueError, stats.binom_test, [1, 2, 3])
  516. def test_bad_n(self):
  517. # len(x) is 1, but n is invalid.
  518. # Missing n
  519. assert_raises(ValueError, stats.binom_test, [100])
  520. # n less than x[0]
  521. assert_raises(ValueError, stats.binom_test, [100], n=50)
  522. def test_bad_p(self):
  523. assert_raises(ValueError, stats.binom_test, [50, 50], p=2.0)
  524. def test_alternatives(self):
  525. res = stats.binom_test(51, 235, p=1./6, alternative='less')
  526. assert_almost_equal(res, 0.982022657605858)
  527. res = stats.binom_test(51, 235, p=1./6, alternative='greater')
  528. assert_almost_equal(res, 0.02654424571169085)
  529. res = stats.binom_test(51, 235, p=1./6, alternative='two-sided')
  530. assert_almost_equal(res, 0.0437479701823997)
  531. class TestFligner(object):
  532. def test_data(self):
  533. # numbers from R: fligner.test in package stats
  534. x1 = np.arange(5)
  535. assert_array_almost_equal(stats.fligner(x1, x1**2),
  536. (3.2282229927203536, 0.072379187848207877),
  537. 11)
  538. def test_trimmed1(self):
  539. # Perturb input to break ties in the transformed data
  540. # See https://github.com/scipy/scipy/pull/8042 for more details
  541. rs = np.random.RandomState(123)
  542. _perturb = lambda g: (np.asarray(g) + 1e-10*rs.randn(len(g))).tolist()
  543. g1_ = _perturb(g1)
  544. g2_ = _perturb(g2)
  545. g3_ = _perturb(g3)
  546. # Test that center='trimmed' gives the same result as center='mean'
  547. # when proportiontocut=0.
  548. Xsq1, pval1 = stats.fligner(g1_, g2_, g3_, center='mean')
  549. Xsq2, pval2 = stats.fligner(g1_, g2_, g3_, center='trimmed',
  550. proportiontocut=0.0)
  551. assert_almost_equal(Xsq1, Xsq2)
  552. assert_almost_equal(pval1, pval2)
  553. def test_trimmed2(self):
  554. x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
  555. y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
  556. # Use center='trimmed'
  557. Xsq1, pval1 = stats.fligner(x, y, center='trimmed',
  558. proportiontocut=0.125)
  559. # Trim the data here, and use center='mean'
  560. Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
  561. # Result should be the same.
  562. assert_almost_equal(Xsq1, Xsq2)
  563. assert_almost_equal(pval1, pval2)
  564. # The following test looks reasonable at first, but fligner() uses the
  565. # function stats.rankdata(), and in one of the cases in this test,
  566. # there are ties, while in the other (because of normal rounding
  567. # errors) there are not. This difference leads to differences in the
  568. # third significant digit of W.
  569. #
  570. #def test_equal_mean_median(self):
  571. # x = np.linspace(-1,1,21)
  572. # y = x**3
  573. # W1, pval1 = stats.fligner(x, y, center='mean')
  574. # W2, pval2 = stats.fligner(x, y, center='median')
  575. # assert_almost_equal(W1, W2)
  576. # assert_almost_equal(pval1, pval2)
  577. def test_bad_keyword(self):
  578. x = np.linspace(-1, 1, 21)
  579. assert_raises(TypeError, stats.fligner, x, x, portiontocut=0.1)
  580. def test_bad_center_value(self):
  581. x = np.linspace(-1, 1, 21)
  582. assert_raises(ValueError, stats.fligner, x, x, center='trim')
  583. def test_bad_num_args(self):
  584. # Too few args raises ValueError.
  585. assert_raises(ValueError, stats.fligner, [1])
  586. def test_empty_arg(self):
  587. x = np.arange(5)
  588. assert_equal((np.nan, np.nan), stats.fligner(x, x**2, []))
  589. class TestMood(object):
  590. def test_mood(self):
  591. # numbers from R: mood.test in package stats
  592. x1 = np.arange(5)
  593. assert_array_almost_equal(stats.mood(x1, x1**2),
  594. (-1.3830857299399906, 0.16663858066771478),
  595. 11)
  596. def test_mood_order_of_args(self):
  597. # z should change sign when the order of arguments changes, pvalue
  598. # should not change
  599. np.random.seed(1234)
  600. x1 = np.random.randn(10, 1)
  601. x2 = np.random.randn(15, 1)
  602. z1, p1 = stats.mood(x1, x2)
  603. z2, p2 = stats.mood(x2, x1)
  604. assert_array_almost_equal([z1, p1], [-z2, p2])
  605. def test_mood_with_axis_none(self):
  606. # Test with axis = None, compare with results from R
  607. x1 = [-0.626453810742332, 0.183643324222082, -0.835628612410047,
  608. 1.59528080213779, 0.329507771815361, -0.820468384118015,
  609. 0.487429052428485, 0.738324705129217, 0.575781351653492,
  610. -0.305388387156356, 1.51178116845085, 0.389843236411431,
  611. -0.621240580541804, -2.2146998871775, 1.12493091814311,
  612. -0.0449336090152309, -0.0161902630989461, 0.943836210685299,
  613. 0.821221195098089, 0.593901321217509]
  614. x2 = [-0.896914546624981, 0.184849184646742, 1.58784533120882,
  615. -1.13037567424629, -0.0802517565509893, 0.132420284381094,
  616. 0.707954729271733, -0.23969802417184, 1.98447393665293,
  617. -0.138787012119665, 0.417650750792556, 0.981752777463662,
  618. -0.392695355503813, -1.03966897694891, 1.78222896030858,
  619. -2.31106908460517, 0.878604580921265, 0.035806718015226,
  620. 1.01282869212708, 0.432265154539617, 2.09081920524915,
  621. -1.19992581964387, 1.58963820029007, 1.95465164222325,
  622. 0.00493777682814261, -2.45170638784613, 0.477237302613617,
  623. -0.596558168631403, 0.792203270299649, 0.289636710177348]
  624. x1 = np.array(x1)
  625. x2 = np.array(x2)
  626. x1.shape = (10, 2)
  627. x2.shape = (15, 2)
  628. assert_array_almost_equal(stats.mood(x1, x2, axis=None),
  629. [-1.31716607555, 0.18778296257])
  630. def test_mood_2d(self):
  631. # Test if the results of mood test in 2-D case are consistent with the
  632. # R result for the same inputs. Numbers from R mood.test().
  633. ny = 5
  634. np.random.seed(1234)
  635. x1 = np.random.randn(10, ny)
  636. x2 = np.random.randn(15, ny)
  637. z_vectest, pval_vectest = stats.mood(x1, x2)
  638. for j in range(ny):
  639. assert_array_almost_equal([z_vectest[j], pval_vectest[j]],
  640. stats.mood(x1[:, j], x2[:, j]))
  641. # inverse order of dimensions
  642. x1 = x1.transpose()
  643. x2 = x2.transpose()
  644. z_vectest, pval_vectest = stats.mood(x1, x2, axis=1)
  645. for i in range(ny):
  646. # check axis handling is self consistent
  647. assert_array_almost_equal([z_vectest[i], pval_vectest[i]],
  648. stats.mood(x1[i, :], x2[i, :]))
  649. def test_mood_3d(self):
  650. shape = (10, 5, 6)
  651. np.random.seed(1234)
  652. x1 = np.random.randn(*shape)
  653. x2 = np.random.randn(*shape)
  654. for axis in range(3):
  655. z_vectest, pval_vectest = stats.mood(x1, x2, axis=axis)
  656. # Tests that result for 3-D arrays is equal to that for the
  657. # same calculation on a set of 1-D arrays taken from the
  658. # 3-D array
  659. axes_idx = ([1, 2], [0, 2], [0, 1]) # the two axes != axis
  660. for i in range(shape[axes_idx[axis][0]]):
  661. for j in range(shape[axes_idx[axis][1]]):
  662. if axis == 0:
  663. slice1 = x1[:, i, j]
  664. slice2 = x2[:, i, j]
  665. elif axis == 1:
  666. slice1 = x1[i, :, j]
  667. slice2 = x2[i, :, j]
  668. else:
  669. slice1 = x1[i, j, :]
  670. slice2 = x2[i, j, :]
  671. assert_array_almost_equal([z_vectest[i, j],
  672. pval_vectest[i, j]],
  673. stats.mood(slice1, slice2))
  674. def test_mood_bad_arg(self):
  675. # Raise ValueError when the sum of the lengths of the args is
  676. # less than 3
  677. assert_raises(ValueError, stats.mood, [1], [])
  678. class TestProbplot(object):
  679. def test_basic(self):
  680. np.random.seed(12345)
  681. x = stats.norm.rvs(size=20)
  682. osm, osr = stats.probplot(x, fit=False)
  683. osm_expected = [-1.8241636, -1.38768012, -1.11829229, -0.91222575,
  684. -0.73908135, -0.5857176, -0.44506467, -0.31273668,
  685. -0.18568928, -0.06158146, 0.06158146, 0.18568928,
  686. 0.31273668, 0.44506467, 0.5857176, 0.73908135,
  687. 0.91222575, 1.11829229, 1.38768012, 1.8241636]
  688. assert_allclose(osr, np.sort(x))
  689. assert_allclose(osm, osm_expected)
  690. res, res_fit = stats.probplot(x, fit=True)
  691. res_fit_expected = [1.05361841, 0.31297795, 0.98741609]
  692. assert_allclose(res_fit, res_fit_expected)
  693. def test_sparams_keyword(self):
  694. np.random.seed(123456)
  695. x = stats.norm.rvs(size=100)
  696. # Check that None, () and 0 (loc=0, for normal distribution) all work
  697. # and give the same results
  698. osm1, osr1 = stats.probplot(x, sparams=None, fit=False)
  699. osm2, osr2 = stats.probplot(x, sparams=0, fit=False)
  700. osm3, osr3 = stats.probplot(x, sparams=(), fit=False)
  701. assert_allclose(osm1, osm2)
  702. assert_allclose(osm1, osm3)
  703. assert_allclose(osr1, osr2)
  704. assert_allclose(osr1, osr3)
  705. # Check giving (loc, scale) params for normal distribution
  706. osm, osr = stats.probplot(x, sparams=(), fit=False)
  707. def test_dist_keyword(self):
  708. np.random.seed(12345)
  709. x = stats.norm.rvs(size=20)
  710. osm1, osr1 = stats.probplot(x, fit=False, dist='t', sparams=(3,))
  711. osm2, osr2 = stats.probplot(x, fit=False, dist=stats.t, sparams=(3,))
  712. assert_allclose(osm1, osm2)
  713. assert_allclose(osr1, osr2)
  714. assert_raises(ValueError, stats.probplot, x, dist='wrong-dist-name')
  715. assert_raises(AttributeError, stats.probplot, x, dist=[])
  716. class custom_dist(object):
  717. """Some class that looks just enough like a distribution."""
  718. def ppf(self, q):
  719. return stats.norm.ppf(q, loc=2)
  720. osm1, osr1 = stats.probplot(x, sparams=(2,), fit=False)
  721. osm2, osr2 = stats.probplot(x, dist=custom_dist(), fit=False)
  722. assert_allclose(osm1, osm2)
  723. assert_allclose(osr1, osr2)
  724. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  725. def test_plot_kwarg(self):
  726. np.random.seed(7654321)
  727. fig = plt.figure()
  728. fig.add_subplot(111)
  729. x = stats.t.rvs(3, size=100)
  730. res1, fitres1 = stats.probplot(x, plot=plt)
  731. plt.close()
  732. res2, fitres2 = stats.probplot(x, plot=None)
  733. res3 = stats.probplot(x, fit=False, plot=plt)
  734. plt.close()
  735. res4 = stats.probplot(x, fit=False, plot=None)
  736. # Check that results are consistent between combinations of `fit` and
  737. # `plot` keywords.
  738. assert_(len(res1) == len(res2) == len(res3) == len(res4) == 2)
  739. assert_allclose(res1, res2)
  740. assert_allclose(res1, res3)
  741. assert_allclose(res1, res4)
  742. assert_allclose(fitres1, fitres2)
  743. # Check that a Matplotlib Axes object is accepted
  744. fig = plt.figure()
  745. ax = fig.add_subplot(111)
  746. stats.probplot(x, fit=False, plot=ax)
  747. plt.close()
  748. def test_probplot_bad_args(self):
  749. # Raise ValueError when given an invalid distribution.
  750. assert_raises(ValueError, stats.probplot, [1], dist="plate_of_shrimp")
  751. def test_empty(self):
  752. assert_equal(stats.probplot([], fit=False),
  753. (np.array([]), np.array([])))
  754. assert_equal(stats.probplot([], fit=True),
  755. ((np.array([]), np.array([])),
  756. (np.nan, np.nan, 0.0)))
  757. def test_array_of_size_one(self):
  758. with np.errstate(invalid='ignore'):
  759. assert_equal(stats.probplot([1], fit=True),
  760. ((np.array([0.]), np.array([1])),
  761. (np.nan, np.nan, 0.0)))
  762. def test_wilcoxon_bad_arg():
  763. # Raise ValueError when two args of different lengths are given or
  764. # zero_method is unknown.
  765. assert_raises(ValueError, stats.wilcoxon, [1], [1, 2])
  766. assert_raises(ValueError, stats.wilcoxon, [1, 2], [1, 2], "dummy")
  767. def test_wilcoxon_arg_type():
  768. # Should be able to accept list as arguments.
  769. # Address issue 6070.
  770. arr = [1, 2, 3, 0, -1, 3, 1, 2, 1, 1, 2]
  771. _ = stats.wilcoxon(arr, zero_method="pratt")
  772. _ = stats.wilcoxon(arr, zero_method="zsplit")
  773. _ = stats.wilcoxon(arr, zero_method="wilcox")
  774. class TestKstat(object):
  775. def test_moments_normal_distribution(self):
  776. np.random.seed(32149)
  777. data = np.random.randn(12345)
  778. moments = []
  779. for n in [1, 2, 3, 4]:
  780. moments.append(stats.kstat(data, n))
  781. expected = [0.011315, 1.017931, 0.05811052, 0.0754134]
  782. assert_allclose(moments, expected, rtol=1e-4)
  783. # test equivalence with `stats.moment`
  784. m1 = stats.moment(data, moment=1)
  785. m2 = stats.moment(data, moment=2)
  786. m3 = stats.moment(data, moment=3)
  787. assert_allclose((m1, m2, m3), expected[:-1], atol=0.02, rtol=1e-2)
  788. def test_empty_input(self):
  789. assert_raises(ValueError, stats.kstat, [])
  790. def test_nan_input(self):
  791. data = np.arange(10.)
  792. data[6] = np.nan
  793. assert_equal(stats.kstat(data), np.nan)
  794. def test_kstat_bad_arg(self):
  795. # Raise ValueError if n > 4 or n < 1.
  796. data = np.arange(10)
  797. for n in [0, 4.001]:
  798. assert_raises(ValueError, stats.kstat, data, n=n)
  799. class TestKstatVar(object):
  800. def test_empty_input(self):
  801. assert_raises(ValueError, stats.kstatvar, [])
  802. def test_nan_input(self):
  803. data = np.arange(10.)
  804. data[6] = np.nan
  805. assert_equal(stats.kstat(data), np.nan)
  806. def test_bad_arg(self):
  807. # Raise ValueError is n is not 1 or 2.
  808. data = [1]
  809. n = 10
  810. assert_raises(ValueError, stats.kstatvar, data, n=n)
  811. class TestPpccPlot(object):
  812. def setup_method(self):
  813. np.random.seed(7654321)
  814. self.x = stats.loggamma.rvs(5, size=500) + 5
  815. def test_basic(self):
  816. N = 5
  817. svals, ppcc = stats.ppcc_plot(self.x, -10, 10, N=N)
  818. ppcc_expected = [0.21139644, 0.21384059, 0.98766719, 0.97980182,
  819. 0.93519298]
  820. assert_allclose(svals, np.linspace(-10, 10, num=N))
  821. assert_allclose(ppcc, ppcc_expected)
  822. def test_dist(self):
  823. # Test that we can specify distributions both by name and as objects.
  824. svals1, ppcc1 = stats.ppcc_plot(self.x, -10, 10, dist='tukeylambda')
  825. svals2, ppcc2 = stats.ppcc_plot(self.x, -10, 10,
  826. dist=stats.tukeylambda)
  827. assert_allclose(svals1, svals2, rtol=1e-20)
  828. assert_allclose(ppcc1, ppcc2, rtol=1e-20)
  829. # Test that 'tukeylambda' is the default dist
  830. svals3, ppcc3 = stats.ppcc_plot(self.x, -10, 10)
  831. assert_allclose(svals1, svals3, rtol=1e-20)
  832. assert_allclose(ppcc1, ppcc3, rtol=1e-20)
  833. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  834. def test_plot_kwarg(self):
  835. # Check with the matplotlib.pyplot module
  836. fig = plt.figure()
  837. ax = fig.add_subplot(111)
  838. stats.ppcc_plot(self.x, -20, 20, plot=plt)
  839. fig.delaxes(ax)
  840. # Check that a Matplotlib Axes object is accepted
  841. ax = fig.add_subplot(111)
  842. stats.ppcc_plot(self.x, -20, 20, plot=ax)
  843. plt.close()
  844. def test_invalid_inputs(self):
  845. # `b` has to be larger than `a`
  846. assert_raises(ValueError, stats.ppcc_plot, self.x, 1, 0)
  847. # Raise ValueError when given an invalid distribution.
  848. assert_raises(ValueError, stats.ppcc_plot, [1, 2, 3], 0, 1,
  849. dist="plate_of_shrimp")
  850. def test_empty(self):
  851. # For consistency with probplot return for one empty array,
  852. # ppcc contains all zeros and svals is the same as for normal array
  853. # input.
  854. svals, ppcc = stats.ppcc_plot([], 0, 1)
  855. assert_allclose(svals, np.linspace(0, 1, num=80))
  856. assert_allclose(ppcc, np.zeros(80, dtype=float))
  857. class TestPpccMax(object):
  858. def test_ppcc_max_bad_arg(self):
  859. # Raise ValueError when given an invalid distribution.
  860. data = [1]
  861. assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp")
  862. def test_ppcc_max_basic(self):
  863. np.random.seed(1234567)
  864. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000) + 1e4
  865. # On Python 2.6 the result is accurate to 5 decimals. On Python >= 2.7
  866. # it is accurate up to 16 decimals
  867. assert_almost_equal(stats.ppcc_max(x), -0.71215366521264145, decimal=5)
  868. def test_dist(self):
  869. np.random.seed(1234567)
  870. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000) + 1e4
  871. # Test that we can specify distributions both by name and as objects.
  872. max1 = stats.ppcc_max(x, dist='tukeylambda')
  873. max2 = stats.ppcc_max(x, dist=stats.tukeylambda)
  874. assert_almost_equal(max1, -0.71215366521264145, decimal=5)
  875. assert_almost_equal(max2, -0.71215366521264145, decimal=5)
  876. # Test that 'tukeylambda' is the default dist
  877. max3 = stats.ppcc_max(x)
  878. assert_almost_equal(max3, -0.71215366521264145, decimal=5)
  879. def test_brack(self):
  880. np.random.seed(1234567)
  881. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000) + 1e4
  882. assert_raises(ValueError, stats.ppcc_max, x, brack=(0.0, 1.0, 0.5))
  883. # On Python 2.6 the result is accurate to 5 decimals. On Python >= 2.7
  884. # it is accurate up to 16 decimals
  885. assert_almost_equal(stats.ppcc_max(x, brack=(0, 1)),
  886. -0.71215366521264145, decimal=5)
  887. # On Python 2.6 the result is accurate to 5 decimals. On Python >= 2.7
  888. # it is accurate up to 16 decimals
  889. assert_almost_equal(stats.ppcc_max(x, brack=(-2, 2)),
  890. -0.71215366521264145, decimal=5)
  891. class TestBoxcox_llf(object):
  892. def test_basic(self):
  893. np.random.seed(54321)
  894. x = stats.norm.rvs(size=10000, loc=10)
  895. lmbda = 1
  896. llf = stats.boxcox_llf(lmbda, x)
  897. llf_expected = -x.size / 2. * np.log(np.sum(x.std()**2))
  898. assert_allclose(llf, llf_expected)
  899. def test_array_like(self):
  900. np.random.seed(54321)
  901. x = stats.norm.rvs(size=100, loc=10)
  902. lmbda = 1
  903. llf = stats.boxcox_llf(lmbda, x)
  904. llf2 = stats.boxcox_llf(lmbda, list(x))
  905. assert_allclose(llf, llf2, rtol=1e-12)
  906. def test_2d_input(self):
  907. # Note: boxcox_llf() was already working with 2-D input (sort of), so
  908. # keep it like that. boxcox() doesn't work with 2-D input though, due
  909. # to brent() returning a scalar.
  910. np.random.seed(54321)
  911. x = stats.norm.rvs(size=100, loc=10)
  912. lmbda = 1
  913. llf = stats.boxcox_llf(lmbda, x)
  914. llf2 = stats.boxcox_llf(lmbda, np.vstack([x, x]).T)
  915. assert_allclose([llf, llf], llf2, rtol=1e-12)
  916. def test_empty(self):
  917. assert_(np.isnan(stats.boxcox_llf(1, [])))
  918. class TestBoxcox(object):
  919. def test_fixed_lmbda(self):
  920. np.random.seed(12345)
  921. x = stats.loggamma.rvs(5, size=50) + 5
  922. xt = stats.boxcox(x, lmbda=1)
  923. assert_allclose(xt, x - 1)
  924. xt = stats.boxcox(x, lmbda=-1)
  925. assert_allclose(xt, 1 - 1/x)
  926. xt = stats.boxcox(x, lmbda=0)
  927. assert_allclose(xt, np.log(x))
  928. # Also test that array_like input works
  929. xt = stats.boxcox(list(x), lmbda=0)
  930. assert_allclose(xt, np.log(x))
  931. def test_lmbda_None(self):
  932. np.random.seed(1234567)
  933. # Start from normal rv's, do inverse transform to check that
  934. # optimization function gets close to the right answer.
  935. np.random.seed(1245)
  936. lmbda = 2.5
  937. x = stats.norm.rvs(loc=10, size=50000)
  938. x_inv = (x * lmbda + 1)**(-lmbda)
  939. xt, maxlog = stats.boxcox(x_inv)
  940. assert_almost_equal(maxlog, -1 / lmbda, decimal=2)
  941. def test_alpha(self):
  942. np.random.seed(1234)
  943. x = stats.loggamma.rvs(5, size=50) + 5
  944. # Some regular values for alpha, on a small sample size
  945. _, _, interval = stats.boxcox(x, alpha=0.75)
  946. assert_allclose(interval, [4.004485780226041, 5.138756355035744])
  947. _, _, interval = stats.boxcox(x, alpha=0.05)
  948. assert_allclose(interval, [1.2138178554857557, 8.209033272375663])
  949. # Try some extreme values, see we don't hit the N=500 limit
  950. x = stats.loggamma.rvs(7, size=500) + 15
  951. _, _, interval = stats.boxcox(x, alpha=0.001)
  952. assert_allclose(interval, [0.3988867, 11.40553131])
  953. _, _, interval = stats.boxcox(x, alpha=0.999)
  954. assert_allclose(interval, [5.83316246, 5.83735292])
  955. def test_boxcox_bad_arg(self):
  956. # Raise ValueError if any data value is negative.
  957. x = np.array([-1])
  958. assert_raises(ValueError, stats.boxcox, x)
  959. def test_empty(self):
  960. assert_(stats.boxcox([]).shape == (0,))
  961. class TestBoxcoxNormmax(object):
  962. def setup_method(self):
  963. np.random.seed(12345)
  964. self.x = stats.loggamma.rvs(5, size=50) + 5
  965. def test_pearsonr(self):
  966. maxlog = stats.boxcox_normmax(self.x)
  967. assert_allclose(maxlog, 1.804465, rtol=1e-6)
  968. def test_mle(self):
  969. maxlog = stats.boxcox_normmax(self.x, method='mle')
  970. assert_allclose(maxlog, 1.758101, rtol=1e-6)
  971. # Check that boxcox() uses 'mle'
  972. _, maxlog_boxcox = stats.boxcox(self.x)
  973. assert_allclose(maxlog_boxcox, maxlog)
  974. def test_all(self):
  975. maxlog_all = stats.boxcox_normmax(self.x, method='all')
  976. assert_allclose(maxlog_all, [1.804465, 1.758101], rtol=1e-6)
  977. class TestBoxcoxNormplot(object):
  978. def setup_method(self):
  979. np.random.seed(7654321)
  980. self.x = stats.loggamma.rvs(5, size=500) + 5
  981. def test_basic(self):
  982. N = 5
  983. lmbdas, ppcc = stats.boxcox_normplot(self.x, -10, 10, N=N)
  984. ppcc_expected = [0.57783375, 0.83610988, 0.97524311, 0.99756057,
  985. 0.95843297]
  986. assert_allclose(lmbdas, np.linspace(-10, 10, num=N))
  987. assert_allclose(ppcc, ppcc_expected)
  988. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  989. def test_plot_kwarg(self):
  990. # Check with the matplotlib.pyplot module
  991. fig = plt.figure()
  992. ax = fig.add_subplot(111)
  993. stats.boxcox_normplot(self.x, -20, 20, plot=plt)
  994. fig.delaxes(ax)
  995. # Check that a Matplotlib Axes object is accepted
  996. ax = fig.add_subplot(111)
  997. stats.boxcox_normplot(self.x, -20, 20, plot=ax)
  998. plt.close()
  999. def test_invalid_inputs(self):
  1000. # `lb` has to be larger than `la`
  1001. assert_raises(ValueError, stats.boxcox_normplot, self.x, 1, 0)
  1002. # `x` can not contain negative values
  1003. assert_raises(ValueError, stats.boxcox_normplot, [-1, 1], 0, 1)
  1004. def test_empty(self):
  1005. assert_(stats.boxcox_normplot([], 0, 1).size == 0)
  1006. class TestYeojohnson_llf(object):
  1007. def test_array_like(self):
  1008. np.random.seed(54321)
  1009. x = stats.norm.rvs(size=100, loc=0)
  1010. lmbda = 1
  1011. llf = stats.yeojohnson_llf(lmbda, x)
  1012. llf2 = stats.yeojohnson_llf(lmbda, list(x))
  1013. assert_allclose(llf, llf2, rtol=1e-12)
  1014. def test_2d_input(self):
  1015. np.random.seed(54321)
  1016. x = stats.norm.rvs(size=100, loc=10)
  1017. lmbda = 1
  1018. llf = stats.yeojohnson_llf(lmbda, x)
  1019. llf2 = stats.yeojohnson_llf(lmbda, np.vstack([x, x]).T)
  1020. assert_allclose([llf, llf], llf2, rtol=1e-12)
  1021. def test_empty(self):
  1022. assert_(np.isnan(stats.yeojohnson_llf(1, [])))
  1023. class TestYeojohnson(object):
  1024. def test_fixed_lmbda(self):
  1025. np.random.seed(12345)
  1026. # Test positive input
  1027. x = stats.loggamma.rvs(5, size=50) + 5
  1028. assert np.all(x > 0)
  1029. xt = stats.yeojohnson(x, lmbda=1)
  1030. assert_allclose(xt, x)
  1031. xt = stats.yeojohnson(x, lmbda=-1)
  1032. assert_allclose(xt, 1 - 1 / (x + 1))
  1033. xt = stats.yeojohnson(x, lmbda=0)
  1034. assert_allclose(xt, np.log(x + 1))
  1035. xt = stats.yeojohnson(x, lmbda=1)
  1036. assert_allclose(xt, x)
  1037. # Test negative input
  1038. x = stats.loggamma.rvs(5, size=50) - 5
  1039. assert np.all(x < 0)
  1040. xt = stats.yeojohnson(x, lmbda=2)
  1041. assert_allclose(xt, -np.log(-x + 1))
  1042. xt = stats.yeojohnson(x, lmbda=1)
  1043. assert_allclose(xt, x)
  1044. xt = stats.yeojohnson(x, lmbda=3)
  1045. assert_allclose(xt, 1 / (-x + 1) - 1)
  1046. # test both positive and negative input
  1047. x = stats.loggamma.rvs(5, size=50) - 2
  1048. assert not np.all(x < 0)
  1049. assert not np.all(x >= 0)
  1050. pos = x >= 0
  1051. xt = stats.yeojohnson(x, lmbda=1)
  1052. assert_allclose(xt[pos], x[pos])
  1053. xt = stats.yeojohnson(x, lmbda=-1)
  1054. assert_allclose(xt[pos], 1 - 1 / (x[pos] + 1))
  1055. xt = stats.yeojohnson(x, lmbda=0)
  1056. assert_allclose(xt[pos], np.log(x[pos] + 1))
  1057. xt = stats.yeojohnson(x, lmbda=1)
  1058. assert_allclose(xt[pos], x[pos])
  1059. neg = ~pos
  1060. xt = stats.yeojohnson(x, lmbda=2)
  1061. assert_allclose(xt[neg], -np.log(-x[neg] + 1))
  1062. xt = stats.yeojohnson(x, lmbda=1)
  1063. assert_allclose(xt[neg], x[neg])
  1064. xt = stats.yeojohnson(x, lmbda=3)
  1065. assert_allclose(xt[neg], 1 / (-x[neg] + 1) - 1)
  1066. @pytest.mark.parametrize('lmbda', [0, .1, .5, 2])
  1067. def test_lmbda_None(self, lmbda):
  1068. # Start from normal rv's, do inverse transform to check that
  1069. # optimization function gets close to the right answer.
  1070. def _inverse_transform(x, lmbda):
  1071. x_inv = np.zeros(x.shape, dtype=x.dtype)
  1072. pos = x >= 0
  1073. # when x >= 0
  1074. if abs(lmbda) < np.spacing(1.):
  1075. x_inv[pos] = np.exp(x[pos]) - 1
  1076. else: # lmbda != 0
  1077. x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1
  1078. # when x < 0
  1079. if abs(lmbda - 2) > np.spacing(1.):
  1080. x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1,
  1081. 1 / (2 - lmbda))
  1082. else: # lmbda == 2
  1083. x_inv[~pos] = 1 - np.exp(-x[~pos])
  1084. return x_inv
  1085. np.random.seed(1234567)
  1086. n_samples = 20000
  1087. x = np.random.normal(loc=0, scale=1, size=(n_samples))
  1088. x_inv = _inverse_transform(x, lmbda)
  1089. xt, maxlog = stats.yeojohnson(x_inv)
  1090. assert_allclose(maxlog, lmbda, atol=1e-2)
  1091. assert_almost_equal(0, np.linalg.norm(x - xt) / n_samples, decimal=2)
  1092. assert_almost_equal(0, xt.mean(), decimal=1)
  1093. assert_almost_equal(1, xt.std(), decimal=1)
  1094. def test_empty(self):
  1095. assert_(stats.yeojohnson([]).shape == (0,))
  1096. def test_array_like(self):
  1097. np.random.seed(54321)
  1098. x = stats.norm.rvs(size=100, loc=0)
  1099. lmbda = 1.5
  1100. xt1, _ = stats.yeojohnson(x)
  1101. xt2, _ = stats.yeojohnson(list(x))
  1102. assert_allclose(xt1, xt2, rtol=1e-12)
  1103. class TestYeojohnsonNormmax(object):
  1104. def setup_method(self):
  1105. np.random.seed(12345)
  1106. self.x = stats.loggamma.rvs(5, size=50) + 5
  1107. def test_mle(self):
  1108. maxlog = stats.yeojohnson_normmax(self.x)
  1109. assert_allclose(maxlog, 1.876393, rtol=1e-6)
  1110. def test_darwin_example(self):
  1111. # test from original paper "A new family of power transformations to
  1112. # improve normality or symmetry" by Yeo and Johnson.
  1113. x = [6.1, -8.4, 1.0, 2.0, 0.7, 2.9, 3.5, 5.1, 1.8, 3.6, 7.0, 3.0, 9.3,
  1114. 7.5, -6.0]
  1115. lmbda = stats.yeojohnson_normmax(x)
  1116. assert np.allclose(lmbda, 1.305, atol=1e-3)
  1117. class TestCircFuncs(object):
  1118. def test_circfuncs(self):
  1119. x = np.array([355, 5, 2, 359, 10, 350])
  1120. M = stats.circmean(x, high=360)
  1121. Mval = 0.167690146
  1122. assert_allclose(M, Mval, rtol=1e-7)
  1123. V = stats.circvar(x, high=360)
  1124. Vval = 42.51955609
  1125. assert_allclose(V, Vval, rtol=1e-7)
  1126. S = stats.circstd(x, high=360)
  1127. Sval = 6.520702116
  1128. assert_allclose(S, Sval, rtol=1e-7)
  1129. def test_circfuncs_small(self):
  1130. x = np.array([20, 21, 22, 18, 19, 20.5, 19.2])
  1131. M1 = x.mean()
  1132. M2 = stats.circmean(x, high=360)
  1133. assert_allclose(M2, M1, rtol=1e-5)
  1134. V1 = x.var()
  1135. V2 = stats.circvar(x, high=360)
  1136. assert_allclose(V2, V1, rtol=1e-4)
  1137. S1 = x.std()
  1138. S2 = stats.circstd(x, high=360)
  1139. assert_allclose(S2, S1, rtol=1e-4)
  1140. def test_circmean_axis(self):
  1141. x = np.array([[355, 5, 2, 359, 10, 350],
  1142. [351, 7, 4, 352, 9, 349],
  1143. [357, 9, 8, 358, 4, 356]])
  1144. M1 = stats.circmean(x, high=360)
  1145. M2 = stats.circmean(x.ravel(), high=360)
  1146. assert_allclose(M1, M2, rtol=1e-14)
  1147. M1 = stats.circmean(x, high=360, axis=1)
  1148. M2 = [stats.circmean(x[i], high=360) for i in range(x.shape[0])]
  1149. assert_allclose(M1, M2, rtol=1e-14)
  1150. M1 = stats.circmean(x, high=360, axis=0)
  1151. M2 = [stats.circmean(x[:, i], high=360) for i in range(x.shape[1])]
  1152. assert_allclose(M1, M2, rtol=1e-14)
  1153. def test_circvar_axis(self):
  1154. x = np.array([[355, 5, 2, 359, 10, 350],
  1155. [351, 7, 4, 352, 9, 349],
  1156. [357, 9, 8, 358, 4, 356]])
  1157. V1 = stats.circvar(x, high=360)
  1158. V2 = stats.circvar(x.ravel(), high=360)
  1159. assert_allclose(V1, V2, rtol=1e-11)
  1160. V1 = stats.circvar(x, high=360, axis=1)
  1161. V2 = [stats.circvar(x[i], high=360) for i in range(x.shape[0])]
  1162. assert_allclose(V1, V2, rtol=1e-11)
  1163. V1 = stats.circvar(x, high=360, axis=0)
  1164. V2 = [stats.circvar(x[:, i], high=360) for i in range(x.shape[1])]
  1165. assert_allclose(V1, V2, rtol=1e-11)
  1166. def test_circstd_axis(self):
  1167. x = np.array([[355, 5, 2, 359, 10, 350],
  1168. [351, 7, 4, 352, 9, 349],
  1169. [357, 9, 8, 358, 4, 356]])
  1170. S1 = stats.circstd(x, high=360)
  1171. S2 = stats.circstd(x.ravel(), high=360)
  1172. assert_allclose(S1, S2, rtol=1e-11)
  1173. S1 = stats.circstd(x, high=360, axis=1)
  1174. S2 = [stats.circstd(x[i], high=360) for i in range(x.shape[0])]
  1175. assert_allclose(S1, S2, rtol=1e-11)
  1176. S1 = stats.circstd(x, high=360, axis=0)
  1177. S2 = [stats.circstd(x[:, i], high=360) for i in range(x.shape[1])]
  1178. assert_allclose(S1, S2, rtol=1e-11)
  1179. def test_circfuncs_array_like(self):
  1180. x = [355, 5, 2, 359, 10, 350]
  1181. assert_allclose(stats.circmean(x, high=360), 0.167690146, rtol=1e-7)
  1182. assert_allclose(stats.circvar(x, high=360), 42.51955609, rtol=1e-7)
  1183. assert_allclose(stats.circstd(x, high=360), 6.520702116, rtol=1e-7)
  1184. def test_empty(self):
  1185. assert_(np.isnan(stats.circmean([])))
  1186. assert_(np.isnan(stats.circstd([])))
  1187. assert_(np.isnan(stats.circvar([])))
  1188. def test_circmean_scalar(self):
  1189. x = 1.
  1190. M1 = x
  1191. M2 = stats.circmean(x)
  1192. assert_allclose(M2, M1, rtol=1e-5)
  1193. def test_circmean_range(self):
  1194. # regression test for gh-6420: circmean(..., high, low) must be
  1195. # between `high` and `low`
  1196. m = stats.circmean(np.arange(0, 2, 0.1), np.pi, -np.pi)
  1197. assert_(m < np.pi)
  1198. assert_(m > -np.pi)
  1199. def test_circfuncs_unit8(self):
  1200. # regression test for gh-7255: overflow when working with
  1201. # numpy uint8 data type
  1202. x = np.array([150, 10], dtype='uint8')
  1203. assert_equal(stats.circmean(x, high=180), 170.0)
  1204. assert_allclose(stats.circvar(x, high=180), 437.45871686, rtol=1e-7)
  1205. assert_allclose(stats.circstd(x, high=180), 20.91551378, rtol=1e-7)
  1206. def test_accuracy_wilcoxon():
  1207. freq = [1, 4, 16, 15, 8, 4, 5, 1, 2]
  1208. nums = range(-4, 5)
  1209. x = np.concatenate([[u] * v for u, v in zip(nums, freq)])
  1210. y = np.zeros(x.size)
  1211. T, p = stats.wilcoxon(x, y, "pratt")
  1212. assert_allclose(T, 423)
  1213. assert_allclose(p, 0.00197547303533107)
  1214. T, p = stats.wilcoxon(x, y, "zsplit")
  1215. assert_allclose(T, 441)
  1216. assert_allclose(p, 0.0032145343172473055)
  1217. T, p = stats.wilcoxon(x, y, "wilcox")
  1218. assert_allclose(T, 327)
  1219. assert_allclose(p, 0.00641346115861)
  1220. # Test the 'correction' option, using values computed in R with:
  1221. # > wilcox.test(x, y, paired=TRUE, exact=FALSE, correct={FALSE,TRUE})
  1222. x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
  1223. y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
  1224. T, p = stats.wilcoxon(x, y, correction=False)
  1225. assert_equal(T, 34)
  1226. assert_allclose(p, 0.6948866, rtol=1e-6)
  1227. T, p = stats.wilcoxon(x, y, correction=True)
  1228. assert_equal(T, 34)
  1229. assert_allclose(p, 0.7240817, rtol=1e-6)
  1230. def test_wilcoxon_result_attributes():
  1231. x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
  1232. y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
  1233. res = stats.wilcoxon(x, y, correction=False)
  1234. attributes = ('statistic', 'pvalue')
  1235. check_named_results(res, attributes)
  1236. def test_wilcoxon_tie():
  1237. # Regression test for gh-2391.
  1238. # Corresponding R code is:
  1239. # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=FALSE)
  1240. # > result$p.value
  1241. # [1] 0.001565402
  1242. # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=TRUE)
  1243. # > result$p.value
  1244. # [1] 0.001904195
  1245. stat, p = stats.wilcoxon([0.1] * 10)
  1246. expected_p = 0.001565402
  1247. assert_equal(stat, 0)
  1248. assert_allclose(p, expected_p, rtol=1e-6)
  1249. stat, p = stats.wilcoxon([0.1] * 10, correction=True)
  1250. expected_p = 0.001904195
  1251. assert_equal(stat, 0)
  1252. assert_allclose(p, expected_p, rtol=1e-6)
  1253. class TestMedianTest(object):
  1254. def test_bad_n_samples(self):
  1255. # median_test requires at least two samples.
  1256. assert_raises(ValueError, stats.median_test, [1, 2, 3])
  1257. def test_empty_sample(self):
  1258. # Each sample must contain at least one value.
  1259. assert_raises(ValueError, stats.median_test, [], [1, 2, 3])
  1260. def test_empty_when_ties_ignored(self):
  1261. # The grand median is 1, and all values in the first argument are
  1262. # equal to the grand median. With ties="ignore", those values are
  1263. # ignored, which results in the first sample being (in effect) empty.
  1264. # This should raise a ValueError.
  1265. assert_raises(ValueError, stats.median_test,
  1266. [1, 1, 1, 1], [2, 0, 1], [2, 0], ties="ignore")
  1267. def test_empty_contingency_row(self):
  1268. # The grand median is 1, and with the default ties="below", all the
  1269. # values in the samples are counted as being below the grand median.
  1270. # This would result a row of zeros in the contingency table, which is
  1271. # an error.
  1272. assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1])
  1273. # With ties="above", all the values are counted as above the
  1274. # grand median.
  1275. assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1],
  1276. ties="above")
  1277. def test_bad_ties(self):
  1278. assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5],
  1279. ties="foo")
  1280. def test_bad_nan_policy(self):
  1281. assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5], nan_policy='foobar')
  1282. def test_bad_keyword(self):
  1283. assert_raises(TypeError, stats.median_test, [1, 2, 3], [4, 5],
  1284. foo="foo")
  1285. def test_simple(self):
  1286. x = [1, 2, 3]
  1287. y = [1, 2, 3]
  1288. stat, p, med, tbl = stats.median_test(x, y)
  1289. # The median is floating point, but this equality test should be safe.
  1290. assert_equal(med, 2.0)
  1291. assert_array_equal(tbl, [[1, 1], [2, 2]])
  1292. # The expected values of the contingency table equal the contingency
  1293. # table, so the statistic should be 0 and the p-value should be 1.
  1294. assert_equal(stat, 0)
  1295. assert_equal(p, 1)
  1296. def test_ties_options(self):
  1297. # Test the contingency table calculation.
  1298. x = [1, 2, 3, 4]
  1299. y = [5, 6]
  1300. z = [7, 8, 9]
  1301. # grand median is 5.
  1302. # Default 'ties' option is "below".
  1303. stat, p, m, tbl = stats.median_test(x, y, z)
  1304. assert_equal(m, 5)
  1305. assert_equal(tbl, [[0, 1, 3], [4, 1, 0]])
  1306. stat, p, m, tbl = stats.median_test(x, y, z, ties="ignore")
  1307. assert_equal(m, 5)
  1308. assert_equal(tbl, [[0, 1, 3], [4, 0, 0]])
  1309. stat, p, m, tbl = stats.median_test(x, y, z, ties="above")
  1310. assert_equal(m, 5)
  1311. assert_equal(tbl, [[0, 2, 3], [4, 0, 0]])
  1312. def test_nan_policy_options(self):
  1313. x = [1, 2, np.nan]
  1314. y = [4, 5, 6]
  1315. mt1 = stats.median_test(x, y, nan_policy='propagate')
  1316. s, p, m, t = stats.median_test(x, y, nan_policy='omit')
  1317. assert_equal(mt1, (np.nan, np.nan, np.nan, None))
  1318. assert_allclose(s, 0.31250000000000006)
  1319. assert_allclose(p, 0.57615012203057869)
  1320. assert_equal(m, 4.0)
  1321. assert_equal(t, np.array([[0, 2],[2, 1]]))
  1322. assert_raises(ValueError, stats.median_test, x, y, nan_policy='raise')
  1323. def test_basic(self):
  1324. # median_test calls chi2_contingency to compute the test statistic
  1325. # and p-value. Make sure it hasn't screwed up the call...
  1326. x = [1, 2, 3, 4, 5]
  1327. y = [2, 4, 6, 8]
  1328. stat, p, m, tbl = stats.median_test(x, y)
  1329. assert_equal(m, 4)
  1330. assert_equal(tbl, [[1, 2], [4, 2]])
  1331. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl)
  1332. assert_allclose(stat, exp_stat)
  1333. assert_allclose(p, exp_p)
  1334. stat, p, m, tbl = stats.median_test(x, y, lambda_=0)
  1335. assert_equal(m, 4)
  1336. assert_equal(tbl, [[1, 2], [4, 2]])
  1337. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0)
  1338. assert_allclose(stat, exp_stat)
  1339. assert_allclose(p, exp_p)
  1340. stat, p, m, tbl = stats.median_test(x, y, correction=False)
  1341. assert_equal(m, 4)
  1342. assert_equal(tbl, [[1, 2], [4, 2]])
  1343. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False)
  1344. assert_allclose(stat, exp_stat)
  1345. assert_allclose(p, exp_p)