test_mstats_basic.py 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481
  1. """
  2. Tests for the stats.mstats module (support for masked arrays)
  3. """
  4. from __future__ import division, print_function, absolute_import
  5. import warnings
  6. import numpy as np
  7. from numpy import nan
  8. import numpy.ma as ma
  9. from numpy.ma import masked, nomask
  10. import scipy.stats.mstats as mstats
  11. from scipy import stats
  12. from .common_tests import check_named_results
  13. import pytest
  14. from pytest import raises as assert_raises
  15. from numpy.ma.testutils import (assert_equal, assert_almost_equal,
  16. assert_array_almost_equal, assert_array_almost_equal_nulp, assert_,
  17. assert_allclose, assert_array_equal)
  18. from scipy._lib._numpy_compat import suppress_warnings
  19. class TestMquantiles(object):
  20. def test_mquantiles_limit_keyword(self):
  21. # Regression test for Trac ticket #867
  22. data = np.array([[6., 7., 1.],
  23. [47., 15., 2.],
  24. [49., 36., 3.],
  25. [15., 39., 4.],
  26. [42., 40., -999.],
  27. [41., 41., -999.],
  28. [7., -999., -999.],
  29. [39., -999., -999.],
  30. [43., -999., -999.],
  31. [40., -999., -999.],
  32. [36., -999., -999.]])
  33. desired = [[19.2, 14.6, 1.45],
  34. [40.0, 37.5, 2.5],
  35. [42.8, 40.05, 3.55]]
  36. quants = mstats.mquantiles(data, axis=0, limit=(0, 50))
  37. assert_almost_equal(quants, desired)
  38. class TestGMean(object):
  39. def test_1D(self):
  40. a = (1, 2, 3, 4)
  41. actual = mstats.gmean(a)
  42. desired = np.power(1*2*3*4, 1./4.)
  43. assert_almost_equal(actual, desired, decimal=14)
  44. desired1 = mstats.gmean(a, axis=-1)
  45. assert_almost_equal(actual, desired1, decimal=14)
  46. assert_(not isinstance(desired1, ma.MaskedArray))
  47. a = ma.array((1, 2, 3, 4), mask=(0, 0, 0, 1))
  48. actual = mstats.gmean(a)
  49. desired = np.power(1*2*3, 1./3.)
  50. assert_almost_equal(actual, desired, decimal=14)
  51. desired1 = mstats.gmean(a, axis=-1)
  52. assert_almost_equal(actual, desired1, decimal=14)
  53. @pytest.mark.skipif(not hasattr(np, 'float96'), reason='cannot find float96 so skipping')
  54. def test_1D_float96(self):
  55. a = ma.array((1, 2, 3, 4), mask=(0, 0, 0, 1))
  56. actual_dt = mstats.gmean(a, dtype=np.float96)
  57. desired_dt = np.power(1*2*3, 1./3.).astype(np.float96)
  58. assert_almost_equal(actual_dt, desired_dt, decimal=14)
  59. assert_(actual_dt.dtype == desired_dt.dtype)
  60. def test_2D(self):
  61. a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)),
  62. mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0)))
  63. actual = mstats.gmean(a)
  64. desired = np.array((1, 2, 3, 4))
  65. assert_array_almost_equal(actual, desired, decimal=14)
  66. desired1 = mstats.gmean(a, axis=0)
  67. assert_array_almost_equal(actual, desired1, decimal=14)
  68. actual = mstats.gmean(a, -1)
  69. desired = ma.array((np.power(1*2*3*4, 1./4.),
  70. np.power(2*3, 1./2.),
  71. np.power(1*4, 1./2.)))
  72. assert_array_almost_equal(actual, desired, decimal=14)
  73. class TestHMean(object):
  74. def test_1D(self):
  75. a = (1, 2, 3, 4)
  76. actual = mstats.hmean(a)
  77. desired = 4. / (1./1 + 1./2 + 1./3 + 1./4)
  78. assert_almost_equal(actual, desired, decimal=14)
  79. desired1 = mstats.hmean(ma.array(a), axis=-1)
  80. assert_almost_equal(actual, desired1, decimal=14)
  81. a = ma.array((1, 2, 3, 4), mask=(0, 0, 0, 1))
  82. actual = mstats.hmean(a)
  83. desired = 3. / (1./1 + 1./2 + 1./3)
  84. assert_almost_equal(actual, desired, decimal=14)
  85. desired1 = mstats.hmean(a, axis=-1)
  86. assert_almost_equal(actual, desired1, decimal=14)
  87. @pytest.mark.skipif(not hasattr(np, 'float96'), reason='cannot find float96 so skipping')
  88. def test_1D_float96(self):
  89. a = ma.array((1, 2, 3, 4), mask=(0, 0, 0, 1))
  90. actual_dt = mstats.hmean(a, dtype=np.float96)
  91. desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3),
  92. dtype=np.float96)
  93. assert_almost_equal(actual_dt, desired_dt, decimal=14)
  94. assert_(actual_dt.dtype == desired_dt.dtype)
  95. def test_2D(self):
  96. a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
  97. mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
  98. actual = mstats.hmean(a)
  99. desired = ma.array((1,2,3,4))
  100. assert_array_almost_equal(actual, desired, decimal=14)
  101. actual1 = mstats.hmean(a,axis=-1)
  102. desired = (4./(1/1.+1/2.+1/3.+1/4.),
  103. 2./(1/2.+1/3.),
  104. 2./(1/1.+1/4.)
  105. )
  106. assert_array_almost_equal(actual1, desired, decimal=14)
  107. class TestRanking(object):
  108. def test_ranking(self):
  109. x = ma.array([0,1,1,1,2,3,4,5,5,6,])
  110. assert_almost_equal(mstats.rankdata(x),
  111. [1,3,3,3,5,6,7,8.5,8.5,10])
  112. x[[3,4]] = masked
  113. assert_almost_equal(mstats.rankdata(x),
  114. [1,2.5,2.5,0,0,4,5,6.5,6.5,8])
  115. assert_almost_equal(mstats.rankdata(x, use_missing=True),
  116. [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8])
  117. x = ma.array([0,1,5,1,2,4,3,5,1,6,])
  118. assert_almost_equal(mstats.rankdata(x),
  119. [1,3,8.5,3,5,7,6,8.5,3,10])
  120. x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]])
  121. assert_almost_equal(mstats.rankdata(x),
  122. [[1,3,3,3,5], [6,7,8.5,8.5,10]])
  123. assert_almost_equal(mstats.rankdata(x, axis=1),
  124. [[1,3,3,3,5], [1,2,3.5,3.5,5]])
  125. assert_almost_equal(mstats.rankdata(x,axis=0),
  126. [[1,1,1,1,1], [2,2,2,2,2,]])
  127. class TestCorr(object):
  128. def test_pearsonr(self):
  129. # Tests some computations of Pearson's r
  130. x = ma.arange(10)
  131. with warnings.catch_warnings():
  132. # The tests in this context are edge cases, with perfect
  133. # correlation or anticorrelation, or totally masked data.
  134. # None of these should trigger a RuntimeWarning.
  135. warnings.simplefilter("error", RuntimeWarning)
  136. assert_almost_equal(mstats.pearsonr(x, x)[0], 1.0)
  137. assert_almost_equal(mstats.pearsonr(x, x[::-1])[0], -1.0)
  138. x = ma.array(x, mask=True)
  139. pr = mstats.pearsonr(x, x)
  140. assert_(pr[0] is masked)
  141. assert_(pr[1] is masked)
  142. x1 = ma.array([-1.0, 0.0, 1.0])
  143. y1 = ma.array([0, 0, 3])
  144. r, p = mstats.pearsonr(x1, y1)
  145. assert_almost_equal(r, np.sqrt(3)/2)
  146. assert_almost_equal(p, 1.0/3)
  147. # (x2, y2) have the same unmasked data as (x1, y1).
  148. mask = [False, False, False, True]
  149. x2 = ma.array([-1.0, 0.0, 1.0, 99.0], mask=mask)
  150. y2 = ma.array([0, 0, 3, -1], mask=mask)
  151. r, p = mstats.pearsonr(x2, y2)
  152. assert_almost_equal(r, np.sqrt(3)/2)
  153. assert_almost_equal(p, 1.0/3)
  154. def test_spearmanr(self):
  155. # Tests some computations of Spearman's rho
  156. (x, y) = ([5.05,6.75,3.21,2.66], [1.65,2.64,2.64,6.95])
  157. assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
  158. (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
  159. (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
  160. assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
  161. x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
  162. 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
  163. y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
  164. 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
  165. assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
  166. x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
  167. 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
  168. y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
  169. 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
  170. (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
  171. assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
  172. # Next test is to make sure calculation uses sufficient precision.
  173. # The denominator's value is ~n^3 and used to be represented as an
  174. # int. 2000**3 > 2**32 so these arrays would cause overflow on
  175. # some machines.
  176. x = list(range(2000))
  177. y = list(range(2000))
  178. y[0], y[9] = y[9], y[0]
  179. y[10], y[434] = y[434], y[10]
  180. y[435], y[1509] = y[1509], y[435]
  181. # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1))
  182. # = 1 - (1 / 500)
  183. # = 0.998
  184. assert_almost_equal(mstats.spearmanr(x,y)[0], 0.998)
  185. # test for namedtuple attributes
  186. res = mstats.spearmanr(x, y)
  187. attributes = ('correlation', 'pvalue')
  188. check_named_results(res, attributes, ma=True)
  189. def test_kendalltau(self):
  190. # simple case without ties
  191. x = ma.array(np.arange(10))
  192. y = ma.array(np.arange(10))
  193. # Cross-check with exact result from R:
  194. # cor.test(x,y,method="kendall",exact=1)
  195. expected = [1.0, 5.511463844797e-07]
  196. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  197. # check exception in case of invalid method keyword
  198. assert_raises(ValueError, mstats.kendalltau, x, y, method='banana')
  199. # swap a couple of values
  200. b = y[1]
  201. y[1] = y[2]
  202. y[2] = b
  203. # Cross-check with exact result from R:
  204. # cor.test(x,y,method="kendall",exact=1)
  205. expected = [0.9555555555555556, 5.511463844797e-06]
  206. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  207. # swap a couple more
  208. b = y[5]
  209. y[5] = y[6]
  210. y[6] = b
  211. # Cross-check with exact result from R:
  212. # cor.test(x,y,method="kendall",exact=1)
  213. expected = [0.9111111111111111, 2.976190476190e-05]
  214. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  215. # same in opposite direction
  216. x = ma.array(np.arange(10))
  217. y = ma.array(np.arange(10)[::-1])
  218. # Cross-check with exact result from R:
  219. # cor.test(x,y,method="kendall",exact=1)
  220. expected = [-1.0, 5.511463844797e-07]
  221. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  222. # swap a couple of values
  223. b = y[1]
  224. y[1] = y[2]
  225. y[2] = b
  226. # Cross-check with exact result from R:
  227. # cor.test(x,y,method="kendall",exact=1)
  228. expected = [-0.9555555555555556, 5.511463844797e-06]
  229. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  230. # swap a couple more
  231. b = y[5]
  232. y[5] = y[6]
  233. y[6] = b
  234. # Cross-check with exact result from R:
  235. # cor.test(x,y,method="kendall",exact=1)
  236. expected = [-0.9111111111111111, 2.976190476190e-05]
  237. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected)
  238. # Tests some computations of Kendall's tau
  239. x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan])
  240. y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
  241. z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
  242. assert_almost_equal(np.asarray(mstats.kendalltau(x, y)),
  243. [+0.3333333, 0.75])
  244. assert_almost_equal(np.asarray(mstats.kendalltau(x, y, method='asymptotic')),
  245. [+0.3333333, 0.4969059])
  246. assert_almost_equal(np.asarray(mstats.kendalltau(x, z)),
  247. [-0.5477226, 0.2785987])
  248. #
  249. x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20,
  250. 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan])
  251. y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27,
  252. 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0])
  253. result = mstats.kendalltau(x, y)
  254. assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
  255. # make sure internal variable use correct precision with
  256. # larger arrays
  257. x = np.arange(2000, dtype=float)
  258. x = ma.masked_greater(x, 1995)
  259. y = np.arange(2000, dtype=float)
  260. y = np.concatenate((y[1000:], y[:1000]))
  261. assert_(np.isfinite(mstats.kendalltau(x, y)[1]))
  262. # test for namedtuple attributes
  263. res = mstats.kendalltau(x, y)
  264. attributes = ('correlation', 'pvalue')
  265. check_named_results(res, attributes, ma=True)
  266. def test_kendalltau_seasonal(self):
  267. # Tests the seasonal Kendall tau.
  268. x = [[nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
  269. [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
  270. [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan],
  271. [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]]
  272. x = ma.fix_invalid(x).T
  273. output = mstats.kendalltau_seasonal(x)
  274. assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
  275. assert_almost_equal(output['seasonal p-value'].round(2),
  276. [0.18,0.53,0.20,0.04])
  277. def test_pointbiserial(self):
  278. x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,
  279. 0,0,0,0,1,-1]
  280. y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0,
  281. 2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1,
  282. 0.8,0.7,0.6,0.5,0.2,0.2,0.1,np.nan]
  283. assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5)
  284. # test for namedtuple attributes
  285. res = mstats.pointbiserialr(x, y)
  286. attributes = ('correlation', 'pvalue')
  287. check_named_results(res, attributes, ma=True)
  288. class TestTrimming(object):
  289. def test_trim(self):
  290. a = ma.arange(10)
  291. assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9])
  292. a = ma.arange(10)
  293. assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None])
  294. a = ma.arange(10)
  295. assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)),
  296. [None,None,None,3,4,5,6,7,None,None])
  297. a = ma.arange(10)
  298. assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True),
  299. [None,1,2,3,4,5,6,7,None,None])
  300. a = ma.arange(12)
  301. a[[0,-1]] = a[5] = masked
  302. assert_equal(mstats.trim(a, (2,8)),
  303. [None, None, 2, 3, 4, None, 6, 7, 8, None, None, None])
  304. x = ma.arange(100).reshape(10, 10)
  305. expected = [1]*10 + [0]*70 + [1]*20
  306. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
  307. assert_equal(trimx._mask.ravel(), expected)
  308. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
  309. assert_equal(trimx._mask.ravel(), expected)
  310. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=-1)
  311. assert_equal(trimx._mask.T.ravel(), expected)
  312. # same as above, but with an extra masked row inserted
  313. x = ma.arange(110).reshape(11, 10)
  314. x[1] = masked
  315. expected = [1]*20 + [0]*70 + [1]*20
  316. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None)
  317. assert_equal(trimx._mask.ravel(), expected)
  318. trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0)
  319. assert_equal(trimx._mask.ravel(), expected)
  320. trimx = mstats.trim(x.T, (0.1,0.2), relative=True, axis=-1)
  321. assert_equal(trimx.T._mask.ravel(), expected)
  322. def test_trim_old(self):
  323. x = ma.arange(100)
  324. assert_equal(mstats.trimboth(x).count(), 60)
  325. assert_equal(mstats.trimtail(x,tail='r').count(), 80)
  326. x[50:70] = masked
  327. trimx = mstats.trimboth(x)
  328. assert_equal(trimx.count(), 48)
  329. assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16)
  330. x._mask = nomask
  331. x.shape = (10,10)
  332. assert_equal(mstats.trimboth(x).count(), 60)
  333. assert_equal(mstats.trimtail(x).count(), 80)
  334. def test_trimmedmean(self):
  335. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  336. 296,299,306,376,428,515,666,1310,2611])
  337. assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0)
  338. assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0)
  339. assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0)
  340. def test_trimmed_stde(self):
  341. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  342. 296,299,306,376,428,515,666,1310,2611])
  343. assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5)
  344. assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5)
  345. def test_winsorization(self):
  346. data = ma.array([77, 87, 88,114,151,210,219,246,253,262,
  347. 296,299,306,376,428,515,666,1310,2611])
  348. assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1),
  349. 21551.4, 1)
  350. assert_almost_equal(
  351. mstats.winsorize(data, (0.2,0.2),(False,False)).var(ddof=1),
  352. 11887.3, 1)
  353. data[5] = masked
  354. winsorized = mstats.winsorize(data)
  355. assert_equal(winsorized.mask, data.mask)
  356. class TestMoments(object):
  357. # Comparison numbers are found using R v.1.5.1
  358. # note that length(testcase) = 4
  359. # testmathworks comes from documentation for the
  360. # Statistics Toolbox for Matlab and can be found at both
  361. # https://www.mathworks.com/help/stats/kurtosis.html
  362. # https://www.mathworks.com/help/stats/skewness.html
  363. # Note that both test cases came from here.
  364. testcase = [1,2,3,4]
  365. testmathworks = ma.fix_invalid([1.165, 0.6268, 0.0751, 0.3516, -0.6965,
  366. np.nan])
  367. testcase_2d = ma.array(
  368. np.array([[0.05245846, 0.50344235, 0.86589117, 0.36936353, 0.46961149],
  369. [0.11574073, 0.31299969, 0.45925772, 0.72618805, 0.75194407],
  370. [0.67696689, 0.91878127, 0.09769044, 0.04645137, 0.37615733],
  371. [0.05903624, 0.29908861, 0.34088298, 0.66216337, 0.83160998],
  372. [0.64619526, 0.94894632, 0.27855892, 0.0706151, 0.39962917]]),
  373. mask=np.array([[True, False, False, True, False],
  374. [True, True, True, False, True],
  375. [False, False, False, False, False],
  376. [True, True, True, True, True],
  377. [False, False, True, False, False]], dtype=bool))
  378. def test_moment(self):
  379. y = mstats.moment(self.testcase,1)
  380. assert_almost_equal(y,0.0,10)
  381. y = mstats.moment(self.testcase,2)
  382. assert_almost_equal(y,1.25)
  383. y = mstats.moment(self.testcase,3)
  384. assert_almost_equal(y,0.0)
  385. y = mstats.moment(self.testcase,4)
  386. assert_almost_equal(y,2.5625)
  387. def test_variation(self):
  388. y = mstats.variation(self.testcase)
  389. assert_almost_equal(y,0.44721359549996, 10)
  390. def test_skewness(self):
  391. y = mstats.skew(self.testmathworks)
  392. assert_almost_equal(y,-0.29322304336607,10)
  393. y = mstats.skew(self.testmathworks,bias=0)
  394. assert_almost_equal(y,-0.437111105023940,10)
  395. y = mstats.skew(self.testcase)
  396. assert_almost_equal(y,0.0,10)
  397. def test_kurtosis(self):
  398. # Set flags for axis = 0 and fisher=0 (Pearson's definition of kurtosis
  399. # for compatibility with Matlab)
  400. y = mstats.kurtosis(self.testmathworks, 0, fisher=0, bias=1)
  401. assert_almost_equal(y, 2.1658856802973, 10)
  402. # Note that MATLAB has confusing docs for the following case
  403. # kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
  404. # kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3)
  405. # The MATLAB docs imply that both should give Fisher's
  406. y = mstats.kurtosis(self.testmathworks, fisher=0, bias=0)
  407. assert_almost_equal(y, 3.663542721189047, 10)
  408. y = mstats.kurtosis(self.testcase, 0, 0)
  409. assert_almost_equal(y, 1.64)
  410. # test that kurtosis works on multidimensional masked arrays
  411. correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385, 0.,
  412. -1.26979517952]),
  413. mask=np.array([False, False, False, True,
  414. False], dtype=bool))
  415. assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1),
  416. correct_2d)
  417. for i, row in enumerate(self.testcase_2d):
  418. assert_almost_equal(mstats.kurtosis(row), correct_2d[i])
  419. correct_2d_bias_corrected = ma.array(
  420. np.array([-1.5, -3., -1.88988209538, 0., -0.5234638463918877]),
  421. mask=np.array([False, False, False, True, False], dtype=bool))
  422. assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1,
  423. bias=False),
  424. correct_2d_bias_corrected)
  425. for i, row in enumerate(self.testcase_2d):
  426. assert_almost_equal(mstats.kurtosis(row, bias=False),
  427. correct_2d_bias_corrected[i])
  428. # Check consistency between stats and mstats implementations
  429. assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]),
  430. stats.kurtosis(self.testcase_2d[2, :]),
  431. nulp=4)
  432. def test_mode(self):
  433. a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7]
  434. a2 = np.reshape(a1, (3,5))
  435. a3 = np.array([1,2,3,4,5,6])
  436. a4 = np.reshape(a3, (3,2))
  437. ma1 = ma.masked_where(ma.array(a1) > 2, a1)
  438. ma2 = ma.masked_where(a2 > 2, a2)
  439. ma3 = ma.masked_where(a3 < 2, a3)
  440. ma4 = ma.masked_where(ma.array(a4) < 2, a4)
  441. assert_equal(mstats.mode(a1, axis=None), (3,4))
  442. assert_equal(mstats.mode(a1, axis=0), (3,4))
  443. assert_equal(mstats.mode(ma1, axis=None), (0,3))
  444. assert_equal(mstats.mode(a2, axis=None), (3,4))
  445. assert_equal(mstats.mode(ma2, axis=None), (0,3))
  446. assert_equal(mstats.mode(a3, axis=None), (1,1))
  447. assert_equal(mstats.mode(ma3, axis=None), (2,1))
  448. assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
  449. assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]]))
  450. assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]]))
  451. assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]]))
  452. assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]]))
  453. assert_equal(mstats.mode(ma4, axis=-1), ([[2],[3],[5]], [[1],[1],[1]]))
  454. a1_res = mstats.mode(a1, axis=None)
  455. # test for namedtuple attributes
  456. attributes = ('mode', 'count')
  457. check_named_results(a1_res, attributes, ma=True)
  458. def test_mode_modifies_input(self):
  459. # regression test for gh-6428: mode(..., axis=None) may not modify
  460. # the input array
  461. im = np.zeros((100, 100))
  462. im[:50, :] += 1
  463. im[:, :50] += 1
  464. cp = im.copy()
  465. a = mstats.mode(im, None)
  466. assert_equal(im, cp)
  467. class TestPercentile(object):
  468. def setup_method(self):
  469. self.a1 = [3, 4, 5, 10, -3, -5, 6]
  470. self.a2 = [3, -6, -2, 8, 7, 4, 2, 1]
  471. self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0]
  472. def test_percentile(self):
  473. x = np.arange(8) * 0.5
  474. assert_equal(mstats.scoreatpercentile(x, 0), 0.)
  475. assert_equal(mstats.scoreatpercentile(x, 100), 3.5)
  476. assert_equal(mstats.scoreatpercentile(x, 50), 1.75)
  477. def test_2D(self):
  478. x = ma.array([[1, 1, 1],
  479. [1, 1, 1],
  480. [4, 4, 3],
  481. [1, 1, 1],
  482. [1, 1, 1]])
  483. assert_equal(mstats.scoreatpercentile(x, 50), [1, 1, 1])
  484. class TestVariability(object):
  485. """ Comparison numbers are found using R v.1.5.1
  486. note that length(testcase) = 4
  487. """
  488. testcase = ma.fix_invalid([1,2,3,4,np.nan])
  489. def test_sem(self):
  490. # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
  491. y = mstats.sem(self.testcase)
  492. assert_almost_equal(y, 0.6454972244)
  493. n = self.testcase.count()
  494. assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)),
  495. mstats.sem(self.testcase, ddof=2))
  496. def test_zmap(self):
  497. # This is not in R, so tested by using:
  498. # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
  499. y = mstats.zmap(self.testcase, self.testcase)
  500. desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996,
  501. 0.44721359549996, 1.3416407864999])
  502. assert_array_almost_equal(desired_unmaskedvals,
  503. y.data[y.mask == False], decimal=12)
  504. def test_zscore(self):
  505. # This is not in R, so tested by using:
  506. # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
  507. y = mstats.zscore(self.testcase)
  508. desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
  509. 0.44721359549996, 1.3416407864999, np.nan])
  510. assert_almost_equal(desired, y, decimal=12)
  511. class TestMisc(object):
  512. def test_obrientransform(self):
  513. args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2,
  514. [6]+[7]*2+[8]*4+[9]*9+[10]*16]
  515. result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538],
  516. [10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]]
  517. assert_almost_equal(np.round(mstats.obrientransform(*args).T,4),
  518. result,4)
  519. def test_kstwosamp(self):
  520. x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
  521. [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
  522. [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
  523. [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
  524. x = ma.fix_invalid(x).T
  525. (winter,spring,summer,fall) = x.T
  526. assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
  527. (0.1818,0.9892))
  528. assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
  529. (0.1469,0.7734))
  530. assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
  531. (0.1818,0.6744))
  532. def test_friedmanchisq(self):
  533. # No missing values
  534. args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
  535. [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
  536. [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
  537. result = mstats.friedmanchisquare(*args)
  538. assert_almost_equal(result[0], 10.4737, 4)
  539. assert_almost_equal(result[1], 0.005317, 6)
  540. # Missing values
  541. x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
  542. [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
  543. [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
  544. [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
  545. x = ma.fix_invalid(x)
  546. result = mstats.friedmanchisquare(*x)
  547. assert_almost_equal(result[0], 2.0156, 4)
  548. assert_almost_equal(result[1], 0.5692, 4)
  549. # test for namedtuple attributes
  550. attributes = ('statistic', 'pvalue')
  551. check_named_results(result, attributes, ma=True)
  552. def test_regress_simple():
  553. # Regress a line with sinusoidal noise. Test for #1273.
  554. x = np.linspace(0, 100, 100)
  555. y = 0.2 * np.linspace(0, 100, 100) + 10
  556. y += np.sin(np.linspace(0, 20, 100))
  557. slope, intercept, r_value, p_value, sterr = mstats.linregress(x, y)
  558. assert_almost_equal(slope, 0.19644990055858422)
  559. assert_almost_equal(intercept, 10.211269918932341)
  560. # test for namedtuple attributes
  561. res = mstats.linregress(x, y)
  562. attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr')
  563. check_named_results(res, attributes, ma=True)
  564. def test_theilslopes():
  565. # Test for basic slope and intercept.
  566. slope, intercept, lower, upper = mstats.theilslopes([0, 1, 1])
  567. assert_almost_equal(slope, 0.5)
  568. assert_almost_equal(intercept, 0.5)
  569. # Test for correct masking.
  570. y = np.ma.array([0, 1, 100, 1], mask=[False, False, True, False])
  571. slope, intercept, lower, upper = mstats.theilslopes(y)
  572. assert_almost_equal(slope, 1./3)
  573. assert_almost_equal(intercept, 2./3)
  574. # Test of confidence intervals from example in Sen (1968).
  575. x = [1, 2, 3, 4, 10, 12, 18]
  576. y = [9, 15, 19, 20, 45, 55, 78]
  577. slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07)
  578. assert_almost_equal(slope, 4)
  579. assert_almost_equal(upper, 4.38, decimal=2)
  580. assert_almost_equal(lower, 3.71, decimal=2)
  581. def test_siegelslopes():
  582. # method should be exact for straight line
  583. y = 2 * np.arange(10) + 0.5
  584. assert_equal(mstats.siegelslopes(y), (2.0, 0.5))
  585. assert_equal(mstats.siegelslopes(y, method='separate'), (2.0, 0.5))
  586. x = 2 * np.arange(10)
  587. y = 5 * x - 3.0
  588. assert_equal(mstats.siegelslopes(y, x), (5.0, -3.0))
  589. assert_equal(mstats.siegelslopes(y, x, method='separate'), (5.0, -3.0))
  590. # method is robust to outliers: brekdown point of 50%
  591. y[:4] = 1000
  592. assert_equal(mstats.siegelslopes(y, x), (5.0, -3.0))
  593. # if there are no outliers, results should be comparble to linregress
  594. np.random.seed(231)
  595. x = np.arange(10)
  596. y = -2.3 + 0.3*x + stats.norm.rvs(size=10)
  597. slope_ols, intercept_ols, _, _, _ = stats.linregress(x, y)
  598. slope, intercept = mstats.siegelslopes(y, x)
  599. assert_allclose(slope, slope_ols, rtol=0.1)
  600. assert_allclose(intercept, intercept_ols, rtol=0.1)
  601. slope, intercept = mstats.siegelslopes(y, x, method='separate')
  602. assert_allclose(slope, slope_ols, rtol=0.1)
  603. assert_allclose(intercept, intercept_ols, rtol=0.1)
  604. def test_plotting_positions():
  605. # Regression test for #1256
  606. pos = mstats.plotting_positions(np.arange(3), 0, 0)
  607. assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75]))
  608. class TestNormalitytests():
  609. def test_vs_nonmasked(self):
  610. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  611. assert_array_almost_equal(mstats.normaltest(x),
  612. stats.normaltest(x))
  613. assert_array_almost_equal(mstats.skewtest(x),
  614. stats.skewtest(x))
  615. assert_array_almost_equal(mstats.kurtosistest(x),
  616. stats.kurtosistest(x))
  617. funcs = [stats.normaltest, stats.skewtest, stats.kurtosistest]
  618. mfuncs = [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]
  619. x = [1, 2, 3, 4]
  620. for func, mfunc in zip(funcs, mfuncs):
  621. assert_raises(ValueError, func, x)
  622. assert_raises(ValueError, mfunc, x)
  623. def test_axis_None(self):
  624. # Test axis=None (equal to axis=0 for 1-D input)
  625. x = np.array((-2,-1,0,1,2,3)*4)**2
  626. assert_allclose(mstats.normaltest(x, axis=None), mstats.normaltest(x))
  627. assert_allclose(mstats.skewtest(x, axis=None), mstats.skewtest(x))
  628. assert_allclose(mstats.kurtosistest(x, axis=None),
  629. mstats.kurtosistest(x))
  630. def test_maskedarray_input(self):
  631. # Add some masked values, test result doesn't change
  632. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  633. xm = np.ma.array(np.r_[np.inf, x, 10],
  634. mask=np.r_[True, [False] * x.size, True])
  635. assert_allclose(mstats.normaltest(xm), stats.normaltest(x))
  636. assert_allclose(mstats.skewtest(xm), stats.skewtest(x))
  637. assert_allclose(mstats.kurtosistest(xm), stats.kurtosistest(x))
  638. def test_nd_input(self):
  639. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  640. x_2d = np.vstack([x] * 2).T
  641. for func in [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]:
  642. res_1d = func(x)
  643. res_2d = func(x_2d)
  644. assert_allclose(res_2d[0], [res_1d[0]] * 2)
  645. assert_allclose(res_2d[1], [res_1d[1]] * 2)
  646. def test_normaltest_result_attributes(self):
  647. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  648. res = mstats.normaltest(x)
  649. attributes = ('statistic', 'pvalue')
  650. check_named_results(res, attributes, ma=True)
  651. def test_kurtosistest_result_attributes(self):
  652. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  653. res = mstats.kurtosistest(x)
  654. attributes = ('statistic', 'pvalue')
  655. check_named_results(res, attributes, ma=True)
  656. def regression_test_9033(self):
  657. # x cleary non-normal but power of negtative denom needs
  658. # to be handled correctly to reject normality
  659. counts = [128, 0, 58, 7, 0, 41, 16, 0, 0, 167]
  660. x = np.hstack([np.full(c, i) for i, c in enumerate(counts)])
  661. assert_equal(mstats.kurtosistest(x)[1] < 0.01, True)
  662. class TestFOneway():
  663. def test_result_attributes(self):
  664. a = np.array([655, 788], dtype=np.uint16)
  665. b = np.array([789, 772], dtype=np.uint16)
  666. res = mstats.f_oneway(a, b)
  667. attributes = ('statistic', 'pvalue')
  668. check_named_results(res, attributes, ma=True)
  669. class TestMannwhitneyu():
  670. def test_result_attributes(self):
  671. x = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  672. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  673. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  674. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  675. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  676. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  677. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2.,
  678. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  679. 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2.,
  680. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  681. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1.,
  682. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  683. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  684. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1.,
  685. 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  686. 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1.,
  687. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  688. 1., 1., 1., 1., 1., 1.])
  689. y = np.array([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1.,
  690. 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3.,
  691. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1.,
  692. 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
  693. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2.,
  694. 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2.,
  695. 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  696. 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1.,
  697. 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
  698. 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1.,
  699. 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1.,
  700. 1., 1., 1., 1.])
  701. res = mstats.mannwhitneyu(x, y)
  702. attributes = ('statistic', 'pvalue')
  703. check_named_results(res, attributes, ma=True)
  704. class TestKruskal():
  705. def test_result_attributes(self):
  706. x = [1, 3, 5, 7, 9]
  707. y = [2, 4, 6, 8, 10]
  708. res = mstats.kruskal(x, y)
  709. attributes = ('statistic', 'pvalue')
  710. check_named_results(res, attributes, ma=True)
  711. #TODO: for all ttest functions, add tests with masked array inputs
  712. class TestTtest_rel():
  713. def test_vs_nonmasked(self):
  714. np.random.seed(1234567)
  715. outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
  716. # 1-D inputs
  717. res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1])
  718. res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
  719. assert_allclose(res1, res2)
  720. # 2-D inputs
  721. res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
  722. res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None)
  723. assert_allclose(res1, res2)
  724. res1 = stats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
  725. res2 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0)
  726. assert_allclose(res1, res2)
  727. # Check default is axis=0
  728. res3 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:])
  729. assert_allclose(res2, res3)
  730. def test_fully_masked(self):
  731. np.random.seed(1234567)
  732. outcome = ma.masked_array(np.random.randn(3, 2),
  733. mask=[[1, 1, 1], [0, 0, 0]])
  734. with suppress_warnings() as sup:
  735. sup.filter(RuntimeWarning, "invalid value encountered in absolute")
  736. for pair in [(outcome[:, 0], outcome[:, 1]), ([np.nan, np.nan], [1.0, 2.0])]:
  737. t, p = mstats.ttest_rel(*pair)
  738. assert_array_equal(t, (np.nan, np.nan))
  739. assert_array_equal(p, (np.nan, np.nan))
  740. def test_result_attributes(self):
  741. np.random.seed(1234567)
  742. outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
  743. res = mstats.ttest_rel(outcome[:, 0], outcome[:, 1])
  744. attributes = ('statistic', 'pvalue')
  745. check_named_results(res, attributes, ma=True)
  746. def test_invalid_input_size(self):
  747. assert_raises(ValueError, mstats.ttest_rel,
  748. np.arange(10), np.arange(11))
  749. x = np.arange(24)
  750. assert_raises(ValueError, mstats.ttest_rel,
  751. x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=1)
  752. assert_raises(ValueError, mstats.ttest_rel,
  753. x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=2)
  754. def test_empty(self):
  755. res1 = mstats.ttest_rel([], [])
  756. assert_(np.all(np.isnan(res1)))
  757. def test_zero_division(self):
  758. t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
  759. assert_equal((np.abs(t), p), (np.inf, 0))
  760. with suppress_warnings() as sup:
  761. sup.filter(RuntimeWarning, "invalid value encountered in absolute")
  762. t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
  763. assert_array_equal(t, np.array([np.nan, np.nan]))
  764. assert_array_equal(p, np.array([np.nan, np.nan]))
  765. class TestTtest_ind():
  766. def test_vs_nonmasked(self):
  767. np.random.seed(1234567)
  768. outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
  769. # 1-D inputs
  770. res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
  771. res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
  772. assert_allclose(res1, res2)
  773. # 2-D inputs
  774. res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
  775. res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
  776. assert_allclose(res1, res2)
  777. res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
  778. res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
  779. assert_allclose(res1, res2)
  780. # Check default is axis=0
  781. res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
  782. assert_allclose(res2, res3)
  783. # Check equal_var
  784. res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
  785. res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
  786. assert_allclose(res4, res5)
  787. res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
  788. res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
  789. assert_allclose(res4, res5)
  790. def test_fully_masked(self):
  791. np.random.seed(1234567)
  792. outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]])
  793. with suppress_warnings() as sup:
  794. sup.filter(RuntimeWarning, "invalid value encountered in absolute")
  795. for pair in [(outcome[:, 0], outcome[:, 1]), ([np.nan, np.nan], [1.0, 2.0])]:
  796. t, p = mstats.ttest_ind(*pair)
  797. assert_array_equal(t, (np.nan, np.nan))
  798. assert_array_equal(p, (np.nan, np.nan))
  799. def test_result_attributes(self):
  800. np.random.seed(1234567)
  801. outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
  802. res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
  803. attributes = ('statistic', 'pvalue')
  804. check_named_results(res, attributes, ma=True)
  805. def test_empty(self):
  806. res1 = mstats.ttest_ind([], [])
  807. assert_(np.all(np.isnan(res1)))
  808. def test_zero_division(self):
  809. t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
  810. assert_equal((np.abs(t), p), (np.inf, 0))
  811. with suppress_warnings() as sup:
  812. sup.filter(RuntimeWarning, "invalid value encountered in absolute")
  813. t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
  814. assert_array_equal(t, (np.nan, np.nan))
  815. assert_array_equal(p, (np.nan, np.nan))
  816. t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1], equal_var=False)
  817. assert_equal((np.abs(t), p), (np.inf, 0))
  818. assert_array_equal(mstats.ttest_ind([0, 0, 0], [0, 0, 0],
  819. equal_var=False), (np.nan, np.nan))
  820. class TestTtest_1samp():
  821. def test_vs_nonmasked(self):
  822. np.random.seed(1234567)
  823. outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
  824. # 1-D inputs
  825. res1 = stats.ttest_1samp(outcome[:, 0], 1)
  826. res2 = mstats.ttest_1samp(outcome[:, 0], 1)
  827. assert_allclose(res1, res2)
  828. # 2-D inputs
  829. res1 = stats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
  830. res2 = mstats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
  831. assert_allclose(res1, res2)
  832. res1 = stats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
  833. res2 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
  834. assert_allclose(res1, res2)
  835. # Check default is axis=0
  836. res3 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:])
  837. assert_allclose(res2, res3)
  838. def test_fully_masked(self):
  839. np.random.seed(1234567)
  840. outcome = ma.masked_array(np.random.randn(3), mask=[1, 1, 1])
  841. expected = (np.nan, np.nan)
  842. with suppress_warnings() as sup:
  843. sup.filter(RuntimeWarning, "invalid value encountered in absolute")
  844. for pair in [((np.nan, np.nan), 0.0), (outcome, 0.0)]:
  845. t, p = mstats.ttest_1samp(*pair)
  846. assert_array_equal(p, expected)
  847. assert_array_equal(t, expected)
  848. def test_result_attributes(self):
  849. np.random.seed(1234567)
  850. outcome = np.random.randn(20, 4) + [0, 0, 1, 2]
  851. res = mstats.ttest_1samp(outcome[:, 0], 1)
  852. attributes = ('statistic', 'pvalue')
  853. check_named_results(res, attributes, ma=True)
  854. def test_empty(self):
  855. res1 = mstats.ttest_1samp([], 1)
  856. assert_(np.all(np.isnan(res1)))
  857. def test_zero_division(self):
  858. t, p = mstats.ttest_1samp([0, 0, 0], 1)
  859. assert_equal((np.abs(t), p), (np.inf, 0))
  860. with suppress_warnings() as sup:
  861. sup.filter(RuntimeWarning, "invalid value encountered in absolute")
  862. t, p = mstats.ttest_1samp([0, 0, 0], 0)
  863. assert_(np.isnan(t))
  864. assert_array_equal(p, (np.nan, np.nan))
  865. class TestCompareWithStats(object):
  866. """
  867. Class to compare mstats results with stats results.
  868. It is in general assumed that scipy.stats is at a more mature stage than
  869. stats.mstats. If a routine in mstats results in similar results like in
  870. scipy.stats, this is considered also as a proper validation of scipy.mstats
  871. routine.
  872. Different sample sizes are used for testing, as some problems between stats
  873. and mstats are dependent on sample size.
  874. Author: Alexander Loew
  875. NOTE that some tests fail. This might be caused by
  876. a) actual differences or bugs between stats and mstats
  877. b) numerical inaccuracies
  878. c) different definitions of routine interfaces
  879. These failures need to be checked. Current workaround is to have disabled these tests,
  880. but issuing reports on scipy-dev
  881. """
  882. def get_n(self):
  883. """ Returns list of sample sizes to be used for comparison. """
  884. return [1000, 100, 10, 5]
  885. def generate_xy_sample(self, n):
  886. # This routine generates numpy arrays and corresponding masked arrays
  887. # with the same data, but additional masked values
  888. np.random.seed(1234567)
  889. x = np.random.randn(n)
  890. y = x + np.random.randn(n)
  891. xm = np.ones(len(x) + 5) * 1e16
  892. ym = np.ones(len(y) + 5) * 1e16
  893. xm[0:len(x)] = x
  894. ym[0:len(y)] = y
  895. mask = xm > 9e15
  896. xm = np.ma.array(xm, mask=mask)
  897. ym = np.ma.array(ym, mask=mask)
  898. return x, y, xm, ym
  899. def generate_xy_sample2D(self, n, nx):
  900. x = np.ones((n, nx)) * np.nan
  901. y = np.ones((n, nx)) * np.nan
  902. xm = np.ones((n+5, nx)) * np.nan
  903. ym = np.ones((n+5, nx)) * np.nan
  904. for i in range(nx):
  905. x[:, i], y[:, i], dx, dy = self.generate_xy_sample(n)
  906. xm[0:n, :] = x[0:n]
  907. ym[0:n, :] = y[0:n]
  908. xm = np.ma.array(xm, mask=np.isnan(xm))
  909. ym = np.ma.array(ym, mask=np.isnan(ym))
  910. return x, y, xm, ym
  911. def test_linregress(self):
  912. for n in self.get_n():
  913. x, y, xm, ym = self.generate_xy_sample(n)
  914. res1 = stats.linregress(x, y)
  915. res2 = stats.mstats.linregress(xm, ym)
  916. assert_allclose(np.asarray(res1), np.asarray(res2))
  917. def test_pearsonr(self):
  918. for n in self.get_n():
  919. x, y, xm, ym = self.generate_xy_sample(n)
  920. r, p = stats.pearsonr(x, y)
  921. rm, pm = stats.mstats.pearsonr(xm, ym)
  922. assert_almost_equal(r, rm, decimal=14)
  923. assert_almost_equal(p, pm, decimal=14)
  924. def test_spearmanr(self):
  925. for n in self.get_n():
  926. x, y, xm, ym = self.generate_xy_sample(n)
  927. r, p = stats.spearmanr(x, y)
  928. rm, pm = stats.mstats.spearmanr(xm, ym)
  929. assert_almost_equal(r, rm, 14)
  930. assert_almost_equal(p, pm, 14)
  931. def test_spearmanr_backcompat_useties(self):
  932. # A regression test to ensure we don't break backwards compat
  933. # more than we have to (see gh-9204).
  934. x = np.arange(6)
  935. assert_raises(ValueError, mstats.spearmanr, x, x, False)
  936. def test_gmean(self):
  937. for n in self.get_n():
  938. x, y, xm, ym = self.generate_xy_sample(n)
  939. r = stats.gmean(abs(x))
  940. rm = stats.mstats.gmean(abs(xm))
  941. assert_allclose(r, rm, rtol=1e-13)
  942. r = stats.gmean(abs(y))
  943. rm = stats.mstats.gmean(abs(ym))
  944. assert_allclose(r, rm, rtol=1e-13)
  945. def test_hmean(self):
  946. for n in self.get_n():
  947. x, y, xm, ym = self.generate_xy_sample(n)
  948. r = stats.hmean(abs(x))
  949. rm = stats.mstats.hmean(abs(xm))
  950. assert_almost_equal(r, rm, 10)
  951. r = stats.hmean(abs(y))
  952. rm = stats.mstats.hmean(abs(ym))
  953. assert_almost_equal(r, rm, 10)
  954. def test_skew(self):
  955. for n in self.get_n():
  956. x, y, xm, ym = self.generate_xy_sample(n)
  957. r = stats.skew(x)
  958. rm = stats.mstats.skew(xm)
  959. assert_almost_equal(r, rm, 10)
  960. r = stats.skew(y)
  961. rm = stats.mstats.skew(ym)
  962. assert_almost_equal(r, rm, 10)
  963. def test_moment(self):
  964. for n in self.get_n():
  965. x, y, xm, ym = self.generate_xy_sample(n)
  966. r = stats.moment(x)
  967. rm = stats.mstats.moment(xm)
  968. assert_almost_equal(r, rm, 10)
  969. r = stats.moment(y)
  970. rm = stats.mstats.moment(ym)
  971. assert_almost_equal(r, rm, 10)
  972. def test_zscore(self):
  973. for n in self.get_n():
  974. x, y, xm, ym = self.generate_xy_sample(n)
  975. # reference solution
  976. zx = (x - x.mean()) / x.std()
  977. zy = (y - y.mean()) / y.std()
  978. # validate stats
  979. assert_allclose(stats.zscore(x), zx, rtol=1e-10)
  980. assert_allclose(stats.zscore(y), zy, rtol=1e-10)
  981. # compare stats and mstats
  982. assert_allclose(stats.zscore(x), stats.mstats.zscore(xm[0:len(x)]),
  983. rtol=1e-10)
  984. assert_allclose(stats.zscore(y), stats.mstats.zscore(ym[0:len(y)]),
  985. rtol=1e-10)
  986. def test_kurtosis(self):
  987. for n in self.get_n():
  988. x, y, xm, ym = self.generate_xy_sample(n)
  989. r = stats.kurtosis(x)
  990. rm = stats.mstats.kurtosis(xm)
  991. assert_almost_equal(r, rm, 10)
  992. r = stats.kurtosis(y)
  993. rm = stats.mstats.kurtosis(ym)
  994. assert_almost_equal(r, rm, 10)
  995. def test_sem(self):
  996. # example from stats.sem doc
  997. a = np.arange(20).reshape(5, 4)
  998. am = np.ma.array(a)
  999. r = stats.sem(a, ddof=1)
  1000. rm = stats.mstats.sem(am, ddof=1)
  1001. assert_allclose(r, 2.82842712, atol=1e-5)
  1002. assert_allclose(rm, 2.82842712, atol=1e-5)
  1003. for n in self.get_n():
  1004. x, y, xm, ym = self.generate_xy_sample(n)
  1005. assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0),
  1006. stats.sem(x, axis=None, ddof=0), decimal=13)
  1007. assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0),
  1008. stats.sem(y, axis=None, ddof=0), decimal=13)
  1009. assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1),
  1010. stats.sem(x, axis=None, ddof=1), decimal=13)
  1011. assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1),
  1012. stats.sem(y, axis=None, ddof=1), decimal=13)
  1013. def test_describe(self):
  1014. for n in self.get_n():
  1015. x, y, xm, ym = self.generate_xy_sample(n)
  1016. r = stats.describe(x, ddof=1)
  1017. rm = stats.mstats.describe(xm, ddof=1)
  1018. for ii in range(6):
  1019. assert_almost_equal(np.asarray(r[ii]),
  1020. np.asarray(rm[ii]),
  1021. decimal=12)
  1022. def test_describe_result_attributes(self):
  1023. actual = mstats.describe(np.arange(5))
  1024. attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness',
  1025. 'kurtosis')
  1026. check_named_results(actual, attributes, ma=True)
  1027. def test_rankdata(self):
  1028. for n in self.get_n():
  1029. x, y, xm, ym = self.generate_xy_sample(n)
  1030. r = stats.rankdata(x)
  1031. rm = stats.mstats.rankdata(x)
  1032. assert_allclose(r, rm)
  1033. def test_tmean(self):
  1034. for n in self.get_n():
  1035. x, y, xm, ym = self.generate_xy_sample(n)
  1036. assert_almost_equal(stats.tmean(x),stats.mstats.tmean(xm), 14)
  1037. assert_almost_equal(stats.tmean(y),stats.mstats.tmean(ym), 14)
  1038. def test_tmax(self):
  1039. for n in self.get_n():
  1040. x, y, xm, ym = self.generate_xy_sample(n)
  1041. assert_almost_equal(stats.tmax(x,2.),
  1042. stats.mstats.tmax(xm,2.), 10)
  1043. assert_almost_equal(stats.tmax(y,2.),
  1044. stats.mstats.tmax(ym,2.), 10)
  1045. assert_almost_equal(stats.tmax(x, upperlimit=3.),
  1046. stats.mstats.tmax(xm, upperlimit=3.), 10)
  1047. assert_almost_equal(stats.tmax(y, upperlimit=3.),
  1048. stats.mstats.tmax(ym, upperlimit=3.), 10)
  1049. def test_tmin(self):
  1050. for n in self.get_n():
  1051. x, y, xm, ym = self.generate_xy_sample(n)
  1052. assert_equal(stats.tmin(x), stats.mstats.tmin(xm))
  1053. assert_equal(stats.tmin(y), stats.mstats.tmin(ym))
  1054. assert_almost_equal(stats.tmin(x, lowerlimit=-1.),
  1055. stats.mstats.tmin(xm, lowerlimit=-1.), 10)
  1056. assert_almost_equal(stats.tmin(y, lowerlimit=-1.),
  1057. stats.mstats.tmin(ym, lowerlimit=-1.), 10)
  1058. def test_zmap(self):
  1059. for n in self.get_n():
  1060. x, y, xm, ym = self.generate_xy_sample(n)
  1061. z = stats.zmap(x, y)
  1062. zm = stats.mstats.zmap(xm, ym)
  1063. assert_allclose(z, zm[0:len(z)], atol=1e-10)
  1064. def test_variation(self):
  1065. for n in self.get_n():
  1066. x, y, xm, ym = self.generate_xy_sample(n)
  1067. assert_almost_equal(stats.variation(x), stats.mstats.variation(xm),
  1068. decimal=12)
  1069. assert_almost_equal(stats.variation(y), stats.mstats.variation(ym),
  1070. decimal=12)
  1071. def test_tvar(self):
  1072. for n in self.get_n():
  1073. x, y, xm, ym = self.generate_xy_sample(n)
  1074. assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm),
  1075. decimal=12)
  1076. assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym),
  1077. decimal=12)
  1078. def test_trimboth(self):
  1079. a = np.arange(20)
  1080. b = stats.trimboth(a, 0.1)
  1081. bm = stats.mstats.trimboth(a, 0.1)
  1082. assert_allclose(np.sort(b), bm.data[~bm.mask])
  1083. def test_tsem(self):
  1084. for n in self.get_n():
  1085. x, y, xm, ym = self.generate_xy_sample(n)
  1086. assert_almost_equal(stats.tsem(x), stats.mstats.tsem(xm),
  1087. decimal=14)
  1088. assert_almost_equal(stats.tsem(y), stats.mstats.tsem(ym),
  1089. decimal=14)
  1090. assert_almost_equal(stats.tsem(x, limits=(-2., 2.)),
  1091. stats.mstats.tsem(xm, limits=(-2., 2.)),
  1092. decimal=14)
  1093. def test_skewtest(self):
  1094. # this test is for 1D data
  1095. for n in self.get_n():
  1096. if n > 8:
  1097. x, y, xm, ym = self.generate_xy_sample(n)
  1098. r = stats.skewtest(x)
  1099. rm = stats.mstats.skewtest(xm)
  1100. assert_allclose(r[0], rm[0], rtol=1e-15)
  1101. # TODO this test is not performed as it is a known issue that
  1102. # mstats returns a slightly different p-value what is a bit
  1103. # strange is that other tests like test_maskedarray_input don't
  1104. # fail!
  1105. #~ assert_almost_equal(r[1], rm[1])
  1106. def test_skewtest_result_attributes(self):
  1107. x = np.array((-2, -1, 0, 1, 2, 3)*4)**2
  1108. res = mstats.skewtest(x)
  1109. attributes = ('statistic', 'pvalue')
  1110. check_named_results(res, attributes, ma=True)
  1111. def test_skewtest_2D_notmasked(self):
  1112. # a normal ndarray is passed to the masked function
  1113. x = np.random.random((20, 2)) * 20.
  1114. r = stats.skewtest(x)
  1115. rm = stats.mstats.skewtest(x)
  1116. assert_allclose(np.asarray(r), np.asarray(rm))
  1117. def test_skewtest_2D_WithMask(self):
  1118. nx = 2
  1119. for n in self.get_n():
  1120. if n > 8:
  1121. x, y, xm, ym = self.generate_xy_sample2D(n, nx)
  1122. r = stats.skewtest(x)
  1123. rm = stats.mstats.skewtest(xm)
  1124. assert_equal(r[0][0], rm[0][0])
  1125. assert_equal(r[0][1], rm[0][1])
  1126. def test_normaltest(self):
  1127. np.seterr(over='raise')
  1128. with suppress_warnings() as sup:
  1129. sup.filter(UserWarning, "kurtosistest only valid for n>=20")
  1130. for n in self.get_n():
  1131. if n > 8:
  1132. x, y, xm, ym = self.generate_xy_sample(n)
  1133. r = stats.normaltest(x)
  1134. rm = stats.mstats.normaltest(xm)
  1135. assert_allclose(np.asarray(r), np.asarray(rm))
  1136. def test_find_repeats(self):
  1137. x = np.asarray([1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4]).astype('float')
  1138. tmp = np.asarray([1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]).astype('float')
  1139. mask = (tmp == 5.)
  1140. xm = np.ma.array(tmp, mask=mask)
  1141. x_orig, xm_orig = x.copy(), xm.copy()
  1142. r = stats.find_repeats(x)
  1143. rm = stats.mstats.find_repeats(xm)
  1144. assert_equal(r, rm)
  1145. assert_equal(x, x_orig)
  1146. assert_equal(xm, xm_orig)
  1147. # This crazy behavior is expected by count_tied_groups, but is not
  1148. # in the docstring...
  1149. _, counts = stats.mstats.find_repeats([])
  1150. assert_equal(counts, np.array(0, dtype=np.intp))
  1151. def test_kendalltau(self):
  1152. for n in self.get_n():
  1153. x, y, xm, ym = self.generate_xy_sample(n)
  1154. r = stats.kendalltau(x, y)
  1155. rm = stats.mstats.kendalltau(xm, ym)
  1156. assert_almost_equal(r[0], rm[0], decimal=10)
  1157. assert_almost_equal(r[1], rm[1], decimal=7)
  1158. def test_obrientransform(self):
  1159. for n in self.get_n():
  1160. x, y, xm, ym = self.generate_xy_sample(n)
  1161. r = stats.obrientransform(x)
  1162. rm = stats.mstats.obrientransform(xm)
  1163. assert_almost_equal(r.T, rm[0:len(x)])
  1164. class TestBrunnerMunzel(object):
  1165. # Data from (Lumley, 1996)
  1166. X = np.ma.masked_invalid([1, 2, 1, 1, 1, np.nan, 1, 1,
  1167. 1, 1, 1, 2, 4, 1, 1, np.nan])
  1168. Y = np.ma.masked_invalid([3, 3, 4, 3, np.nan, 1, 2, 3, 1, 1, 5, 4])
  1169. significant = 14
  1170. def test_brunnermunzel_one_sided(self):
  1171. # Results are compared with R's lawstat package.
  1172. u1, p1 = mstats.brunnermunzel(self.X, self.Y, alternative='less')
  1173. u2, p2 = mstats.brunnermunzel(self.Y, self.X, alternative='greater')
  1174. u3, p3 = mstats.brunnermunzel(self.X, self.Y, alternative='greater')
  1175. u4, p4 = mstats.brunnermunzel(self.Y, self.X, alternative='less')
  1176. assert_almost_equal(p1, p2, decimal=self.significant)
  1177. assert_almost_equal(p3, p4, decimal=self.significant)
  1178. assert_(p1 != p3)
  1179. assert_almost_equal(u1, 3.1374674823029505,
  1180. decimal=self.significant)
  1181. assert_almost_equal(u2, -3.1374674823029505,
  1182. decimal=self.significant)
  1183. assert_almost_equal(u3, 3.1374674823029505,
  1184. decimal=self.significant)
  1185. assert_almost_equal(u4, -3.1374674823029505,
  1186. decimal=self.significant)
  1187. assert_almost_equal(p1, 0.0028931043330757342,
  1188. decimal=self.significant)
  1189. assert_almost_equal(p3, 0.99710689566692423,
  1190. decimal=self.significant)
  1191. def test_brunnermunzel_two_sided(self):
  1192. # Results are compared with R's lawstat package.
  1193. u1, p1 = mstats.brunnermunzel(self.X, self.Y, alternative='two-sided')
  1194. u2, p2 = mstats.brunnermunzel(self.Y, self.X, alternative='two-sided')
  1195. assert_almost_equal(p1, p2, decimal=self.significant)
  1196. assert_almost_equal(u1, 3.1374674823029505,
  1197. decimal=self.significant)
  1198. assert_almost_equal(u2, -3.1374674823029505,
  1199. decimal=self.significant)
  1200. assert_almost_equal(p1, 0.0057862086661515377,
  1201. decimal=self.significant)
  1202. def test_brunnermunzel_default(self):
  1203. # The default value for alternative is two-sided
  1204. u1, p1 = mstats.brunnermunzel(self.X, self.Y)
  1205. u2, p2 = mstats.brunnermunzel(self.Y, self.X)
  1206. assert_almost_equal(p1, p2, decimal=self.significant)
  1207. assert_almost_equal(u1, 3.1374674823029505,
  1208. decimal=self.significant)
  1209. assert_almost_equal(u2, -3.1374674823029505,
  1210. decimal=self.significant)
  1211. assert_almost_equal(p1, 0.0057862086661515377,
  1212. decimal=self.significant)
  1213. def test_brunnermunzel_alternative_error(self):
  1214. alternative = "error"
  1215. distribution = "t"
  1216. assert_(alternative not in ["two-sided", "greater", "less"])
  1217. assert_raises(ValueError,
  1218. mstats.brunnermunzel,
  1219. self.X,
  1220. self.Y,
  1221. alternative,
  1222. distribution)
  1223. def test_brunnermunzel_distribution_norm(self):
  1224. u1, p1 = mstats.brunnermunzel(self.X, self.Y, distribution="normal")
  1225. u2, p2 = mstats.brunnermunzel(self.Y, self.X, distribution="normal")
  1226. assert_almost_equal(p1, p2, decimal=self.significant)
  1227. assert_almost_equal(u1, 3.1374674823029505,
  1228. decimal=self.significant)
  1229. assert_almost_equal(u2, -3.1374674823029505,
  1230. decimal=self.significant)
  1231. assert_almost_equal(p1, 0.0017041417600383024,
  1232. decimal=self.significant)
  1233. def test_brunnermunzel_distribution_error(self):
  1234. alternative = "two-sided"
  1235. distribution = "error"
  1236. assert_(alternative not in ["t", "normal"])
  1237. assert_raises(ValueError,
  1238. mstats.brunnermunzel,
  1239. self.X,
  1240. self.Y,
  1241. alternative,
  1242. distribution)
  1243. def test_brunnermunzel_empty_imput(self):
  1244. u1, p1 = mstats.brunnermunzel(self.X, [])
  1245. u2, p2 = mstats.brunnermunzel([], self.Y)
  1246. u3, p3 = mstats.brunnermunzel([], [])
  1247. assert_(np.isnan(u1))
  1248. assert_(np.isnan(p1))
  1249. assert_(np.isnan(u2))
  1250. assert_(np.isnan(p2))
  1251. assert_(np.isnan(u3))
  1252. assert_(np.isnan(p3))