123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- from __future__ import division, print_function, absolute_import
- import numpy.testing as npt
- import numpy as np
- from scipy._lib.six import xrange
- import pytest
- from scipy import stats
- from .common_tests import (check_normalization, check_moment, check_mean_expect,
- check_var_expect, check_skew_expect,
- check_kurt_expect, check_entropy,
- check_private_entropy, check_edge_support,
- check_named_args, check_random_state_property,
- check_pickling, check_rvs_broadcast)
- from scipy.stats._distr_params import distdiscrete
- vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4])
- distdiscrete += [[stats.rv_discrete(values=vals), ()]]
- def cases_test_discrete_basic():
- seen = set()
- for distname, arg in distdiscrete:
- yield distname, arg, distname not in seen
- seen.add(distname)
- @pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic())
- def test_discrete_basic(distname, arg, first_case):
- try:
- distfn = getattr(stats, distname)
- except TypeError:
- distfn = distname
- distname = 'sample distribution'
- np.random.seed(9765456)
- rvs = distfn.rvs(size=2000, *arg)
- supp = np.unique(rvs)
- m, v = distfn.stats(*arg)
- check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf')
- check_pmf_cdf(distfn, arg, distname)
- check_oth(distfn, arg, supp, distname + ' oth')
- check_edge_support(distfn, arg)
- alpha = 0.01
- check_discrete_chisquare(distfn, arg, rvs, alpha,
- distname + ' chisquare')
- if first_case:
- locscale_defaults = (0,)
- meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf,
- distfn.logsf]
- # make sure arguments are within support
- spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0, }
- k = spec_k.get(distname, 1)
- check_named_args(distfn, k, arg, locscale_defaults, meths)
- if distname != 'sample distribution':
- check_scale_docstring(distfn)
- check_random_state_property(distfn, arg)
- check_pickling(distfn, arg)
- # Entropy
- check_entropy(distfn, arg, distname)
- if distfn.__class__._entropy != stats.rv_discrete._entropy:
- check_private_entropy(distfn, arg, stats.rv_discrete)
- @pytest.mark.parametrize('distname,arg', distdiscrete)
- def test_moments(distname, arg):
- try:
- distfn = getattr(stats, distname)
- except TypeError:
- distfn = distname
- distname = 'sample distribution'
- m, v, s, k = distfn.stats(*arg, moments='mvsk')
- check_normalization(distfn, arg, distname)
- # compare `stats` and `moment` methods
- check_moment(distfn, arg, m, v, distname)
- check_mean_expect(distfn, arg, m, distname)
- check_var_expect(distfn, arg, m, v, distname)
- check_skew_expect(distfn, arg, m, v, s, distname)
- if distname not in ['zipf', 'yulesimon']:
- check_kurt_expect(distfn, arg, m, v, k, distname)
- # frozen distr moments
- check_moment_frozen(distfn, arg, m, 1)
- check_moment_frozen(distfn, arg, v+m*m, 2)
- @pytest.mark.parametrize('dist,shape_args', distdiscrete)
- def test_rvs_broadcast(dist, shape_args):
- # If shape_only is True, it means the _rvs method of the
- # distribution uses more than one random number to generate a random
- # variate. That means the result of using rvs with broadcasting or
- # with a nontrivial size will not necessarily be the same as using the
- # numpy.vectorize'd version of rvs(), so we can only compare the shapes
- # of the results, not the values.
- # Whether or not a distribution is in the following list is an
- # implementation detail of the distribution, not a requirement. If
- # the implementation the rvs() method of a distribution changes, this
- # test might also have to be changed.
- shape_only = dist in ['skellam', 'yulesimon']
- try:
- distfunc = getattr(stats, dist)
- except TypeError:
- distfunc = dist
- dist = 'rv_discrete(values=(%r, %r))' % (dist.xk, dist.pk)
- loc = np.zeros(2)
- nargs = distfunc.numargs
- allargs = []
- bshape = []
- # Generate shape parameter arguments...
- for k in range(nargs):
- shp = (k + 3,) + (1,)*(k + 1)
- param_val = shape_args[k]
- allargs.append(param_val*np.ones(shp, dtype=np.array(param_val).dtype))
- bshape.insert(0, shp[0])
- allargs.append(loc)
- bshape.append(loc.size)
- # bshape holds the expected shape when loc, scale, and the shape
- # parameters are all broadcast together.
- check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, [np.int_])
- def check_cdf_ppf(distfn, arg, supp, msg):
- # cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer}
- npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg), *arg),
- supp, msg + '-roundtrip')
- npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg),
- supp, msg + '-roundtrip')
- if not hasattr(distfn, 'xk'):
- supp1 = supp[supp < distfn.b]
- npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg),
- supp1 + distfn.inc, msg + ' ppf-cdf-next')
- # -1e-8 could cause an error if pmf < 1e-8
- def check_pmf_cdf(distfn, arg, distname):
- if hasattr(distfn, 'xk'):
- index = distfn.xk
- else:
- startind = int(distfn.ppf(0.01, *arg) - 1)
- index = list(range(startind, startind + 10))
- cdfs = distfn.cdf(index, *arg)
- pmfs_cum = distfn.pmf(index, *arg).cumsum()
- atol, rtol = 1e-10, 1e-10
- if distname == 'skellam': # ncx2 accuracy
- atol, rtol = 1e-5, 1e-5
- npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0],
- atol=atol, rtol=rtol)
- def check_moment_frozen(distfn, arg, m, k):
- npt.assert_allclose(distfn(*arg).moment(k), m,
- atol=1e-10, rtol=1e-10)
- def check_oth(distfn, arg, supp, msg):
- # checking other methods of distfn
- npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg),
- atol=1e-10, rtol=1e-10)
- q = np.linspace(0.01, 0.99, 20)
- npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg),
- atol=1e-10, rtol=1e-10)
- median_sf = distfn.isf(0.5, *arg)
- npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5)
- npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5)
- def check_discrete_chisquare(distfn, arg, rvs, alpha, msg):
- """Perform chisquare test for random sample of a discrete distribution
- Parameters
- ----------
- distname : string
- name of distribution function
- arg : sequence
- parameters of distribution
- alpha : float
- significance level, threshold for p-value
- Returns
- -------
- result : bool
- 0 if test passes, 1 if test fails
- """
- wsupp = 0.05
- # construct intervals with minimum mass `wsupp`.
- # intervals are left-half-open as in a cdf difference
- lo = int(max(distfn.a, -1000))
- distsupport = xrange(lo, int(min(distfn.b, 1000)) + 1)
- last = 0
- distsupp = [lo]
- distmass = []
- for ii in distsupport:
- current = distfn.cdf(ii, *arg)
- if current - last >= wsupp - 1e-14:
- distsupp.append(ii)
- distmass.append(current - last)
- last = current
- if current > (1 - wsupp):
- break
- if distsupp[-1] < distfn.b:
- distsupp.append(distfn.b)
- distmass.append(1 - last)
- distsupp = np.array(distsupp)
- distmass = np.array(distmass)
- # convert intervals to right-half-open as required by histogram
- histsupp = distsupp + 1e-8
- histsupp[0] = distfn.a
- # find sample frequencies and perform chisquare test
- freq, hsupp = np.histogram(rvs, histsupp)
- chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass)
- npt.assert_(pval > alpha,
- 'chisquare - test for %s at arg = %s with pval = %s' %
- (msg, str(arg), str(pval)))
- def check_scale_docstring(distfn):
- if distfn.__doc__ is not None:
- # Docstrings can be stripped if interpreter is run with -OO
- npt.assert_('scale' not in distfn.__doc__)
|