_discrete_distns.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. from __future__ import division, print_function, absolute_import
  6. from scipy import special
  7. from scipy.special import entr, logsumexp, betaln, gammaln as gamln
  8. from scipy._lib._numpy_compat import broadcast_to
  9. from scipy._lib._util import _lazywhere
  10. from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
  11. import numpy as np
  12. from ._distn_infrastructure import (
  13. rv_discrete, _ncx2_pdf, _ncx2_cdf, get_distribution_names)
  14. class binom_gen(rv_discrete):
  15. r"""A binomial discrete random variable.
  16. %(before_notes)s
  17. Notes
  18. -----
  19. The probability mass function for `binom` is:
  20. .. math::
  21. f(k) = \binom{n}{k} p^k (1-p)^{n-k}
  22. for ``k`` in ``{0, 1,..., n}``.
  23. `binom` takes ``n`` and ``p`` as shape parameters.
  24. %(after_notes)s
  25. %(example)s
  26. """
  27. def _rvs(self, n, p):
  28. return self._random_state.binomial(n, p, self._size)
  29. def _argcheck(self, n, p):
  30. self.b = n
  31. return (n >= 0) & (p >= 0) & (p <= 1)
  32. def _logpmf(self, x, n, p):
  33. k = floor(x)
  34. combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1)))
  35. return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
  36. def _pmf(self, x, n, p):
  37. # binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k)
  38. return exp(self._logpmf(x, n, p))
  39. def _cdf(self, x, n, p):
  40. k = floor(x)
  41. vals = special.bdtr(k, n, p)
  42. return vals
  43. def _sf(self, x, n, p):
  44. k = floor(x)
  45. return special.bdtrc(k, n, p)
  46. def _ppf(self, q, n, p):
  47. vals = ceil(special.bdtrik(q, n, p))
  48. vals1 = np.maximum(vals - 1, 0)
  49. temp = special.bdtr(vals1, n, p)
  50. return np.where(temp >= q, vals1, vals)
  51. def _stats(self, n, p, moments='mv'):
  52. q = 1.0 - p
  53. mu = n * p
  54. var = n * p * q
  55. g1, g2 = None, None
  56. if 's' in moments:
  57. g1 = (q - p) / sqrt(var)
  58. if 'k' in moments:
  59. g2 = (1.0 - 6*p*q) / var
  60. return mu, var, g1, g2
  61. def _entropy(self, n, p):
  62. k = np.r_[0:n + 1]
  63. vals = self._pmf(k, n, p)
  64. return np.sum(entr(vals), axis=0)
  65. binom = binom_gen(name='binom')
  66. class bernoulli_gen(binom_gen):
  67. r"""A Bernoulli discrete random variable.
  68. %(before_notes)s
  69. Notes
  70. -----
  71. The probability mass function for `bernoulli` is:
  72. .. math::
  73. f(k) = \begin{cases}1-p &\text{if } k = 0\\
  74. p &\text{if } k = 1\end{cases}
  75. for :math:`k` in :math:`\{0, 1\}`.
  76. `bernoulli` takes :math:`p` as shape parameter.
  77. %(after_notes)s
  78. %(example)s
  79. """
  80. def _rvs(self, p):
  81. return binom_gen._rvs(self, 1, p)
  82. def _argcheck(self, p):
  83. return (p >= 0) & (p <= 1)
  84. def _logpmf(self, x, p):
  85. return binom._logpmf(x, 1, p)
  86. def _pmf(self, x, p):
  87. # bernoulli.pmf(k) = 1-p if k = 0
  88. # = p if k = 1
  89. return binom._pmf(x, 1, p)
  90. def _cdf(self, x, p):
  91. return binom._cdf(x, 1, p)
  92. def _sf(self, x, p):
  93. return binom._sf(x, 1, p)
  94. def _ppf(self, q, p):
  95. return binom._ppf(q, 1, p)
  96. def _stats(self, p):
  97. return binom._stats(1, p)
  98. def _entropy(self, p):
  99. return entr(p) + entr(1-p)
  100. bernoulli = bernoulli_gen(b=1, name='bernoulli')
  101. class nbinom_gen(rv_discrete):
  102. r"""A negative binomial discrete random variable.
  103. %(before_notes)s
  104. Notes
  105. -----
  106. Negative binomial distribution describes a sequence of i.i.d. Bernoulli
  107. trials, repeated until a predefined, non-random number of successes occurs.
  108. The probability mass function of the number of failures for `nbinom` is:
  109. .. math::
  110. f(k) = \binom{k+n-1}{n-1} p^n (1-p)^k
  111. for :math:`k \ge 0`.
  112. `nbinom` takes :math:`n` and :math:`p` as shape parameters where n is the
  113. number of successes, whereas p is the probability of a single success.
  114. %(after_notes)s
  115. %(example)s
  116. """
  117. def _rvs(self, n, p):
  118. return self._random_state.negative_binomial(n, p, self._size)
  119. def _argcheck(self, n, p):
  120. return (n > 0) & (p >= 0) & (p <= 1)
  121. def _pmf(self, x, n, p):
  122. # nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k
  123. return exp(self._logpmf(x, n, p))
  124. def _logpmf(self, x, n, p):
  125. coeff = gamln(n+x) - gamln(x+1) - gamln(n)
  126. return coeff + n*log(p) + special.xlog1py(x, -p)
  127. def _cdf(self, x, n, p):
  128. k = floor(x)
  129. return special.betainc(n, k+1, p)
  130. def _sf_skip(self, x, n, p):
  131. # skip because special.nbdtrc doesn't work for 0<n<1
  132. k = floor(x)
  133. return special.nbdtrc(k, n, p)
  134. def _ppf(self, q, n, p):
  135. vals = ceil(special.nbdtrik(q, n, p))
  136. vals1 = (vals-1).clip(0.0, np.inf)
  137. temp = self._cdf(vals1, n, p)
  138. return np.where(temp >= q, vals1, vals)
  139. def _stats(self, n, p):
  140. Q = 1.0 / p
  141. P = Q - 1.0
  142. mu = n*P
  143. var = n*P*Q
  144. g1 = (Q+P)/sqrt(n*P*Q)
  145. g2 = (1.0 + 6*P*Q) / (n*P*Q)
  146. return mu, var, g1, g2
  147. nbinom = nbinom_gen(name='nbinom')
  148. class geom_gen(rv_discrete):
  149. r"""A geometric discrete random variable.
  150. %(before_notes)s
  151. Notes
  152. -----
  153. The probability mass function for `geom` is:
  154. .. math::
  155. f(k) = (1-p)^{k-1} p
  156. for :math:`k \ge 1`.
  157. `geom` takes :math:`p` as shape parameter.
  158. %(after_notes)s
  159. %(example)s
  160. """
  161. def _rvs(self, p):
  162. return self._random_state.geometric(p, size=self._size)
  163. def _argcheck(self, p):
  164. return (p <= 1) & (p >= 0)
  165. def _pmf(self, k, p):
  166. # geom.pmf(k) = (1-p)**(k-1)*p
  167. return np.power(1-p, k-1) * p
  168. def _logpmf(self, k, p):
  169. return special.xlog1py(k - 1, -p) + log(p)
  170. def _cdf(self, x, p):
  171. k = floor(x)
  172. return -expm1(log1p(-p)*k)
  173. def _sf(self, x, p):
  174. return np.exp(self._logsf(x, p))
  175. def _logsf(self, x, p):
  176. k = floor(x)
  177. return k*log1p(-p)
  178. def _ppf(self, q, p):
  179. vals = ceil(log1p(-q) / log1p(-p))
  180. temp = self._cdf(vals-1, p)
  181. return np.where((temp >= q) & (vals > 0), vals-1, vals)
  182. def _stats(self, p):
  183. mu = 1.0/p
  184. qr = 1.0-p
  185. var = qr / p / p
  186. g1 = (2.0-p) / sqrt(qr)
  187. g2 = np.polyval([1, -6, 6], p)/(1.0-p)
  188. return mu, var, g1, g2
  189. geom = geom_gen(a=1, name='geom', longname="A geometric")
  190. class hypergeom_gen(rv_discrete):
  191. r"""A hypergeometric discrete random variable.
  192. The hypergeometric distribution models drawing objects from a bin.
  193. `M` is the total number of objects, `n` is total number of Type I objects.
  194. The random variate represents the number of Type I objects in `N` drawn
  195. without replacement from the total population.
  196. %(before_notes)s
  197. Notes
  198. -----
  199. The symbols used to denote the shape parameters (`M`, `n`, and `N`) are not
  200. universally accepted. See the Examples for a clarification of the
  201. definitions used here.
  202. The probability mass function is defined as,
  203. .. math:: p(k, M, n, N) = \frac{\binom{n}{k} \binom{M - n}{N - k}}
  204. {\binom{M}{N}}
  205. for :math:`k \in [\max(0, N - M + n), \min(n, N)]`, where the binomial
  206. coefficients are defined as,
  207. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  208. %(after_notes)s
  209. Examples
  210. --------
  211. >>> from scipy.stats import hypergeom
  212. >>> import matplotlib.pyplot as plt
  213. Suppose we have a collection of 20 animals, of which 7 are dogs. Then if
  214. we want to know the probability of finding a given number of dogs if we
  215. choose at random 12 of the 20 animals, we can initialize a frozen
  216. distribution and plot the probability mass function:
  217. >>> [M, n, N] = [20, 7, 12]
  218. >>> rv = hypergeom(M, n, N)
  219. >>> x = np.arange(0, n+1)
  220. >>> pmf_dogs = rv.pmf(x)
  221. >>> fig = plt.figure()
  222. >>> ax = fig.add_subplot(111)
  223. >>> ax.plot(x, pmf_dogs, 'bo')
  224. >>> ax.vlines(x, 0, pmf_dogs, lw=2)
  225. >>> ax.set_xlabel('# of dogs in our group of chosen animals')
  226. >>> ax.set_ylabel('hypergeom PMF')
  227. >>> plt.show()
  228. Instead of using a frozen distribution we can also use `hypergeom`
  229. methods directly. To for example obtain the cumulative distribution
  230. function, use:
  231. >>> prb = hypergeom.cdf(x, M, n, N)
  232. And to generate random numbers:
  233. >>> R = hypergeom.rvs(M, n, N, size=10)
  234. """
  235. def _rvs(self, M, n, N):
  236. return self._random_state.hypergeometric(n, M-n, N, size=self._size)
  237. def _argcheck(self, M, n, N):
  238. cond = (M > 0) & (n >= 0) & (N >= 0)
  239. cond &= (n <= M) & (N <= M)
  240. self.a = np.maximum(N-(M-n), 0)
  241. self.b = np.minimum(n, N)
  242. return cond
  243. def _logpmf(self, k, M, n, N):
  244. tot, good = M, n
  245. bad = tot - good
  246. result = (betaln(good+1, 1) + betaln(bad+1, 1) + betaln(tot-N+1, N+1) -
  247. betaln(k+1, good-k+1) - betaln(N-k+1, bad-N+k+1) -
  248. betaln(tot+1, 1))
  249. return result
  250. def _pmf(self, k, M, n, N):
  251. # same as the following but numerically more precise
  252. # return comb(good, k) * comb(bad, N-k) / comb(tot, N)
  253. return exp(self._logpmf(k, M, n, N))
  254. def _stats(self, M, n, N):
  255. # tot, good, sample_size = M, n, N
  256. # "wikipedia".replace('N', 'M').replace('n', 'N').replace('K', 'n')
  257. M, n, N = 1.*M, 1.*n, 1.*N
  258. m = M - n
  259. p = n/M
  260. mu = N*p
  261. var = m*n*N*(M - N)*1.0/(M*M*(M-1))
  262. g1 = (m - n)*(M-2*N) / (M-2.0) * sqrt((M-1.0) / (m*n*N*(M-N)))
  263. g2 = M*(M+1) - 6.*N*(M-N) - 6.*n*m
  264. g2 *= (M-1)*M*M
  265. g2 += 6.*n*N*(M-N)*m*(5.*M-6)
  266. g2 /= n * N * (M-N) * m * (M-2.) * (M-3.)
  267. return mu, var, g1, g2
  268. def _entropy(self, M, n, N):
  269. k = np.r_[N - (M - n):min(n, N) + 1]
  270. vals = self.pmf(k, M, n, N)
  271. return np.sum(entr(vals), axis=0)
  272. def _sf(self, k, M, n, N):
  273. """More precise calculation, 1 - cdf doesn't cut it."""
  274. # This for loop is needed because `k` can be an array. If that's the
  275. # case, the sf() method makes M, n and N arrays of the same shape. We
  276. # therefore unpack all inputs args, so we can do the manual
  277. # integration.
  278. res = []
  279. for quant, tot, good, draw in zip(k, M, n, N):
  280. # Manual integration over probability mass function. More accurate
  281. # than integrate.quad.
  282. k2 = np.arange(quant + 1, draw + 1)
  283. res.append(np.sum(self._pmf(k2, tot, good, draw)))
  284. return np.asarray(res)
  285. def _logsf(self, k, M, n, N):
  286. """
  287. More precise calculation than log(sf)
  288. """
  289. res = []
  290. for quant, tot, good, draw in zip(k, M, n, N):
  291. # Integration over probability mass function using logsumexp
  292. k2 = np.arange(quant + 1, draw + 1)
  293. res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
  294. return np.asarray(res)
  295. hypergeom = hypergeom_gen(name='hypergeom')
  296. # FIXME: Fails _cdfvec
  297. class logser_gen(rv_discrete):
  298. r"""A Logarithmic (Log-Series, Series) discrete random variable.
  299. %(before_notes)s
  300. Notes
  301. -----
  302. The probability mass function for `logser` is:
  303. .. math::
  304. f(k) = - \frac{p^k}{k \log(1-p)}
  305. for :math:`k \ge 1`.
  306. `logser` takes :math:`p` as shape parameter.
  307. %(after_notes)s
  308. %(example)s
  309. """
  310. def _rvs(self, p):
  311. # looks wrong for p>0.5, too few k=1
  312. # trying to use generic is worse, no k=1 at all
  313. return self._random_state.logseries(p, size=self._size)
  314. def _argcheck(self, p):
  315. return (p > 0) & (p < 1)
  316. def _pmf(self, k, p):
  317. # logser.pmf(k) = - p**k / (k*log(1-p))
  318. return -np.power(p, k) * 1.0 / k / special.log1p(-p)
  319. def _stats(self, p):
  320. r = special.log1p(-p)
  321. mu = p / (p - 1.0) / r
  322. mu2p = -p / r / (p - 1.0)**2
  323. var = mu2p - mu*mu
  324. mu3p = -p / r * (1.0+p) / (1.0 - p)**3
  325. mu3 = mu3p - 3*mu*mu2p + 2*mu**3
  326. g1 = mu3 / np.power(var, 1.5)
  327. mu4p = -p / r * (
  328. 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4)
  329. mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4
  330. g2 = mu4 / var**2 - 3.0
  331. return mu, var, g1, g2
  332. logser = logser_gen(a=1, name='logser', longname='A logarithmic')
  333. class poisson_gen(rv_discrete):
  334. r"""A Poisson discrete random variable.
  335. %(before_notes)s
  336. Notes
  337. -----
  338. The probability mass function for `poisson` is:
  339. .. math::
  340. f(k) = \exp(-\mu) \frac{\mu^k}{k!}
  341. for :math:`k \ge 0`.
  342. `poisson` takes :math:`\mu` as shape parameter.
  343. %(after_notes)s
  344. %(example)s
  345. """
  346. # Override rv_discrete._argcheck to allow mu=0.
  347. def _argcheck(self, mu):
  348. return mu >= 0
  349. def _rvs(self, mu):
  350. return self._random_state.poisson(mu, self._size)
  351. def _logpmf(self, k, mu):
  352. Pk = special.xlogy(k, mu) - gamln(k + 1) - mu
  353. return Pk
  354. def _pmf(self, k, mu):
  355. # poisson.pmf(k) = exp(-mu) * mu**k / k!
  356. return exp(self._logpmf(k, mu))
  357. def _cdf(self, x, mu):
  358. k = floor(x)
  359. return special.pdtr(k, mu)
  360. def _sf(self, x, mu):
  361. k = floor(x)
  362. return special.pdtrc(k, mu)
  363. def _ppf(self, q, mu):
  364. vals = ceil(special.pdtrik(q, mu))
  365. vals1 = np.maximum(vals - 1, 0)
  366. temp = special.pdtr(vals1, mu)
  367. return np.where(temp >= q, vals1, vals)
  368. def _stats(self, mu):
  369. var = mu
  370. tmp = np.asarray(mu)
  371. mu_nonzero = tmp > 0
  372. g1 = _lazywhere(mu_nonzero, (tmp,), lambda x: sqrt(1.0/x), np.inf)
  373. g2 = _lazywhere(mu_nonzero, (tmp,), lambda x: 1.0/x, np.inf)
  374. return mu, var, g1, g2
  375. poisson = poisson_gen(name="poisson", longname='A Poisson')
  376. class planck_gen(rv_discrete):
  377. r"""A Planck discrete exponential random variable.
  378. %(before_notes)s
  379. Notes
  380. -----
  381. The probability mass function for `planck` is:
  382. .. math::
  383. f(k) = (1-\exp(-\lambda)) \exp(-\lambda k)
  384. for :math:`k \ge 0` and :math:`\lambda > 0`.
  385. `planck` takes :math:`\lambda` as shape parameter.
  386. %(after_notes)s
  387. %(example)s
  388. """
  389. def _argcheck(self, lambda_):
  390. return lambda_ > 0
  391. def _pmf(self, k, lambda_):
  392. return (1-exp(-lambda_))*exp(-lambda_*k)
  393. def _cdf(self, x, lambda_):
  394. k = floor(x)
  395. return 1-exp(-lambda_*(k+1))
  396. def _sf(self, x, lambda_):
  397. return np.exp(self._logsf(x, lambda_))
  398. def _logsf(self, x, lambda_):
  399. k = floor(x)
  400. return -lambda_*(k+1)
  401. def _ppf(self, q, lambda_):
  402. vals = ceil(-1.0/lambda_ * log1p(-q)-1)
  403. vals1 = (vals-1).clip(self.a, np.inf)
  404. temp = self._cdf(vals1, lambda_)
  405. return np.where(temp >= q, vals1, vals)
  406. def _stats(self, lambda_):
  407. mu = 1/(exp(lambda_)-1)
  408. var = exp(-lambda_)/(expm1(-lambda_))**2
  409. g1 = 2*cosh(lambda_/2.0)
  410. g2 = 4+2*cosh(lambda_)
  411. return mu, var, g1, g2
  412. def _entropy(self, lambda_):
  413. l = lambda_
  414. C = (1-exp(-l))
  415. return l*exp(-l)/C - log(C)
  416. planck = planck_gen(a=0, name='planck', longname='A discrete exponential ')
  417. class boltzmann_gen(rv_discrete):
  418. r"""A Boltzmann (Truncated Discrete Exponential) random variable.
  419. %(before_notes)s
  420. Notes
  421. -----
  422. The probability mass function for `boltzmann` is:
  423. .. math::
  424. f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) / (1-\exp(-\lambda N))
  425. for :math:`k = 0,..., N-1`.
  426. `boltzmann` takes :math:`\lambda > 0` and :math:`N > 0` as shape parameters.
  427. %(after_notes)s
  428. %(example)s
  429. """
  430. def _argcheck(self, lambda_, N):
  431. self.a = 0
  432. self.b = N - 1
  433. return (lambda_ > 0) & (N > 0)
  434. def _pmf(self, k, lambda_, N):
  435. # boltzmann.pmf(k) =
  436. # (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N))
  437. fact = (1-exp(-lambda_))/(1-exp(-lambda_*N))
  438. return fact*exp(-lambda_*k)
  439. def _cdf(self, x, lambda_, N):
  440. k = floor(x)
  441. return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N))
  442. def _ppf(self, q, lambda_, N):
  443. qnew = q*(1-exp(-lambda_*N))
  444. vals = ceil(-1.0/lambda_ * log(1-qnew)-1)
  445. vals1 = (vals-1).clip(0.0, np.inf)
  446. temp = self._cdf(vals1, lambda_, N)
  447. return np.where(temp >= q, vals1, vals)
  448. def _stats(self, lambda_, N):
  449. z = exp(-lambda_)
  450. zN = exp(-lambda_*N)
  451. mu = z/(1.0-z)-N*zN/(1-zN)
  452. var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2
  453. trm = (1-zN)/(1-z)
  454. trm2 = (z*trm**2 - N*N*zN)
  455. g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN)
  456. g1 = g1 / trm2**(1.5)
  457. g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN)
  458. g2 = g2 / trm2 / trm2
  459. return mu, var, g1, g2
  460. boltzmann = boltzmann_gen(name='boltzmann',
  461. longname='A truncated discrete exponential ')
  462. class randint_gen(rv_discrete):
  463. r"""A uniform discrete random variable.
  464. %(before_notes)s
  465. Notes
  466. -----
  467. The probability mass function for `randint` is:
  468. .. math::
  469. f(k) = \frac{1}{high - low}
  470. for ``k = low, ..., high - 1``.
  471. `randint` takes ``low`` and ``high`` as shape parameters.
  472. %(after_notes)s
  473. %(example)s
  474. """
  475. def _argcheck(self, low, high):
  476. self.a = low
  477. self.b = high - 1
  478. return (high > low)
  479. def _pmf(self, k, low, high):
  480. # randint.pmf(k) = 1./(high - low)
  481. p = np.ones_like(k) / (high - low)
  482. return np.where((k >= low) & (k < high), p, 0.)
  483. def _cdf(self, x, low, high):
  484. k = floor(x)
  485. return (k - low + 1.) / (high - low)
  486. def _ppf(self, q, low, high):
  487. vals = ceil(q * (high - low) + low) - 1
  488. vals1 = (vals - 1).clip(low, high)
  489. temp = self._cdf(vals1, low, high)
  490. return np.where(temp >= q, vals1, vals)
  491. def _stats(self, low, high):
  492. m2, m1 = np.asarray(high), np.asarray(low)
  493. mu = (m2 + m1 - 1.0) / 2
  494. d = m2 - m1
  495. var = (d*d - 1) / 12.0
  496. g1 = 0.0
  497. g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0)
  498. return mu, var, g1, g2
  499. def _rvs(self, low, high):
  500. """An array of *size* random integers >= ``low`` and < ``high``."""
  501. if self._size is not None:
  502. # Numpy's RandomState.randint() doesn't broadcast its arguments.
  503. # Use `broadcast_to()` to extend the shapes of low and high
  504. # up to self._size. Then we can use the numpy.vectorize'd
  505. # randint without needing to pass it a `size` argument.
  506. low = broadcast_to(low, self._size)
  507. high = broadcast_to(high, self._size)
  508. randint = np.vectorize(self._random_state.randint, otypes=[np.int_])
  509. return randint(low, high)
  510. def _entropy(self, low, high):
  511. return log(high - low)
  512. randint = randint_gen(name='randint', longname='A discrete uniform '
  513. '(random integer)')
  514. # FIXME: problems sampling.
  515. class zipf_gen(rv_discrete):
  516. r"""A Zipf discrete random variable.
  517. %(before_notes)s
  518. Notes
  519. -----
  520. The probability mass function for `zipf` is:
  521. .. math::
  522. f(k, a) = \frac{1}{\zeta(a) k^a}
  523. for :math:`k \ge 1`.
  524. `zipf` takes :math:`a` as shape parameter. :math:`\zeta` is the
  525. Riemann zeta function (`scipy.special.zeta`)
  526. %(after_notes)s
  527. %(example)s
  528. """
  529. def _rvs(self, a):
  530. return self._random_state.zipf(a, size=self._size)
  531. def _argcheck(self, a):
  532. return a > 1
  533. def _pmf(self, k, a):
  534. # zipf.pmf(k, a) = 1/(zeta(a) * k**a)
  535. Pk = 1.0 / special.zeta(a, 1) / k**a
  536. return Pk
  537. def _munp(self, n, a):
  538. return _lazywhere(
  539. a > n + 1, (a, n),
  540. lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1),
  541. np.inf)
  542. zipf = zipf_gen(a=1, name='zipf', longname='A Zipf')
  543. class dlaplace_gen(rv_discrete):
  544. r"""A Laplacian discrete random variable.
  545. %(before_notes)s
  546. Notes
  547. -----
  548. The probability mass function for `dlaplace` is:
  549. .. math::
  550. f(k) = \tanh(a/2) \exp(-a |k|)
  551. for integers :math:`k` and :math:`a > 0`.
  552. `dlaplace` takes :math:`a` as shape parameter.
  553. %(after_notes)s
  554. %(example)s
  555. """
  556. def _pmf(self, k, a):
  557. # dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k))
  558. return tanh(a/2.0) * exp(-a * abs(k))
  559. def _cdf(self, x, a):
  560. k = floor(x)
  561. f = lambda k, a: 1.0 - exp(-a * k) / (exp(a) + 1)
  562. f2 = lambda k, a: exp(a * (k+1)) / (exp(a) + 1)
  563. return _lazywhere(k >= 0, (k, a), f=f, f2=f2)
  564. def _ppf(self, q, a):
  565. const = 1 + exp(a)
  566. vals = ceil(np.where(q < 1.0 / (1 + exp(-a)),
  567. log(q*const) / a - 1,
  568. -log((1-q) * const) / a))
  569. vals1 = vals - 1
  570. return np.where(self._cdf(vals1, a) >= q, vals1, vals)
  571. def _stats(self, a):
  572. ea = exp(a)
  573. mu2 = 2.*ea/(ea-1.)**2
  574. mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4
  575. return 0., mu2, 0., mu4/mu2**2 - 3.
  576. def _entropy(self, a):
  577. return a / sinh(a) - log(tanh(a/2.0))
  578. dlaplace = dlaplace_gen(a=-np.inf,
  579. name='dlaplace', longname='A discrete Laplacian')
  580. class skellam_gen(rv_discrete):
  581. r"""A Skellam discrete random variable.
  582. %(before_notes)s
  583. Notes
  584. -----
  585. Probability distribution of the difference of two correlated or
  586. uncorrelated Poisson random variables.
  587. Let :math:`k_1` and :math:`k_2` be two Poisson-distributed r.v. with
  588. expected values :math:`\lambda_1` and :math:`\lambda_2`. Then,
  589. :math:`k_1 - k_2` follows a Skellam distribution with parameters
  590. :math:`\mu_1 = \lambda_1 - \rho \sqrt{\lambda_1 \lambda_2}` and
  591. :math:`\mu_2 = \lambda_2 - \rho \sqrt{\lambda_1 \lambda_2}`, where
  592. :math:`\rho` is the correlation coefficient between :math:`k_1` and
  593. :math:`k_2`. If the two Poisson-distributed r.v. are independent then
  594. :math:`\rho = 0`.
  595. Parameters :math:`\mu_1` and :math:`\mu_2` must be strictly positive.
  596. For details see: https://en.wikipedia.org/wiki/Skellam_distribution
  597. `skellam` takes :math:`\mu_1` and :math:`\mu_2` as shape parameters.
  598. %(after_notes)s
  599. %(example)s
  600. """
  601. def _rvs(self, mu1, mu2):
  602. n = self._size
  603. return (self._random_state.poisson(mu1, n) -
  604. self._random_state.poisson(mu2, n))
  605. def _pmf(self, x, mu1, mu2):
  606. px = np.where(x < 0,
  607. _ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
  608. _ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
  609. # ncx2.pdf() returns nan's for extremely low probabilities
  610. return px
  611. def _cdf(self, x, mu1, mu2):
  612. x = floor(x)
  613. px = np.where(x < 0,
  614. _ncx2_cdf(2*mu2, -2*x, 2*mu1),
  615. 1 - _ncx2_cdf(2*mu1, 2*(x+1), 2*mu2))
  616. return px
  617. def _stats(self, mu1, mu2):
  618. mean = mu1 - mu2
  619. var = mu1 + mu2
  620. g1 = mean / sqrt((var)**3)
  621. g2 = 1 / var
  622. return mean, var, g1, g2
  623. skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam')
  624. class yulesimon_gen(rv_discrete):
  625. r"""A Yule-Simon discrete random variable.
  626. %(before_notes)s
  627. Notes
  628. -----
  629. The probability mass function for the `yulesimon` is:
  630. .. math::
  631. f(k) = \alpha B(k, \alpha+1)
  632. for :math:`k=1,2,3,...`, where :math:`\alpha>0`.
  633. Here :math:`B` refers to the `scipy.special.beta` function.
  634. The sampling of random variates is based on pg 553, Section 6.3 of [1]_.
  635. Our notation maps to the referenced logic via :math:`\alpha=a-1`.
  636. For details see the wikipedia entry [2]_.
  637. References
  638. ----------
  639. .. [1] Devroye, Luc. "Non-uniform Random Variate Generation",
  640. (1986) Springer, New York.
  641. .. [2] https://en.wikipedia.org/wiki/Yule-Simon_distribution
  642. %(after_notes)s
  643. %(example)s
  644. """
  645. def _rvs(self, alpha):
  646. E1 = self._random_state.standard_exponential(self._size)
  647. E2 = self._random_state.standard_exponential(self._size)
  648. ans = ceil(-E1 / log1p(-exp(-E2 / alpha)))
  649. return ans
  650. def _pmf(self, x, alpha):
  651. return alpha * special.beta(x, alpha + 1)
  652. def _argcheck(self, alpha):
  653. return (alpha > 0)
  654. def _logpmf(self, x, alpha):
  655. return log(alpha) + special.betaln(x, alpha + 1)
  656. def _cdf(self, x, alpha):
  657. return 1 - x * special.beta(x, alpha + 1)
  658. def _sf(self, x, alpha):
  659. return x * special.beta(x, alpha + 1)
  660. def _logsf(self, x, alpha):
  661. return log(x) + special.betaln(x, alpha + 1)
  662. def _stats(self, alpha):
  663. mu = np.where(alpha <= 1, np.inf, alpha / (alpha - 1))
  664. mu2 = np.where(alpha > 2,
  665. alpha**2 / ((alpha - 2.0) * (alpha - 1)**2),
  666. np.inf)
  667. mu2 = np.where(alpha <= 1, np.nan, mu2)
  668. g1 = np.where(alpha > 3,
  669. sqrt(alpha - 2) * (alpha + 1)**2 / (alpha * (alpha - 3)),
  670. np.inf)
  671. g1 = np.where(alpha <= 2, np.nan, g1)
  672. g2 = np.where(alpha > 4,
  673. (alpha + 3) + (alpha**3 - 49 * alpha - 22) / (alpha *
  674. (alpha - 4) * (alpha - 3)), np.inf)
  675. g2 = np.where(alpha <= 2, np.nan, g2)
  676. return mu, mu2, g1, g2
  677. yulesimon = yulesimon_gen(name='yulesimon', a=1)
  678. # Collect names of classes and objects in this module.
  679. pairs = list(globals().items())
  680. _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete)
  681. __all__ = _distn_names + _distn_gen_names