test_binned_statistic.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. from __future__ import division, print_function, absolute_import
  2. import numpy as np
  3. from numpy.testing import assert_allclose
  4. from scipy.stats import (binned_statistic, binned_statistic_2d,
  5. binned_statistic_dd)
  6. from scipy._lib.six import u
  7. from .common_tests import check_named_results
  8. class TestBinnedStatistic(object):
  9. @classmethod
  10. def setup_class(cls):
  11. np.random.seed(9865)
  12. cls.x = np.random.random(100)
  13. cls.y = np.random.random(100)
  14. cls.v = np.random.random(100)
  15. cls.X = np.random.random((100, 3))
  16. cls.w = np.random.random(100)
  17. def test_1d_count(self):
  18. x = self.x
  19. v = self.v
  20. count1, edges1, bc = binned_statistic(x, v, 'count', bins=10)
  21. count2, edges2 = np.histogram(x, bins=10)
  22. assert_allclose(count1, count2)
  23. assert_allclose(edges1, edges2)
  24. def test_gh5927(self):
  25. # smoke test for gh5927 - binned_statistic was using `is` for string
  26. # comparison
  27. x = self.x
  28. v = self.v
  29. statistics = [u'mean', u'median', u'count', u'sum']
  30. for statistic in statistics:
  31. res = binned_statistic(x, v, statistic, bins=10)
  32. def test_1d_result_attributes(self):
  33. x = self.x
  34. v = self.v
  35. res = binned_statistic(x, v, 'count', bins=10)
  36. attributes = ('statistic', 'bin_edges', 'binnumber')
  37. check_named_results(res, attributes)
  38. def test_1d_sum(self):
  39. x = self.x
  40. v = self.v
  41. sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10)
  42. sum2, edges2 = np.histogram(x, bins=10, weights=v)
  43. assert_allclose(sum1, sum2)
  44. assert_allclose(edges1, edges2)
  45. def test_1d_mean(self):
  46. x = self.x
  47. v = self.v
  48. stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10)
  49. stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10)
  50. assert_allclose(stat1, stat2)
  51. assert_allclose(edges1, edges2)
  52. def test_1d_std(self):
  53. x = self.x
  54. v = self.v
  55. stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10)
  56. stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)
  57. assert_allclose(stat1, stat2)
  58. assert_allclose(edges1, edges2)
  59. def test_1d_min(self):
  60. x = self.x
  61. v = self.v
  62. stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10)
  63. stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10)
  64. assert_allclose(stat1, stat2)
  65. assert_allclose(edges1, edges2)
  66. def test_1d_max(self):
  67. x = self.x
  68. v = self.v
  69. stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
  70. stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)
  71. assert_allclose(stat1, stat2)
  72. assert_allclose(edges1, edges2)
  73. def test_1d_median(self):
  74. x = self.x
  75. v = self.v
  76. stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
  77. stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)
  78. assert_allclose(stat1, stat2)
  79. assert_allclose(edges1, edges2)
  80. def test_1d_bincode(self):
  81. x = self.x[:20]
  82. v = self.v[:20]
  83. count1, edges1, bc = binned_statistic(x, v, 'count', bins=3)
  84. bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1,
  85. 1, 2, 1])
  86. bcount = [(bc == i).sum() for i in np.unique(bc)]
  87. assert_allclose(bc, bc2)
  88. assert_allclose(bcount, count1)
  89. def test_1d_range_keyword(self):
  90. # Regression test for gh-3063, range can be (min, max) or [(min, max)]
  91. np.random.seed(9865)
  92. x = np.arange(30)
  93. data = np.random.random(30)
  94. mean, bins, _ = binned_statistic(x[:15], data[:15])
  95. mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
  96. mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))
  97. assert_allclose(mean, mean_range)
  98. assert_allclose(bins, bins_range)
  99. assert_allclose(mean, mean_range2)
  100. assert_allclose(bins, bins_range2)
  101. def test_1d_multi_values(self):
  102. x = self.x
  103. v = self.v
  104. w = self.w
  105. stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
  106. stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
  107. stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)
  108. assert_allclose(stat2[0], stat1v)
  109. assert_allclose(stat2[1], stat1w)
  110. assert_allclose(edges1v, edges2)
  111. assert_allclose(bc1v, bc2)
  112. def test_2d_count(self):
  113. x = self.x
  114. y = self.y
  115. v = self.v
  116. count1, binx1, biny1, bc = binned_statistic_2d(
  117. x, y, v, 'count', bins=5)
  118. count2, binx2, biny2 = np.histogram2d(x, y, bins=5)
  119. assert_allclose(count1, count2)
  120. assert_allclose(binx1, binx2)
  121. assert_allclose(biny1, biny2)
  122. def test_2d_result_attributes(self):
  123. x = self.x
  124. y = self.y
  125. v = self.v
  126. res = binned_statistic_2d(x, y, v, 'count', bins=5)
  127. attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber')
  128. check_named_results(res, attributes)
  129. def test_2d_sum(self):
  130. x = self.x
  131. y = self.y
  132. v = self.v
  133. sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5)
  134. sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v)
  135. assert_allclose(sum1, sum2)
  136. assert_allclose(binx1, binx2)
  137. assert_allclose(biny1, biny2)
  138. def test_2d_mean(self):
  139. x = self.x
  140. y = self.y
  141. v = self.v
  142. stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5)
  143. stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
  144. assert_allclose(stat1, stat2)
  145. assert_allclose(binx1, binx2)
  146. assert_allclose(biny1, biny2)
  147. def test_2d_mean_unicode(self):
  148. x = self.x
  149. y = self.y
  150. v = self.v
  151. stat1, binx1, biny1, bc = binned_statistic_2d(
  152. x, y, v, u('mean'), bins=5)
  153. stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5)
  154. assert_allclose(stat1, stat2)
  155. assert_allclose(binx1, binx2)
  156. assert_allclose(biny1, biny2)
  157. def test_2d_std(self):
  158. x = self.x
  159. y = self.y
  160. v = self.v
  161. stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5)
  162. stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5)
  163. assert_allclose(stat1, stat2)
  164. assert_allclose(binx1, binx2)
  165. assert_allclose(biny1, biny2)
  166. def test_2d_min(self):
  167. x = self.x
  168. y = self.y
  169. v = self.v
  170. stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5)
  171. stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5)
  172. assert_allclose(stat1, stat2)
  173. assert_allclose(binx1, binx2)
  174. assert_allclose(biny1, biny2)
  175. def test_2d_max(self):
  176. x = self.x
  177. y = self.y
  178. v = self.v
  179. stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5)
  180. stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5)
  181. assert_allclose(stat1, stat2)
  182. assert_allclose(binx1, binx2)
  183. assert_allclose(biny1, biny2)
  184. def test_2d_median(self):
  185. x = self.x
  186. y = self.y
  187. v = self.v
  188. stat1, binx1, biny1, bc = binned_statistic_2d(
  189. x, y, v, 'median', bins=5)
  190. stat2, binx2, biny2, bc = binned_statistic_2d(
  191. x, y, v, np.median, bins=5)
  192. assert_allclose(stat1, stat2)
  193. assert_allclose(binx1, binx2)
  194. assert_allclose(biny1, biny2)
  195. def test_2d_bincode(self):
  196. x = self.x[:20]
  197. y = self.y[:20]
  198. v = self.v[:20]
  199. count1, binx1, biny1, bc = binned_statistic_2d(
  200. x, y, v, 'count', bins=3)
  201. bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16,
  202. 6, 11, 16, 6, 6, 11, 8])
  203. bcount = [(bc == i).sum() for i in np.unique(bc)]
  204. assert_allclose(bc, bc2)
  205. count1adj = count1[count1.nonzero()]
  206. assert_allclose(bcount, count1adj)
  207. def test_2d_multi_values(self):
  208. x = self.x
  209. y = self.y
  210. v = self.v
  211. w = self.w
  212. stat1v, binx1v, biny1v, bc1v = binned_statistic_2d(
  213. x, y, v, 'mean', bins=8)
  214. stat1w, binx1w, biny1w, bc1w = binned_statistic_2d(
  215. x, y, w, 'mean', bins=8)
  216. stat2, binx2, biny2, bc2 = binned_statistic_2d(
  217. x, y, [v, w], 'mean', bins=8)
  218. assert_allclose(stat2[0], stat1v)
  219. assert_allclose(stat2[1], stat1w)
  220. assert_allclose(binx1v, binx2)
  221. assert_allclose(biny1w, biny2)
  222. assert_allclose(bc1v, bc2)
  223. def test_2d_binnumbers_unraveled(self):
  224. x = self.x
  225. y = self.y
  226. v = self.v
  227. stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20)
  228. stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10)
  229. stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d(
  230. x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True)
  231. bcx3 = np.searchsorted(edgesx, x, side='right')
  232. bcy3 = np.searchsorted(edgesy, y, side='right')
  233. # `numpy.searchsorted` is non-inclusive on right-edge, compensate
  234. bcx3[x == x.max()] -= 1
  235. bcy3[y == y.max()] -= 1
  236. assert_allclose(bcx, bc2[0])
  237. assert_allclose(bcy, bc2[1])
  238. assert_allclose(bcx3, bc2[0])
  239. assert_allclose(bcy3, bc2[1])
  240. def test_dd_count(self):
  241. X = self.X
  242. v = self.v
  243. count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
  244. count2, edges2 = np.histogramdd(X, bins=3)
  245. assert_allclose(count1, count2)
  246. assert_allclose(edges1, edges2)
  247. def test_dd_result_attributes(self):
  248. X = self.X
  249. v = self.v
  250. res = binned_statistic_dd(X, v, 'count', bins=3)
  251. attributes = ('statistic', 'bin_edges', 'binnumber')
  252. check_named_results(res, attributes)
  253. def test_dd_sum(self):
  254. X = self.X
  255. v = self.v
  256. sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
  257. sum2, edges2 = np.histogramdd(X, bins=3, weights=v)
  258. assert_allclose(sum1, sum2)
  259. assert_allclose(edges1, edges2)
  260. def test_dd_mean(self):
  261. X = self.X
  262. v = self.v
  263. stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3)
  264. stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3)
  265. assert_allclose(stat1, stat2)
  266. assert_allclose(edges1, edges2)
  267. def test_dd_std(self):
  268. X = self.X
  269. v = self.v
  270. stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3)
  271. stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)
  272. assert_allclose(stat1, stat2)
  273. assert_allclose(edges1, edges2)
  274. def test_dd_min(self):
  275. X = self.X
  276. v = self.v
  277. stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3)
  278. stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3)
  279. assert_allclose(stat1, stat2)
  280. assert_allclose(edges1, edges2)
  281. def test_dd_max(self):
  282. X = self.X
  283. v = self.v
  284. stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
  285. stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)
  286. assert_allclose(stat1, stat2)
  287. assert_allclose(edges1, edges2)
  288. def test_dd_median(self):
  289. X = self.X
  290. v = self.v
  291. stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
  292. stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)
  293. assert_allclose(stat1, stat2)
  294. assert_allclose(edges1, edges2)
  295. def test_dd_bincode(self):
  296. X = self.X[:20]
  297. v = self.v[:20]
  298. count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
  299. bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
  300. 32, 36, 91, 43, 87, 81, 81])
  301. bcount = [(bc == i).sum() for i in np.unique(bc)]
  302. assert_allclose(bc, bc2)
  303. count1adj = count1[count1.nonzero()]
  304. assert_allclose(bcount, count1adj)
  305. def test_dd_multi_values(self):
  306. X = self.X
  307. v = self.v
  308. w = self.w
  309. stat1v, edges1v, bc1v = binned_statistic_dd(X, v, np.std, bins=8)
  310. stat1w, edges1w, bc1w = binned_statistic_dd(X, w, np.std, bins=8)
  311. stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], np.std, bins=8)
  312. assert_allclose(stat2[0], stat1v)
  313. assert_allclose(stat2[1], stat1w)
  314. assert_allclose(edges1v, edges2)
  315. assert_allclose(edges1w, edges2)
  316. assert_allclose(bc1v, bc2)
  317. def test_dd_binnumbers_unraveled(self):
  318. X = self.X
  319. v = self.v
  320. stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
  321. stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
  322. stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)
  323. stat2, edges2, bc2 = binned_statistic_dd(
  324. X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)
  325. assert_allclose(bcx, bc2[0])
  326. assert_allclose(bcy, bc2[1])
  327. assert_allclose(bcz, bc2[2])