test_hierarchy.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062
  1. #
  2. # Author: Damian Eads
  3. # Date: April 17, 2008
  4. #
  5. # Copyright (C) 2008 Damian Eads
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions
  9. # are met:
  10. #
  11. # 1. Redistributions of source code must retain the above copyright
  12. # notice, this list of conditions and the following disclaimer.
  13. #
  14. # 2. Redistributions in binary form must reproduce the above
  15. # copyright notice, this list of conditions and the following
  16. # disclaimer in the documentation and/or other materials provided
  17. # with the distribution.
  18. #
  19. # 3. The name of the author may not be used to endorse or promote
  20. # products derived from this software without specific prior
  21. # written permission.
  22. #
  23. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  24. # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  25. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  27. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  29. # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  31. # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  32. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34. from __future__ import division, print_function, absolute_import
  35. import numpy as np
  36. from numpy.testing import assert_allclose, assert_equal, assert_, assert_warns
  37. import pytest
  38. from pytest import raises as assert_raises
  39. from scipy._lib.six import xrange, u
  40. import scipy.cluster.hierarchy
  41. from scipy.cluster.hierarchy import (
  42. ClusterWarning, linkage, from_mlab_linkage, to_mlab_linkage,
  43. num_obs_linkage, inconsistent, cophenet, fclusterdata, fcluster,
  44. is_isomorphic, single, leaders, complete, weighted, centroid,
  45. correspond, is_monotonic, maxdists, maxinconsts, maxRstat,
  46. is_valid_linkage, is_valid_im, to_tree, leaves_list, dendrogram,
  47. set_link_color_palette, cut_tree, optimal_leaf_ordering,
  48. _order_cluster_tree, _hierarchy, _LINKAGE_METHODS)
  49. from scipy.spatial.distance import pdist
  50. from scipy.cluster._hierarchy import Heap
  51. from . import hierarchy_test_data
  52. # Matplotlib is not a scipy dependency but is optionally used in dendrogram, so
  53. # check if it's available
  54. try:
  55. import matplotlib
  56. # and set the backend to be Agg (no gui)
  57. matplotlib.use('Agg')
  58. # before importing pyplot
  59. import matplotlib.pyplot as plt
  60. have_matplotlib = True
  61. except Exception:
  62. have_matplotlib = False
  63. class TestLinkage(object):
  64. def test_linkage_non_finite_elements_in_distance_matrix(self):
  65. # Tests linkage(Y) where Y contains a non-finite element (e.g. NaN or Inf).
  66. # Exception expected.
  67. y = np.zeros((6,))
  68. y[0] = np.nan
  69. assert_raises(ValueError, linkage, y)
  70. def test_linkage_empty_distance_matrix(self):
  71. # Tests linkage(Y) where Y is a 0x4 linkage matrix. Exception expected.
  72. y = np.zeros((0,))
  73. assert_raises(ValueError, linkage, y)
  74. def test_linkage_tdist(self):
  75. for method in ['single', 'complete', 'average', 'weighted', u('single')]:
  76. self.check_linkage_tdist(method)
  77. def check_linkage_tdist(self, method):
  78. # Tests linkage(Y, method) on the tdist data set.
  79. Z = linkage(hierarchy_test_data.ytdist, method)
  80. expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method)
  81. assert_allclose(Z, expectedZ, atol=1e-10)
  82. def test_linkage_X(self):
  83. for method in ['centroid', 'median', 'ward']:
  84. self.check_linkage_q(method)
  85. def check_linkage_q(self, method):
  86. # Tests linkage(Y, method) on the Q data set.
  87. Z = linkage(hierarchy_test_data.X, method)
  88. expectedZ = getattr(hierarchy_test_data, 'linkage_X_' + method)
  89. assert_allclose(Z, expectedZ, atol=1e-06)
  90. y = scipy.spatial.distance.pdist(hierarchy_test_data.X,
  91. metric="euclidean")
  92. Z = linkage(y, method)
  93. assert_allclose(Z, expectedZ, atol=1e-06)
  94. def test_compare_with_trivial(self):
  95. rng = np.random.RandomState(0)
  96. n = 20
  97. X = rng.rand(n, 2)
  98. d = pdist(X)
  99. for method, code in _LINKAGE_METHODS.items():
  100. Z_trivial = _hierarchy.linkage(d, n, code)
  101. Z = linkage(d, method)
  102. assert_allclose(Z_trivial, Z, rtol=1e-14, atol=1e-15)
  103. def test_optimal_leaf_ordering(self):
  104. Z = linkage(hierarchy_test_data.ytdist, optimal_ordering=True)
  105. expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_single_olo')
  106. assert_allclose(Z, expectedZ, atol=1e-10)
  107. class TestLinkageTies(object):
  108. _expectations = {
  109. 'single': np.array([[0, 1, 1.41421356, 2],
  110. [2, 3, 1.41421356, 3]]),
  111. 'complete': np.array([[0, 1, 1.41421356, 2],
  112. [2, 3, 2.82842712, 3]]),
  113. 'average': np.array([[0, 1, 1.41421356, 2],
  114. [2, 3, 2.12132034, 3]]),
  115. 'weighted': np.array([[0, 1, 1.41421356, 2],
  116. [2, 3, 2.12132034, 3]]),
  117. 'centroid': np.array([[0, 1, 1.41421356, 2],
  118. [2, 3, 2.12132034, 3]]),
  119. 'median': np.array([[0, 1, 1.41421356, 2],
  120. [2, 3, 2.12132034, 3]]),
  121. 'ward': np.array([[0, 1, 1.41421356, 2],
  122. [2, 3, 2.44948974, 3]]),
  123. }
  124. def test_linkage_ties(self):
  125. for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']:
  126. self.check_linkage_ties(method)
  127. def check_linkage_ties(self, method):
  128. X = np.array([[-1, -1], [0, 0], [1, 1]])
  129. Z = linkage(X, method=method)
  130. expectedZ = self._expectations[method]
  131. assert_allclose(Z, expectedZ, atol=1e-06)
  132. class TestInconsistent(object):
  133. def test_inconsistent_tdist(self):
  134. for depth in hierarchy_test_data.inconsistent_ytdist:
  135. self.check_inconsistent_tdist(depth)
  136. def check_inconsistent_tdist(self, depth):
  137. Z = hierarchy_test_data.linkage_ytdist_single
  138. assert_allclose(inconsistent(Z, depth),
  139. hierarchy_test_data.inconsistent_ytdist[depth])
  140. class TestCopheneticDistance(object):
  141. def test_linkage_cophenet_tdist_Z(self):
  142. # Tests cophenet(Z) on tdist data set.
  143. expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
  144. 295, 138, 219, 295, 295])
  145. Z = hierarchy_test_data.linkage_ytdist_single
  146. M = cophenet(Z)
  147. assert_allclose(M, expectedM, atol=1e-10)
  148. def test_linkage_cophenet_tdist_Z_Y(self):
  149. # Tests cophenet(Z, Y) on tdist data set.
  150. Z = hierarchy_test_data.linkage_ytdist_single
  151. (c, M) = cophenet(Z, hierarchy_test_data.ytdist)
  152. expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
  153. 295, 138, 219, 295, 295])
  154. expectedc = 0.639931296433393415057366837573
  155. assert_allclose(c, expectedc, atol=1e-10)
  156. assert_allclose(M, expectedM, atol=1e-10)
  157. class TestMLabLinkageConversion(object):
  158. def test_mlab_linkage_conversion_empty(self):
  159. # Tests from/to_mlab_linkage on empty linkage array.
  160. X = np.asarray([])
  161. assert_equal(from_mlab_linkage([]), X)
  162. assert_equal(to_mlab_linkage([]), X)
  163. def test_mlab_linkage_conversion_single_row(self):
  164. # Tests from/to_mlab_linkage on linkage array with single row.
  165. Z = np.asarray([[0., 1., 3., 2.]])
  166. Zm = [[1, 2, 3]]
  167. assert_equal(from_mlab_linkage(Zm), Z)
  168. assert_equal(to_mlab_linkage(Z), Zm)
  169. def test_mlab_linkage_conversion_multiple_rows(self):
  170. # Tests from/to_mlab_linkage on linkage array with multiple rows.
  171. Zm = np.asarray([[3, 6, 138], [4, 5, 219],
  172. [1, 8, 255], [2, 9, 268], [7, 10, 295]])
  173. Z = np.array([[2., 5., 138., 2.],
  174. [3., 4., 219., 2.],
  175. [0., 7., 255., 3.],
  176. [1., 8., 268., 4.],
  177. [6., 9., 295., 6.]],
  178. dtype=np.double)
  179. assert_equal(from_mlab_linkage(Zm), Z)
  180. assert_equal(to_mlab_linkage(Z), Zm)
  181. class TestFcluster(object):
  182. def test_fclusterdata(self):
  183. for t in hierarchy_test_data.fcluster_inconsistent:
  184. self.check_fclusterdata(t, 'inconsistent')
  185. for t in hierarchy_test_data.fcluster_distance:
  186. self.check_fclusterdata(t, 'distance')
  187. for t in hierarchy_test_data.fcluster_maxclust:
  188. self.check_fclusterdata(t, 'maxclust')
  189. def check_fclusterdata(self, t, criterion):
  190. # Tests fclusterdata(X, criterion=criterion, t=t) on a random 3-cluster data set.
  191. expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]
  192. X = hierarchy_test_data.Q_X
  193. T = fclusterdata(X, criterion=criterion, t=t)
  194. assert_(is_isomorphic(T, expectedT))
  195. def test_fcluster(self):
  196. for t in hierarchy_test_data.fcluster_inconsistent:
  197. self.check_fcluster(t, 'inconsistent')
  198. for t in hierarchy_test_data.fcluster_distance:
  199. self.check_fcluster(t, 'distance')
  200. for t in hierarchy_test_data.fcluster_maxclust:
  201. self.check_fcluster(t, 'maxclust')
  202. def check_fcluster(self, t, criterion):
  203. # Tests fcluster(Z, criterion=criterion, t=t) on a random 3-cluster data set.
  204. expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]
  205. Z = single(hierarchy_test_data.Q_X)
  206. T = fcluster(Z, criterion=criterion, t=t)
  207. assert_(is_isomorphic(T, expectedT))
  208. def test_fcluster_monocrit(self):
  209. for t in hierarchy_test_data.fcluster_distance:
  210. self.check_fcluster_monocrit(t)
  211. for t in hierarchy_test_data.fcluster_maxclust:
  212. self.check_fcluster_maxclust_monocrit(t)
  213. def check_fcluster_monocrit(self, t):
  214. expectedT = hierarchy_test_data.fcluster_distance[t]
  215. Z = single(hierarchy_test_data.Q_X)
  216. T = fcluster(Z, t, criterion='monocrit', monocrit=maxdists(Z))
  217. assert_(is_isomorphic(T, expectedT))
  218. def check_fcluster_maxclust_monocrit(self, t):
  219. expectedT = hierarchy_test_data.fcluster_maxclust[t]
  220. Z = single(hierarchy_test_data.Q_X)
  221. T = fcluster(Z, t, criterion='maxclust_monocrit', monocrit=maxdists(Z))
  222. assert_(is_isomorphic(T, expectedT))
  223. class TestLeaders(object):
  224. def test_leaders_single(self):
  225. # Tests leaders using a flat clustering generated by single linkage.
  226. X = hierarchy_test_data.Q_X
  227. Y = pdist(X)
  228. Z = linkage(Y)
  229. T = fcluster(Z, criterion='maxclust', t=3)
  230. Lright = (np.array([53, 55, 56]), np.array([2, 3, 1]))
  231. L = leaders(Z, T)
  232. assert_equal(L, Lright)
  233. class TestIsIsomorphic(object):
  234. def test_is_isomorphic_1(self):
  235. # Tests is_isomorphic on test case #1 (one flat cluster, different labellings)
  236. a = [1, 1, 1]
  237. b = [2, 2, 2]
  238. assert_(is_isomorphic(a, b))
  239. assert_(is_isomorphic(b, a))
  240. def test_is_isomorphic_2(self):
  241. # Tests is_isomorphic on test case #2 (two flat clusters, different labelings)
  242. a = [1, 7, 1]
  243. b = [2, 3, 2]
  244. assert_(is_isomorphic(a, b))
  245. assert_(is_isomorphic(b, a))
  246. def test_is_isomorphic_3(self):
  247. # Tests is_isomorphic on test case #3 (no flat clusters)
  248. a = []
  249. b = []
  250. assert_(is_isomorphic(a, b))
  251. def test_is_isomorphic_4A(self):
  252. # Tests is_isomorphic on test case #4A (3 flat clusters, different labelings, isomorphic)
  253. a = [1, 2, 3]
  254. b = [1, 3, 2]
  255. assert_(is_isomorphic(a, b))
  256. assert_(is_isomorphic(b, a))
  257. def test_is_isomorphic_4B(self):
  258. # Tests is_isomorphic on test case #4B (3 flat clusters, different labelings, nonisomorphic)
  259. a = [1, 2, 3, 3]
  260. b = [1, 3, 2, 3]
  261. assert_(is_isomorphic(a, b) == False)
  262. assert_(is_isomorphic(b, a) == False)
  263. def test_is_isomorphic_4C(self):
  264. # Tests is_isomorphic on test case #4C (3 flat clusters, different labelings, isomorphic)
  265. a = [7, 2, 3]
  266. b = [6, 3, 2]
  267. assert_(is_isomorphic(a, b))
  268. assert_(is_isomorphic(b, a))
  269. def test_is_isomorphic_5(self):
  270. # Tests is_isomorphic on test case #5 (1000 observations, 2/3/5 random
  271. # clusters, random permutation of the labeling).
  272. for nc in [2, 3, 5]:
  273. self.help_is_isomorphic_randperm(1000, nc)
  274. def test_is_isomorphic_6(self):
  275. # Tests is_isomorphic on test case #5A (1000 observations, 2/3/5 random
  276. # clusters, random permutation of the labeling, slightly
  277. # nonisomorphic.)
  278. for nc in [2, 3, 5]:
  279. self.help_is_isomorphic_randperm(1000, nc, True, 5)
  280. def test_is_isomorphic_7(self):
  281. # Regression test for gh-6271
  282. assert_(not is_isomorphic([1, 2, 3], [1, 1, 1]))
  283. def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0):
  284. for k in range(3):
  285. a = np.int_(np.random.rand(nobs) * nclusters)
  286. b = np.zeros(a.size, dtype=np.int_)
  287. P = np.random.permutation(nclusters)
  288. for i in xrange(0, a.shape[0]):
  289. b[i] = P[a[i]]
  290. if noniso:
  291. Q = np.random.permutation(nobs)
  292. b[Q[0:nerrors]] += 1
  293. b[Q[0:nerrors]] %= nclusters
  294. assert_(is_isomorphic(a, b) == (not noniso))
  295. assert_(is_isomorphic(b, a) == (not noniso))
  296. class TestIsValidLinkage(object):
  297. def test_is_valid_linkage_various_size(self):
  298. for nrow, ncol, valid in [(2, 5, False), (2, 3, False),
  299. (1, 4, True), (2, 4, True)]:
  300. self.check_is_valid_linkage_various_size(nrow, ncol, valid)
  301. def check_is_valid_linkage_various_size(self, nrow, ncol, valid):
  302. # Tests is_valid_linkage(Z) with linkage matrics of various sizes
  303. Z = np.asarray([[0, 1, 3.0, 2, 5],
  304. [3, 2, 4.0, 3, 3]], dtype=np.double)
  305. Z = Z[:nrow, :ncol]
  306. assert_(is_valid_linkage(Z) == valid)
  307. if not valid:
  308. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  309. def test_is_valid_linkage_int_type(self):
  310. # Tests is_valid_linkage(Z) with integer type.
  311. Z = np.asarray([[0, 1, 3.0, 2],
  312. [3, 2, 4.0, 3]], dtype=int)
  313. assert_(is_valid_linkage(Z) == False)
  314. assert_raises(TypeError, is_valid_linkage, Z, throw=True)
  315. def test_is_valid_linkage_empty(self):
  316. # Tests is_valid_linkage(Z) with empty linkage.
  317. Z = np.zeros((0, 4), dtype=np.double)
  318. assert_(is_valid_linkage(Z) == False)
  319. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  320. def test_is_valid_linkage_4_and_up(self):
  321. # Tests is_valid_linkage(Z) on linkage on observation sets between
  322. # sizes 4 and 15 (step size 3).
  323. for i in xrange(4, 15, 3):
  324. y = np.random.rand(i*(i-1)//2)
  325. Z = linkage(y)
  326. assert_(is_valid_linkage(Z) == True)
  327. def test_is_valid_linkage_4_and_up_neg_index_left(self):
  328. # Tests is_valid_linkage(Z) on linkage on observation sets between
  329. # sizes 4 and 15 (step size 3) with negative indices (left).
  330. for i in xrange(4, 15, 3):
  331. y = np.random.rand(i*(i-1)//2)
  332. Z = linkage(y)
  333. Z[i//2,0] = -2
  334. assert_(is_valid_linkage(Z) == False)
  335. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  336. def test_is_valid_linkage_4_and_up_neg_index_right(self):
  337. # Tests is_valid_linkage(Z) on linkage on observation sets between
  338. # sizes 4 and 15 (step size 3) with negative indices (right).
  339. for i in xrange(4, 15, 3):
  340. y = np.random.rand(i*(i-1)//2)
  341. Z = linkage(y)
  342. Z[i//2,1] = -2
  343. assert_(is_valid_linkage(Z) == False)
  344. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  345. def test_is_valid_linkage_4_and_up_neg_dist(self):
  346. # Tests is_valid_linkage(Z) on linkage on observation sets between
  347. # sizes 4 and 15 (step size 3) with negative distances.
  348. for i in xrange(4, 15, 3):
  349. y = np.random.rand(i*(i-1)//2)
  350. Z = linkage(y)
  351. Z[i//2,2] = -0.5
  352. assert_(is_valid_linkage(Z) == False)
  353. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  354. def test_is_valid_linkage_4_and_up_neg_counts(self):
  355. # Tests is_valid_linkage(Z) on linkage on observation sets between
  356. # sizes 4 and 15 (step size 3) with negative counts.
  357. for i in xrange(4, 15, 3):
  358. y = np.random.rand(i*(i-1)//2)
  359. Z = linkage(y)
  360. Z[i//2,3] = -2
  361. assert_(is_valid_linkage(Z) == False)
  362. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  363. class TestIsValidInconsistent(object):
  364. def test_is_valid_im_int_type(self):
  365. # Tests is_valid_im(R) with integer type.
  366. R = np.asarray([[0, 1, 3.0, 2],
  367. [3, 2, 4.0, 3]], dtype=int)
  368. assert_(is_valid_im(R) == False)
  369. assert_raises(TypeError, is_valid_im, R, throw=True)
  370. def test_is_valid_im_various_size(self):
  371. for nrow, ncol, valid in [(2, 5, False), (2, 3, False),
  372. (1, 4, True), (2, 4, True)]:
  373. self.check_is_valid_im_various_size(nrow, ncol, valid)
  374. def check_is_valid_im_various_size(self, nrow, ncol, valid):
  375. # Tests is_valid_im(R) with linkage matrics of various sizes
  376. R = np.asarray([[0, 1, 3.0, 2, 5],
  377. [3, 2, 4.0, 3, 3]], dtype=np.double)
  378. R = R[:nrow, :ncol]
  379. assert_(is_valid_im(R) == valid)
  380. if not valid:
  381. assert_raises(ValueError, is_valid_im, R, throw=True)
  382. def test_is_valid_im_empty(self):
  383. # Tests is_valid_im(R) with empty inconsistency matrix.
  384. R = np.zeros((0, 4), dtype=np.double)
  385. assert_(is_valid_im(R) == False)
  386. assert_raises(ValueError, is_valid_im, R, throw=True)
  387. def test_is_valid_im_4_and_up(self):
  388. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  389. # (step size 3).
  390. for i in xrange(4, 15, 3):
  391. y = np.random.rand(i*(i-1)//2)
  392. Z = linkage(y)
  393. R = inconsistent(Z)
  394. assert_(is_valid_im(R) == True)
  395. def test_is_valid_im_4_and_up_neg_index_left(self):
  396. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  397. # (step size 3) with negative link height means.
  398. for i in xrange(4, 15, 3):
  399. y = np.random.rand(i*(i-1)//2)
  400. Z = linkage(y)
  401. R = inconsistent(Z)
  402. R[i//2,0] = -2.0
  403. assert_(is_valid_im(R) == False)
  404. assert_raises(ValueError, is_valid_im, R, throw=True)
  405. def test_is_valid_im_4_and_up_neg_index_right(self):
  406. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  407. # (step size 3) with negative link height standard deviations.
  408. for i in xrange(4, 15, 3):
  409. y = np.random.rand(i*(i-1)//2)
  410. Z = linkage(y)
  411. R = inconsistent(Z)
  412. R[i//2,1] = -2.0
  413. assert_(is_valid_im(R) == False)
  414. assert_raises(ValueError, is_valid_im, R, throw=True)
  415. def test_is_valid_im_4_and_up_neg_dist(self):
  416. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  417. # (step size 3) with negative link counts.
  418. for i in xrange(4, 15, 3):
  419. y = np.random.rand(i*(i-1)//2)
  420. Z = linkage(y)
  421. R = inconsistent(Z)
  422. R[i//2,2] = -0.5
  423. assert_(is_valid_im(R) == False)
  424. assert_raises(ValueError, is_valid_im, R, throw=True)
  425. class TestNumObsLinkage(object):
  426. def test_num_obs_linkage_empty(self):
  427. # Tests num_obs_linkage(Z) with empty linkage.
  428. Z = np.zeros((0, 4), dtype=np.double)
  429. assert_raises(ValueError, num_obs_linkage, Z)
  430. def test_num_obs_linkage_1x4(self):
  431. # Tests num_obs_linkage(Z) on linkage over 2 observations.
  432. Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
  433. assert_equal(num_obs_linkage(Z), 2)
  434. def test_num_obs_linkage_2x4(self):
  435. # Tests num_obs_linkage(Z) on linkage over 3 observations.
  436. Z = np.asarray([[0, 1, 3.0, 2],
  437. [3, 2, 4.0, 3]], dtype=np.double)
  438. assert_equal(num_obs_linkage(Z), 3)
  439. def test_num_obs_linkage_4_and_up(self):
  440. # Tests num_obs_linkage(Z) on linkage on observation sets between sizes
  441. # 4 and 15 (step size 3).
  442. for i in xrange(4, 15, 3):
  443. y = np.random.rand(i*(i-1)//2)
  444. Z = linkage(y)
  445. assert_equal(num_obs_linkage(Z), i)
  446. class TestLeavesList(object):
  447. def test_leaves_list_1x4(self):
  448. # Tests leaves_list(Z) on a 1x4 linkage.
  449. Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
  450. to_tree(Z)
  451. assert_equal(leaves_list(Z), [0, 1])
  452. def test_leaves_list_2x4(self):
  453. # Tests leaves_list(Z) on a 2x4 linkage.
  454. Z = np.asarray([[0, 1, 3.0, 2],
  455. [3, 2, 4.0, 3]], dtype=np.double)
  456. to_tree(Z)
  457. assert_equal(leaves_list(Z), [0, 1, 2])
  458. def test_leaves_list_Q(self):
  459. for method in ['single', 'complete', 'average', 'weighted', 'centroid',
  460. 'median', 'ward']:
  461. self.check_leaves_list_Q(method)
  462. def check_leaves_list_Q(self, method):
  463. # Tests leaves_list(Z) on the Q data set
  464. X = hierarchy_test_data.Q_X
  465. Z = linkage(X, method)
  466. node = to_tree(Z)
  467. assert_equal(node.pre_order(), leaves_list(Z))
  468. def test_Q_subtree_pre_order(self):
  469. # Tests that pre_order() works when called on sub-trees.
  470. X = hierarchy_test_data.Q_X
  471. Z = linkage(X, 'single')
  472. node = to_tree(Z)
  473. assert_equal(node.pre_order(), (node.get_left().pre_order()
  474. + node.get_right().pre_order()))
  475. class TestCorrespond(object):
  476. def test_correspond_empty(self):
  477. # Tests correspond(Z, y) with empty linkage and condensed distance matrix.
  478. y = np.zeros((0,))
  479. Z = np.zeros((0,4))
  480. assert_raises(ValueError, correspond, Z, y)
  481. def test_correspond_2_and_up(self):
  482. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  483. # different sizes.
  484. for i in xrange(2, 4):
  485. y = np.random.rand(i*(i-1)//2)
  486. Z = linkage(y)
  487. assert_(correspond(Z, y))
  488. for i in xrange(4, 15, 3):
  489. y = np.random.rand(i*(i-1)//2)
  490. Z = linkage(y)
  491. assert_(correspond(Z, y))
  492. def test_correspond_4_and_up(self):
  493. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  494. # different sizes. Correspondence should be false.
  495. for (i, j) in (list(zip(list(range(2, 4)), list(range(3, 5)))) +
  496. list(zip(list(range(3, 5)), list(range(2, 4))))):
  497. y = np.random.rand(i*(i-1)//2)
  498. y2 = np.random.rand(j*(j-1)//2)
  499. Z = linkage(y)
  500. Z2 = linkage(y2)
  501. assert_equal(correspond(Z, y2), False)
  502. assert_equal(correspond(Z2, y), False)
  503. def test_correspond_4_and_up_2(self):
  504. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  505. # different sizes. Correspondence should be false.
  506. for (i, j) in (list(zip(list(range(2, 7)), list(range(16, 21)))) +
  507. list(zip(list(range(2, 7)), list(range(16, 21))))):
  508. y = np.random.rand(i*(i-1)//2)
  509. y2 = np.random.rand(j*(j-1)//2)
  510. Z = linkage(y)
  511. Z2 = linkage(y2)
  512. assert_equal(correspond(Z, y2), False)
  513. assert_equal(correspond(Z2, y), False)
  514. def test_num_obs_linkage_multi_matrix(self):
  515. # Tests num_obs_linkage with observation matrices of multiple sizes.
  516. for n in xrange(2, 10):
  517. X = np.random.rand(n, 4)
  518. Y = pdist(X)
  519. Z = linkage(Y)
  520. assert_equal(num_obs_linkage(Z), n)
  521. class TestIsMonotonic(object):
  522. def test_is_monotonic_empty(self):
  523. # Tests is_monotonic(Z) on an empty linkage.
  524. Z = np.zeros((0, 4))
  525. assert_raises(ValueError, is_monotonic, Z)
  526. def test_is_monotonic_1x4(self):
  527. # Tests is_monotonic(Z) on 1x4 linkage. Expecting True.
  528. Z = np.asarray([[0, 1, 0.3, 2]], dtype=np.double)
  529. assert_equal(is_monotonic(Z), True)
  530. def test_is_monotonic_2x4_T(self):
  531. # Tests is_monotonic(Z) on 2x4 linkage. Expecting True.
  532. Z = np.asarray([[0, 1, 0.3, 2],
  533. [2, 3, 0.4, 3]], dtype=np.double)
  534. assert_equal(is_monotonic(Z), True)
  535. def test_is_monotonic_2x4_F(self):
  536. # Tests is_monotonic(Z) on 2x4 linkage. Expecting False.
  537. Z = np.asarray([[0, 1, 0.4, 2],
  538. [2, 3, 0.3, 3]], dtype=np.double)
  539. assert_equal(is_monotonic(Z), False)
  540. def test_is_monotonic_3x4_T(self):
  541. # Tests is_monotonic(Z) on 3x4 linkage. Expecting True.
  542. Z = np.asarray([[0, 1, 0.3, 2],
  543. [2, 3, 0.4, 2],
  544. [4, 5, 0.6, 4]], dtype=np.double)
  545. assert_equal(is_monotonic(Z), True)
  546. def test_is_monotonic_3x4_F1(self):
  547. # Tests is_monotonic(Z) on 3x4 linkage (case 1). Expecting False.
  548. Z = np.asarray([[0, 1, 0.3, 2],
  549. [2, 3, 0.2, 2],
  550. [4, 5, 0.6, 4]], dtype=np.double)
  551. assert_equal(is_monotonic(Z), False)
  552. def test_is_monotonic_3x4_F2(self):
  553. # Tests is_monotonic(Z) on 3x4 linkage (case 2). Expecting False.
  554. Z = np.asarray([[0, 1, 0.8, 2],
  555. [2, 3, 0.4, 2],
  556. [4, 5, 0.6, 4]], dtype=np.double)
  557. assert_equal(is_monotonic(Z), False)
  558. def test_is_monotonic_3x4_F3(self):
  559. # Tests is_monotonic(Z) on 3x4 linkage (case 3). Expecting False
  560. Z = np.asarray([[0, 1, 0.3, 2],
  561. [2, 3, 0.4, 2],
  562. [4, 5, 0.2, 4]], dtype=np.double)
  563. assert_equal(is_monotonic(Z), False)
  564. def test_is_monotonic_tdist_linkage1(self):
  565. # Tests is_monotonic(Z) on clustering generated by single linkage on
  566. # tdist data set. Expecting True.
  567. Z = linkage(hierarchy_test_data.ytdist, 'single')
  568. assert_equal(is_monotonic(Z), True)
  569. def test_is_monotonic_tdist_linkage2(self):
  570. # Tests is_monotonic(Z) on clustering generated by single linkage on
  571. # tdist data set. Perturbing. Expecting False.
  572. Z = linkage(hierarchy_test_data.ytdist, 'single')
  573. Z[2,2] = 0.0
  574. assert_equal(is_monotonic(Z), False)
  575. def test_is_monotonic_Q_linkage(self):
  576. # Tests is_monotonic(Z) on clustering generated by single linkage on
  577. # Q data set. Expecting True.
  578. X = hierarchy_test_data.Q_X
  579. Z = linkage(X, 'single')
  580. assert_equal(is_monotonic(Z), True)
  581. class TestMaxDists(object):
  582. def test_maxdists_empty_linkage(self):
  583. # Tests maxdists(Z) on empty linkage. Expecting exception.
  584. Z = np.zeros((0, 4), dtype=np.double)
  585. assert_raises(ValueError, maxdists, Z)
  586. def test_maxdists_one_cluster_linkage(self):
  587. # Tests maxdists(Z) on linkage with one cluster.
  588. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  589. MD = maxdists(Z)
  590. expectedMD = calculate_maximum_distances(Z)
  591. assert_allclose(MD, expectedMD, atol=1e-15)
  592. def test_maxdists_Q_linkage(self):
  593. for method in ['single', 'complete', 'ward', 'centroid', 'median']:
  594. self.check_maxdists_Q_linkage(method)
  595. def check_maxdists_Q_linkage(self, method):
  596. # Tests maxdists(Z) on the Q data set
  597. X = hierarchy_test_data.Q_X
  598. Z = linkage(X, method)
  599. MD = maxdists(Z)
  600. expectedMD = calculate_maximum_distances(Z)
  601. assert_allclose(MD, expectedMD, atol=1e-15)
  602. class TestMaxInconsts(object):
  603. def test_maxinconsts_empty_linkage(self):
  604. # Tests maxinconsts(Z, R) on empty linkage. Expecting exception.
  605. Z = np.zeros((0, 4), dtype=np.double)
  606. R = np.zeros((0, 4), dtype=np.double)
  607. assert_raises(ValueError, maxinconsts, Z, R)
  608. def test_maxinconsts_difrow_linkage(self):
  609. # Tests maxinconsts(Z, R) on linkage and inconsistency matrices with
  610. # different numbers of clusters. Expecting exception.
  611. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  612. R = np.random.rand(2, 4)
  613. assert_raises(ValueError, maxinconsts, Z, R)
  614. def test_maxinconsts_one_cluster_linkage(self):
  615. # Tests maxinconsts(Z, R) on linkage with one cluster.
  616. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  617. R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
  618. MD = maxinconsts(Z, R)
  619. expectedMD = calculate_maximum_inconsistencies(Z, R)
  620. assert_allclose(MD, expectedMD, atol=1e-15)
  621. def test_maxinconsts_Q_linkage(self):
  622. for method in ['single', 'complete', 'ward', 'centroid', 'median']:
  623. self.check_maxinconsts_Q_linkage(method)
  624. def check_maxinconsts_Q_linkage(self, method):
  625. # Tests maxinconsts(Z, R) on the Q data set
  626. X = hierarchy_test_data.Q_X
  627. Z = linkage(X, method)
  628. R = inconsistent(Z)
  629. MD = maxinconsts(Z, R)
  630. expectedMD = calculate_maximum_inconsistencies(Z, R)
  631. assert_allclose(MD, expectedMD, atol=1e-15)
  632. class TestMaxRStat(object):
  633. def test_maxRstat_invalid_index(self):
  634. for i in [3.3, -1, 4]:
  635. self.check_maxRstat_invalid_index(i)
  636. def check_maxRstat_invalid_index(self, i):
  637. # Tests maxRstat(Z, R, i). Expecting exception.
  638. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  639. R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
  640. if isinstance(i, int):
  641. assert_raises(ValueError, maxRstat, Z, R, i)
  642. else:
  643. assert_raises(TypeError, maxRstat, Z, R, i)
  644. def test_maxRstat_empty_linkage(self):
  645. for i in range(4):
  646. self.check_maxRstat_empty_linkage(i)
  647. def check_maxRstat_empty_linkage(self, i):
  648. # Tests maxRstat(Z, R, i) on empty linkage. Expecting exception.
  649. Z = np.zeros((0, 4), dtype=np.double)
  650. R = np.zeros((0, 4), dtype=np.double)
  651. assert_raises(ValueError, maxRstat, Z, R, i)
  652. def test_maxRstat_difrow_linkage(self):
  653. for i in range(4):
  654. self.check_maxRstat_difrow_linkage(i)
  655. def check_maxRstat_difrow_linkage(self, i):
  656. # Tests maxRstat(Z, R, i) on linkage and inconsistency matrices with
  657. # different numbers of clusters. Expecting exception.
  658. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  659. R = np.random.rand(2, 4)
  660. assert_raises(ValueError, maxRstat, Z, R, i)
  661. def test_maxRstat_one_cluster_linkage(self):
  662. for i in range(4):
  663. self.check_maxRstat_one_cluster_linkage(i)
  664. def check_maxRstat_one_cluster_linkage(self, i):
  665. # Tests maxRstat(Z, R, i) on linkage with one cluster.
  666. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  667. R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
  668. MD = maxRstat(Z, R, 1)
  669. expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
  670. assert_allclose(MD, expectedMD, atol=1e-15)
  671. def test_maxRstat_Q_linkage(self):
  672. for method in ['single', 'complete', 'ward', 'centroid', 'median']:
  673. for i in range(4):
  674. self.check_maxRstat_Q_linkage(method, i)
  675. def check_maxRstat_Q_linkage(self, method, i):
  676. # Tests maxRstat(Z, R, i) on the Q data set
  677. X = hierarchy_test_data.Q_X
  678. Z = linkage(X, method)
  679. R = inconsistent(Z)
  680. MD = maxRstat(Z, R, 1)
  681. expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
  682. assert_allclose(MD, expectedMD, atol=1e-15)
  683. class TestDendrogram(object):
  684. def test_dendrogram_single_linkage_tdist(self):
  685. # Tests dendrogram calculation on single linkage of the tdist data set.
  686. Z = linkage(hierarchy_test_data.ytdist, 'single')
  687. R = dendrogram(Z, no_plot=True)
  688. leaves = R["leaves"]
  689. assert_equal(leaves, [2, 5, 1, 0, 3, 4])
  690. def test_valid_orientation(self):
  691. Z = linkage(hierarchy_test_data.ytdist, 'single')
  692. assert_raises(ValueError, dendrogram, Z, orientation="foo")
  693. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  694. def test_dendrogram_plot(self):
  695. for orientation in ['top', 'bottom', 'left', 'right']:
  696. self.check_dendrogram_plot(orientation)
  697. def check_dendrogram_plot(self, orientation):
  698. # Tests dendrogram plotting.
  699. Z = linkage(hierarchy_test_data.ytdist, 'single')
  700. expected = {'color_list': ['g', 'b', 'b', 'b', 'b'],
  701. 'dcoord': [[0.0, 138.0, 138.0, 0.0],
  702. [0.0, 219.0, 219.0, 0.0],
  703. [0.0, 255.0, 255.0, 219.0],
  704. [0.0, 268.0, 268.0, 255.0],
  705. [138.0, 295.0, 295.0, 268.0]],
  706. 'icoord': [[5.0, 5.0, 15.0, 15.0],
  707. [45.0, 45.0, 55.0, 55.0],
  708. [35.0, 35.0, 50.0, 50.0],
  709. [25.0, 25.0, 42.5, 42.5],
  710. [10.0, 10.0, 33.75, 33.75]],
  711. 'ivl': ['2', '5', '1', '0', '3', '4'],
  712. 'leaves': [2, 5, 1, 0, 3, 4]}
  713. fig = plt.figure()
  714. ax = fig.add_subplot(221)
  715. # test that dendrogram accepts ax keyword
  716. R1 = dendrogram(Z, ax=ax, orientation=orientation)
  717. assert_equal(R1, expected)
  718. # test that dendrogram accepts and handle the leaf_font_size and
  719. # leaf_rotation keywords
  720. R1a = dendrogram(Z, ax=ax, orientation=orientation,
  721. leaf_font_size=20, leaf_rotation=90)
  722. testlabel = (
  723. ax.get_xticklabels()[0]
  724. if orientation in ['top', 'bottom']
  725. else ax.get_yticklabels()[0]
  726. )
  727. assert_equal(testlabel.get_rotation(), 90)
  728. assert_equal(testlabel.get_size(), 20)
  729. R1a = dendrogram(Z, ax=ax, orientation=orientation,
  730. leaf_rotation=90)
  731. testlabel = (
  732. ax.get_xticklabels()[0]
  733. if orientation in ['top', 'bottom']
  734. else ax.get_yticklabels()[0]
  735. )
  736. assert_equal(testlabel.get_rotation(), 90)
  737. R1a = dendrogram(Z, ax=ax, orientation=orientation,
  738. leaf_font_size=20)
  739. testlabel = (
  740. ax.get_xticklabels()[0]
  741. if orientation in ['top', 'bottom']
  742. else ax.get_yticklabels()[0]
  743. )
  744. assert_equal(testlabel.get_size(), 20)
  745. plt.close()
  746. # test plotting to gca (will import pylab)
  747. R2 = dendrogram(Z, orientation=orientation)
  748. plt.close()
  749. assert_equal(R2, expected)
  750. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  751. def test_dendrogram_truncate_mode(self):
  752. Z = linkage(hierarchy_test_data.ytdist, 'single')
  753. R = dendrogram(Z, 2, 'lastp', show_contracted=True)
  754. plt.close()
  755. assert_equal(R, {'color_list': ['b'],
  756. 'dcoord': [[0.0, 295.0, 295.0, 0.0]],
  757. 'icoord': [[5.0, 5.0, 15.0, 15.0]],
  758. 'ivl': ['(2)', '(4)'],
  759. 'leaves': [6, 9]})
  760. R = dendrogram(Z, 2, 'mtica', show_contracted=True)
  761. plt.close()
  762. assert_equal(R, {'color_list': ['g', 'b', 'b', 'b'],
  763. 'dcoord': [[0.0, 138.0, 138.0, 0.0],
  764. [0.0, 255.0, 255.0, 0.0],
  765. [0.0, 268.0, 268.0, 255.0],
  766. [138.0, 295.0, 295.0, 268.0]],
  767. 'icoord': [[5.0, 5.0, 15.0, 15.0],
  768. [35.0, 35.0, 45.0, 45.0],
  769. [25.0, 25.0, 40.0, 40.0],
  770. [10.0, 10.0, 32.5, 32.5]],
  771. 'ivl': ['2', '5', '1', '0', '(2)'],
  772. 'leaves': [2, 5, 1, 0, 7]})
  773. def test_dendrogram_colors(self):
  774. # Tests dendrogram plots with alternate colors
  775. Z = linkage(hierarchy_test_data.ytdist, 'single')
  776. set_link_color_palette(['c', 'm', 'y', 'k'])
  777. R = dendrogram(Z, no_plot=True,
  778. above_threshold_color='g', color_threshold=250)
  779. set_link_color_palette(['g', 'r', 'c', 'm', 'y', 'k'])
  780. color_list = R['color_list']
  781. assert_equal(color_list, ['c', 'm', 'g', 'g', 'g'])
  782. # reset color palette (global list)
  783. set_link_color_palette(None)
  784. def calculate_maximum_distances(Z):
  785. # Used for testing correctness of maxdists.
  786. n = Z.shape[0] + 1
  787. B = np.zeros((n-1,))
  788. q = np.zeros((3,))
  789. for i in xrange(0, n - 1):
  790. q[:] = 0.0
  791. left = Z[i, 0]
  792. right = Z[i, 1]
  793. if left >= n:
  794. q[0] = B[int(left) - n]
  795. if right >= n:
  796. q[1] = B[int(right) - n]
  797. q[2] = Z[i, 2]
  798. B[i] = q.max()
  799. return B
  800. def calculate_maximum_inconsistencies(Z, R, k=3):
  801. # Used for testing correctness of maxinconsts.
  802. n = Z.shape[0] + 1
  803. B = np.zeros((n-1,))
  804. q = np.zeros((3,))
  805. for i in xrange(0, n - 1):
  806. q[:] = 0.0
  807. left = Z[i, 0]
  808. right = Z[i, 1]
  809. if left >= n:
  810. q[0] = B[int(left) - n]
  811. if right >= n:
  812. q[1] = B[int(right) - n]
  813. q[2] = R[i, k]
  814. B[i] = q.max()
  815. return B
  816. def within_tol(a, b, tol):
  817. return np.abs(a - b).max() < tol
  818. def test_unsupported_uncondensed_distance_matrix_linkage_warning():
  819. assert_warns(ClusterWarning, linkage, [[0, 1], [1, 0]])
  820. def test_euclidean_linkage_value_error():
  821. for method in scipy.cluster.hierarchy._EUCLIDEAN_METHODS:
  822. assert_raises(ValueError, linkage, [[1, 1], [1, 1]],
  823. method=method, metric='cityblock')
  824. def test_2x2_linkage():
  825. Z1 = linkage([1], method='single', metric='euclidean')
  826. Z2 = linkage([[0, 1], [0, 0]], method='single', metric='euclidean')
  827. assert_allclose(Z1, Z2)
  828. def test_node_compare():
  829. np.random.seed(23)
  830. nobs = 50
  831. X = np.random.randn(nobs, 4)
  832. Z = scipy.cluster.hierarchy.ward(X)
  833. tree = to_tree(Z)
  834. assert_(tree > tree.get_left())
  835. assert_(tree.get_right() > tree.get_left())
  836. assert_(tree.get_right() == tree.get_right())
  837. assert_(tree.get_right() != tree.get_left())
  838. def test_cut_tree():
  839. np.random.seed(23)
  840. nobs = 50
  841. X = np.random.randn(nobs, 4)
  842. Z = scipy.cluster.hierarchy.ward(X)
  843. cutree = cut_tree(Z)
  844. assert_equal(cutree[:, 0], np.arange(nobs))
  845. assert_equal(cutree[:, -1], np.zeros(nobs))
  846. assert_equal(cutree.max(0), np.arange(nobs - 1, -1, -1))
  847. assert_equal(cutree[:, [-5]], cut_tree(Z, n_clusters=5))
  848. assert_equal(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10]))
  849. assert_equal(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5]))
  850. nodes = _order_cluster_tree(Z)
  851. heights = np.array([node.dist for node in nodes])
  852. assert_equal(cutree[:, np.searchsorted(heights, [5])],
  853. cut_tree(Z, height=5))
  854. assert_equal(cutree[:, np.searchsorted(heights, [5, 10])],
  855. cut_tree(Z, height=[5, 10]))
  856. assert_equal(cutree[:, np.searchsorted(heights, [10, 5])],
  857. cut_tree(Z, height=[10, 5]))
  858. def test_optimal_leaf_ordering():
  859. # test with the distance vector y
  860. Z = optimal_leaf_ordering(linkage(hierarchy_test_data.ytdist),
  861. hierarchy_test_data.ytdist)
  862. expectedZ = hierarchy_test_data.linkage_ytdist_single_olo
  863. assert_allclose(Z, expectedZ, atol=1e-10)
  864. # test with the observation matrix X
  865. Z = optimal_leaf_ordering(linkage(hierarchy_test_data.X, 'ward'),
  866. hierarchy_test_data.X)
  867. expectedZ = hierarchy_test_data.linkage_X_ward_olo
  868. assert_allclose(Z, expectedZ, atol=1e-06)
  869. def test_Heap():
  870. values = np.array([2, -1, 0, -1.5, 3])
  871. heap = Heap(values)
  872. pair = heap.get_min()
  873. assert_equal(pair['key'], 3)
  874. assert_equal(pair['value'], -1.5)
  875. heap.remove_min()
  876. pair = heap.get_min()
  877. assert_equal(pair['key'], 1)
  878. assert_equal(pair['value'], -1)
  879. heap.change_value(1, 2.5)
  880. pair = heap.get_min()
  881. assert_equal(pair['key'], 2)
  882. assert_equal(pair['value'], 0)
  883. heap.remove_min()
  884. heap.remove_min()
  885. heap.change_value(1, 10)
  886. pair = heap.get_min()
  887. assert_equal(pair['key'], 4)
  888. assert_equal(pair['value'], 3)
  889. heap.remove_min()
  890. pair = heap.get_min()
  891. assert_equal(pair['key'], 1)
  892. assert_equal(pair['value'], 10)