test_sparsetools.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. from __future__ import division, print_function, absolute_import
  2. import sys
  3. import os
  4. import gc
  5. import re
  6. import threading
  7. import numpy as np
  8. from numpy.testing import assert_equal, assert_, assert_allclose
  9. from scipy.sparse import (_sparsetools, coo_matrix, csr_matrix, csc_matrix,
  10. bsr_matrix, dia_matrix)
  11. from scipy.sparse.sputils import supported_dtypes
  12. from scipy._lib._testutils import check_free_memory
  13. import pytest
  14. from pytest import raises as assert_raises
  15. def test_exception():
  16. assert_raises(MemoryError, _sparsetools.test_throw_error)
  17. def test_threads():
  18. # Smoke test for parallel threaded execution; doesn't actually
  19. # check that code runs in parallel, but just that it produces
  20. # expected results.
  21. nthreads = 10
  22. niter = 100
  23. n = 20
  24. a = csr_matrix(np.ones([n, n]))
  25. bres = []
  26. class Worker(threading.Thread):
  27. def run(self):
  28. b = a.copy()
  29. for j in range(niter):
  30. _sparsetools.csr_plus_csr(n, n,
  31. a.indptr, a.indices, a.data,
  32. a.indptr, a.indices, a.data,
  33. b.indptr, b.indices, b.data)
  34. bres.append(b)
  35. threads = [Worker() for _ in range(nthreads)]
  36. for thread in threads:
  37. thread.start()
  38. for thread in threads:
  39. thread.join()
  40. for b in bres:
  41. assert_(np.all(b.toarray() == 2))
  42. def test_regression_std_vector_dtypes():
  43. # Regression test for gh-3780, checking the std::vector typemaps
  44. # in sparsetools.cxx are complete.
  45. for dtype in supported_dtypes:
  46. ad = np.matrix([[1, 2], [3, 4]]).astype(dtype)
  47. a = csr_matrix(ad, dtype=dtype)
  48. # getcol is one function using std::vector typemaps, and should not fail
  49. assert_equal(a.getcol(0).todense(), ad[:,0])
  50. @pytest.mark.slow
  51. def test_nnz_overflow():
  52. # Regression test for gh-7230 / gh-7871, checking that coo_todense
  53. # with nnz > int32max doesn't overflow.
  54. nnz = np.iinfo(np.int32).max + 1
  55. # Ensure ~20 GB of RAM is free to run this test.
  56. check_free_memory((4 + 4 + 1) * nnz / 1e6 + 0.5)
  57. # Use nnz duplicate entries to keep the dense version small.
  58. row = np.zeros(nnz, dtype=np.int32)
  59. col = np.zeros(nnz, dtype=np.int32)
  60. data = np.zeros(nnz, dtype=np.int8)
  61. data[-1] = 4
  62. s = coo_matrix((data, (row, col)), shape=(1, 1), copy=False)
  63. # Sums nnz duplicates to produce a 1x1 array containing 4.
  64. d = s.toarray()
  65. assert_allclose(d, [[4]])
  66. @pytest.mark.skipif(not (sys.platform.startswith('linux') and np.dtype(np.intp).itemsize >= 8),
  67. reason="test requires 64-bit Linux")
  68. class TestInt32Overflow(object):
  69. """
  70. Some of the sparsetools routines use dense 2D matrices whose
  71. total size is not bounded by the nnz of the sparse matrix. These
  72. routines used to suffer from int32 wraparounds; here, we try to
  73. check that the wraparounds don't occur any more.
  74. """
  75. # choose n large enough
  76. n = 50000
  77. def setup_method(self):
  78. assert self.n**2 > np.iinfo(np.int32).max
  79. # check there's enough memory even if everything is run at the
  80. # same time
  81. try:
  82. parallel_count = int(os.environ.get('PYTEST_XDIST_WORKER_COUNT', '1'))
  83. except ValueError:
  84. parallel_count = np.inf
  85. check_free_memory(3000 * parallel_count)
  86. def teardown_method(self):
  87. gc.collect()
  88. def test_coo_todense(self):
  89. # Check *_todense routines (cf. gh-2179)
  90. #
  91. # All of them in the end call coo_matrix.todense
  92. n = self.n
  93. i = np.array([0, n-1])
  94. j = np.array([0, n-1])
  95. data = np.array([1, 2], dtype=np.int8)
  96. m = coo_matrix((data, (i, j)))
  97. r = m.todense()
  98. assert_equal(r[0,0], 1)
  99. assert_equal(r[-1,-1], 2)
  100. del r
  101. gc.collect()
  102. @pytest.mark.slow
  103. def test_matvecs(self):
  104. # Check *_matvecs routines
  105. n = self.n
  106. i = np.array([0, n-1])
  107. j = np.array([0, n-1])
  108. data = np.array([1, 2], dtype=np.int8)
  109. m = coo_matrix((data, (i, j)))
  110. b = np.ones((n, n), dtype=np.int8)
  111. for sptype in (csr_matrix, csc_matrix, bsr_matrix):
  112. m2 = sptype(m)
  113. r = m2.dot(b)
  114. assert_equal(r[0,0], 1)
  115. assert_equal(r[-1,-1], 2)
  116. del r
  117. gc.collect()
  118. del b
  119. gc.collect()
  120. @pytest.mark.slow
  121. def test_dia_matvec(self):
  122. # Check: huge dia_matrix _matvec
  123. n = self.n
  124. data = np.ones((n, n), dtype=np.int8)
  125. offsets = np.arange(n)
  126. m = dia_matrix((data, offsets), shape=(n, n))
  127. v = np.ones(m.shape[1], dtype=np.int8)
  128. r = m.dot(v)
  129. assert_equal(r[0], np.int8(n))
  130. del data, offsets, m, v, r
  131. gc.collect()
  132. _bsr_ops = [pytest.param("matmat", marks=pytest.mark.xslow),
  133. pytest.param("matvecs", marks=pytest.mark.xslow),
  134. "matvec",
  135. "diagonal",
  136. "sort_indices",
  137. pytest.param("transpose", marks=pytest.mark.xslow)]
  138. @pytest.mark.slow
  139. @pytest.mark.parametrize("op", _bsr_ops)
  140. def test_bsr_1_block(self, op):
  141. # Check: huge bsr_matrix (1-block)
  142. #
  143. # The point here is that indices inside a block may overflow.
  144. def get_matrix():
  145. n = self.n
  146. data = np.ones((1, n, n), dtype=np.int8)
  147. indptr = np.array([0, 1], dtype=np.int32)
  148. indices = np.array([0], dtype=np.int32)
  149. m = bsr_matrix((data, indices, indptr), blocksize=(n, n), copy=False)
  150. del data, indptr, indices
  151. return m
  152. gc.collect()
  153. try:
  154. getattr(self, "_check_bsr_" + op)(get_matrix)
  155. finally:
  156. gc.collect()
  157. @pytest.mark.slow
  158. @pytest.mark.parametrize("op", _bsr_ops)
  159. def test_bsr_n_block(self, op):
  160. # Check: huge bsr_matrix (n-block)
  161. #
  162. # The point here is that while indices within a block don't
  163. # overflow, accumulators across many block may.
  164. def get_matrix():
  165. n = self.n
  166. data = np.ones((n, n, 1), dtype=np.int8)
  167. indptr = np.array([0, n], dtype=np.int32)
  168. indices = np.arange(n, dtype=np.int32)
  169. m = bsr_matrix((data, indices, indptr), blocksize=(n, 1), copy=False)
  170. del data, indptr, indices
  171. return m
  172. gc.collect()
  173. try:
  174. getattr(self, "_check_bsr_" + op)(get_matrix)
  175. finally:
  176. gc.collect()
  177. def _check_bsr_matvecs(self, m):
  178. m = m()
  179. n = self.n
  180. # _matvecs
  181. r = m.dot(np.ones((n, 2), dtype=np.int8))
  182. assert_equal(r[0,0], np.int8(n))
  183. def _check_bsr_matvec(self, m):
  184. m = m()
  185. n = self.n
  186. # _matvec
  187. r = m.dot(np.ones((n,), dtype=np.int8))
  188. assert_equal(r[0], np.int8(n))
  189. def _check_bsr_diagonal(self, m):
  190. m = m()
  191. n = self.n
  192. # _diagonal
  193. r = m.diagonal()
  194. assert_equal(r, np.ones(n))
  195. def _check_bsr_sort_indices(self, m):
  196. # _sort_indices
  197. m = m()
  198. m.sort_indices()
  199. def _check_bsr_transpose(self, m):
  200. # _transpose
  201. m = m()
  202. m.transpose()
  203. def _check_bsr_matmat(self, m):
  204. m = m()
  205. n = self.n
  206. # _bsr_matmat
  207. m2 = bsr_matrix(np.ones((n, 2), dtype=np.int8), blocksize=(m.blocksize[1], 2))
  208. m.dot(m2) # shouldn't SIGSEGV
  209. del m2
  210. # _bsr_matmat
  211. m2 = bsr_matrix(np.ones((2, n), dtype=np.int8), blocksize=(2, m.blocksize[0]))
  212. m2.dot(m) # shouldn't SIGSEGV
  213. @pytest.mark.skip(reason="64-bit indices in sparse matrices not available")
  214. def test_csr_matmat_int64_overflow():
  215. n = 3037000500
  216. assert n**2 > np.iinfo(np.int64).max
  217. # the test would take crazy amounts of memory
  218. check_free_memory(n * (8*2 + 1) * 3 / 1e6)
  219. # int64 overflow
  220. data = np.ones((n,), dtype=np.int8)
  221. indptr = np.arange(n+1, dtype=np.int64)
  222. indices = np.zeros(n, dtype=np.int64)
  223. a = csr_matrix((data, indices, indptr))
  224. b = a.T
  225. assert_raises(RuntimeError, a.dot, b)
  226. def test_upcast():
  227. a0 = csr_matrix([[np.pi, np.pi*1j], [3, 4]], dtype=complex)
  228. b0 = np.array([256+1j, 2**32], dtype=complex)
  229. for a_dtype in supported_dtypes:
  230. for b_dtype in supported_dtypes:
  231. msg = "(%r, %r)" % (a_dtype, b_dtype)
  232. if np.issubdtype(a_dtype, np.complexfloating):
  233. a = a0.copy().astype(a_dtype)
  234. else:
  235. a = a0.real.copy().astype(a_dtype)
  236. if np.issubdtype(b_dtype, np.complexfloating):
  237. b = b0.copy().astype(b_dtype)
  238. else:
  239. b = b0.real.copy().astype(b_dtype)
  240. if not (a_dtype == np.bool_ and b_dtype == np.bool_):
  241. c = np.zeros((2,), dtype=np.bool_)
  242. assert_raises(ValueError, _sparsetools.csr_matvec,
  243. 2, 2, a.indptr, a.indices, a.data, b, c)
  244. if ((np.issubdtype(a_dtype, np.complexfloating) and
  245. not np.issubdtype(b_dtype, np.complexfloating)) or
  246. (not np.issubdtype(a_dtype, np.complexfloating) and
  247. np.issubdtype(b_dtype, np.complexfloating))):
  248. c = np.zeros((2,), dtype=np.float64)
  249. assert_raises(ValueError, _sparsetools.csr_matvec,
  250. 2, 2, a.indptr, a.indices, a.data, b, c)
  251. c = np.zeros((2,), dtype=np.result_type(a_dtype, b_dtype))
  252. _sparsetools.csr_matvec(2, 2, a.indptr, a.indices, a.data, b, c)
  253. assert_allclose(c, np.dot(a.toarray(), b), err_msg=msg)
  254. def test_endianness():
  255. d = np.ones((3,4))
  256. offsets = [-1,0,1]
  257. a = dia_matrix((d.astype('<f8'), offsets), (4, 4))
  258. b = dia_matrix((d.astype('>f8'), offsets), (4, 4))
  259. v = np.arange(4)
  260. assert_allclose(a.dot(v), [1, 3, 6, 5])
  261. assert_allclose(b.dot(v), [1, 3, 6, 5])