dia.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. """Sparse DIAgonal format"""
  2. from __future__ import division, print_function, absolute_import
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['dia_matrix', 'isspmatrix_dia']
  5. import numpy as np
  6. from .base import isspmatrix, _formats, spmatrix
  7. from .data import _data_matrix
  8. from .sputils import (isshape, upcast_char, getdtype, get_index_dtype,
  9. get_sum_dtype, validateaxis, check_shape)
  10. from ._sparsetools import dia_matvec
  11. class dia_matrix(_data_matrix):
  12. """Sparse matrix with DIAgonal storage
  13. This can be instantiated in several ways:
  14. dia_matrix(D)
  15. with a dense matrix
  16. dia_matrix(S)
  17. with another sparse matrix S (equivalent to S.todia())
  18. dia_matrix((M, N), [dtype])
  19. to construct an empty matrix with shape (M, N),
  20. dtype is optional, defaulting to dtype='d'.
  21. dia_matrix((data, offsets), shape=(M, N))
  22. where the ``data[k,:]`` stores the diagonal entries for
  23. diagonal ``offsets[k]`` (See example below)
  24. Attributes
  25. ----------
  26. dtype : dtype
  27. Data type of the matrix
  28. shape : 2-tuple
  29. Shape of the matrix
  30. ndim : int
  31. Number of dimensions (this is always 2)
  32. nnz
  33. Number of nonzero elements
  34. data
  35. DIA format data array of the matrix
  36. offsets
  37. DIA format offset array of the matrix
  38. Notes
  39. -----
  40. Sparse matrices can be used in arithmetic operations: they support
  41. addition, subtraction, multiplication, division, and matrix power.
  42. Examples
  43. --------
  44. >>> import numpy as np
  45. >>> from scipy.sparse import dia_matrix
  46. >>> dia_matrix((3, 4), dtype=np.int8).toarray()
  47. array([[0, 0, 0, 0],
  48. [0, 0, 0, 0],
  49. [0, 0, 0, 0]], dtype=int8)
  50. >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
  51. >>> offsets = np.array([0, -1, 2])
  52. >>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
  53. array([[1, 0, 3, 0],
  54. [1, 2, 0, 4],
  55. [0, 2, 3, 0],
  56. [0, 0, 3, 4]])
  57. """
  58. format = 'dia'
  59. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  60. _data_matrix.__init__(self)
  61. if isspmatrix_dia(arg1):
  62. if copy:
  63. arg1 = arg1.copy()
  64. self.data = arg1.data
  65. self.offsets = arg1.offsets
  66. self._shape = check_shape(arg1.shape)
  67. elif isspmatrix(arg1):
  68. if isspmatrix_dia(arg1) and copy:
  69. A = arg1.copy()
  70. else:
  71. A = arg1.todia()
  72. self.data = A.data
  73. self.offsets = A.offsets
  74. self._shape = check_shape(A.shape)
  75. elif isinstance(arg1, tuple):
  76. if isshape(arg1):
  77. # It's a tuple of matrix dimensions (M, N)
  78. # create empty matrix
  79. self._shape = check_shape(arg1)
  80. self.data = np.zeros((0,0), getdtype(dtype, default=float))
  81. idx_dtype = get_index_dtype(maxval=max(self.shape))
  82. self.offsets = np.zeros((0), dtype=idx_dtype)
  83. else:
  84. try:
  85. # Try interpreting it as (data, offsets)
  86. data, offsets = arg1
  87. except Exception:
  88. raise ValueError('unrecognized form for dia_matrix constructor')
  89. else:
  90. if shape is None:
  91. raise ValueError('expected a shape argument')
  92. self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
  93. self.offsets = np.atleast_1d(np.array(arg1[1],
  94. dtype=get_index_dtype(maxval=max(shape)),
  95. copy=copy))
  96. self._shape = check_shape(shape)
  97. else:
  98. #must be dense, convert to COO first, then to DIA
  99. try:
  100. arg1 = np.asarray(arg1)
  101. except Exception:
  102. raise ValueError("unrecognized form for"
  103. " %s_matrix constructor" % self.format)
  104. from .coo import coo_matrix
  105. A = coo_matrix(arg1, dtype=dtype, shape=shape).todia()
  106. self.data = A.data
  107. self.offsets = A.offsets
  108. self._shape = check_shape(A.shape)
  109. if dtype is not None:
  110. self.data = self.data.astype(dtype)
  111. #check format
  112. if self.offsets.ndim != 1:
  113. raise ValueError('offsets array must have rank 1')
  114. if self.data.ndim != 2:
  115. raise ValueError('data array must have rank 2')
  116. if self.data.shape[0] != len(self.offsets):
  117. raise ValueError('number of diagonals (%d) '
  118. 'does not match the number of offsets (%d)'
  119. % (self.data.shape[0], len(self.offsets)))
  120. if len(np.unique(self.offsets)) != len(self.offsets):
  121. raise ValueError('offset array contains duplicate values')
  122. def __repr__(self):
  123. format = _formats[self.getformat()][1]
  124. return "<%dx%d sparse matrix of type '%s'\n" \
  125. "\twith %d stored elements (%d diagonals) in %s format>" % \
  126. (self.shape + (self.dtype.type, self.nnz, self.data.shape[0],
  127. format))
  128. def _data_mask(self):
  129. """Returns a mask of the same shape as self.data, where
  130. mask[i,j] is True when data[i,j] corresponds to a stored element."""
  131. num_rows, num_cols = self.shape
  132. offset_inds = np.arange(self.data.shape[1])
  133. row = offset_inds - self.offsets[:,None]
  134. mask = (row >= 0)
  135. mask &= (row < num_rows)
  136. mask &= (offset_inds < num_cols)
  137. return mask
  138. def count_nonzero(self):
  139. mask = self._data_mask()
  140. return np.count_nonzero(self.data[mask])
  141. def getnnz(self, axis=None):
  142. if axis is not None:
  143. raise NotImplementedError("getnnz over an axis is not implemented "
  144. "for DIA format")
  145. M,N = self.shape
  146. nnz = 0
  147. for k in self.offsets:
  148. if k > 0:
  149. nnz += min(M,N-k)
  150. else:
  151. nnz += min(M+k,N)
  152. return int(nnz)
  153. getnnz.__doc__ = spmatrix.getnnz.__doc__
  154. count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
  155. def sum(self, axis=None, dtype=None, out=None):
  156. validateaxis(axis)
  157. if axis is not None and axis < 0:
  158. axis += 2
  159. res_dtype = get_sum_dtype(self.dtype)
  160. num_rows, num_cols = self.shape
  161. ret = None
  162. if axis == 0:
  163. mask = self._data_mask()
  164. x = (self.data * mask).sum(axis=0)
  165. if x.shape[0] == num_cols:
  166. res = x
  167. else:
  168. res = np.zeros(num_cols, dtype=x.dtype)
  169. res[:x.shape[0]] = x
  170. ret = np.matrix(res, dtype=res_dtype)
  171. else:
  172. row_sums = np.zeros(num_rows, dtype=res_dtype)
  173. one = np.ones(num_cols, dtype=res_dtype)
  174. dia_matvec(num_rows, num_cols, len(self.offsets),
  175. self.data.shape[1], self.offsets, self.data, one, row_sums)
  176. row_sums = np.matrix(row_sums)
  177. if axis is None:
  178. return row_sums.sum(dtype=dtype, out=out)
  179. if axis is not None:
  180. row_sums = row_sums.T
  181. ret = np.matrix(row_sums.sum(axis=axis))
  182. if out is not None and out.shape != ret.shape:
  183. raise ValueError("dimensions do not match")
  184. return ret.sum(axis=(), dtype=dtype, out=out)
  185. sum.__doc__ = spmatrix.sum.__doc__
  186. def _mul_vector(self, other):
  187. x = other
  188. y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
  189. x.dtype.char))
  190. L = self.data.shape[1]
  191. M,N = self.shape
  192. dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel())
  193. return y
  194. def _mul_multimatrix(self, other):
  195. return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T])
  196. def _setdiag(self, values, k=0):
  197. M, N = self.shape
  198. if values.ndim == 0:
  199. # broadcast
  200. values_n = np.inf
  201. else:
  202. values_n = len(values)
  203. if k < 0:
  204. n = min(M + k, N, values_n)
  205. min_index = 0
  206. max_index = n
  207. else:
  208. n = min(M, N - k, values_n)
  209. min_index = k
  210. max_index = k + n
  211. if values.ndim != 0:
  212. # allow also longer sequences
  213. values = values[:n]
  214. if k in self.offsets:
  215. self.data[self.offsets == k, min_index:max_index] = values
  216. else:
  217. self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
  218. m = max(max_index, self.data.shape[1])
  219. data = np.zeros((self.data.shape[0]+1, m), dtype=self.data.dtype)
  220. data[:-1,:self.data.shape[1]] = self.data
  221. data[-1, min_index:max_index] = values
  222. self.data = data
  223. def todia(self, copy=False):
  224. if copy:
  225. return self.copy()
  226. else:
  227. return self
  228. todia.__doc__ = spmatrix.todia.__doc__
  229. def transpose(self, axes=None, copy=False):
  230. if axes is not None:
  231. raise ValueError(("Sparse matrices do not support "
  232. "an 'axes' parameter because swapping "
  233. "dimensions is the only logical permutation."))
  234. num_rows, num_cols = self.shape
  235. max_dim = max(self.shape)
  236. # flip diagonal offsets
  237. offsets = -self.offsets
  238. # re-align the data matrix
  239. r = np.arange(len(offsets), dtype=np.intc)[:, None]
  240. c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
  241. pad_amount = max(0, max_dim-self.data.shape[1])
  242. data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
  243. dtype=self.data.dtype)))
  244. data = data[r, c]
  245. return dia_matrix((data, offsets), shape=(
  246. num_cols, num_rows), copy=copy)
  247. transpose.__doc__ = spmatrix.transpose.__doc__
  248. def diagonal(self, k=0):
  249. rows, cols = self.shape
  250. if k <= -rows or k >= cols:
  251. raise ValueError("k exceeds matrix dimensions")
  252. idx, = np.nonzero(self.offsets == k)
  253. first_col, last_col = max(0, k), min(rows + k, cols)
  254. if idx.size == 0:
  255. return np.zeros(last_col - first_col, dtype=self.data.dtype)
  256. return self.data[idx[0], first_col:last_col]
  257. diagonal.__doc__ = spmatrix.diagonal.__doc__
  258. def tocsc(self, copy=False):
  259. from .csc import csc_matrix
  260. if self.nnz == 0:
  261. return csc_matrix(self.shape, dtype=self.dtype)
  262. num_rows, num_cols = self.shape
  263. num_offsets, offset_len = self.data.shape
  264. offset_inds = np.arange(offset_len)
  265. row = offset_inds - self.offsets[:,None]
  266. mask = (row >= 0)
  267. mask &= (row < num_rows)
  268. mask &= (offset_inds < num_cols)
  269. mask &= (self.data != 0)
  270. idx_dtype = get_index_dtype(maxval=max(self.shape))
  271. indptr = np.zeros(num_cols + 1, dtype=idx_dtype)
  272. indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0))
  273. indptr[offset_len+1:] = indptr[offset_len]
  274. indices = row.T[mask.T].astype(idx_dtype, copy=False)
  275. data = self.data.T[mask.T]
  276. return csc_matrix((data, indices, indptr), shape=self.shape,
  277. dtype=self.dtype)
  278. tocsc.__doc__ = spmatrix.tocsc.__doc__
  279. def tocoo(self, copy=False):
  280. num_rows, num_cols = self.shape
  281. num_offsets, offset_len = self.data.shape
  282. offset_inds = np.arange(offset_len)
  283. row = offset_inds - self.offsets[:,None]
  284. mask = (row >= 0)
  285. mask &= (row < num_rows)
  286. mask &= (offset_inds < num_cols)
  287. mask &= (self.data != 0)
  288. row = row[mask]
  289. col = np.tile(offset_inds, num_offsets)[mask.ravel()]
  290. data = self.data[mask]
  291. from .coo import coo_matrix
  292. A = coo_matrix((data,(row,col)), shape=self.shape, dtype=self.dtype)
  293. A.has_canonical_format = True
  294. return A
  295. tocoo.__doc__ = spmatrix.tocoo.__doc__
  296. # needed by _data_matrix
  297. def _with_data(self, data, copy=True):
  298. """Returns a matrix with the same sparsity structure as self,
  299. but with different data. By default the structure arrays are copied.
  300. """
  301. if copy:
  302. return dia_matrix((data, self.offsets.copy()), shape=self.shape)
  303. else:
  304. return dia_matrix((data,self.offsets), shape=self.shape)
  305. def resize(self, *shape):
  306. shape = check_shape(shape)
  307. M, N = shape
  308. # we do not need to handle the case of expanding N
  309. self.data = self.data[:, :N]
  310. if (M > self.shape[0] and
  311. np.any(self.offsets + self.shape[0] < self.data.shape[1])):
  312. # explicitly clear values that were previously hidden
  313. mask = (self.offsets[:, None] + self.shape[0] <=
  314. np.arange(self.data.shape[1]))
  315. self.data[mask] = 0
  316. self._shape = shape
  317. resize.__doc__ = spmatrix.resize.__doc__
  318. def isspmatrix_dia(x):
  319. """Is x of dia_matrix type?
  320. Parameters
  321. ----------
  322. x
  323. object to check for being a dia matrix
  324. Returns
  325. -------
  326. bool
  327. True if x is a dia matrix, False otherwise
  328. Examples
  329. --------
  330. >>> from scipy.sparse import dia_matrix, isspmatrix_dia
  331. >>> isspmatrix_dia(dia_matrix([[5]]))
  332. True
  333. >>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia
  334. >>> isspmatrix_dia(csr_matrix([[5]]))
  335. False
  336. """
  337. return isinstance(x, dia_matrix)