coo.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. """ A sparse matrix in COOrdinate or 'triplet' format"""
  2. from __future__ import division, print_function, absolute_import
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['coo_matrix', 'isspmatrix_coo']
  5. from warnings import warn
  6. import numpy as np
  7. from scipy._lib.six import zip as izip
  8. from ._sparsetools import coo_tocsr, coo_todense, coo_matvec
  9. from .base import isspmatrix, SparseEfficiencyWarning, spmatrix
  10. from .data import _data_matrix, _minmax_mixin
  11. from .sputils import (upcast, upcast_char, to_native, isshape, getdtype,
  12. get_index_dtype, downcast_intp_index, check_shape,
  13. check_reshape_kwargs)
  14. class coo_matrix(_data_matrix, _minmax_mixin):
  15. """
  16. A sparse matrix in COOrdinate format.
  17. Also known as the 'ijv' or 'triplet' format.
  18. This can be instantiated in several ways:
  19. coo_matrix(D)
  20. with a dense matrix D
  21. coo_matrix(S)
  22. with another sparse matrix S (equivalent to S.tocoo())
  23. coo_matrix((M, N), [dtype])
  24. to construct an empty matrix with shape (M, N)
  25. dtype is optional, defaulting to dtype='d'.
  26. coo_matrix((data, (i, j)), [shape=(M, N)])
  27. to construct from three arrays:
  28. 1. data[:] the entries of the matrix, in any order
  29. 2. i[:] the row indices of the matrix entries
  30. 3. j[:] the column indices of the matrix entries
  31. Where ``A[i[k], j[k]] = data[k]``. When shape is not
  32. specified, it is inferred from the index arrays
  33. Attributes
  34. ----------
  35. dtype : dtype
  36. Data type of the matrix
  37. shape : 2-tuple
  38. Shape of the matrix
  39. ndim : int
  40. Number of dimensions (this is always 2)
  41. nnz
  42. Number of nonzero elements
  43. data
  44. COO format data array of the matrix
  45. row
  46. COO format row index array of the matrix
  47. col
  48. COO format column index array of the matrix
  49. Notes
  50. -----
  51. Sparse matrices can be used in arithmetic operations: they support
  52. addition, subtraction, multiplication, division, and matrix power.
  53. Advantages of the COO format
  54. - facilitates fast conversion among sparse formats
  55. - permits duplicate entries (see example)
  56. - very fast conversion to and from CSR/CSC formats
  57. Disadvantages of the COO format
  58. - does not directly support:
  59. + arithmetic operations
  60. + slicing
  61. Intended Usage
  62. - COO is a fast format for constructing sparse matrices
  63. - Once a matrix has been constructed, convert to CSR or
  64. CSC format for fast arithmetic and matrix vector operations
  65. - By default when converting to CSR or CSC format, duplicate (i,j)
  66. entries will be summed together. This facilitates efficient
  67. construction of finite element matrices and the like. (see example)
  68. Examples
  69. --------
  70. >>> # Constructing an empty matrix
  71. >>> from scipy.sparse import coo_matrix
  72. >>> coo_matrix((3, 4), dtype=np.int8).toarray()
  73. array([[0, 0, 0, 0],
  74. [0, 0, 0, 0],
  75. [0, 0, 0, 0]], dtype=int8)
  76. >>> # Constructing a matrix using ijv format
  77. >>> row = np.array([0, 3, 1, 0])
  78. >>> col = np.array([0, 3, 1, 2])
  79. >>> data = np.array([4, 5, 7, 9])
  80. >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
  81. array([[4, 0, 9, 0],
  82. [0, 7, 0, 0],
  83. [0, 0, 0, 0],
  84. [0, 0, 0, 5]])
  85. >>> # Constructing a matrix with duplicate indices
  86. >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
  87. >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
  88. >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
  89. >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
  90. >>> # Duplicate indices are maintained until implicitly or explicitly summed
  91. >>> np.max(coo.data)
  92. 1
  93. >>> coo.toarray()
  94. array([[3, 0, 1, 0],
  95. [0, 2, 0, 0],
  96. [0, 0, 0, 0],
  97. [0, 0, 0, 1]])
  98. """
  99. format = 'coo'
  100. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  101. _data_matrix.__init__(self)
  102. if isinstance(arg1, tuple):
  103. if isshape(arg1):
  104. M, N = arg1
  105. self._shape = check_shape((M, N))
  106. idx_dtype = get_index_dtype(maxval=max(M, N))
  107. self.row = np.array([], dtype=idx_dtype)
  108. self.col = np.array([], dtype=idx_dtype)
  109. self.data = np.array([], getdtype(dtype, default=float))
  110. self.has_canonical_format = True
  111. else:
  112. try:
  113. obj, (row, col) = arg1
  114. except (TypeError, ValueError):
  115. raise TypeError('invalid input format')
  116. if shape is None:
  117. if len(row) == 0 or len(col) == 0:
  118. raise ValueError('cannot infer dimensions from zero '
  119. 'sized index arrays')
  120. M = np.max(row) + 1
  121. N = np.max(col) + 1
  122. self._shape = check_shape((M, N))
  123. else:
  124. # Use 2 steps to ensure shape has length 2.
  125. M, N = shape
  126. self._shape = check_shape((M, N))
  127. idx_dtype = get_index_dtype(maxval=max(self.shape))
  128. self.row = np.array(row, copy=copy, dtype=idx_dtype)
  129. self.col = np.array(col, copy=copy, dtype=idx_dtype)
  130. self.data = np.array(obj, copy=copy)
  131. self.has_canonical_format = False
  132. else:
  133. if isspmatrix(arg1):
  134. if isspmatrix_coo(arg1) and copy:
  135. self.row = arg1.row.copy()
  136. self.col = arg1.col.copy()
  137. self.data = arg1.data.copy()
  138. self._shape = check_shape(arg1.shape)
  139. else:
  140. coo = arg1.tocoo()
  141. self.row = coo.row
  142. self.col = coo.col
  143. self.data = coo.data
  144. self._shape = check_shape(coo.shape)
  145. self.has_canonical_format = False
  146. else:
  147. #dense argument
  148. M = np.atleast_2d(np.asarray(arg1))
  149. if M.ndim != 2:
  150. raise TypeError('expected dimension <= 2 array or matrix')
  151. else:
  152. self._shape = check_shape(M.shape)
  153. self.row, self.col = M.nonzero()
  154. self.data = M[self.row, self.col]
  155. self.has_canonical_format = True
  156. if dtype is not None:
  157. self.data = self.data.astype(dtype, copy=False)
  158. self._check()
  159. def reshape(self, *args, **kwargs):
  160. shape = check_shape(args, self.shape)
  161. order, copy = check_reshape_kwargs(kwargs)
  162. # Return early if reshape is not required
  163. if shape == self.shape:
  164. if copy:
  165. return self.copy()
  166. else:
  167. return self
  168. nrows, ncols = self.shape
  169. if order == 'C':
  170. # Upcast to avoid overflows: the coo_matrix constructor
  171. # below will downcast the results to a smaller dtype, if
  172. # possible.
  173. dtype = get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1)))
  174. flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col
  175. new_row, new_col = divmod(flat_indices, shape[1])
  176. elif order == 'F':
  177. dtype = get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1)))
  178. flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row
  179. new_col, new_row = divmod(flat_indices, shape[0])
  180. else:
  181. raise ValueError("'order' must be 'C' or 'F'")
  182. # Handle copy here rather than passing on to the constructor so that no
  183. # copy will be made of new_row and new_col regardless
  184. if copy:
  185. new_data = self.data.copy()
  186. else:
  187. new_data = self.data
  188. return coo_matrix((new_data, (new_row, new_col)),
  189. shape=shape, copy=False)
  190. reshape.__doc__ = spmatrix.reshape.__doc__
  191. def getnnz(self, axis=None):
  192. if axis is None:
  193. nnz = len(self.data)
  194. if nnz != len(self.row) or nnz != len(self.col):
  195. raise ValueError('row, column, and data array must all be the '
  196. 'same length')
  197. if self.data.ndim != 1 or self.row.ndim != 1 or \
  198. self.col.ndim != 1:
  199. raise ValueError('row, column, and data arrays must be 1-D')
  200. return int(nnz)
  201. if axis < 0:
  202. axis += 2
  203. if axis == 0:
  204. return np.bincount(downcast_intp_index(self.col),
  205. minlength=self.shape[1])
  206. elif axis == 1:
  207. return np.bincount(downcast_intp_index(self.row),
  208. minlength=self.shape[0])
  209. else:
  210. raise ValueError('axis out of bounds')
  211. getnnz.__doc__ = spmatrix.getnnz.__doc__
  212. def _check(self):
  213. """ Checks data structure for consistency """
  214. # index arrays should have integer data types
  215. if self.row.dtype.kind != 'i':
  216. warn("row index array has non-integer dtype (%s) "
  217. % self.row.dtype.name)
  218. if self.col.dtype.kind != 'i':
  219. warn("col index array has non-integer dtype (%s) "
  220. % self.col.dtype.name)
  221. idx_dtype = get_index_dtype(maxval=max(self.shape))
  222. self.row = np.asarray(self.row, dtype=idx_dtype)
  223. self.col = np.asarray(self.col, dtype=idx_dtype)
  224. self.data = to_native(self.data)
  225. if self.nnz > 0:
  226. if self.row.max() >= self.shape[0]:
  227. raise ValueError('row index exceeds matrix dimensions')
  228. if self.col.max() >= self.shape[1]:
  229. raise ValueError('column index exceeds matrix dimensions')
  230. if self.row.min() < 0:
  231. raise ValueError('negative row index found')
  232. if self.col.min() < 0:
  233. raise ValueError('negative column index found')
  234. def transpose(self, axes=None, copy=False):
  235. if axes is not None:
  236. raise ValueError(("Sparse matrices do not support "
  237. "an 'axes' parameter because swapping "
  238. "dimensions is the only logical permutation."))
  239. M, N = self.shape
  240. return coo_matrix((self.data, (self.col, self.row)),
  241. shape=(N, M), copy=copy)
  242. transpose.__doc__ = spmatrix.transpose.__doc__
  243. def resize(self, *shape):
  244. shape = check_shape(shape)
  245. new_M, new_N = shape
  246. M, N = self.shape
  247. if new_M < M or new_N < N:
  248. mask = np.logical_and(self.row < new_M, self.col < new_N)
  249. if not mask.all():
  250. self.row = self.row[mask]
  251. self.col = self.col[mask]
  252. self.data = self.data[mask]
  253. self._shape = shape
  254. resize.__doc__ = spmatrix.resize.__doc__
  255. def toarray(self, order=None, out=None):
  256. """See the docstring for `spmatrix.toarray`."""
  257. B = self._process_toarray_args(order, out)
  258. fortran = int(B.flags.f_contiguous)
  259. if not fortran and not B.flags.c_contiguous:
  260. raise ValueError("Output array must be C or F contiguous")
  261. M,N = self.shape
  262. coo_todense(M, N, self.nnz, self.row, self.col, self.data,
  263. B.ravel('A'), fortran)
  264. return B
  265. def tocsc(self, copy=False):
  266. """Convert this matrix to Compressed Sparse Column format
  267. Duplicate entries will be summed together.
  268. Examples
  269. --------
  270. >>> from numpy import array
  271. >>> from scipy.sparse import coo_matrix
  272. >>> row = array([0, 0, 1, 3, 1, 0, 0])
  273. >>> col = array([0, 2, 1, 3, 1, 0, 0])
  274. >>> data = array([1, 1, 1, 1, 1, 1, 1])
  275. >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsc()
  276. >>> A.toarray()
  277. array([[3, 0, 1, 0],
  278. [0, 2, 0, 0],
  279. [0, 0, 0, 0],
  280. [0, 0, 0, 1]])
  281. """
  282. from .csc import csc_matrix
  283. if self.nnz == 0:
  284. return csc_matrix(self.shape, dtype=self.dtype)
  285. else:
  286. M,N = self.shape
  287. idx_dtype = get_index_dtype((self.col, self.row),
  288. maxval=max(self.nnz, M))
  289. row = self.row.astype(idx_dtype, copy=False)
  290. col = self.col.astype(idx_dtype, copy=False)
  291. indptr = np.empty(N + 1, dtype=idx_dtype)
  292. indices = np.empty_like(row, dtype=idx_dtype)
  293. data = np.empty_like(self.data, dtype=upcast(self.dtype))
  294. coo_tocsr(N, M, self.nnz, col, row, self.data,
  295. indptr, indices, data)
  296. x = csc_matrix((data, indices, indptr), shape=self.shape)
  297. if not self.has_canonical_format:
  298. x.sum_duplicates()
  299. return x
  300. def tocsr(self, copy=False):
  301. """Convert this matrix to Compressed Sparse Row format
  302. Duplicate entries will be summed together.
  303. Examples
  304. --------
  305. >>> from numpy import array
  306. >>> from scipy.sparse import coo_matrix
  307. >>> row = array([0, 0, 1, 3, 1, 0, 0])
  308. >>> col = array([0, 2, 1, 3, 1, 0, 0])
  309. >>> data = array([1, 1, 1, 1, 1, 1, 1])
  310. >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr()
  311. >>> A.toarray()
  312. array([[3, 0, 1, 0],
  313. [0, 2, 0, 0],
  314. [0, 0, 0, 0],
  315. [0, 0, 0, 1]])
  316. """
  317. from .csr import csr_matrix
  318. if self.nnz == 0:
  319. return csr_matrix(self.shape, dtype=self.dtype)
  320. else:
  321. M,N = self.shape
  322. idx_dtype = get_index_dtype((self.row, self.col),
  323. maxval=max(self.nnz, N))
  324. row = self.row.astype(idx_dtype, copy=False)
  325. col = self.col.astype(idx_dtype, copy=False)
  326. indptr = np.empty(M + 1, dtype=idx_dtype)
  327. indices = np.empty_like(col, dtype=idx_dtype)
  328. data = np.empty_like(self.data, dtype=upcast(self.dtype))
  329. coo_tocsr(M, N, self.nnz, row, col, self.data,
  330. indptr, indices, data)
  331. x = csr_matrix((data, indices, indptr), shape=self.shape)
  332. if not self.has_canonical_format:
  333. x.sum_duplicates()
  334. return x
  335. def tocoo(self, copy=False):
  336. if copy:
  337. return self.copy()
  338. else:
  339. return self
  340. tocoo.__doc__ = spmatrix.tocoo.__doc__
  341. def todia(self, copy=False):
  342. from .dia import dia_matrix
  343. self.sum_duplicates()
  344. ks = self.col - self.row # the diagonal for each nonzero
  345. diags, diag_idx = np.unique(ks, return_inverse=True)
  346. if len(diags) > 100:
  347. # probably undesired, should todia() have a maxdiags parameter?
  348. warn("Constructing a DIA matrix with %d diagonals "
  349. "is inefficient" % len(diags), SparseEfficiencyWarning)
  350. #initialize and fill in data array
  351. if self.data.size == 0:
  352. data = np.zeros((0, 0), dtype=self.dtype)
  353. else:
  354. data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
  355. data[diag_idx, self.col] = self.data
  356. return dia_matrix((data,diags), shape=self.shape)
  357. todia.__doc__ = spmatrix.todia.__doc__
  358. def todok(self, copy=False):
  359. from .dok import dok_matrix
  360. self.sum_duplicates()
  361. dok = dok_matrix((self.shape), dtype=self.dtype)
  362. dok._update(izip(izip(self.row,self.col),self.data))
  363. return dok
  364. todok.__doc__ = spmatrix.todok.__doc__
  365. def diagonal(self, k=0):
  366. rows, cols = self.shape
  367. if k <= -rows or k >= cols:
  368. raise ValueError("k exceeds matrix dimensions")
  369. diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
  370. dtype=self.dtype)
  371. diag_mask = (self.row + k) == self.col
  372. if self.has_canonical_format:
  373. row = self.row[diag_mask]
  374. data = self.data[diag_mask]
  375. else:
  376. row, _, data = self._sum_duplicates(self.row[diag_mask],
  377. self.col[diag_mask],
  378. self.data[diag_mask])
  379. diag[row + min(k, 0)] = data
  380. return diag
  381. diagonal.__doc__ = _data_matrix.diagonal.__doc__
  382. def _setdiag(self, values, k):
  383. M, N = self.shape
  384. if values.ndim and not len(values):
  385. return
  386. idx_dtype = self.row.dtype
  387. # Determine which triples to keep and where to put the new ones.
  388. full_keep = self.col - self.row != k
  389. if k < 0:
  390. max_index = min(M+k, N)
  391. if values.ndim:
  392. max_index = min(max_index, len(values))
  393. keep = np.logical_or(full_keep, self.col >= max_index)
  394. new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
  395. new_col = np.arange(max_index, dtype=idx_dtype)
  396. else:
  397. max_index = min(M, N-k)
  398. if values.ndim:
  399. max_index = min(max_index, len(values))
  400. keep = np.logical_or(full_keep, self.row >= max_index)
  401. new_row = np.arange(max_index, dtype=idx_dtype)
  402. new_col = np.arange(k, k + max_index, dtype=idx_dtype)
  403. # Define the array of data consisting of the entries to be added.
  404. if values.ndim:
  405. new_data = values[:max_index]
  406. else:
  407. new_data = np.empty(max_index, dtype=self.dtype)
  408. new_data[:] = values
  409. # Update the internal structure.
  410. self.row = np.concatenate((self.row[keep], new_row))
  411. self.col = np.concatenate((self.col[keep], new_col))
  412. self.data = np.concatenate((self.data[keep], new_data))
  413. self.has_canonical_format = False
  414. # needed by _data_matrix
  415. def _with_data(self,data,copy=True):
  416. """Returns a matrix with the same sparsity structure as self,
  417. but with different data. By default the index arrays
  418. (i.e. .row and .col) are copied.
  419. """
  420. if copy:
  421. return coo_matrix((data, (self.row.copy(), self.col.copy())),
  422. shape=self.shape, dtype=data.dtype)
  423. else:
  424. return coo_matrix((data, (self.row, self.col)),
  425. shape=self.shape, dtype=data.dtype)
  426. def sum_duplicates(self):
  427. """Eliminate duplicate matrix entries by adding them together
  428. This is an *in place* operation
  429. """
  430. if self.has_canonical_format:
  431. return
  432. summed = self._sum_duplicates(self.row, self.col, self.data)
  433. self.row, self.col, self.data = summed
  434. self.has_canonical_format = True
  435. def _sum_duplicates(self, row, col, data):
  436. # Assumes (data, row, col) not in canonical format.
  437. if len(data) == 0:
  438. return row, col, data
  439. order = np.lexsort((row, col))
  440. row = row[order]
  441. col = col[order]
  442. data = data[order]
  443. unique_mask = ((row[1:] != row[:-1]) |
  444. (col[1:] != col[:-1]))
  445. unique_mask = np.append(True, unique_mask)
  446. row = row[unique_mask]
  447. col = col[unique_mask]
  448. unique_inds, = np.nonzero(unique_mask)
  449. data = np.add.reduceat(data, unique_inds, dtype=self.dtype)
  450. return row, col, data
  451. def eliminate_zeros(self):
  452. """Remove zero entries from the matrix
  453. This is an *in place* operation
  454. """
  455. mask = self.data != 0
  456. self.data = self.data[mask]
  457. self.row = self.row[mask]
  458. self.col = self.col[mask]
  459. #######################
  460. # Arithmetic handlers #
  461. #######################
  462. def _add_dense(self, other):
  463. if other.shape != self.shape:
  464. raise ValueError('Incompatible shapes.')
  465. dtype = upcast_char(self.dtype.char, other.dtype.char)
  466. result = np.array(other, dtype=dtype, copy=True)
  467. fortran = int(result.flags.f_contiguous)
  468. M, N = self.shape
  469. coo_todense(M, N, self.nnz, self.row, self.col, self.data,
  470. result.ravel('A'), fortran)
  471. return np.matrix(result, copy=False)
  472. def _mul_vector(self, other):
  473. #output array
  474. result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
  475. other.dtype.char))
  476. coo_matvec(self.nnz, self.row, self.col, self.data, other, result)
  477. return result
  478. def _mul_multivector(self, other):
  479. result = np.zeros((other.shape[1], self.shape[0]),
  480. dtype=upcast_char(self.dtype.char, other.dtype.char))
  481. for i, col in enumerate(other.T):
  482. coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i])
  483. return result.T.view(type=type(other))
  484. def isspmatrix_coo(x):
  485. """Is x of coo_matrix type?
  486. Parameters
  487. ----------
  488. x
  489. object to check for being a coo matrix
  490. Returns
  491. -------
  492. bool
  493. True if x is a coo matrix, False otherwise
  494. Examples
  495. --------
  496. >>> from scipy.sparse import coo_matrix, isspmatrix_coo
  497. >>> isspmatrix_coo(coo_matrix([[5]]))
  498. True
  499. >>> from scipy.sparse import coo_matrix, csr_matrix, isspmatrix_coo
  500. >>> isspmatrix_coo(csr_matrix([[5]]))
  501. False
  502. """
  503. return isinstance(x, coo_matrix)