csc.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. """Compressed Sparse Column matrix format"""
  2. from __future__ import division, print_function, absolute_import
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['csc_matrix', 'isspmatrix_csc']
  5. import numpy as np
  6. from .base import spmatrix
  7. from ._sparsetools import csc_tocsr
  8. from . import _sparsetools
  9. from .sputils import upcast, isintlike, IndexMixin, get_index_dtype
  10. from .compressed import _cs_matrix
  11. class csc_matrix(_cs_matrix, IndexMixin):
  12. """
  13. Compressed Sparse Column matrix
  14. This can be instantiated in several ways:
  15. csc_matrix(D)
  16. with a dense matrix or rank-2 ndarray D
  17. csc_matrix(S)
  18. with another sparse matrix S (equivalent to S.tocsc())
  19. csc_matrix((M, N), [dtype])
  20. to construct an empty matrix with shape (M, N)
  21. dtype is optional, defaulting to dtype='d'.
  22. csc_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
  23. where ``data``, ``row_ind`` and ``col_ind`` satisfy the
  24. relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
  25. csc_matrix((data, indices, indptr), [shape=(M, N)])
  26. is the standard CSC representation where the row indices for
  27. column i are stored in ``indices[indptr[i]:indptr[i+1]]``
  28. and their corresponding values are stored in
  29. ``data[indptr[i]:indptr[i+1]]``. If the shape parameter is
  30. not supplied, the matrix dimensions are inferred from
  31. the index arrays.
  32. Attributes
  33. ----------
  34. dtype : dtype
  35. Data type of the matrix
  36. shape : 2-tuple
  37. Shape of the matrix
  38. ndim : int
  39. Number of dimensions (this is always 2)
  40. nnz
  41. Number of stored values, including explicit zeros
  42. data
  43. Data array of the matrix
  44. indices
  45. CSC format index array
  46. indptr
  47. CSC format index pointer array
  48. has_sorted_indices
  49. Whether indices are sorted
  50. Notes
  51. -----
  52. Sparse matrices can be used in arithmetic operations: they support
  53. addition, subtraction, multiplication, division, and matrix power.
  54. Advantages of the CSC format
  55. - efficient arithmetic operations CSC + CSC, CSC * CSC, etc.
  56. - efficient column slicing
  57. - fast matrix vector products (CSR, BSR may be faster)
  58. Disadvantages of the CSC format
  59. - slow row slicing operations (consider CSR)
  60. - changes to the sparsity structure are expensive (consider LIL or DOK)
  61. Examples
  62. --------
  63. >>> import numpy as np
  64. >>> from scipy.sparse import csc_matrix
  65. >>> csc_matrix((3, 4), dtype=np.int8).toarray()
  66. array([[0, 0, 0, 0],
  67. [0, 0, 0, 0],
  68. [0, 0, 0, 0]], dtype=int8)
  69. >>> row = np.array([0, 2, 2, 0, 1, 2])
  70. >>> col = np.array([0, 0, 1, 2, 2, 2])
  71. >>> data = np.array([1, 2, 3, 4, 5, 6])
  72. >>> csc_matrix((data, (row, col)), shape=(3, 3)).toarray()
  73. array([[1, 0, 4],
  74. [0, 0, 5],
  75. [2, 3, 6]])
  76. >>> indptr = np.array([0, 2, 3, 6])
  77. >>> indices = np.array([0, 2, 2, 0, 1, 2])
  78. >>> data = np.array([1, 2, 3, 4, 5, 6])
  79. >>> csc_matrix((data, indices, indptr), shape=(3, 3)).toarray()
  80. array([[1, 0, 4],
  81. [0, 0, 5],
  82. [2, 3, 6]])
  83. """
  84. format = 'csc'
  85. def transpose(self, axes=None, copy=False):
  86. if axes is not None:
  87. raise ValueError(("Sparse matrices do not support "
  88. "an 'axes' parameter because swapping "
  89. "dimensions is the only logical permutation."))
  90. M, N = self.shape
  91. from .csr import csr_matrix
  92. return csr_matrix((self.data, self.indices,
  93. self.indptr), (N, M), copy=copy)
  94. transpose.__doc__ = spmatrix.transpose.__doc__
  95. def __iter__(self):
  96. for r in self.tocsr():
  97. yield r
  98. def tocsc(self, copy=False):
  99. if copy:
  100. return self.copy()
  101. else:
  102. return self
  103. tocsc.__doc__ = spmatrix.tocsc.__doc__
  104. def tocsr(self, copy=False):
  105. M,N = self.shape
  106. idx_dtype = get_index_dtype((self.indptr, self.indices),
  107. maxval=max(self.nnz, N))
  108. indptr = np.empty(M + 1, dtype=idx_dtype)
  109. indices = np.empty(self.nnz, dtype=idx_dtype)
  110. data = np.empty(self.nnz, dtype=upcast(self.dtype))
  111. csc_tocsr(M, N,
  112. self.indptr.astype(idx_dtype),
  113. self.indices.astype(idx_dtype),
  114. self.data,
  115. indptr,
  116. indices,
  117. data)
  118. from .csr import csr_matrix
  119. A = csr_matrix((data, indices, indptr), shape=self.shape, copy=False)
  120. A.has_sorted_indices = True
  121. return A
  122. tocsr.__doc__ = spmatrix.tocsr.__doc__
  123. def __getitem__(self, key):
  124. # Use CSR to implement fancy indexing.
  125. row, col = self._unpack_index(key)
  126. # Things that return submatrices. row or col is a int or slice.
  127. if (isinstance(row, slice) or isinstance(col, slice) or
  128. isintlike(row) or isintlike(col)):
  129. return self.T[col, row].T
  130. # Things that return a sequence of values.
  131. else:
  132. return self.T[col, row]
  133. def nonzero(self):
  134. # CSC can't use _cs_matrix's .nonzero method because it
  135. # returns the indices sorted for self transposed.
  136. # Get row and col indices, from _cs_matrix.tocoo
  137. major_dim, minor_dim = self._swap(self.shape)
  138. minor_indices = self.indices
  139. major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
  140. _sparsetools.expandptr(major_dim, self.indptr, major_indices)
  141. row, col = self._swap((major_indices, minor_indices))
  142. # Remove explicit zeros
  143. nz_mask = self.data != 0
  144. row = row[nz_mask]
  145. col = col[nz_mask]
  146. # Sort them to be in C-style order
  147. ind = np.argsort(row, kind='mergesort')
  148. row = row[ind]
  149. col = col[ind]
  150. return row, col
  151. nonzero.__doc__ = _cs_matrix.nonzero.__doc__
  152. def getrow(self, i):
  153. """Returns a copy of row i of the matrix, as a (1 x n)
  154. CSR matrix (row vector).
  155. """
  156. # we convert to CSR to maintain compatibility with old impl.
  157. # in spmatrix.getrow()
  158. return self._get_submatrix(i, slice(None)).tocsr()
  159. def getcol(self, i):
  160. """Returns a copy of column i of the matrix, as a (m x 1)
  161. CSC matrix (column vector).
  162. """
  163. M, N = self.shape
  164. i = int(i)
  165. if i < 0:
  166. i += N
  167. if i < 0 or i >= N:
  168. raise IndexError('index (%d) out of range' % i)
  169. idx = slice(*self.indptr[i:i+2])
  170. data = self.data[idx].copy()
  171. indices = self.indices[idx].copy()
  172. indptr = np.array([0, len(indices)], dtype=self.indptr.dtype)
  173. return csc_matrix((data, indices, indptr), shape=(M, 1),
  174. dtype=self.dtype, copy=False)
  175. # these functions are used by the parent class (_cs_matrix)
  176. # to remove redudancy between csc_matrix and csr_matrix
  177. def _swap(self, x):
  178. """swap the members of x if this is a column-oriented matrix
  179. """
  180. return x[1], x[0]
  181. def isspmatrix_csc(x):
  182. """Is x of csc_matrix type?
  183. Parameters
  184. ----------
  185. x
  186. object to check for being a csc matrix
  187. Returns
  188. -------
  189. bool
  190. True if x is a csc matrix, False otherwise
  191. Examples
  192. --------
  193. >>> from scipy.sparse import csc_matrix, isspmatrix_csc
  194. >>> isspmatrix_csc(csc_matrix([[5]]))
  195. True
  196. >>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc
  197. >>> isspmatrix_csc(csr_matrix([[5]]))
  198. False
  199. """
  200. return isinstance(x, csc_matrix)