dok.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. """Dictionary Of Keys based matrix"""
  2. from __future__ import division, print_function, absolute_import
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['dok_matrix', 'isspmatrix_dok']
  5. import functools
  6. import operator
  7. import itertools
  8. import numpy as np
  9. from scipy._lib.six import zip as izip, xrange, iteritems, iterkeys, itervalues
  10. from .base import spmatrix, isspmatrix
  11. from .sputils import (isdense, getdtype, isshape, isintlike, isscalarlike,
  12. upcast, upcast_scalar, IndexMixin, get_index_dtype,
  13. check_shape)
  14. try:
  15. from operator import isSequenceType as _is_sequence
  16. except ImportError:
  17. def _is_sequence(x):
  18. return (hasattr(x, '__len__') or hasattr(x, '__next__')
  19. or hasattr(x, 'next'))
  20. class dok_matrix(spmatrix, IndexMixin, dict):
  21. """
  22. Dictionary Of Keys based sparse matrix.
  23. This is an efficient structure for constructing sparse
  24. matrices incrementally.
  25. This can be instantiated in several ways:
  26. dok_matrix(D)
  27. with a dense matrix, D
  28. dok_matrix(S)
  29. with a sparse matrix, S
  30. dok_matrix((M,N), [dtype])
  31. create the matrix with initial shape (M,N)
  32. dtype is optional, defaulting to dtype='d'
  33. Attributes
  34. ----------
  35. dtype : dtype
  36. Data type of the matrix
  37. shape : 2-tuple
  38. Shape of the matrix
  39. ndim : int
  40. Number of dimensions (this is always 2)
  41. nnz
  42. Number of nonzero elements
  43. Notes
  44. -----
  45. Sparse matrices can be used in arithmetic operations: they support
  46. addition, subtraction, multiplication, division, and matrix power.
  47. Allows for efficient O(1) access of individual elements.
  48. Duplicates are not allowed.
  49. Can be efficiently converted to a coo_matrix once constructed.
  50. Examples
  51. --------
  52. >>> import numpy as np
  53. >>> from scipy.sparse import dok_matrix
  54. >>> S = dok_matrix((5, 5), dtype=np.float32)
  55. >>> for i in range(5):
  56. ... for j in range(5):
  57. ... S[i, j] = i + j # Update element
  58. """
  59. format = 'dok'
  60. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  61. dict.__init__(self)
  62. spmatrix.__init__(self)
  63. self.dtype = getdtype(dtype, default=float)
  64. if isinstance(arg1, tuple) and isshape(arg1): # (M,N)
  65. M, N = arg1
  66. self._shape = check_shape((M, N))
  67. elif isspmatrix(arg1): # Sparse ctor
  68. if isspmatrix_dok(arg1) and copy:
  69. arg1 = arg1.copy()
  70. else:
  71. arg1 = arg1.todok()
  72. if dtype is not None:
  73. arg1 = arg1.astype(dtype)
  74. dict.update(self, arg1)
  75. self._shape = check_shape(arg1.shape)
  76. self.dtype = arg1.dtype
  77. else: # Dense ctor
  78. try:
  79. arg1 = np.asarray(arg1)
  80. except Exception:
  81. raise TypeError('Invalid input format.')
  82. if len(arg1.shape) != 2:
  83. raise TypeError('Expected rank <=2 dense array or matrix.')
  84. from .coo import coo_matrix
  85. d = coo_matrix(arg1, dtype=dtype).todok()
  86. dict.update(self, d)
  87. self._shape = check_shape(arg1.shape)
  88. self.dtype = d.dtype
  89. def update(self, val):
  90. # Prevent direct usage of update
  91. raise NotImplementedError("Direct modification to dok_matrix element "
  92. "is not allowed.")
  93. def _update(self, data):
  94. """An update method for dict data defined for direct access to
  95. `dok_matrix` data. Main purpose is to be used for effcient conversion
  96. from other spmatrix classes. Has no checking if `data` is valid."""
  97. return dict.update(self, data)
  98. def set_shape(self, shape):
  99. new_matrix = self.reshape(shape, copy=False).asformat(self.format)
  100. self.__dict__ = new_matrix.__dict__
  101. dict.clear(self)
  102. dict.update(self, new_matrix)
  103. shape = property(fget=spmatrix.get_shape, fset=set_shape)
  104. def getnnz(self, axis=None):
  105. if axis is not None:
  106. raise NotImplementedError("getnnz over an axis is not implemented "
  107. "for DOK format.")
  108. return dict.__len__(self)
  109. def count_nonzero(self):
  110. return sum(x != 0 for x in itervalues(self))
  111. getnnz.__doc__ = spmatrix.getnnz.__doc__
  112. count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
  113. def __len__(self):
  114. return dict.__len__(self)
  115. def get(self, key, default=0.):
  116. """This overrides the dict.get method, providing type checking
  117. but otherwise equivalent functionality.
  118. """
  119. try:
  120. i, j = key
  121. assert isintlike(i) and isintlike(j)
  122. except (AssertionError, TypeError, ValueError):
  123. raise IndexError('Index must be a pair of integers.')
  124. if (i < 0 or i >= self.shape[0] or j < 0 or j >= self.shape[1]):
  125. raise IndexError('Index out of bounds.')
  126. return dict.get(self, key, default)
  127. def __getitem__(self, index):
  128. """If key=(i, j) is a pair of integers, return the corresponding
  129. element. If either i or j is a slice or sequence, return a new sparse
  130. matrix with just these elements.
  131. """
  132. zero = self.dtype.type(0)
  133. i, j = self._unpack_index(index)
  134. i_intlike = isintlike(i)
  135. j_intlike = isintlike(j)
  136. if i_intlike and j_intlike:
  137. i = int(i)
  138. j = int(j)
  139. if i < 0:
  140. i += self.shape[0]
  141. if i < 0 or i >= self.shape[0]:
  142. raise IndexError('Index out of bounds.')
  143. if j < 0:
  144. j += self.shape[1]
  145. if j < 0 or j >= self.shape[1]:
  146. raise IndexError('Index out of bounds.')
  147. return dict.get(self, (i,j), zero)
  148. elif ((i_intlike or isinstance(i, slice)) and
  149. (j_intlike or isinstance(j, slice))):
  150. # Fast path for slicing very sparse matrices
  151. i_slice = slice(i, i+1) if i_intlike else i
  152. j_slice = slice(j, j+1) if j_intlike else j
  153. i_indices = i_slice.indices(self.shape[0])
  154. j_indices = j_slice.indices(self.shape[1])
  155. i_seq = xrange(*i_indices)
  156. j_seq = xrange(*j_indices)
  157. newshape = (len(i_seq), len(j_seq))
  158. newsize = _prod(newshape)
  159. if len(self) < 2*newsize and newsize != 0:
  160. # Switch to the fast path only when advantageous
  161. # (count the iterations in the loops, adjust for complexity)
  162. #
  163. # We also don't handle newsize == 0 here (if
  164. # i/j_intlike, it can mean index i or j was out of
  165. # bounds)
  166. return self._getitem_ranges(i_indices, j_indices, newshape)
  167. i, j = self._index_to_arrays(i, j)
  168. if i.size == 0:
  169. return dok_matrix(i.shape, dtype=self.dtype)
  170. min_i = i.min()
  171. if min_i < -self.shape[0] or i.max() >= self.shape[0]:
  172. raise IndexError('Index (%d) out of range -%d to %d.' %
  173. (i.min(), self.shape[0], self.shape[0]-1))
  174. if min_i < 0:
  175. i = i.copy()
  176. i[i < 0] += self.shape[0]
  177. min_j = j.min()
  178. if min_j < -self.shape[1] or j.max() >= self.shape[1]:
  179. raise IndexError('Index (%d) out of range -%d to %d.' %
  180. (j.min(), self.shape[1], self.shape[1]-1))
  181. if min_j < 0:
  182. j = j.copy()
  183. j[j < 0] += self.shape[1]
  184. newdok = dok_matrix(i.shape, dtype=self.dtype)
  185. for key in itertools.product(xrange(i.shape[0]), xrange(i.shape[1])):
  186. v = dict.get(self, (i[key], j[key]), zero)
  187. if v:
  188. dict.__setitem__(newdok, key, v)
  189. return newdok
  190. def _getitem_ranges(self, i_indices, j_indices, shape):
  191. # performance golf: we don't want Numpy scalars here, they are slow
  192. i_start, i_stop, i_stride = map(int, i_indices)
  193. j_start, j_stop, j_stride = map(int, j_indices)
  194. newdok = dok_matrix(shape, dtype=self.dtype)
  195. for (ii, jj) in iterkeys(self):
  196. # ditto for numpy scalars
  197. ii = int(ii)
  198. jj = int(jj)
  199. a, ra = divmod(ii - i_start, i_stride)
  200. if a < 0 or a >= shape[0] or ra != 0:
  201. continue
  202. b, rb = divmod(jj - j_start, j_stride)
  203. if b < 0 or b >= shape[1] or rb != 0:
  204. continue
  205. dict.__setitem__(newdok, (a, b),
  206. dict.__getitem__(self, (ii, jj)))
  207. return newdok
  208. def __setitem__(self, index, x):
  209. if isinstance(index, tuple) and len(index) == 2:
  210. # Integer index fast path
  211. i, j = index
  212. if (isintlike(i) and isintlike(j) and 0 <= i < self.shape[0]
  213. and 0 <= j < self.shape[1]):
  214. v = np.asarray(x, dtype=self.dtype)
  215. if v.ndim == 0 and v != 0:
  216. dict.__setitem__(self, (int(i), int(j)), v[()])
  217. return
  218. i, j = self._unpack_index(index)
  219. i, j = self._index_to_arrays(i, j)
  220. if isspmatrix(x):
  221. x = x.toarray()
  222. # Make x and i into the same shape
  223. x = np.asarray(x, dtype=self.dtype)
  224. x, _ = np.broadcast_arrays(x, i)
  225. if x.shape != i.shape:
  226. raise ValueError("Shape mismatch in assignment.")
  227. if np.size(x) == 0:
  228. return
  229. min_i = i.min()
  230. if min_i < -self.shape[0] or i.max() >= self.shape[0]:
  231. raise IndexError('Index (%d) out of range -%d to %d.' %
  232. (i.min(), self.shape[0], self.shape[0]-1))
  233. if min_i < 0:
  234. i = i.copy()
  235. i[i < 0] += self.shape[0]
  236. min_j = j.min()
  237. if min_j < -self.shape[1] or j.max() >= self.shape[1]:
  238. raise IndexError('Index (%d) out of range -%d to %d.' %
  239. (j.min(), self.shape[1], self.shape[1]-1))
  240. if min_j < 0:
  241. j = j.copy()
  242. j[j < 0] += self.shape[1]
  243. dict.update(self, izip(izip(i.flat, j.flat), x.flat))
  244. if 0 in x:
  245. zeroes = x == 0
  246. for key in izip(i[zeroes].flat, j[zeroes].flat):
  247. if dict.__getitem__(self, key) == 0:
  248. # may have been superseded by later update
  249. del self[key]
  250. def __add__(self, other):
  251. if isscalarlike(other):
  252. res_dtype = upcast_scalar(self.dtype, other)
  253. new = dok_matrix(self.shape, dtype=res_dtype)
  254. # Add this scalar to every element.
  255. M, N = self.shape
  256. for key in itertools.product(xrange(M), xrange(N)):
  257. aij = dict.get(self, (key), 0) + other
  258. if aij:
  259. new[key] = aij
  260. # new.dtype.char = self.dtype.char
  261. elif isspmatrix_dok(other):
  262. if other.shape != self.shape:
  263. raise ValueError("Matrix dimensions are not equal.")
  264. # We could alternatively set the dimensions to the largest of
  265. # the two matrices to be summed. Would this be a good idea?
  266. res_dtype = upcast(self.dtype, other.dtype)
  267. new = dok_matrix(self.shape, dtype=res_dtype)
  268. dict.update(new, self)
  269. with np.errstate(over='ignore'):
  270. dict.update(new,
  271. ((k, new[k] + other[k]) for k in iterkeys(other)))
  272. elif isspmatrix(other):
  273. csc = self.tocsc()
  274. new = csc + other
  275. elif isdense(other):
  276. new = self.todense() + other
  277. else:
  278. return NotImplemented
  279. return new
  280. def __radd__(self, other):
  281. if isscalarlike(other):
  282. new = dok_matrix(self.shape, dtype=self.dtype)
  283. M, N = self.shape
  284. for key in itertools.product(xrange(M), xrange(N)):
  285. aij = dict.get(self, (key), 0) + other
  286. if aij:
  287. new[key] = aij
  288. elif isspmatrix_dok(other):
  289. if other.shape != self.shape:
  290. raise ValueError("Matrix dimensions are not equal.")
  291. new = dok_matrix(self.shape, dtype=self.dtype)
  292. dict.update(new, self)
  293. dict.update(new,
  294. ((k, self[k] + other[k]) for k in iterkeys(other)))
  295. elif isspmatrix(other):
  296. csc = self.tocsc()
  297. new = csc + other
  298. elif isdense(other):
  299. new = other + self.todense()
  300. else:
  301. return NotImplemented
  302. return new
  303. def __neg__(self):
  304. if self.dtype.kind == 'b':
  305. raise NotImplementedError('Negating a sparse boolean matrix is not'
  306. ' supported.')
  307. new = dok_matrix(self.shape, dtype=self.dtype)
  308. dict.update(new, ((k, -self[k]) for k in iterkeys(self)))
  309. return new
  310. def _mul_scalar(self, other):
  311. res_dtype = upcast_scalar(self.dtype, other)
  312. # Multiply this scalar by every element.
  313. new = dok_matrix(self.shape, dtype=res_dtype)
  314. dict.update(new, ((k, v * other) for k, v in iteritems(self)))
  315. return new
  316. def _mul_vector(self, other):
  317. # matrix * vector
  318. result = np.zeros(self.shape[0], dtype=upcast(self.dtype, other.dtype))
  319. for (i, j), v in iteritems(self):
  320. result[i] += v * other[j]
  321. return result
  322. def _mul_multivector(self, other):
  323. # matrix * multivector
  324. result_shape = (self.shape[0], other.shape[1])
  325. result_dtype = upcast(self.dtype, other.dtype)
  326. result = np.zeros(result_shape, dtype=result_dtype)
  327. for (i, j), v in iteritems(self):
  328. result[i,:] += v * other[j,:]
  329. return result
  330. def __imul__(self, other):
  331. if isscalarlike(other):
  332. dict.update(self, ((k, v * other) for k, v in iteritems(self)))
  333. return self
  334. return NotImplemented
  335. def __truediv__(self, other):
  336. if isscalarlike(other):
  337. res_dtype = upcast_scalar(self.dtype, other)
  338. new = dok_matrix(self.shape, dtype=res_dtype)
  339. dict.update(new, ((k, v / other) for k, v in iteritems(self)))
  340. return new
  341. return self.tocsr() / other
  342. def __itruediv__(self, other):
  343. if isscalarlike(other):
  344. dict.update(self, ((k, v / other) for k, v in iteritems(self)))
  345. return self
  346. return NotImplemented
  347. def __reduce__(self):
  348. # this approach is necessary because __setstate__ is called after
  349. # __setitem__ upon unpickling and since __init__ is not called there
  350. # is no shape attribute hence it is not possible to unpickle it.
  351. return dict.__reduce__(self)
  352. # What should len(sparse) return? For consistency with dense matrices,
  353. # perhaps it should be the number of rows? For now it returns the number
  354. # of non-zeros.
  355. def transpose(self, axes=None, copy=False):
  356. if axes is not None:
  357. raise ValueError("Sparse matrices do not support "
  358. "an 'axes' parameter because swapping "
  359. "dimensions is the only logical permutation.")
  360. M, N = self.shape
  361. new = dok_matrix((N, M), dtype=self.dtype, copy=copy)
  362. dict.update(new, (((right, left), val)
  363. for (left, right), val in iteritems(self)))
  364. return new
  365. transpose.__doc__ = spmatrix.transpose.__doc__
  366. def conjtransp(self):
  367. """Return the conjugate transpose."""
  368. M, N = self.shape
  369. new = dok_matrix((N, M), dtype=self.dtype)
  370. dict.update(new, (((right, left), np.conj(val))
  371. for (left, right), val in iteritems(self)))
  372. return new
  373. def copy(self):
  374. new = dok_matrix(self.shape, dtype=self.dtype)
  375. dict.update(new, self)
  376. return new
  377. copy.__doc__ = spmatrix.copy.__doc__
  378. def getrow(self, i):
  379. """Returns the i-th row as a (1 x n) DOK matrix."""
  380. new = dok_matrix((1, self.shape[1]), dtype=self.dtype)
  381. dict.update(new, (((0, j), self[i, j]) for j in xrange(self.shape[1])))
  382. return new
  383. def getcol(self, j):
  384. """Returns the j-th column as a (m x 1) DOK matrix."""
  385. new = dok_matrix((self.shape[0], 1), dtype=self.dtype)
  386. dict.update(new, (((i, 0), self[i, j]) for i in xrange(self.shape[0])))
  387. return new
  388. def tocoo(self, copy=False):
  389. from .coo import coo_matrix
  390. if self.nnz == 0:
  391. return coo_matrix(self.shape, dtype=self.dtype)
  392. idx_dtype = get_index_dtype(maxval=max(self.shape))
  393. data = np.fromiter(itervalues(self), dtype=self.dtype, count=self.nnz)
  394. row = np.fromiter((i for i, _ in iterkeys(self)), dtype=idx_dtype, count=self.nnz)
  395. col = np.fromiter((j for _, j in iterkeys(self)), dtype=idx_dtype, count=self.nnz)
  396. A = coo_matrix((data, (row, col)), shape=self.shape, dtype=self.dtype)
  397. A.has_canonical_format = True
  398. return A
  399. tocoo.__doc__ = spmatrix.tocoo.__doc__
  400. def todok(self, copy=False):
  401. if copy:
  402. return self.copy()
  403. return self
  404. todok.__doc__ = spmatrix.todok.__doc__
  405. def tocsc(self, copy=False):
  406. return self.tocoo(copy=False).tocsc(copy=copy)
  407. tocsc.__doc__ = spmatrix.tocsc.__doc__
  408. def resize(self, *shape):
  409. shape = check_shape(shape)
  410. newM, newN = shape
  411. M, N = self.shape
  412. if newM < M or newN < N:
  413. # Remove all elements outside new dimensions
  414. for (i, j) in list(iterkeys(self)):
  415. if i >= newM or j >= newN:
  416. del self[i, j]
  417. self._shape = shape
  418. resize.__doc__ = spmatrix.resize.__doc__
  419. def isspmatrix_dok(x):
  420. """Is x of dok_matrix type?
  421. Parameters
  422. ----------
  423. x
  424. object to check for being a dok matrix
  425. Returns
  426. -------
  427. bool
  428. True if x is a dok matrix, False otherwise
  429. Examples
  430. --------
  431. >>> from scipy.sparse import dok_matrix, isspmatrix_dok
  432. >>> isspmatrix_dok(dok_matrix([[5]]))
  433. True
  434. >>> from scipy.sparse import dok_matrix, csr_matrix, isspmatrix_dok
  435. >>> isspmatrix_dok(csr_matrix([[5]]))
  436. False
  437. """
  438. return isinstance(x, dok_matrix)
  439. def _prod(x):
  440. """Product of a list of numbers; ~40x faster vs np.prod for Python tuples"""
  441. if len(x) == 0:
  442. return 1
  443. return functools.reduce(operator.mul, x)