lil.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. """LInked List sparse matrix class
  2. """
  3. from __future__ import division, print_function, absolute_import
  4. __docformat__ = "restructuredtext en"
  5. __all__ = ['lil_matrix','isspmatrix_lil']
  6. from bisect import bisect_left
  7. import numpy as np
  8. from scipy._lib._version import NumpyVersion
  9. from scipy._lib.six import xrange, zip
  10. if NumpyVersion(np.__version__) >= '1.17.0':
  11. from scipy._lib._util import _broadcast_arrays
  12. else:
  13. from numpy import broadcast_arrays as _broadcast_arrays
  14. from .base import spmatrix, isspmatrix
  15. from .sputils import (getdtype, isshape, isscalarlike, IndexMixin,
  16. upcast_scalar, get_index_dtype, isintlike, check_shape,
  17. check_reshape_kwargs)
  18. from . import _csparsetools
  19. class lil_matrix(spmatrix, IndexMixin):
  20. """Row-based linked list sparse matrix
  21. This is a structure for constructing sparse matrices incrementally.
  22. Note that inserting a single item can take linear time in the worst case;
  23. to construct a matrix efficiently, make sure the items are pre-sorted by
  24. index, per row.
  25. This can be instantiated in several ways:
  26. lil_matrix(D)
  27. with a dense matrix or rank-2 ndarray D
  28. lil_matrix(S)
  29. with another sparse matrix S (equivalent to S.tolil())
  30. lil_matrix((M, N), [dtype])
  31. to construct an empty matrix with shape (M, N)
  32. dtype is optional, defaulting to dtype='d'.
  33. Attributes
  34. ----------
  35. dtype : dtype
  36. Data type of the matrix
  37. shape : 2-tuple
  38. Shape of the matrix
  39. ndim : int
  40. Number of dimensions (this is always 2)
  41. nnz
  42. Number of nonzero elements
  43. data
  44. LIL format data array of the matrix
  45. rows
  46. LIL format row index array of the matrix
  47. Notes
  48. -----
  49. Sparse matrices can be used in arithmetic operations: they support
  50. addition, subtraction, multiplication, division, and matrix power.
  51. Advantages of the LIL format
  52. - supports flexible slicing
  53. - changes to the matrix sparsity structure are efficient
  54. Disadvantages of the LIL format
  55. - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
  56. - slow column slicing (consider CSC)
  57. - slow matrix vector products (consider CSR or CSC)
  58. Intended Usage
  59. - LIL is a convenient format for constructing sparse matrices
  60. - once a matrix has been constructed, convert to CSR or
  61. CSC format for fast arithmetic and matrix vector operations
  62. - consider using the COO format when constructing large matrices
  63. Data Structure
  64. - An array (``self.rows``) of rows, each of which is a sorted
  65. list of column indices of non-zero elements.
  66. - The corresponding nonzero values are stored in similar
  67. fashion in ``self.data``.
  68. """
  69. format = 'lil'
  70. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  71. spmatrix.__init__(self)
  72. self.dtype = getdtype(dtype, arg1, default=float)
  73. # First get the shape
  74. if isspmatrix(arg1):
  75. if isspmatrix_lil(arg1) and copy:
  76. A = arg1.copy()
  77. else:
  78. A = arg1.tolil()
  79. if dtype is not None:
  80. A = A.astype(dtype)
  81. self._shape = check_shape(A.shape)
  82. self.dtype = A.dtype
  83. self.rows = A.rows
  84. self.data = A.data
  85. elif isinstance(arg1,tuple):
  86. if isshape(arg1):
  87. if shape is not None:
  88. raise ValueError('invalid use of shape parameter')
  89. M, N = arg1
  90. self._shape = check_shape((M, N))
  91. self.rows = np.empty((M,), dtype=object)
  92. self.data = np.empty((M,), dtype=object)
  93. for i in range(M):
  94. self.rows[i] = []
  95. self.data[i] = []
  96. else:
  97. raise TypeError('unrecognized lil_matrix constructor usage')
  98. else:
  99. # assume A is dense
  100. try:
  101. A = np.asmatrix(arg1)
  102. except TypeError:
  103. raise TypeError('unsupported matrix type')
  104. else:
  105. from .csr import csr_matrix
  106. A = csr_matrix(A, dtype=dtype).tolil()
  107. self._shape = check_shape(A.shape)
  108. self.dtype = A.dtype
  109. self.rows = A.rows
  110. self.data = A.data
  111. def __iadd__(self,other):
  112. self[:,:] = self + other
  113. return self
  114. def __isub__(self,other):
  115. self[:,:] = self - other
  116. return self
  117. def __imul__(self,other):
  118. if isscalarlike(other):
  119. self[:,:] = self * other
  120. return self
  121. else:
  122. return NotImplemented
  123. def __itruediv__(self,other):
  124. if isscalarlike(other):
  125. self[:,:] = self / other
  126. return self
  127. else:
  128. return NotImplemented
  129. # Whenever the dimensions change, empty lists should be created for each
  130. # row
  131. def getnnz(self, axis=None):
  132. if axis is None:
  133. return sum([len(rowvals) for rowvals in self.data])
  134. if axis < 0:
  135. axis += 2
  136. if axis == 0:
  137. out = np.zeros(self.shape[1], dtype=np.intp)
  138. for row in self.rows:
  139. out[row] += 1
  140. return out
  141. elif axis == 1:
  142. return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp)
  143. else:
  144. raise ValueError('axis out of bounds')
  145. def count_nonzero(self):
  146. return sum(np.count_nonzero(rowvals) for rowvals in self.data)
  147. getnnz.__doc__ = spmatrix.getnnz.__doc__
  148. count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
  149. def __str__(self):
  150. val = ''
  151. for i, row in enumerate(self.rows):
  152. for pos, j in enumerate(row):
  153. val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos]))
  154. return val[:-1]
  155. def getrowview(self, i):
  156. """Returns a view of the 'i'th row (without copying).
  157. """
  158. new = lil_matrix((1, self.shape[1]), dtype=self.dtype)
  159. new.rows[0] = self.rows[i]
  160. new.data[0] = self.data[i]
  161. return new
  162. def getrow(self, i):
  163. """Returns a copy of the 'i'th row.
  164. """
  165. i = self._check_row_bounds(i)
  166. new = lil_matrix((1, self.shape[1]), dtype=self.dtype)
  167. new.rows[0] = self.rows[i][:]
  168. new.data[0] = self.data[i][:]
  169. return new
  170. def _check_row_bounds(self, i):
  171. if i < 0:
  172. i += self.shape[0]
  173. if i < 0 or i >= self.shape[0]:
  174. raise IndexError('row index out of bounds')
  175. return i
  176. def _check_col_bounds(self, j):
  177. if j < 0:
  178. j += self.shape[1]
  179. if j < 0 or j >= self.shape[1]:
  180. raise IndexError('column index out of bounds')
  181. return j
  182. def __getitem__(self, index):
  183. """Return the element(s) index=(i, j), where j may be a slice.
  184. This always returns a copy for consistency, since slices into
  185. Python lists return copies.
  186. """
  187. # Scalar fast path first
  188. if isinstance(index, tuple) and len(index) == 2:
  189. i, j = index
  190. # Use isinstance checks for common index types; this is
  191. # ~25-50% faster than isscalarlike. Other types are
  192. # handled below.
  193. if ((isinstance(i, int) or isinstance(i, np.integer)) and
  194. (isinstance(j, int) or isinstance(j, np.integer))):
  195. v = _csparsetools.lil_get1(self.shape[0], self.shape[1],
  196. self.rows, self.data,
  197. i, j)
  198. return self.dtype.type(v)
  199. # Utilities found in IndexMixin
  200. i, j = self._unpack_index(index)
  201. # Proper check for other scalar index types
  202. i_intlike = isintlike(i)
  203. j_intlike = isintlike(j)
  204. if i_intlike and j_intlike:
  205. v = _csparsetools.lil_get1(self.shape[0], self.shape[1],
  206. self.rows, self.data,
  207. i, j)
  208. return self.dtype.type(v)
  209. elif j_intlike or isinstance(j, slice):
  210. # column slicing fast path
  211. if j_intlike:
  212. j = self._check_col_bounds(j)
  213. j = slice(j, j+1)
  214. if i_intlike:
  215. i = self._check_row_bounds(i)
  216. i = xrange(i, i+1)
  217. i_shape = None
  218. elif isinstance(i, slice):
  219. i = xrange(*i.indices(self.shape[0]))
  220. i_shape = None
  221. else:
  222. i = np.atleast_1d(i)
  223. i_shape = i.shape
  224. if i_shape is None or len(i_shape) == 1:
  225. return self._get_row_ranges(i, j)
  226. i, j = self._index_to_arrays(i, j)
  227. if i.size == 0:
  228. return lil_matrix(i.shape, dtype=self.dtype)
  229. new = lil_matrix(i.shape, dtype=self.dtype)
  230. i, j = _prepare_index_for_memoryview(i, j)
  231. _csparsetools.lil_fancy_get(self.shape[0], self.shape[1],
  232. self.rows, self.data,
  233. new.rows, new.data,
  234. i, j)
  235. return new
  236. def _get_row_ranges(self, rows, col_slice):
  237. """
  238. Fast path for indexing in the case where column index is slice.
  239. This gains performance improvement over brute force by more
  240. efficient skipping of zeros, by accessing the elements
  241. column-wise in order.
  242. Parameters
  243. ----------
  244. rows : sequence or xrange
  245. Rows indexed. If xrange, must be within valid bounds.
  246. col_slice : slice
  247. Columns indexed
  248. """
  249. j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
  250. col_range = xrange(j_start, j_stop, j_stride)
  251. nj = len(col_range)
  252. new = lil_matrix((len(rows), nj), dtype=self.dtype)
  253. _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
  254. self.rows, self.data,
  255. new.rows, new.data,
  256. rows,
  257. j_start, j_stop, j_stride, nj)
  258. return new
  259. def __setitem__(self, index, x):
  260. # Scalar fast path first
  261. if isinstance(index, tuple) and len(index) == 2:
  262. i, j = index
  263. # Use isinstance checks for common index types; this is
  264. # ~25-50% faster than isscalarlike. Scalar index
  265. # assignment for other types is handled below together
  266. # with fancy indexing.
  267. if ((isinstance(i, int) or isinstance(i, np.integer)) and
  268. (isinstance(j, int) or isinstance(j, np.integer))):
  269. x = self.dtype.type(x)
  270. if x.size > 1:
  271. # Triggered if input was an ndarray
  272. raise ValueError("Trying to assign a sequence to an item")
  273. _csparsetools.lil_insert(self.shape[0], self.shape[1],
  274. self.rows, self.data, i, j, x)
  275. return
  276. # General indexing
  277. i, j = self._unpack_index(index)
  278. # shortcut for common case of full matrix assign:
  279. if (isspmatrix(x) and isinstance(i, slice) and i == slice(None) and
  280. isinstance(j, slice) and j == slice(None)
  281. and x.shape == self.shape):
  282. x = lil_matrix(x, dtype=self.dtype)
  283. self.rows = x.rows
  284. self.data = x.data
  285. return
  286. i, j = self._index_to_arrays(i, j)
  287. if isspmatrix(x):
  288. x = x.toarray()
  289. # Make x and i into the same shape
  290. x = np.asarray(x, dtype=self.dtype)
  291. x, _ = _broadcast_arrays(x, i)
  292. if x.shape != i.shape:
  293. raise ValueError("shape mismatch in assignment")
  294. # Set values
  295. i, j, x = _prepare_index_for_memoryview(i, j, x)
  296. _csparsetools.lil_fancy_set(self.shape[0], self.shape[1],
  297. self.rows, self.data,
  298. i, j, x)
  299. def _mul_scalar(self, other):
  300. if other == 0:
  301. # Multiply by zero: return the zero matrix
  302. new = lil_matrix(self.shape, dtype=self.dtype)
  303. else:
  304. res_dtype = upcast_scalar(self.dtype, other)
  305. new = self.copy()
  306. new = new.astype(res_dtype)
  307. # Multiply this scalar by every element.
  308. for j, rowvals in enumerate(new.data):
  309. new.data[j] = [val*other for val in rowvals]
  310. return new
  311. def __truediv__(self, other): # self / other
  312. if isscalarlike(other):
  313. new = self.copy()
  314. # Divide every element by this scalar
  315. for j, rowvals in enumerate(new.data):
  316. new.data[j] = [val/other for val in rowvals]
  317. return new
  318. else:
  319. return self.tocsr() / other
  320. def copy(self):
  321. from copy import deepcopy
  322. new = lil_matrix(self.shape, dtype=self.dtype)
  323. new.data = deepcopy(self.data)
  324. new.rows = deepcopy(self.rows)
  325. return new
  326. copy.__doc__ = spmatrix.copy.__doc__
  327. def reshape(self, *args, **kwargs):
  328. shape = check_shape(args, self.shape)
  329. order, copy = check_reshape_kwargs(kwargs)
  330. # Return early if reshape is not required
  331. if shape == self.shape:
  332. if copy:
  333. return self.copy()
  334. else:
  335. return self
  336. new = lil_matrix(shape, dtype=self.dtype)
  337. if order == 'C':
  338. ncols = self.shape[1]
  339. for i, row in enumerate(self.rows):
  340. for col, j in enumerate(row):
  341. new_r, new_c = np.unravel_index(i * ncols + j, shape)
  342. new[new_r, new_c] = self[i, j]
  343. elif order == 'F':
  344. nrows = self.shape[0]
  345. for i, row in enumerate(self.rows):
  346. for col, j in enumerate(row):
  347. new_r, new_c = np.unravel_index(i + j * nrows, shape, order)
  348. new[new_r, new_c] = self[i, j]
  349. else:
  350. raise ValueError("'order' must be 'C' or 'F'")
  351. return new
  352. reshape.__doc__ = spmatrix.reshape.__doc__
  353. def resize(self, *shape):
  354. shape = check_shape(shape)
  355. new_M, new_N = shape
  356. M, N = self.shape
  357. if new_M < M:
  358. self.rows = self.rows[:new_M]
  359. self.data = self.data[:new_M]
  360. elif new_M > M:
  361. self.rows = np.resize(self.rows, new_M)
  362. self.data = np.resize(self.data, new_M)
  363. for i in range(M, new_M):
  364. self.rows[i] = []
  365. self.data[i] = []
  366. if new_N < N:
  367. for row, data in zip(self.rows, self.data):
  368. trunc = bisect_left(row, new_N)
  369. del row[trunc:]
  370. del data[trunc:]
  371. self._shape = shape
  372. resize.__doc__ = spmatrix.resize.__doc__
  373. def toarray(self, order=None, out=None):
  374. d = self._process_toarray_args(order, out)
  375. for i, row in enumerate(self.rows):
  376. for pos, j in enumerate(row):
  377. d[i, j] = self.data[i][pos]
  378. return d
  379. toarray.__doc__ = spmatrix.toarray.__doc__
  380. def transpose(self, axes=None, copy=False):
  381. return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False)
  382. transpose.__doc__ = spmatrix.transpose.__doc__
  383. def tolil(self, copy=False):
  384. if copy:
  385. return self.copy()
  386. else:
  387. return self
  388. tolil.__doc__ = spmatrix.tolil.__doc__
  389. def tocsr(self, copy=False):
  390. lst = [len(x) for x in self.rows]
  391. idx_dtype = get_index_dtype(maxval=max(self.shape[1], sum(lst)))
  392. indptr = np.cumsum([0] + lst, dtype=idx_dtype)
  393. indices = np.array([x for y in self.rows for x in y], dtype=idx_dtype)
  394. data = np.array([x for y in self.data for x in y], dtype=self.dtype)
  395. from .csr import csr_matrix
  396. return csr_matrix((data, indices, indptr), shape=self.shape)
  397. tocsr.__doc__ = spmatrix.tocsr.__doc__
  398. def _prepare_index_for_memoryview(i, j, x=None):
  399. """
  400. Convert index and data arrays to form suitable for passing to the
  401. Cython fancy getset routines.
  402. The conversions are necessary since to (i) ensure the integer
  403. index arrays are in one of the accepted types, and (ii) to ensure
  404. the arrays are writable so that Cython memoryview support doesn't
  405. choke on them.
  406. Parameters
  407. ----------
  408. i, j
  409. Index arrays
  410. x : optional
  411. Data arrays
  412. Returns
  413. -------
  414. i, j, x
  415. Re-formatted arrays (x is omitted, if input was None)
  416. """
  417. if i.dtype > j.dtype:
  418. j = j.astype(i.dtype)
  419. elif i.dtype < j.dtype:
  420. i = i.astype(j.dtype)
  421. if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  422. i = i.astype(np.intp)
  423. if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  424. j = j.astype(np.intp)
  425. if x is not None:
  426. if not x.flags.writeable:
  427. x = x.copy()
  428. return i, j, x
  429. else:
  430. return i, j
  431. def isspmatrix_lil(x):
  432. """Is x of lil_matrix type?
  433. Parameters
  434. ----------
  435. x
  436. object to check for being a lil matrix
  437. Returns
  438. -------
  439. bool
  440. True if x is a lil matrix, False otherwise
  441. Examples
  442. --------
  443. >>> from scipy.sparse import lil_matrix, isspmatrix_lil
  444. >>> isspmatrix_lil(lil_matrix([[5]]))
  445. True
  446. >>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil
  447. >>> isspmatrix_lil(csr_matrix([[5]]))
  448. False
  449. """
  450. return isinstance(x, lil_matrix)