_gcrotmk.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. # Copyright (C) 2015, Pauli Virtanen <pav@iki.fi>
  2. # Distributed under the same license as Scipy.
  3. from __future__ import division, print_function, absolute_import
  4. import warnings
  5. import numpy as np
  6. from numpy.linalg import LinAlgError
  7. from scipy._lib.six import xrange
  8. from scipy.linalg import (get_blas_funcs, qr, solve, svd, qr_insert, lstsq)
  9. from scipy.sparse.linalg.isolve.utils import make_system
  10. __all__ = ['gcrotmk']
  11. def _fgmres(matvec, v0, m, atol, lpsolve=None, rpsolve=None, cs=(), outer_v=(),
  12. prepend_outer_v=False):
  13. """
  14. FGMRES Arnoldi process, with optional projection or augmentation
  15. Parameters
  16. ----------
  17. matvec : callable
  18. Operation A*x
  19. v0 : ndarray
  20. Initial vector, normalized to nrm2(v0) == 1
  21. m : int
  22. Number of GMRES rounds
  23. atol : float
  24. Absolute tolerance for early exit
  25. lpsolve : callable
  26. Left preconditioner L
  27. rpsolve : callable
  28. Right preconditioner R
  29. CU : list of (ndarray, ndarray)
  30. Columns of matrices C and U in GCROT
  31. outer_v : list of ndarrays
  32. Augmentation vectors in LGMRES
  33. prepend_outer_v : bool, optional
  34. Whether augmentation vectors come before or after
  35. Krylov iterates
  36. Raises
  37. ------
  38. LinAlgError
  39. If nans encountered
  40. Returns
  41. -------
  42. Q, R : ndarray
  43. QR decomposition of the upper Hessenberg H=QR
  44. B : ndarray
  45. Projections corresponding to matrix C
  46. vs : list of ndarray
  47. Columns of matrix V
  48. zs : list of ndarray
  49. Columns of matrix Z
  50. y : ndarray
  51. Solution to ||H y - e_1||_2 = min!
  52. res : float
  53. The final (preconditioned) residual norm
  54. """
  55. if lpsolve is None:
  56. lpsolve = lambda x: x
  57. if rpsolve is None:
  58. rpsolve = lambda x: x
  59. axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (v0,))
  60. vs = [v0]
  61. zs = []
  62. y = None
  63. res = np.nan
  64. m = m + len(outer_v)
  65. # Orthogonal projection coefficients
  66. B = np.zeros((len(cs), m), dtype=v0.dtype)
  67. # H is stored in QR factorized form
  68. Q = np.ones((1, 1), dtype=v0.dtype)
  69. R = np.zeros((1, 0), dtype=v0.dtype)
  70. eps = np.finfo(v0.dtype).eps
  71. breakdown = False
  72. # FGMRES Arnoldi process
  73. for j in xrange(m):
  74. # L A Z = C B + V H
  75. if prepend_outer_v and j < len(outer_v):
  76. z, w = outer_v[j]
  77. elif prepend_outer_v and j == len(outer_v):
  78. z = rpsolve(v0)
  79. w = None
  80. elif not prepend_outer_v and j >= m - len(outer_v):
  81. z, w = outer_v[j - (m - len(outer_v))]
  82. else:
  83. z = rpsolve(vs[-1])
  84. w = None
  85. if w is None:
  86. w = lpsolve(matvec(z))
  87. else:
  88. # w is clobbered below
  89. w = w.copy()
  90. w_norm = nrm2(w)
  91. # GCROT projection: L A -> (1 - C C^H) L A
  92. # i.e. orthogonalize against C
  93. for i, c in enumerate(cs):
  94. alpha = dot(c, w)
  95. B[i,j] = alpha
  96. w = axpy(c, w, c.shape[0], -alpha) # w -= alpha*c
  97. # Orthogonalize against V
  98. hcur = np.zeros(j+2, dtype=Q.dtype)
  99. for i, v in enumerate(vs):
  100. alpha = dot(v, w)
  101. hcur[i] = alpha
  102. w = axpy(v, w, v.shape[0], -alpha) # w -= alpha*v
  103. hcur[i+1] = nrm2(w)
  104. with np.errstate(over='ignore', divide='ignore'):
  105. # Careful with denormals
  106. alpha = 1/hcur[-1]
  107. if np.isfinite(alpha):
  108. w = scal(alpha, w)
  109. if not (hcur[-1] > eps * w_norm):
  110. # w essentially in the span of previous vectors,
  111. # or we have nans. Bail out after updating the QR
  112. # solution.
  113. breakdown = True
  114. vs.append(w)
  115. zs.append(z)
  116. # Arnoldi LSQ problem
  117. # Add new column to H=Q*R, padding other columns with zeros
  118. Q2 = np.zeros((j+2, j+2), dtype=Q.dtype, order='F')
  119. Q2[:j+1,:j+1] = Q
  120. Q2[j+1,j+1] = 1
  121. R2 = np.zeros((j+2, j), dtype=R.dtype, order='F')
  122. R2[:j+1,:] = R
  123. Q, R = qr_insert(Q2, R2, hcur, j, which='col',
  124. overwrite_qru=True, check_finite=False)
  125. # Transformed least squares problem
  126. # || Q R y - inner_res_0 * e_1 ||_2 = min!
  127. # Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0]
  128. # Residual is immediately known
  129. res = abs(Q[0,-1])
  130. # Check for termination
  131. if res < atol or breakdown:
  132. break
  133. if not np.isfinite(R[j,j]):
  134. # nans encountered, bail out
  135. raise LinAlgError()
  136. # -- Get the LSQ problem solution
  137. # The problem is triangular, but the condition number may be
  138. # bad (or in case of breakdown the last diagonal entry may be
  139. # zero), so use lstsq instead of trtrs.
  140. y, _, _, _, = lstsq(R[:j+1,:j+1], Q[0,:j+1].conj())
  141. B = B[:,:j+1]
  142. return Q, R, B, vs, zs, y, res
  143. def gcrotmk(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None,
  144. m=20, k=None, CU=None, discard_C=False, truncate='oldest',
  145. atol=None):
  146. """
  147. Solve a matrix equation using flexible GCROT(m,k) algorithm.
  148. Parameters
  149. ----------
  150. A : {sparse matrix, dense matrix, LinearOperator}
  151. The real or complex N-by-N matrix of the linear system.
  152. b : {array, matrix}
  153. Right hand side of the linear system. Has shape (N,) or (N,1).
  154. x0 : {array, matrix}
  155. Starting guess for the solution.
  156. tol, atol : float, optional
  157. Tolerances for convergence, ``norm(residual) <= max(tol*norm(b), atol)``.
  158. The default for ``atol`` is `tol`.
  159. .. warning::
  160. The default value for `atol` will be changed in a future release.
  161. For future compatibility, specify `atol` explicitly.
  162. maxiter : int, optional
  163. Maximum number of iterations. Iteration will stop after maxiter
  164. steps even if the specified tolerance has not been achieved.
  165. M : {sparse matrix, dense matrix, LinearOperator}, optional
  166. Preconditioner for A. The preconditioner should approximate the
  167. inverse of A. gcrotmk is a 'flexible' algorithm and the preconditioner
  168. can vary from iteration to iteration. Effective preconditioning
  169. dramatically improves the rate of convergence, which implies that
  170. fewer iterations are needed to reach a given error tolerance.
  171. callback : function, optional
  172. User-supplied function to call after each iteration. It is called
  173. as callback(xk), where xk is the current solution vector.
  174. m : int, optional
  175. Number of inner FGMRES iterations per each outer iteration.
  176. Default: 20
  177. k : int, optional
  178. Number of vectors to carry between inner FGMRES iterations.
  179. According to [2]_, good values are around m.
  180. Default: m
  181. CU : list of tuples, optional
  182. List of tuples ``(c, u)`` which contain the columns of the matrices
  183. C and U in the GCROT(m,k) algorithm. For details, see [2]_.
  184. The list given and vectors contained in it are modified in-place.
  185. If not given, start from empty matrices. The ``c`` elements in the
  186. tuples can be ``None``, in which case the vectors are recomputed
  187. via ``c = A u`` on start and orthogonalized as described in [3]_.
  188. discard_C : bool, optional
  189. Discard the C-vectors at the end. Useful if recycling Krylov subspaces
  190. for different linear systems.
  191. truncate : {'oldest', 'smallest'}, optional
  192. Truncation scheme to use. Drop: oldest vectors, or vectors with
  193. smallest singular values using the scheme discussed in [1,2].
  194. See [2]_ for detailed comparison.
  195. Default: 'oldest'
  196. Returns
  197. -------
  198. x : array or matrix
  199. The solution found.
  200. info : int
  201. Provides convergence information:
  202. * 0 : successful exit
  203. * >0 : convergence to tolerance not achieved, number of iterations
  204. References
  205. ----------
  206. .. [1] E. de Sturler, ''Truncation strategies for optimal Krylov subspace
  207. methods'', SIAM J. Numer. Anal. 36, 864 (1999).
  208. .. [2] J.E. Hicken and D.W. Zingg, ''A simplified and flexible variant
  209. of GCROT for solving nonsymmetric linear systems'',
  210. SIAM J. Sci. Comput. 32, 172 (2010).
  211. .. [3] M.L. Parks, E. de Sturler, G. Mackey, D.D. Johnson, S. Maiti,
  212. ''Recycling Krylov subspaces for sequences of linear systems'',
  213. SIAM J. Sci. Comput. 28, 1651 (2006).
  214. """
  215. A,M,x,b,postprocess = make_system(A,M,x0,b)
  216. if not np.isfinite(b).all():
  217. raise ValueError("RHS must contain only finite numbers")
  218. if truncate not in ('oldest', 'smallest'):
  219. raise ValueError("Invalid value for 'truncate': %r" % (truncate,))
  220. if atol is None:
  221. warnings.warn("scipy.sparse.linalg.gcrotmk called without specifying `atol`. "
  222. "The default value will change in the future. To preserve "
  223. "current behavior, set ``atol=tol``.",
  224. category=DeprecationWarning, stacklevel=2)
  225. atol = tol
  226. matvec = A.matvec
  227. psolve = M.matvec
  228. if CU is None:
  229. CU = []
  230. if k is None:
  231. k = m
  232. axpy, dot, scal = None, None, None
  233. r = b - matvec(x)
  234. axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (x, r))
  235. b_norm = nrm2(b)
  236. if discard_C:
  237. CU[:] = [(None, u) for c, u in CU]
  238. # Reorthogonalize old vectors
  239. if CU:
  240. # Sort already existing vectors to the front
  241. CU.sort(key=lambda cu: cu[0] is not None)
  242. # Fill-in missing ones
  243. C = np.empty((A.shape[0], len(CU)), dtype=r.dtype, order='F')
  244. us = []
  245. j = 0
  246. while CU:
  247. # More memory-efficient: throw away old vectors as we go
  248. c, u = CU.pop(0)
  249. if c is None:
  250. c = matvec(u)
  251. C[:,j] = c
  252. j += 1
  253. us.append(u)
  254. # Orthogonalize
  255. Q, R, P = qr(C, overwrite_a=True, mode='economic', pivoting=True)
  256. del C
  257. # C := Q
  258. cs = list(Q.T)
  259. # U := U P R^-1, back-substitution
  260. new_us = []
  261. for j in xrange(len(cs)):
  262. u = us[P[j]]
  263. for i in xrange(j):
  264. u = axpy(us[P[i]], u, u.shape[0], -R[i,j])
  265. if abs(R[j,j]) < 1e-12 * abs(R[0,0]):
  266. # discard rest of the vectors
  267. break
  268. u = scal(1.0/R[j,j], u)
  269. new_us.append(u)
  270. # Form the new CU lists
  271. CU[:] = list(zip(cs, new_us))[::-1]
  272. if CU:
  273. axpy, dot = get_blas_funcs(['axpy', 'dot'], (r,))
  274. # Solve first the projection operation with respect to the CU
  275. # vectors. This corresponds to modifying the initial guess to
  276. # be
  277. #
  278. # x' = x + U y
  279. # y = argmin_y || b - A (x + U y) ||^2
  280. #
  281. # The solution is y = C^H (b - A x)
  282. for c, u in CU:
  283. yc = dot(c, r)
  284. x = axpy(u, x, x.shape[0], yc)
  285. r = axpy(c, r, r.shape[0], -yc)
  286. # GCROT main iteration
  287. for j_outer in xrange(maxiter):
  288. # -- callback
  289. if callback is not None:
  290. callback(x)
  291. beta = nrm2(r)
  292. # -- check stopping condition
  293. beta_tol = max(atol, tol * b_norm)
  294. if beta <= beta_tol and (j_outer > 0 or CU):
  295. # recompute residual to avoid rounding error
  296. r = b - matvec(x)
  297. beta = nrm2(r)
  298. if beta <= beta_tol:
  299. j_outer = -1
  300. break
  301. ml = m + max(k - len(CU), 0)
  302. cs = [c for c, u in CU]
  303. try:
  304. Q, R, B, vs, zs, y, pres = _fgmres(matvec,
  305. r/beta,
  306. ml,
  307. rpsolve=psolve,
  308. atol=max(atol, tol*b_norm)/beta,
  309. cs=cs)
  310. y *= beta
  311. except LinAlgError:
  312. # Floating point over/underflow, non-finite result from
  313. # matmul etc. -- report failure.
  314. break
  315. #
  316. # At this point,
  317. #
  318. # [A U, A Z] = [C, V] G; G = [ I B ]
  319. # [ 0 H ]
  320. #
  321. # where [C, V] has orthonormal columns, and r = beta v_0. Moreover,
  322. #
  323. # || b - A (x + Z y + U q) ||_2 = || r - C B y - V H y - C q ||_2 = min!
  324. #
  325. # from which y = argmin_y || beta e_1 - H y ||_2, and q = -B y
  326. #
  327. #
  328. # GCROT(m,k) update
  329. #
  330. # Define new outer vectors
  331. # ux := (Z - U B) y
  332. ux = zs[0]*y[0]
  333. for z, yc in zip(zs[1:], y[1:]):
  334. ux = axpy(z, ux, ux.shape[0], yc) # ux += z*yc
  335. by = B.dot(y)
  336. for cu, byc in zip(CU, by):
  337. c, u = cu
  338. ux = axpy(u, ux, ux.shape[0], -byc) # ux -= u*byc
  339. # cx := V H y
  340. hy = Q.dot(R.dot(y))
  341. cx = vs[0] * hy[0]
  342. for v, hyc in zip(vs[1:], hy[1:]):
  343. cx = axpy(v, cx, cx.shape[0], hyc) # cx += v*hyc
  344. # Normalize cx, maintaining cx = A ux
  345. # This new cx is orthogonal to the previous C, by construction
  346. try:
  347. alpha = 1/nrm2(cx)
  348. if not np.isfinite(alpha):
  349. raise FloatingPointError()
  350. except (FloatingPointError, ZeroDivisionError):
  351. # Cannot update, so skip it
  352. continue
  353. cx = scal(alpha, cx)
  354. ux = scal(alpha, ux)
  355. # Update residual and solution
  356. gamma = dot(cx, r)
  357. r = axpy(cx, r, r.shape[0], -gamma) # r -= gamma*cx
  358. x = axpy(ux, x, x.shape[0], gamma) # x += gamma*ux
  359. # Truncate CU
  360. if truncate == 'oldest':
  361. while len(CU) >= k and CU:
  362. del CU[0]
  363. elif truncate == 'smallest':
  364. if len(CU) >= k and CU:
  365. # cf. [1,2]
  366. D = solve(R[:-1,:].T, B.T).T
  367. W, sigma, V = svd(D)
  368. # C := C W[:,:k-1], U := U W[:,:k-1]
  369. new_CU = []
  370. for j, w in enumerate(W[:,:k-1].T):
  371. c, u = CU[0]
  372. c = c * w[0]
  373. u = u * w[0]
  374. for cup, wp in zip(CU[1:], w[1:]):
  375. cp, up = cup
  376. c = axpy(cp, c, c.shape[0], wp)
  377. u = axpy(up, u, u.shape[0], wp)
  378. # Reorthogonalize at the same time; not necessary
  379. # in exact arithmetic, but floating point error
  380. # tends to accumulate here
  381. for cp, up in new_CU:
  382. alpha = dot(cp, c)
  383. c = axpy(cp, c, c.shape[0], -alpha)
  384. u = axpy(up, u, u.shape[0], -alpha)
  385. alpha = nrm2(c)
  386. c = scal(1.0/alpha, c)
  387. u = scal(1.0/alpha, u)
  388. new_CU.append((c, u))
  389. CU[:] = new_CU
  390. # Add new vector to CU
  391. CU.append((cx, ux))
  392. # Include the solution vector to the span
  393. CU.append((None, x.copy()))
  394. if discard_C:
  395. CU[:] = [(None, uz) for cz, uz in CU]
  396. return postprocess(x), j_outer + 1