lbfgsb.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. """
  2. Functions
  3. ---------
  4. .. autosummary::
  5. :toctree: generated/
  6. fmin_l_bfgs_b
  7. """
  8. ## License for the Python wrapper
  9. ## ==============================
  10. ## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
  11. ## Permission is hereby granted, free of charge, to any person obtaining a
  12. ## copy of this software and associated documentation files (the "Software"),
  13. ## to deal in the Software without restriction, including without limitation
  14. ## the rights to use, copy, modify, merge, publish, distribute, sublicense,
  15. ## and/or sell copies of the Software, and to permit persons to whom the
  16. ## Software is furnished to do so, subject to the following conditions:
  17. ## The above copyright notice and this permission notice shall be included in
  18. ## all copies or substantial portions of the Software.
  19. ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. ## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. ## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  24. ## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  25. ## DEALINGS IN THE SOFTWARE.
  26. ## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
  27. from __future__ import division, print_function, absolute_import
  28. import numpy as np
  29. from numpy import array, asarray, float64, int32, zeros
  30. from . import _lbfgsb
  31. from .optimize import (MemoizeJac, OptimizeResult,
  32. _check_unknown_options, wrap_function,
  33. _approx_fprime_helper)
  34. from scipy.sparse.linalg import LinearOperator
  35. __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
  36. def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
  37. approx_grad=0,
  38. bounds=None, m=10, factr=1e7, pgtol=1e-5,
  39. epsilon=1e-8,
  40. iprint=-1, maxfun=15000, maxiter=15000, disp=None,
  41. callback=None, maxls=20):
  42. """
  43. Minimize a function func using the L-BFGS-B algorithm.
  44. Parameters
  45. ----------
  46. func : callable f(x,*args)
  47. Function to minimise.
  48. x0 : ndarray
  49. Initial guess.
  50. fprime : callable fprime(x,*args), optional
  51. The gradient of `func`. If None, then `func` returns the function
  52. value and the gradient (``f, g = func(x, *args)``), unless
  53. `approx_grad` is True in which case `func` returns only ``f``.
  54. args : sequence, optional
  55. Arguments to pass to `func` and `fprime`.
  56. approx_grad : bool, optional
  57. Whether to approximate the gradient numerically (in which case
  58. `func` returns only the function value).
  59. bounds : list, optional
  60. ``(min, max)`` pairs for each element in ``x``, defining
  61. the bounds on that parameter. Use None or +-inf for one of ``min`` or
  62. ``max`` when there is no bound in that direction.
  63. m : int, optional
  64. The maximum number of variable metric corrections
  65. used to define the limited memory matrix. (The limited memory BFGS
  66. method does not store the full hessian but uses this many terms in an
  67. approximation to it.)
  68. factr : float, optional
  69. The iteration stops when
  70. ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
  71. where ``eps`` is the machine precision, which is automatically
  72. generated by the code. Typical values for `factr` are: 1e12 for
  73. low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
  74. high accuracy. See Notes for relationship to `ftol`, which is exposed
  75. (instead of `factr`) by the `scipy.optimize.minimize` interface to
  76. L-BFGS-B.
  77. pgtol : float, optional
  78. The iteration will stop when
  79. ``max{|proj g_i | i = 1, ..., n} <= pgtol``
  80. where ``pg_i`` is the i-th component of the projected gradient.
  81. epsilon : float, optional
  82. Step size used when `approx_grad` is True, for numerically
  83. calculating the gradient
  84. iprint : int, optional
  85. Controls the frequency of output. ``iprint < 0`` means no output;
  86. ``iprint = 0`` print only one line at the last iteration;
  87. ``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
  88. ``iprint = 99`` print details of every iteration except n-vectors;
  89. ``iprint = 100`` print also the changes of active set and final x;
  90. ``iprint > 100`` print details of every iteration including x and g.
  91. disp : int, optional
  92. If zero, then no output. If a positive number, then this over-rides
  93. `iprint` (i.e., `iprint` gets the value of `disp`).
  94. maxfun : int, optional
  95. Maximum number of function evaluations.
  96. maxiter : int, optional
  97. Maximum number of iterations.
  98. callback : callable, optional
  99. Called after each iteration, as ``callback(xk)``, where ``xk`` is the
  100. current parameter vector.
  101. maxls : int, optional
  102. Maximum number of line search steps (per iteration). Default is 20.
  103. Returns
  104. -------
  105. x : array_like
  106. Estimated position of the minimum.
  107. f : float
  108. Value of `func` at the minimum.
  109. d : dict
  110. Information dictionary.
  111. * d['warnflag'] is
  112. - 0 if converged,
  113. - 1 if too many function evaluations or too many iterations,
  114. - 2 if stopped for another reason, given in d['task']
  115. * d['grad'] is the gradient at the minimum (should be 0 ish)
  116. * d['funcalls'] is the number of function calls made.
  117. * d['nit'] is the number of iterations.
  118. See also
  119. --------
  120. minimize: Interface to minimization algorithms for multivariate
  121. functions. See the 'L-BFGS-B' `method` in particular. Note that the
  122. `ftol` option is made available via that interface, while `factr` is
  123. provided via this interface, where `factr` is the factor multiplying
  124. the default machine floating-point precision to arrive at `ftol`:
  125. ``ftol = factr * numpy.finfo(float).eps``.
  126. Notes
  127. -----
  128. License of L-BFGS-B (FORTRAN code):
  129. The version included here (in fortran code) is 3.0
  130. (released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
  131. and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
  132. condition for use:
  133. This software is freely available, but we expect that all publications
  134. describing work using this software, or all commercial products using it,
  135. quote at least one of the references given below. This software is released
  136. under the BSD License.
  137. References
  138. ----------
  139. * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
  140. Constrained Optimization, (1995), SIAM Journal on Scientific and
  141. Statistical Computing, 16, 5, pp. 1190-1208.
  142. * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
  143. FORTRAN routines for large scale bound constrained optimization (1997),
  144. ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
  145. * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
  146. FORTRAN routines for large scale bound constrained optimization (2011),
  147. ACM Transactions on Mathematical Software, 38, 1.
  148. """
  149. # handle fprime/approx_grad
  150. if approx_grad:
  151. fun = func
  152. jac = None
  153. elif fprime is None:
  154. fun = MemoizeJac(func)
  155. jac = fun.derivative
  156. else:
  157. fun = func
  158. jac = fprime
  159. # build options
  160. if disp is None:
  161. disp = iprint
  162. opts = {'disp': disp,
  163. 'iprint': iprint,
  164. 'maxcor': m,
  165. 'ftol': factr * np.finfo(float).eps,
  166. 'gtol': pgtol,
  167. 'eps': epsilon,
  168. 'maxfun': maxfun,
  169. 'maxiter': maxiter,
  170. 'callback': callback,
  171. 'maxls': maxls}
  172. res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
  173. **opts)
  174. d = {'grad': res['jac'],
  175. 'task': res['message'],
  176. 'funcalls': res['nfev'],
  177. 'nit': res['nit'],
  178. 'warnflag': res['status']}
  179. f = res['fun']
  180. x = res['x']
  181. return x, f, d
  182. def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
  183. disp=None, maxcor=10, ftol=2.2204460492503131e-09,
  184. gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
  185. iprint=-1, callback=None, maxls=20, **unknown_options):
  186. """
  187. Minimize a scalar function of one or more variables using the L-BFGS-B
  188. algorithm.
  189. Options
  190. -------
  191. disp : None or int
  192. If `disp is None` (the default), then the supplied version of `iprint`
  193. is used. If `disp is not None`, then it overrides the supplied version
  194. of `iprint` with the behaviour you outlined.
  195. maxcor : int
  196. The maximum number of variable metric corrections used to
  197. define the limited memory matrix. (The limited memory BFGS
  198. method does not store the full hessian but uses this many terms
  199. in an approximation to it.)
  200. ftol : float
  201. The iteration stops when ``(f^k -
  202. f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
  203. gtol : float
  204. The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
  205. <= gtol`` where ``pg_i`` is the i-th component of the
  206. projected gradient.
  207. eps : float
  208. Step size used for numerical approximation of the jacobian.
  209. maxfun : int
  210. Maximum number of function evaluations.
  211. maxiter : int
  212. Maximum number of iterations.
  213. maxls : int, optional
  214. Maximum number of line search steps (per iteration). Default is 20.
  215. Notes
  216. -----
  217. The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
  218. but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
  219. relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
  220. I.e., `factr` multiplies the default machine floating-point precision to
  221. arrive at `ftol`.
  222. """
  223. _check_unknown_options(unknown_options)
  224. m = maxcor
  225. epsilon = eps
  226. pgtol = gtol
  227. factr = ftol / np.finfo(float).eps
  228. x0 = asarray(x0).ravel()
  229. n, = x0.shape
  230. if bounds is None:
  231. bounds = [(None, None)] * n
  232. if len(bounds) != n:
  233. raise ValueError('length of x0 != length of bounds')
  234. # unbounded variables must use None, not +-inf, for optimizer to work properly
  235. bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]
  236. if disp is not None:
  237. if disp == 0:
  238. iprint = -1
  239. else:
  240. iprint = disp
  241. n_function_evals, fun = wrap_function(fun, ())
  242. if jac is None:
  243. def func_and_grad(x):
  244. f = fun(x, *args)
  245. g = _approx_fprime_helper(x, fun, epsilon, args=args, f0=f)
  246. return f, g
  247. else:
  248. def func_and_grad(x):
  249. f = fun(x, *args)
  250. g = jac(x, *args)
  251. return f, g
  252. nbd = zeros(n, int32)
  253. low_bnd = zeros(n, float64)
  254. upper_bnd = zeros(n, float64)
  255. bounds_map = {(None, None): 0,
  256. (1, None): 1,
  257. (1, 1): 2,
  258. (None, 1): 3}
  259. for i in range(0, n):
  260. l, u = bounds[i]
  261. if l is not None:
  262. low_bnd[i] = l
  263. l = 1
  264. if u is not None:
  265. upper_bnd[i] = u
  266. u = 1
  267. nbd[i] = bounds_map[l, u]
  268. if not maxls > 0:
  269. raise ValueError('maxls must be positive.')
  270. x = array(x0, float64)
  271. f = array(0.0, float64)
  272. g = zeros((n,), float64)
  273. wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
  274. iwa = zeros(3*n, int32)
  275. task = zeros(1, 'S60')
  276. csave = zeros(1, 'S60')
  277. lsave = zeros(4, int32)
  278. isave = zeros(44, int32)
  279. dsave = zeros(29, float64)
  280. task[:] = 'START'
  281. n_iterations = 0
  282. while 1:
  283. # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
  284. _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
  285. pgtol, wa, iwa, task, iprint, csave, lsave,
  286. isave, dsave, maxls)
  287. task_str = task.tostring()
  288. if task_str.startswith(b'FG'):
  289. # The minimization routine wants f and g at the current x.
  290. # Note that interruptions due to maxfun are postponed
  291. # until the completion of the current minimization iteration.
  292. # Overwrite f and g:
  293. f, g = func_and_grad(x)
  294. elif task_str.startswith(b'NEW_X'):
  295. # new iteration
  296. n_iterations += 1
  297. if callback is not None:
  298. callback(np.copy(x))
  299. if n_iterations >= maxiter:
  300. task[:] = 'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
  301. elif n_function_evals[0] > maxfun:
  302. task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
  303. 'EXCEEDS LIMIT')
  304. else:
  305. break
  306. task_str = task.tostring().strip(b'\x00').strip()
  307. if task_str.startswith(b'CONV'):
  308. warnflag = 0
  309. elif n_function_evals[0] > maxfun or n_iterations >= maxiter:
  310. warnflag = 1
  311. else:
  312. warnflag = 2
  313. # These two portions of the workspace are described in the mainlb
  314. # subroutine in lbfgsb.f. See line 363.
  315. s = wa[0: m*n].reshape(m, n)
  316. y = wa[m*n: 2*m*n].reshape(m, n)
  317. # See lbfgsb.f line 160 for this portion of the workspace.
  318. # isave(31) = the total number of BFGS updates prior the current iteration;
  319. n_bfgs_updates = isave[30]
  320. n_corrs = min(n_bfgs_updates, maxcor)
  321. hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
  322. return OptimizeResult(fun=f, jac=g, nfev=n_function_evals[0],
  323. nit=n_iterations, status=warnflag, message=task_str,
  324. x=x, success=(warnflag == 0), hess_inv=hess_inv)
  325. class LbfgsInvHessProduct(LinearOperator):
  326. """Linear operator for the L-BFGS approximate inverse Hessian.
  327. This operator computes the product of a vector with the approximate inverse
  328. of the Hessian of the objective function, using the L-BFGS limited
  329. memory approximation to the inverse Hessian, accumulated during the
  330. optimization.
  331. Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
  332. interface.
  333. Parameters
  334. ----------
  335. sk : array_like, shape=(n_corr, n)
  336. Array of `n_corr` most recent updates to the solution vector.
  337. (See [1]).
  338. yk : array_like, shape=(n_corr, n)
  339. Array of `n_corr` most recent updates to the gradient. (See [1]).
  340. References
  341. ----------
  342. .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
  343. storage." Mathematics of computation 35.151 (1980): 773-782.
  344. """
  345. def __init__(self, sk, yk):
  346. """Construct the operator."""
  347. if sk.shape != yk.shape or sk.ndim != 2:
  348. raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
  349. n_corrs, n = sk.shape
  350. super(LbfgsInvHessProduct, self).__init__(
  351. dtype=np.float64, shape=(n, n))
  352. self.sk = sk
  353. self.yk = yk
  354. self.n_corrs = n_corrs
  355. self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
  356. def _matvec(self, x):
  357. """Efficient matrix-vector multiply with the BFGS matrices.
  358. This calculation is described in Section (4) of [1].
  359. Parameters
  360. ----------
  361. x : ndarray
  362. An array with shape (n,) or (n,1).
  363. Returns
  364. -------
  365. y : ndarray
  366. The matrix-vector product
  367. """
  368. s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
  369. q = np.array(x, dtype=self.dtype, copy=True)
  370. if q.ndim == 2 and q.shape[1] == 1:
  371. q = q.reshape(-1)
  372. alpha = np.zeros(n_corrs)
  373. for i in range(n_corrs-1, -1, -1):
  374. alpha[i] = rho[i] * np.dot(s[i], q)
  375. q = q - alpha[i]*y[i]
  376. r = q
  377. for i in range(n_corrs):
  378. beta = rho[i] * np.dot(y[i], r)
  379. r = r + s[i] * (alpha[i] - beta)
  380. return r
  381. def todense(self):
  382. """Return a dense array representation of this operator.
  383. Returns
  384. -------
  385. arr : ndarray, shape=(n, n)
  386. An array with the same shape and containing
  387. the same data represented by this `LinearOperator`.
  388. """
  389. s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
  390. I = np.eye(*self.shape, dtype=self.dtype)
  391. Hk = I
  392. for i in range(n_corrs):
  393. A1 = I - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i]
  394. A2 = I - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i]
  395. Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] *
  396. s[i][np.newaxis, :])
  397. return Hk