_trustregion_ncg.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. """Newton-CG trust-region optimization."""
  2. from __future__ import division, print_function, absolute_import
  3. import math
  4. import numpy as np
  5. import scipy.linalg
  6. from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
  7. __all__ = []
  8. def _minimize_trust_ncg(fun, x0, args=(), jac=None, hess=None, hessp=None,
  9. **trust_region_options):
  10. """
  11. Minimization of scalar function of one or more variables using
  12. the Newton conjugate gradient trust-region algorithm.
  13. Options
  14. -------
  15. initial_trust_radius : float
  16. Initial trust-region radius.
  17. max_trust_radius : float
  18. Maximum value of the trust-region radius. No steps that are longer
  19. than this value will be proposed.
  20. eta : float
  21. Trust region related acceptance stringency for proposed steps.
  22. gtol : float
  23. Gradient norm must be less than `gtol` before successful
  24. termination.
  25. """
  26. if jac is None:
  27. raise ValueError('Jacobian is required for Newton-CG trust-region '
  28. 'minimization')
  29. if hess is None and hessp is None:
  30. raise ValueError('Either the Hessian or the Hessian-vector product '
  31. 'is required for Newton-CG trust-region minimization')
  32. return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
  33. hessp=hessp, subproblem=CGSteihaugSubproblem,
  34. **trust_region_options)
  35. class CGSteihaugSubproblem(BaseQuadraticSubproblem):
  36. """Quadratic subproblem solved by a conjugate gradient method"""
  37. def solve(self, trust_radius):
  38. """
  39. Solve the subproblem using a conjugate gradient method.
  40. Parameters
  41. ----------
  42. trust_radius : float
  43. We are allowed to wander only this far away from the origin.
  44. Returns
  45. -------
  46. p : ndarray
  47. The proposed step.
  48. hits_boundary : bool
  49. True if the proposed step is on the boundary of the trust region.
  50. Notes
  51. -----
  52. This is algorithm (7.2) of Nocedal and Wright 2nd edition.
  53. Only the function that computes the Hessian-vector product is required.
  54. The Hessian itself is not required, and the Hessian does
  55. not need to be positive semidefinite.
  56. """
  57. # get the norm of jacobian and define the origin
  58. p_origin = np.zeros_like(self.jac)
  59. # define a default tolerance
  60. tolerance = min(0.5, math.sqrt(self.jac_mag)) * self.jac_mag
  61. # Stop the method if the search direction
  62. # is a direction of nonpositive curvature.
  63. if self.jac_mag < tolerance:
  64. hits_boundary = False
  65. return p_origin, hits_boundary
  66. # init the state for the first iteration
  67. z = p_origin
  68. r = self.jac
  69. d = -r
  70. # Search for the min of the approximation of the objective function.
  71. while True:
  72. # do an iteration
  73. Bd = self.hessp(d)
  74. dBd = np.dot(d, Bd)
  75. if dBd <= 0:
  76. # Look at the two boundary points.
  77. # Find both values of t to get the boundary points such that
  78. # ||z + t d|| == trust_radius
  79. # and then choose the one with the predicted min value.
  80. ta, tb = self.get_boundaries_intersections(z, d, trust_radius)
  81. pa = z + ta * d
  82. pb = z + tb * d
  83. if self(pa) < self(pb):
  84. p_boundary = pa
  85. else:
  86. p_boundary = pb
  87. hits_boundary = True
  88. return p_boundary, hits_boundary
  89. r_squared = np.dot(r, r)
  90. alpha = r_squared / dBd
  91. z_next = z + alpha * d
  92. if scipy.linalg.norm(z_next) >= trust_radius:
  93. # Find t >= 0 to get the boundary point such that
  94. # ||z + t d|| == trust_radius
  95. ta, tb = self.get_boundaries_intersections(z, d, trust_radius)
  96. p_boundary = z + tb * d
  97. hits_boundary = True
  98. return p_boundary, hits_boundary
  99. r_next = r + alpha * Bd
  100. r_next_squared = np.dot(r_next, r_next)
  101. if math.sqrt(r_next_squared) < tolerance:
  102. hits_boundary = False
  103. return z_next, hits_boundary
  104. beta_next = r_next_squared / r_squared
  105. d_next = -r_next + beta_next * d
  106. # update the state for the next iteration
  107. z = z_next
  108. r = r_next
  109. d = d_next