test_hessian_update_strategy.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. from __future__ import division, print_function, absolute_import
  2. import numpy as np
  3. from copy import deepcopy
  4. from numpy.linalg import norm
  5. from numpy.testing import (TestCase, assert_array_almost_equal,
  6. assert_array_equal, assert_array_less,
  7. assert_raises, assert_equal, assert_,
  8. run_module_suite, assert_allclose, assert_warns,
  9. dec)
  10. from scipy.optimize import (BFGS,
  11. SR1,
  12. HessianUpdateStrategy,
  13. minimize)
  14. class Rosenbrock:
  15. """Rosenbrock function.
  16. The following optimization problem:
  17. minimize sum(100.0*(x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
  18. """
  19. def __init__(self, n=2, random_state=0):
  20. rng = np.random.RandomState(random_state)
  21. self.x0 = rng.uniform(-1, 1, n)
  22. self.x_opt = np.ones(n)
  23. def fun(self, x):
  24. x = np.asarray(x)
  25. r = np.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0,
  26. axis=0)
  27. return r
  28. def grad(self, x):
  29. x = np.asarray(x)
  30. xm = x[1:-1]
  31. xm_m1 = x[:-2]
  32. xm_p1 = x[2:]
  33. der = np.zeros_like(x)
  34. der[1:-1] = (200 * (xm - xm_m1**2) -
  35. 400 * (xm_p1 - xm**2) * xm - 2 * (1 - xm))
  36. der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  37. der[-1] = 200 * (x[-1] - x[-2]**2)
  38. return der
  39. def hess(self, x):
  40. x = np.atleast_1d(x)
  41. H = np.diag(-400 * x[:-1], 1) - np.diag(400 * x[:-1], -1)
  42. diagonal = np.zeros(len(x), dtype=x.dtype)
  43. diagonal[0] = 1200 * x[0]**2 - 400 * x[1] + 2
  44. diagonal[-1] = 200
  45. diagonal[1:-1] = 202 + 1200 * x[1:-1]**2 - 400 * x[2:]
  46. H = H + np.diag(diagonal)
  47. return H
  48. class TestHessianUpdateStrategy(TestCase):
  49. def test_hessian_initialization(self):
  50. quasi_newton = (BFGS(), SR1())
  51. for qn in quasi_newton:
  52. qn.initialize(5, 'hess')
  53. B = qn.get_matrix()
  54. assert_array_equal(B, np.eye(5))
  55. # For this list of points it is known
  56. # that no exception occur during the
  57. # Hessian update. Hence no update is
  58. # skiped or damped.
  59. def test_rosenbrock_with_no_exception(self):
  60. # Define auxiliar problem
  61. prob = Rosenbrock(n=5)
  62. # Define iteration points
  63. x_list = [[0.0976270, 0.4303787, 0.2055267, 0.0897663, -0.15269040],
  64. [0.1847239, 0.0505757, 0.2123832, 0.0255081, 0.00083286],
  65. [0.2142498, -0.0188480, 0.0503822, 0.0347033, 0.03323606],
  66. [0.2071680, -0.0185071, 0.0341337, -0.0139298, 0.02881750],
  67. [0.1533055, -0.0322935, 0.0280418, -0.0083592, 0.01503699],
  68. [0.1382378, -0.0276671, 0.0266161, -0.0074060, 0.02801610],
  69. [0.1651957, -0.0049124, 0.0269665, -0.0040025, 0.02138184],
  70. [0.2354930, 0.0443711, 0.0173959, 0.0041872, 0.00794563],
  71. [0.4168118, 0.1433867, 0.0111714, 0.0126265, -0.00658537],
  72. [0.4681972, 0.2153273, 0.0225249, 0.0152704, -0.00463809],
  73. [0.6023068, 0.3346815, 0.0731108, 0.0186618, -0.00371541],
  74. [0.6415743, 0.3985468, 0.1324422, 0.0214160, -0.00062401],
  75. [0.7503690, 0.5447616, 0.2804541, 0.0539851, 0.00242230],
  76. [0.7452626, 0.5644594, 0.3324679, 0.0865153, 0.00454960],
  77. [0.8059782, 0.6586838, 0.4229577, 0.1452990, 0.00976702],
  78. [0.8549542, 0.7226562, 0.4991309, 0.2420093, 0.02772661],
  79. [0.8571332, 0.7285741, 0.5279076, 0.2824549, 0.06030276],
  80. [0.8835633, 0.7727077, 0.5957984, 0.3411303, 0.09652185],
  81. [0.9071558, 0.8299587, 0.6771400, 0.4402896, 0.17469338],
  82. [0.9190793, 0.8486480, 0.7163332, 0.5083780, 0.26107691],
  83. [0.9371223, 0.8762177, 0.7653702, 0.5773109, 0.32181041],
  84. [0.9554613, 0.9119893, 0.8282687, 0.6776178, 0.43162744],
  85. [0.9545744, 0.9099264, 0.8270244, 0.6822220, 0.45237623],
  86. [0.9688112, 0.9351710, 0.8730961, 0.7546601, 0.56622448],
  87. [0.9743227, 0.9491953, 0.9005150, 0.8086497, 0.64505437],
  88. [0.9807345, 0.9638853, 0.9283012, 0.8631675, 0.73812581],
  89. [0.9886746, 0.9777760, 0.9558950, 0.9123417, 0.82726553],
  90. [0.9899096, 0.9803828, 0.9615592, 0.9255600, 0.85822149],
  91. [0.9969510, 0.9935441, 0.9864657, 0.9726775, 0.94358663],
  92. [0.9979533, 0.9960274, 0.9921724, 0.9837415, 0.96626288],
  93. [0.9995981, 0.9989171, 0.9974178, 0.9949954, 0.99023356],
  94. [1.0002640, 1.0005088, 1.0010594, 1.0021161, 1.00386912],
  95. [0.9998903, 0.9998459, 0.9997795, 0.9995484, 0.99916305],
  96. [1.0000008, 0.9999905, 0.9999481, 0.9998903, 0.99978047],
  97. [1.0000004, 0.9999983, 1.0000001, 1.0000031, 1.00000297],
  98. [0.9999995, 1.0000003, 1.0000005, 1.0000001, 1.00000032],
  99. [0.9999999, 0.9999997, 0.9999994, 0.9999989, 0.99999786],
  100. [0.9999999, 0.9999999, 0.9999999, 0.9999999, 0.99999991]]
  101. # Get iteration points
  102. grad_list = [prob.grad(x) for x in x_list]
  103. delta_x = [np.array(x_list[i+1])-np.array(x_list[i])
  104. for i in range(len(x_list)-1)]
  105. delta_grad = [grad_list[i+1]-grad_list[i]
  106. for i in range(len(grad_list)-1)]
  107. # Check curvature condition
  108. for i in range(len(delta_x)):
  109. s = delta_x[i]
  110. y = delta_grad[i]
  111. if np.dot(s, y) <= 0:
  112. raise ArithmeticError()
  113. # Define QuasiNewton update
  114. for quasi_newton in (BFGS(init_scale=1, min_curvature=1e-4),
  115. SR1(init_scale=1)):
  116. hess = deepcopy(quasi_newton)
  117. inv_hess = deepcopy(quasi_newton)
  118. hess.initialize(len(x_list[0]), 'hess')
  119. inv_hess.initialize(len(x_list[0]), 'inv_hess')
  120. # Compare the hessian and its inverse
  121. for i in range(len(delta_x)):
  122. s = delta_x[i]
  123. y = delta_grad[i]
  124. hess.update(s, y)
  125. inv_hess.update(s, y)
  126. B = hess.get_matrix()
  127. H = inv_hess.get_matrix()
  128. assert_array_almost_equal(np.linalg.inv(B), H, decimal=10)
  129. B_true = prob.hess(x_list[i+1])
  130. assert_array_less(norm(B - B_true)/norm(B_true), 0.1)
  131. def test_SR1_skip_update(self):
  132. # Define auxiliar problem
  133. prob = Rosenbrock(n=5)
  134. # Define iteration points
  135. x_list = [[0.0976270, 0.4303787, 0.2055267, 0.0897663, -0.15269040],
  136. [0.1847239, 0.0505757, 0.2123832, 0.0255081, 0.00083286],
  137. [0.2142498, -0.0188480, 0.0503822, 0.0347033, 0.03323606],
  138. [0.2071680, -0.0185071, 0.0341337, -0.0139298, 0.02881750],
  139. [0.1533055, -0.0322935, 0.0280418, -0.0083592, 0.01503699],
  140. [0.1382378, -0.0276671, 0.0266161, -0.0074060, 0.02801610],
  141. [0.1651957, -0.0049124, 0.0269665, -0.0040025, 0.02138184],
  142. [0.2354930, 0.0443711, 0.0173959, 0.0041872, 0.00794563],
  143. [0.4168118, 0.1433867, 0.0111714, 0.0126265, -0.00658537],
  144. [0.4681972, 0.2153273, 0.0225249, 0.0152704, -0.00463809],
  145. [0.6023068, 0.3346815, 0.0731108, 0.0186618, -0.00371541],
  146. [0.6415743, 0.3985468, 0.1324422, 0.0214160, -0.00062401],
  147. [0.7503690, 0.5447616, 0.2804541, 0.0539851, 0.00242230],
  148. [0.7452626, 0.5644594, 0.3324679, 0.0865153, 0.00454960],
  149. [0.8059782, 0.6586838, 0.4229577, 0.1452990, 0.00976702],
  150. [0.8549542, 0.7226562, 0.4991309, 0.2420093, 0.02772661],
  151. [0.8571332, 0.7285741, 0.5279076, 0.2824549, 0.06030276],
  152. [0.8835633, 0.7727077, 0.5957984, 0.3411303, 0.09652185],
  153. [0.9071558, 0.8299587, 0.6771400, 0.4402896, 0.17469338]]
  154. # Get iteration points
  155. grad_list = [prob.grad(x) for x in x_list]
  156. delta_x = [np.array(x_list[i+1])-np.array(x_list[i])
  157. for i in range(len(x_list)-1)]
  158. delta_grad = [grad_list[i+1]-grad_list[i]
  159. for i in range(len(grad_list)-1)]
  160. hess = SR1(init_scale=1, min_denominator=1e-2)
  161. hess.initialize(len(x_list[0]), 'hess')
  162. # Compare the hessian and its inverse
  163. for i in range(len(delta_x)-1):
  164. s = delta_x[i]
  165. y = delta_grad[i]
  166. hess.update(s, y)
  167. # Test skip update
  168. B = np.copy(hess.get_matrix())
  169. s = delta_x[17]
  170. y = delta_grad[17]
  171. hess.update(s, y)
  172. B_updated = np.copy(hess.get_matrix())
  173. assert_array_equal(B, B_updated)
  174. def test_BFGS_skip_update(self):
  175. # Define auxiliar problem
  176. prob = Rosenbrock(n=5)
  177. # Define iteration points
  178. x_list = [[0.0976270, 0.4303787, 0.2055267, 0.0897663, -0.15269040],
  179. [0.1847239, 0.0505757, 0.2123832, 0.0255081, 0.00083286],
  180. [0.2142498, -0.0188480, 0.0503822, 0.0347033, 0.03323606],
  181. [0.2071680, -0.0185071, 0.0341337, -0.0139298, 0.02881750],
  182. [0.1533055, -0.0322935, 0.0280418, -0.0083592, 0.01503699],
  183. [0.1382378, -0.0276671, 0.0266161, -0.0074060, 0.02801610],
  184. [0.1651957, -0.0049124, 0.0269665, -0.0040025, 0.02138184]]
  185. # Get iteration points
  186. grad_list = [prob.grad(x) for x in x_list]
  187. delta_x = [np.array(x_list[i+1])-np.array(x_list[i])
  188. for i in range(len(x_list)-1)]
  189. delta_grad = [grad_list[i+1]-grad_list[i]
  190. for i in range(len(grad_list)-1)]
  191. hess = BFGS(init_scale=1, min_curvature=10)
  192. hess.initialize(len(x_list[0]), 'hess')
  193. # Compare the hessian and its inverse
  194. for i in range(len(delta_x)-1):
  195. s = delta_x[i]
  196. y = delta_grad[i]
  197. hess.update(s, y)
  198. # Test skip update
  199. B = np.copy(hess.get_matrix())
  200. s = delta_x[5]
  201. y = delta_grad[5]
  202. hess.update(s, y)
  203. B_updated = np.copy(hess.get_matrix())
  204. assert_array_equal(B, B_updated)