_differentiable_functions.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. from __future__ import division, print_function, absolute_import
  2. import numpy as np
  3. import scipy.sparse as sps
  4. from ._numdiff import approx_derivative, group_columns
  5. from ._hessian_update_strategy import HessianUpdateStrategy
  6. from scipy.sparse.linalg import LinearOperator
  7. FD_METHODS = ('2-point', '3-point', 'cs')
  8. class ScalarFunction(object):
  9. """Scalar function and its derivatives.
  10. This class defines a scalar function F: R^n->R and methods for
  11. computing or approximating its first and second derivatives.
  12. Notes
  13. -----
  14. This class implements a memoization logic. There are methods `fun`,
  15. `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
  16. things should be considered:
  17. 1. Use only public methods `fun`, `grad` and `hess`.
  18. 2. After one of the methods is called, the corresponding attribute
  19. will be set. However, a subsequent call with a different argument
  20. of *any* of the methods may overwrite the attribute.
  21. """
  22. def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
  23. finite_diff_bounds):
  24. if not callable(grad) and grad not in FD_METHODS:
  25. raise ValueError("`grad` must be either callable or one of {}."
  26. .format(FD_METHODS))
  27. if not (callable(hess) or hess in FD_METHODS
  28. or isinstance(hess, HessianUpdateStrategy)):
  29. raise ValueError("`hess` must be either callable,"
  30. "HessianUpdateStrategy or one of {}."
  31. .format(FD_METHODS))
  32. if grad in FD_METHODS and hess in FD_METHODS:
  33. raise ValueError("Whenever the gradient is estimated via "
  34. "finite-differences, we require the Hessian "
  35. "to be estimated using one of the "
  36. "quasi-Newton strategies.")
  37. self.x = np.atleast_1d(x0).astype(float)
  38. self.n = self.x.size
  39. self.nfev = 0
  40. self.ngev = 0
  41. self.nhev = 0
  42. self.f_updated = False
  43. self.g_updated = False
  44. self.H_updated = False
  45. finite_diff_options = {}
  46. if grad in FD_METHODS:
  47. finite_diff_options["method"] = grad
  48. finite_diff_options["rel_step"] = finite_diff_rel_step
  49. finite_diff_options["bounds"] = finite_diff_bounds
  50. if hess in FD_METHODS:
  51. finite_diff_options["method"] = hess
  52. finite_diff_options["rel_step"] = finite_diff_rel_step
  53. finite_diff_options["as_linear_operator"] = True
  54. # Function evaluation
  55. def fun_wrapped(x):
  56. self.nfev += 1
  57. return fun(x, *args)
  58. def update_fun():
  59. self.f = fun_wrapped(self.x)
  60. self._update_fun_impl = update_fun
  61. self._update_fun()
  62. # Gradient evaluation
  63. if callable(grad):
  64. def grad_wrapped(x):
  65. self.ngev += 1
  66. return np.atleast_1d(grad(x, *args))
  67. def update_grad():
  68. self.g = grad_wrapped(self.x)
  69. elif grad in FD_METHODS:
  70. def update_grad():
  71. self._update_fun()
  72. self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
  73. **finite_diff_options)
  74. self._update_grad_impl = update_grad
  75. self._update_grad()
  76. # Hessian Evaluation
  77. if callable(hess):
  78. self.H = hess(x0, *args)
  79. self.H_updated = True
  80. self.nhev += 1
  81. if sps.issparse(self.H):
  82. def hess_wrapped(x):
  83. self.nhev += 1
  84. return sps.csr_matrix(hess(x, *args))
  85. self.H = sps.csr_matrix(self.H)
  86. elif isinstance(self.H, LinearOperator):
  87. def hess_wrapped(x):
  88. self.nhev += 1
  89. return hess(x, *args)
  90. else:
  91. def hess_wrapped(x):
  92. self.nhev += 1
  93. return np.atleast_2d(np.asarray(hess(x, *args)))
  94. self.H = np.atleast_2d(np.asarray(self.H))
  95. def update_hess():
  96. self.H = hess_wrapped(self.x)
  97. elif hess in FD_METHODS:
  98. def update_hess():
  99. self._update_grad()
  100. self.H = approx_derivative(grad_wrapped, self.x, f0=self.g,
  101. **finite_diff_options)
  102. return self.H
  103. update_hess()
  104. self.H_updated = True
  105. elif isinstance(hess, HessianUpdateStrategy):
  106. self.H = hess
  107. self.H.initialize(self.n, 'hess')
  108. self.H_updated = True
  109. self.x_prev = None
  110. self.g_prev = None
  111. def update_hess():
  112. self._update_grad()
  113. self.H.update(self.x - self.x_prev, self.g - self.g_prev)
  114. self._update_hess_impl = update_hess
  115. if isinstance(hess, HessianUpdateStrategy):
  116. def update_x(x):
  117. self._update_grad()
  118. self.x_prev = self.x
  119. self.g_prev = self.g
  120. self.x = x
  121. self.f_updated = False
  122. self.g_updated = False
  123. self.H_updated = False
  124. self._update_hess()
  125. else:
  126. def update_x(x):
  127. self.x = x
  128. self.f_updated = False
  129. self.g_updated = False
  130. self.H_updated = False
  131. self._update_x_impl = update_x
  132. def _update_fun(self):
  133. if not self.f_updated:
  134. self._update_fun_impl()
  135. self.f_updated = True
  136. def _update_grad(self):
  137. if not self.g_updated:
  138. self._update_grad_impl()
  139. self.g_updated = True
  140. def _update_hess(self):
  141. if not self.H_updated:
  142. self._update_hess_impl()
  143. self.H_updated = True
  144. def fun(self, x):
  145. if not np.array_equal(x, self.x):
  146. self._update_x_impl(x)
  147. self._update_fun()
  148. return self.f
  149. def grad(self, x):
  150. if not np.array_equal(x, self.x):
  151. self._update_x_impl(x)
  152. self._update_grad()
  153. return self.g
  154. def hess(self, x):
  155. if not np.array_equal(x, self.x):
  156. self._update_x_impl(x)
  157. self._update_hess()
  158. return self.H
  159. class VectorFunction(object):
  160. """Vector function and its derivatives.
  161. This class defines a vector function F: R^n->R^m and methods for
  162. computing or approximating its first and second derivatives.
  163. Notes
  164. -----
  165. This class implements a memoization logic. There are methods `fun`,
  166. `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
  167. things should be considered:
  168. 1. Use only public methods `fun`, `jac` and `hess`.
  169. 2. After one of the methods is called, the corresponding attribute
  170. will be set. However, a subsequent call with a different argument
  171. of *any* of the methods may overwrite the attribute.
  172. """
  173. def __init__(self, fun, x0, jac, hess,
  174. finite_diff_rel_step, finite_diff_jac_sparsity,
  175. finite_diff_bounds, sparse_jacobian):
  176. if not callable(jac) and jac not in FD_METHODS:
  177. raise ValueError("`jac` must be either callable or one of {}."
  178. .format(FD_METHODS))
  179. if not (callable(hess) or hess in FD_METHODS
  180. or isinstance(hess, HessianUpdateStrategy)):
  181. raise ValueError("`hess` must be either callable,"
  182. "HessianUpdateStrategy or one of {}."
  183. .format(FD_METHODS))
  184. if jac in FD_METHODS and hess in FD_METHODS:
  185. raise ValueError("Whenever the Jacobian is estimated via "
  186. "finite-differences, we require the Hessian to "
  187. "be estimated using one of the quasi-Newton "
  188. "strategies.")
  189. self.x = np.atleast_1d(x0).astype(float)
  190. self.n = self.x.size
  191. self.nfev = 0
  192. self.njev = 0
  193. self.nhev = 0
  194. self.f_updated = False
  195. self.J_updated = False
  196. self.H_updated = False
  197. finite_diff_options = {}
  198. if jac in FD_METHODS:
  199. finite_diff_options["method"] = jac
  200. finite_diff_options["rel_step"] = finite_diff_rel_step
  201. if finite_diff_jac_sparsity is not None:
  202. sparsity_groups = group_columns(finite_diff_jac_sparsity)
  203. finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
  204. sparsity_groups)
  205. finite_diff_options["bounds"] = finite_diff_bounds
  206. self.x_diff = np.copy(self.x)
  207. if hess in FD_METHODS:
  208. finite_diff_options["method"] = hess
  209. finite_diff_options["rel_step"] = finite_diff_rel_step
  210. finite_diff_options["as_linear_operator"] = True
  211. self.x_diff = np.copy(self.x)
  212. if jac in FD_METHODS and hess in FD_METHODS:
  213. raise ValueError("Whenever the Jacobian is estimated via "
  214. "finite-differences, we require the Hessian to "
  215. "be estimated using one of the quasi-Newton "
  216. "strategies.")
  217. # Function evaluation
  218. def fun_wrapped(x):
  219. self.nfev += 1
  220. return np.atleast_1d(fun(x))
  221. def update_fun():
  222. self.f = fun_wrapped(self.x)
  223. self._update_fun_impl = update_fun
  224. update_fun()
  225. self.v = np.zeros_like(self.f)
  226. self.m = self.v.size
  227. # Jacobian Evaluation
  228. if callable(jac):
  229. self.J = jac(self.x)
  230. self.J_updated = True
  231. self.njev += 1
  232. if (sparse_jacobian or
  233. sparse_jacobian is None and sps.issparse(self.J)):
  234. def jac_wrapped(x):
  235. self.njev += 1
  236. return sps.csr_matrix(jac(x))
  237. self.J = sps.csr_matrix(self.J)
  238. self.sparse_jacobian = True
  239. elif sps.issparse(self.J):
  240. def jac_wrapped(x):
  241. self.njev += 1
  242. return jac(x).toarray()
  243. self.J = self.J.toarray()
  244. self.sparse_jacobian = False
  245. else:
  246. def jac_wrapped(x):
  247. self.njev += 1
  248. return np.atleast_2d(jac(x))
  249. self.J = np.atleast_2d(self.J)
  250. self.sparse_jacobian = False
  251. def update_jac():
  252. self.J = jac_wrapped(self.x)
  253. elif jac in FD_METHODS:
  254. self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
  255. **finite_diff_options)
  256. self.J_updated = True
  257. if (sparse_jacobian or
  258. sparse_jacobian is None and sps.issparse(self.J)):
  259. def update_jac():
  260. self._update_fun()
  261. self.J = sps.csr_matrix(
  262. approx_derivative(fun_wrapped, self.x, f0=self.f,
  263. **finite_diff_options))
  264. self.J = sps.csr_matrix(self.J)
  265. self.sparse_jacobian = True
  266. elif sps.issparse(self.J):
  267. def update_jac():
  268. self._update_fun()
  269. self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
  270. **finite_diff_options).toarray()
  271. self.J = self.J.toarray()
  272. self.sparse_jacobian = False
  273. else:
  274. def update_jac():
  275. self._update_fun()
  276. self.J = np.atleast_2d(
  277. approx_derivative(fun_wrapped, self.x, f0=self.f,
  278. **finite_diff_options))
  279. self.J = np.atleast_2d(self.J)
  280. self.sparse_jacobian = False
  281. self._update_jac_impl = update_jac
  282. # Define Hessian
  283. if callable(hess):
  284. self.H = hess(self.x, self.v)
  285. self.H_updated = True
  286. self.nhev += 1
  287. if sps.issparse(self.H):
  288. def hess_wrapped(x, v):
  289. self.nhev += 1
  290. return sps.csr_matrix(hess(x, v))
  291. self.H = sps.csr_matrix(self.H)
  292. elif isinstance(self.H, LinearOperator):
  293. def hess_wrapped(x, v):
  294. self.nhev += 1
  295. return hess(x, v)
  296. else:
  297. def hess_wrapped(x, v):
  298. self.nhev += 1
  299. return np.atleast_2d(np.asarray(hess(x, v)))
  300. self.H = np.atleast_2d(np.asarray(self.H))
  301. def update_hess():
  302. self.H = hess_wrapped(self.x, self.v)
  303. elif hess in FD_METHODS:
  304. def jac_dot_v(x, v):
  305. return jac_wrapped(x).T.dot(v)
  306. def update_hess():
  307. self._update_jac()
  308. self.H = approx_derivative(jac_dot_v, self.x,
  309. f0=self.J.T.dot(self.v),
  310. args=(self.v,),
  311. **finite_diff_options)
  312. update_hess()
  313. self.H_updated = True
  314. elif isinstance(hess, HessianUpdateStrategy):
  315. self.H = hess
  316. self.H.initialize(self.n, 'hess')
  317. self.H_updated = True
  318. self.x_prev = None
  319. self.J_prev = None
  320. def update_hess():
  321. self._update_jac()
  322. # When v is updated before x was updated, then x_prev and
  323. # J_prev are None and we need this check.
  324. if self.x_prev is not None and self.J_prev is not None:
  325. delta_x = self.x - self.x_prev
  326. delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
  327. self.H.update(delta_x, delta_g)
  328. self._update_hess_impl = update_hess
  329. if isinstance(hess, HessianUpdateStrategy):
  330. def update_x(x):
  331. self._update_jac()
  332. self.x_prev = self.x
  333. self.J_prev = self.J
  334. self.x = x
  335. self.f_updated = False
  336. self.J_updated = False
  337. self.H_updated = False
  338. self._update_hess()
  339. else:
  340. def update_x(x):
  341. self.x = x
  342. self.f_updated = False
  343. self.J_updated = False
  344. self.H_updated = False
  345. self._update_x_impl = update_x
  346. def _update_v(self, v):
  347. if not np.array_equal(v, self.v):
  348. self.v = v
  349. self.H_updated = False
  350. def _update_x(self, x):
  351. if not np.array_equal(x, self.x):
  352. self._update_x_impl(x)
  353. def _update_fun(self):
  354. if not self.f_updated:
  355. self._update_fun_impl()
  356. self.f_updated = True
  357. def _update_jac(self):
  358. if not self.J_updated:
  359. self._update_jac_impl()
  360. self.J_updated = True
  361. def _update_hess(self):
  362. if not self.H_updated:
  363. self._update_hess_impl()
  364. self.H_updated = True
  365. def fun(self, x):
  366. self._update_x(x)
  367. self._update_fun()
  368. return self.f
  369. def jac(self, x):
  370. self._update_x(x)
  371. self._update_jac()
  372. return self.J
  373. def hess(self, x, v):
  374. # v should be updated before x.
  375. self._update_v(v)
  376. self._update_x(x)
  377. self._update_hess()
  378. return self.H
  379. class LinearVectorFunction(object):
  380. """Linear vector function and its derivatives.
  381. Defines a linear function F = A x, where x is n-dimensional vector and
  382. A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
  383. is identically zero and it is returned as a csr matrix.
  384. """
  385. def __init__(self, A, x0, sparse_jacobian):
  386. if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
  387. self.J = sps.csr_matrix(A)
  388. self.sparse_jacobian = True
  389. elif sps.issparse(A):
  390. self.J = A.toarray()
  391. self.sparse_jacobian = False
  392. else:
  393. self.J = np.atleast_2d(A)
  394. self.sparse_jacobian = False
  395. self.m, self.n = self.J.shape
  396. self.x = np.atleast_1d(x0).astype(float)
  397. self.f = self.J.dot(self.x)
  398. self.f_updated = True
  399. self.v = np.zeros(self.m, dtype=float)
  400. self.H = sps.csr_matrix((self.n, self.n))
  401. def _update_x(self, x):
  402. if not np.array_equal(x, self.x):
  403. self.x = x
  404. self.f_updated = False
  405. def fun(self, x):
  406. self._update_x(x)
  407. if not self.f_updated:
  408. self.f = self.J.dot(x)
  409. self.f_updated = True
  410. return self.f
  411. def jac(self, x):
  412. self._update_x(x)
  413. return self.J
  414. def hess(self, x, v):
  415. self._update_x(x)
  416. self.v = v
  417. return self.H
  418. class IdentityVectorFunction(LinearVectorFunction):
  419. """Identity vector function and its derivatives.
  420. The Jacobian is the identity matrix, returned as a dense array when
  421. `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
  422. identically zero and it is returned as a csr matrix.
  423. """
  424. def __init__(self, x0, sparse_jacobian):
  425. n = len(x0)
  426. if sparse_jacobian or sparse_jacobian is None:
  427. A = sps.eye(n, format='csr')
  428. sparse_jacobian = True
  429. else:
  430. A = np.eye(n)
  431. sparse_jacobian = False
  432. super(IdentityVectorFunction, self).__init__(A, x0, sparse_jacobian)