Source code for pysit.optimization.lbfgs



import time
import copy
from collections import deque

import numpy as np

from pysit.optimization.optimization import OptimizationBase

__all__=['LBFGS']

__docformat__ = "restructuredtext en"

[docs]class LBFGS(OptimizationBase): def __init__(self, objective, memory_length=None, reset_on_new_inner_loop_call=True, *args, **kwargs): OptimizationBase.__init__(self, objective, *args, **kwargs) self.prev_alpha = None self.prev_model = None # collections.deque uses None to indicate no length self.memory_length=memory_length self.reset_on_new_inner_loop_call = reset_on_new_inner_loop_call self._reset_memory() def _reset_memory(self): self.memory = deque([], maxlen=self.memory_length) self._reset_line_search = True self.prev_model = None
[docs] def inner_loop(self, *args, **kwargs): if self.reset_on_new_inner_loop_call: self._reset_memory() OptimizationBase.inner_loop(self, *args, **kwargs)
def _select_step(self, shots, current_objective_value, gradient, iteration, objective_arguments, **kwargs): """Compute the LBFGS update for a set of shots. Gives the step s as a function of the gradient vector. Implemented as in p178 of Nocedal and Wright. Parameters ---------- shots : list of pysit.Shot List of Shots for which to compute. grad : ndarray Gradient vector. i : int Current time index. """ mem = self.memory q = copy.deepcopy(gradient) x_k = copy.deepcopy(self.base_model) #Not sure if the copy is required # fix the 'y' variable, since we saved the previous gradient and just # computed the next gradient. That is, assume we are at k+1. the right # side of memory is k, but we didn't compute y_k yet. in the y_k slot we # stored a copy of the (negative) gradient. thus y_k is that negative # gradient plus the current gradient. #Use the same idea for the 's' variable which is defined as the model difference. #We cannot use the variable 'step' at the end of the iteration, because in inner_loop() #the model is updated as self.base_model += step, and the modelparameter class can #enforce bounds and do other types of postprocessing. if len(mem) > 0: mem[-1][2] += gradient # y mem[-1][1] = x_k - self.prev_model #Subtraction will result a model perturbation, which is linear. mem[-1][0] = 1./mem[-1][2].inner_product(mem[-1][1]) # rho gamma = mem[-1][1].inner_product(mem[-1][2]) / mem[-1][2].inner_product(mem[-1][2]) else: gamma = 1.0 alphas = [] for rho, s, y in reversed(mem): alpha = rho * s.inner_product(q) t= alpha * y q -= t alphas.append(alpha) alphas.reverse() r = gamma * q for alpha, m in zip(alphas, mem): rho, s, y = m beta = rho*y.inner_product(r) r += (alpha-beta)*s # Search the opposite direction direction = -1.0*r alpha0_kwargs = {'reset' : False} if self._reset_line_search: alpha0_kwargs = {'reset' : True} self._reset_line_search = False alpha = self.select_alpha(shots, gradient, direction, objective_arguments, current_objective_value=current_objective_value, alpha0_kwargs=alpha0_kwargs, **kwargs) self._print(' alpha {0}'.format(alpha)) self.store_history('alpha', iteration, alpha) step = alpha * direction # these copy calls might be removable self.prev_model = x_k self.memory.append([None,None,copy.deepcopy(-1*gradient)]) return step def _compute_alpha0(self, phi0, grad0, reset=False, *args, **kwargs): if reset: return phi0 / (grad0.norm()*np.prod(self.solver.mesh.deltas))**2 else: return 1.0