DDP/DDP.m at master · yifan-hou/DDP

586 lines (499 loc) · 16.3 KB
% DDP - Differential dynamic programming,
% 		solves optimal control problem for second-order differentiable descrete systems:
%        minimize sum_i L(x(:,i),u(:,i)) + Final(x(:,end))
%        s.t.  x(:,i+1) = f(x(:,i),u(:,i))
% 		Implemented based on:
% 			<Jacobson D, Mayne D. Differential dynamic programming[J]. 1970>
% 		Regularization, line search and input constrain handling is modified from:
% 			<Tassa Y, Mansard N, Todorov E. Control-limited differential dynamic programming[C]>
% 		The QP used here is implemented by Yuval Y. Tassa:
% 			https://www.mathworks.com/matlabcentral/fileexchange/52069-ilqg-ddp-trajectory-optimization
% x0	- nx by 1, initial state
% N 	- length of horizon
% unom	- nu by N, initial control sequence, could be all zeros
% f 	- a function describing system dynamics:
% 			[xnew,fx,fu] = f(x,u,i,para)
% L 	- a function descrbing running cost:
%			[L] = L(x,u,i,para)
% F 	- a function calculating the final cost:
% 			[F,Fx,Fxx] = F(x,i,para)
% H 	- a function calculating the pseudo-Hamiltonian:
% 			H(x,u,lambda) = L(x,u) + lambda^T*f(x,u)
% 			[H,Hx,Hxx,Hu,Hux,Huu] = H(x,u,lambda,i,para)
% para 	- a structure describing optional parameters.
% 			see 'DDP_getDefaultPara.m' for details
% LB 	- optional, nu by N, lower bound on control
% UB 	- optional, nu by N, upper bound on control
% xnom, unom	-  the optimized nominal state-action pair.
% beta_			-  the optimized linear feedback gain matrices.
% 				   cell array of size N, elements are nx by nu
% 				   a control is calculated as: 
% 						u(:,i) = unom(:,i) + beta_{i}*(xnow - xnom(:,i))
% info 			- a structure containing description of the solution
%				   exitflag: positive means success, negative means failure
%				 		2: small change in cost
%				 		1: small gradient
%				 		-1: ill conditioned
%				   exitinfo: the corresbonding info.
% 				   iter: the number of iterations used.
% 		Yifan Hou
% 		houyf11@gmail.com
function [xnom,unom, alpha_,beta_,info] = DDP(x0,N, unom, model,uLB,uUB,para)
% function [xnom,unom, alpha_,beta_,info] = DDP(x0,N, unom, f,L,F,H,para,uLB,uUB)
print_head = 10;
print_count = print_head;
% dimensions
nx = length(x0);
nu = size(unom,1);
% regularization for all time steps
Ci_reg_base = para.Ci_reg_init;
Ci_reg_ramp = 1;
is_constrained = false;
if ~isempty(uLB)
	is_constrained = true;
info.exitflag = 0;
for iter = 1:para.maxIter
info.iter = iter;
% ---------------------------------------------------
% 			step one: forward pass
% 		get nominal states/gradients
% ---------------------------------------------------
xnom = zeros(nx,N);
xnom(:,1) = x0;
fx = cell([N-1,1]);
fu = cell([N-1,1]);
for i = 1:N-1
	[xnom(:,i+1), fx{i},fu{i}] = model.f(xnom(:,i),unom(:,i),i);
	Vnom = Vnom + model.L(xnom(:,i),unom(:,i),i);
Vnom = Vnom + model.Final(xnom(:,N));
% ---------------------------------------------------
% 			step two: backward pass
% 		get Value model / direction of new solution
% ---------------------------------------------------
a      = zeros(1,N); % a(i) = V(i,xnom(i)) - Vnom(i,xnom(i)) is the expected change in value
Vx     = zeros(nx,N);
Vxx    = cell([1,N]);
Vxx{N} = zeros(nx);
alpha_ = zeros(nu,N);
beta_  = cell([1,N]);
flag_backpass_success = true;
% boundary condition
[~,Vx(:,N),Vxx{N}] = model.Final(xnom(:,N));
for i = N-1:-1:1
	[~,Hx,Hxx,Hu,Hux,Huu] = model.H(xnom(:,i),unom(:,i),Vx(:,i+1),i);
	Ai = Hxx + fx{i}'*Vxx{i+1}*fx{i};
	Bi = Hux + fu{i}'*Vxx{i+1}*fx{i};
	Ci = Huu + fu{i}'*Vxx{i+1}*fu{i};
	% add regularization if Ci is ill-conditioned 
	for r = 0:para.Ci_reg_localloop
		% regularization coefficient
		Ci_reg = Ci_reg_base*2^r;
		Ci = Ci + Ci_reg*eye(nu);
		cond_Ci = rcond(Ci);
		if cond_Ci > 100*eps
	if (cond_Ci < 100*eps)
		flag_backpass_success = false;
    if is_constrained == false
    	alpha_(:,i) = -Ci\Hu;
		beta_{i}    = -Ci\Bi;
    	% solve QP for constrained problem
        [alpha_(:,i),result,R,free] = boxQP(Ci, Hu, uLB(:,i)-unom(:,i), uUB(:,i)-unom(:,i), alpha_(:,i+1));
        if result < 1
            flag_backpass_success  = false;
            break;
        end
        % beta_{i}    = -Ci\Bi;
        beta_{i}    = zeros(nu,nx);
        if any(free)
            Lfree        	   = -R\(R'\Bi(free,:));
            beta_{i}(free,:)   = Lfree;
        end
%        options = optimoptions('quadprog','Display','off');
% 	    [Alpha, ~, EXITFLAG] = quadprog(Ci,Hu,[],[],[],[],uLB(:,i)-unom(:,i),uUB(:,i)-unom(:,i),alpha_(:,i+1),options);
% 	    Beta    = -Ci\Bi;
	% 	a(i) = a(i+1) - epsilon*(1-epsilon/2)*Hu'*(Ci\Hu);
	% 	here we only calculate for epsilon = 1.
	%   a(i) for other epsilon: multiply by 2*epsilon(1-epsilon/2)
	if is_constrained  % has to use full equation
		a(i)    = a(i+1) + Hu'*alpha_(:,i) + 0.5*alpha_(:,i)'*Ci*alpha_(:,i);
		Vx(:,i) = Hx + beta_{i}'*Hu + (Ci*beta_{i}+Bi)'*alpha_(:,i);
		Vxx{i}  = Ai + beta_{i}'*Ci*beta_{i} + beta_{i}'*Bi + Bi'*beta_{i};
	else % cancell a lot of terms
		a(i)    = a(i+1) + 0.5*Hu'*alpha_(:,i);
		Vx(:,i) = Hx + beta_{i}'*Hu;
		Vxx{i}  = Ai - beta_{i}'*Ci*beta_{i};
    Vxx{i}  = .5*(Vxx{i} + Vxx{i}');
flag_linesearch_success = false;
if flag_backpass_success
	% termination checking(success): small gradient
	g_norm = mean(max(abs(alpha_) ./ (abs(unom)+1),[],1));
	if (g_norm < para.tolGrad) && (Ci_reg < 1e-5)
		info.exitflag = 1;
		info.exitinfo = 'small gradient';
		if para.detail >= 1
			disp('Terminated with success: small gradient');
	% ---------------------------------------------------
	% 		line search for step length
	% ---------------------------------------------------
	x = zeros(nx,N);
	x(:,1) = x0;
	u = zeros(nu,N);
	inner_count = 1;
	epsilon = 1; % step length
	% a0: expected change of value.
	% 	should be negative
	a0 = a(1);
	while true
	% ---------------------------------------------------
	% 			step three: forward pass
	%	 	    test new policy with epsilon
	% ---------------------------------------------------
        flag_violation = false;
		for i = 1:N-1
			deltaX = x(:,i) - xnom(:,i);
			u(:,i) = unom(:,i) + epsilon*alpha_(:,i) + beta_{i}*deltaX;
            if is_constrained
            	u(1,i) = truncate(u(1,i), uLB(1,i), uUB(1,i));
            	u(2,i) = truncate(u(2,i), uLB(2,i), uUB(2,i));
                % if any(u(:,i) < uLB(:,i)) || any(u(:,i) > uUB(:,i))
                %     flag_violation = true;
			x(:,i+1) = model.f(x(:,i),u(:,i),i);
	% ---------------------------------------------------
	% 			step four: evaluation
	% 		Calculate actual change in cost
	%   	determine whether to stop line search
	% ---------------------------------------------------
		for i=1:N-1
			V = V + model.L(x(:,i),u(:,i),i);
		V = V + model.Final(x(:,N));
		% deltaV: Actual change in value
		% 	should be negative
		deltaV = V - Vnom;
		if para.detail >= 2
			fprintf('Inner Iter: %d, deltaV: %lf\n', inner_count, deltaV);
		if a0 < 0 % a0 should be negative
		    V_improvement = deltaV/a0; %should be positive
		    V_improvement = -sign(deltaV);
		    disp('[WARNING] Expected change in Value is positive');
		if (V_improvement > para.c)&& ~flag_violation
		    flag_linesearch_success = true;
		    break;
		% line search
		epsilon = 0.5*epsilon;
		a0 = 2*epsilon*(1-epsilon/2)*a(1);
		inner_count = inner_count + 1;
		if inner_count > para.maxInnerLoop
	end % end loop line search
end % end if flag_backpass_success
% ---------------------------------------------------
% 			step five: Update solution 
% ---------------------------------------------------
% print head lines
if para.detail >= 1 && print_count == print_head
        print_count = 0;
        fprintf('%-12s','Iter','Value','reduction','expected','gradient','log10(reg)')
        fprintf('\n');
if flag_linesearch_success
	% accept new solution
	% print status
	if para.detail >= 1
	    fprintf('%-12d%-12.6g%-12.3g%-12.3g%-12.3g%-12.1f\n', ...
	       iter, V, deltaV, a0, g_norm, log10(Ci_reg_base));
	    print_count = print_count + 1;
	% shrink regularization on Ci
	Ci_reg_ramp = min(Ci_reg_ramp / para.Ci_reg_fac, 1/para.Ci_reg_fac);
	Ci_reg_base = Ci_reg_base * Ci_reg_ramp * (Ci_reg_base > para.Ci_reg_Min); % might be zero
	% termination checking(success): small change in cost
	if abs(deltaV) < para.MinCostChange
		info.exitflag = 2;
		info.exitinfo = 'small cost change';
		if para.detail >= 1
			disp('Terminated with success: change in cost very small');
	% do not accept new solution
	% print status
	if para.detail >= 1
		if flag_backpass_success
		    fprintf('%-12d%-12s%-12.3g%-12.3g%-12.3g%-12.1f\n', ...
		        iter, 'LS-FAIL', deltaV, a0, g_norm, log10(Ci_reg_base));
			fprintf('%-12d%-12s%-12s%-12s%-12s%-12.1f\n', ...
				iter, 'BP-FAIL', '-', '-', '-', log10(Ci_reg_base));
	    print_count = print_count + 1;
	% increase regularizaiton
	Ci_reg_ramp = max(Ci_reg_ramp * para.Ci_reg_fac, para.Ci_reg_fac);
	Ci_reg_base = max(Ci_reg_base * Ci_reg_ramp, para.Ci_reg_Min);
	% termination checking(failure): ill condition
	if Ci_reg_base > para.Ci_reg_Max
		info.exitflag = -1;
		info.exitinfo = 'ill-conditioned';
		if para.detail >= 1
			disp('Terminated with failure: ill conditioned');
info.iter_success = true;
if ~isempty(para.updatePara)
	para = para.updatePara(para,info);
end % end main iteration
if ~flag_backpass_success
	info.exitflag = -1;
	% % experimental: add one more pass to obtain full feedback gain matrix
	% for i = N-1:-1:1
	%     [~,Hx,Hxx,Hu,Hux,Huu] = H(xnom(:,i),unom(:,i),Vx(:,i+1),i);
	%     Ai = Hxx + fx{i}'*Vxx{i+1}*fx{i};
	%     Bi = Hux + fu{i}'*Vxx{i+1}*fx{i};
	%     Ci = Huu + fu{i}'*Vxx{i+1}*fu{i};
	%     % add regularization if Ci is ill-conditioned
	%     for r = 0:para.Ci_reg_localloop
	%         % regularization coefficient
	%         Ci_reg = Ci_reg_base*2^r;
	%         Ci = Ci + Ci_reg*eye(nu);
	%         cond_Ci = rcond(Ci);
	%         if cond_Ci > 100*eps
	%             break;
	%         end
	%     % 	if (cond_Ci < 100*eps)
	%     % 		flag_backpass_success = false;
	%     % 		break;
	%     % 	end
	%     tempbeta_    = -Ci\Bi;
	%     beta_{i}(1,:) = tempbeta_(1,:);
	%     % % solve QP for constrained problem
	%     % [alpha_(:,i),result,R,free] = boxQP(Ci, Hu, uLB(:,i)-unom(:,i), uUB(:,i)-unom(:,i), alpha_(:,i+1));
	%     % if result < 1
	%     % 	flag_backpass_success  = false;
	%     % 	break;
	%     % end
	%     Vx(:,i) = Hx + beta_{i}'*Hu + (Ci*beta_{i}+Bi)'*alpha_(:,i);
	%     Vxx{i}  = Ai + beta_{i}'*Ci*beta_{i} + beta_{i}'*Bi + Bi'*beta_{i};
	%     Vxx{i}  = .5*(Vxx{i} + Vxx{i}');
if para.detail > 0.5
	fprintf('DDP terminated with number of iterations: %d\n', iter);
end % end function
function x = truncate(x, lb, ub)
    if x > ub
        x = ub;
    if x < lb
        x = lb;
% @INPROCEEDINGS{
% author={Tassa, Y. and Mansard, N. and Todorov, E.},
% booktitle={Robotics and Automation (ICRA), 2014 IEEE International Conference on},
% title={Control-Limited Differential Dynamic Programming},
% year={2014}, month={May}, doi={10.1109/ICRA.2014.6907001}}
% function [x,result,Hfree,free,trace] = boxQP(H,g,lower,upper,x0,options)
function [x,result,Hfree,free] = boxQP(H,g,lower,upper,x0) %,options)
% Minimize 0.5*x'*H*x + x'*g  s.t. lower<=x<=upper
%     H            - positive definite matrix   (n * n)
%     g            - bias vector                (n)
%     lower        - lower bounds               (n)
%     upper        - upper bounds               (n)
%   optional inputs:
%     x0           - initial state              (n)
%     options      - see below                  (7)
%  outputs:
%     x            - solution                   (n)
%     result       - result type (roughly, higher is better, see below)
%     Hfree        - subspace cholesky factor   (n_free * n_free)
%     free         - set of free dimensions     (n)
% if nargin > 5
%     options        = num2cell(options(:));
%     [maxIter, minGrad, minRelImprove, stepDec, minStep, Armijo, print] = deal(options{:});
% else % defaults
    maxIter        = 100;       % maximum number of iterations
    minGrad        = 1e-8;      % minimum norm of non-fixed gradient
    minRelImprove  = 1e-8;      % minimum relative improvement
    stepDec        = 0.6;       % factor for decreasing stepsize
    minStep        = 1e-22;     % minimal stepsize for linesearch
	Armijo         = 0.1;   	% Armijo parameter (fraction of linear improvement required)
	print          = 0;			% verbosity
n        = size(H,1);
clamped  = false(n,1);
free     = true(n,1);
oldvalue = 0;
result   = 0;
gnorm    = 0;
nfactor  = 0;
trace    = [];
Hfree    = zeros(n);
% initial state
if nargin > 4 && numel(x0)==n
	x = clamp(x0(:), lower, upper);
    LU = [lower upper];
    LU(~isfinite(LU)) = nan;
	x = nanmean(LU,2);
x(~isfinite(x)) = 0;
% initial objective value
value    = x'*g + 0.5*x'*H*x;
if print > 0
	fprintf('==========\nStarting box-QP, dimension %-3d, initial value: %-12.3f\n',n, value);
% main loop
for iter = 1:maxIter
    if result ~=0
        break;
    % check relative improvement
    if( iter>1 && (oldvalue - value) < minRelImprove*abs(oldvalue) )
        result = 4;
        break;
    oldvalue = value;
    % get gradient
    grad     = g + H*x;
    % find clamped dimensions
    old_clamped                     = clamped;
    clamped                         = false(n,1);
    clamped((x == lower)&(grad>0))  = true;
    clamped((x == upper)&(grad<0))  = true;
    free                            = ~clamped;
    % check for all clamped
    if all(clamped)
        result = 6;
        break;
    % factorize if clamped has changed
    if iter == 1
        factorize    = true;
        factorize    = any(old_clamped ~= clamped);
    if factorize
        [Hfree, indef]  = chol(H(free,free));
        if indef
            result = -1;
            break
        end
        nfactor            = nfactor + 1;
    % check gradient norm
    gnorm  = norm(grad(free));
    if gnorm < minGrad
        result = 5;
        break;
    % get search direction
    grad_clamped   = g  + H*(x.*clamped);
    search         = zeros(n,1);
    search(free)   = -Hfree\(Hfree'\grad_clamped(free)) - x(free);
    % check for descent direction
    sdotg          = sum(search.*grad);
    if sdotg >= 0 % (should not happen)
        break
    % armijo linesearch
    step  = 1;
    nstep = 0;
	xc    = clamp(x+step*search, lower, upper);
    vc    = xc'*g + 0.5*xc'*H*xc;
    while (vc - oldvalue)/(step*sdotg) < Armijo
        step  = step*stepDec;
        nstep = nstep+1;
		xc    = clamp(x+step*search, lower, upper);
        vc    = xc'*g + 0.5*xc'*H*xc;
        if step<minStep
            result = 2;
            break
        end
    if print > 1
		fprintf('iter %-3d  value % -9.5g |g| %-9.3g  reduction %-9.3g  linesearch %g^%-2d  n_clamped %d\n', ...
			iter, vc, gnorm, oldvalue-vc, stepDec, nstep, sum(clamped));
    % if nargout > 4
    %     trace(iter).x        = x; %#ok<*AGROW>
    %     trace(iter).xc       = xc;
    %     trace(iter).value    = value;
    %     trace(iter).search   = search;
    %     trace(iter).clamped  = clamped;
    %     trace(iter).nfactor  = nfactor;
    % accept candidate
    x     = xc;
    value = vc;
if iter >= maxIter
    result = 1;
results = { 'Hessian is not positive definite',...          % result = -1
            'No descent direction found',...                % result = 0    SHOULD NOT OCCUR
            'Maximum main iterations exceeded',...          % result = 1
            'Maximum line-search iterations exceeded',...   % result = 2
            'No bounds, returning Newton point',...         % result = 3
            'Improvement smaller than tolerance',...        % result = 4
            'Gradient norm smaller than tolerance',...      % result = 5
            'All dimensions are clamped'};                  % result = 6
if print > 0
    fprintf('RESULT: %s.\niterations %d  gradient %-12.6g final value %-12.6g  factorizations %d\n',...
        results{result+2}, iter, gnorm, value, nfactor);
function xc = clamp(x, lower, upper)
	xc = max(lower, min(upper, x));
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

DDP.m

Latest commit

History

DDP.m

File metadata and controls