#14 Add scenario description for pendulum in matlab.

nrontsis · Jan 15, 2019 · e0dc994 · e0dc994
1 parent 8855fe5
commit e0dc994
Show file tree

Hide file tree

Showing 8 changed files with 464 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,4 +5,5 @@ __pycache__
 *.swp
 octave-workspace
 *.egg
+examples/pendulum-matlab/pilcoV0.9/**
 tmp
diff --git a/examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m
@@ -0,0 +1,81 @@
+%% draw_pendulum.m
+% *Summary:* Draw the pendulum system with reward, applied torque, 
+% and predictive uncertainty of the tips of the pendulums
+%
+%    function draw_pendulum(theta, torque, cost, text1, text2, M, S)
+%
+%
+% *Input arguments:*
+%
+%   theta1     angle of inner pendulum
+%   theta2     angle of outer pendulum
+%   f1         torque applied to inner pendulum
+%   f2         torque applied to outer pendulum
+%   cost       cost structure
+%     .fcn     function handle (it is assumed to use saturating cost)
+%     .<>      other fields that are passed to cost
+%   text1      (optional) text field 1
+%   text2      (optional) text field 2
+%   M          (optional) mean of state
+%   S          (optional) covariance of state
+%
+%
+% Copyright (C) 2008-2013 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-18
+
+
+function draw_pendulum(theta, torque, cost, text1, text2, M, S)
+%% Code
+
+l = 0.6;
+xmin = -1.2*l; 
+xmax = 1.2*l;    
+umax = 0.5;
+height = 0;
+
+% Draw pendulum
+pendulum = [0, 0; l*sin(theta), -l*cos(theta)];
+clf; hold on
+plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4)
+
+% plot ellipses around tips of pendulum (if M, S exist)
+try
+  if max(max(S))>0
+    err = linspace(-1,1,100)*sqrt(S(2,2));
+    plot(l*sin(M(2)+2*err),-l*cos(M(2)+2*err),'b','linewidth',1)
+    plot(l*sin(M(2)+err),-l*cos(M(2)+err),'b','linewidth',2)
+    plot(l*sin(M(2)),-l*cos(M(2)),'b.','markersize',20)
+  end
+catch
+end
+
+% Draw useful information
+% target location
+plot(0,l,'k+','MarkerSize',20);
+plot([xmin, xmax], [-height, -height],'k','linewidth',2)
+% joint
+plot(0,0,'k.','markersize',24)
+plot(0,0,'y.','markersize',14)
+% tip of pendulum
+plot(l*sin(theta),-l*cos(theta),'k.','markersize',24)
+plot(l*sin(theta),-l*cos(theta),'y.','markersize',14)
+plot(0,-2*l,'.w','markersize',0.005)
+% applied torque
+plot([0 torque/umax*xmax],[-0.5, -0.5],'g','linewidth',10);
+% immediate reward
+reward = 1-cost.fcn(cost,[0, theta]',zeros(2));
+plot([0 reward*xmax],[-0.7, -0.7],'y', 'linewidth',10);
+text(0,-0.5,'applied  torque')
+text(0,-0.7,'immediate reward')
+if exist('text1','var')  
+  text(0,-0.9, text1)
+end
+if exist('text2','var')  
+  text(0,-1.1, text2)
+end
+
+set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-2*l 2*l]);
+axis off;
+drawnow;
diff --git a/examples/pendulum-matlab/matlab-envinroment/draw_rollout_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/draw_rollout_pendulum.m
@@ -0,0 +1,29 @@
+%% draw_rollout_pendulum.m
+% *Summary:* Script to draw a trajectory of the most recent pendulum trajectory
+%
+% Copyright (C) 2008-2013 by 
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-27
+%
+%% High-Level Steps
+% # For each time step, plot the observed trajectory
+
+%% Code
+
+% Loop over states in trajectory
+for r = 1:size(xx,1)
+  cost.t = r;
+  if exist('j','var') && ~isempty(M{j})
+    draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost,  ...
+      ['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
+      ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
+      ' sec'], M{j}(:,r), Sigma{j}(:,:,r));
+  else
+     draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost,  ...
+      ['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
+      ['total experience (after this trial): ' num2str(dt*size(x,1)) ...
+      ' sec'])
+  end
+  pause(dt);
+end
diff --git a/examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m
@@ -0,0 +1,44 @@
+%% dynamics_pendulum.m
+% *Summary:* Implements ths ODE for simulating the pendulum dynamics, where 
+% an input torque f can be applied  
+%
+%    function dz = dynamics_pendulum(t,z,u)
+%
+%
+% *Input arguments:*
+%
+%		t     current time step (called from ODE solver)
+%   z     state                                                    [2 x 1]
+%   u     (optional): torque f(t) applied to pendulum
+%
+% *Output arguments:*
+%   
+%   dz    if 3 input arguments:      state derivative wrt time
+%
+%   Note: It is assumed that the state variables are of the following order:
+%         dtheta:  [rad/s] angular velocity of pendulum
+%         theta:   [rad]   angle of pendulum
+%
+% A detailed derivation of the dynamics can be found in:
+%
+% M.P. Deisenroth: 
+% Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, 
+% KIT Scientific Publishing, 2010.
+%
+%
+% Copyright (C) 2008-2013 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-18
+
+function dz = dynamics_pendulum(t,z,u)
+%% Code
+
+l = 1;    % [m]        length of pendulum
+m = 1;    % [kg]       mass of pendulum
+g = 9.82; % [m/s^2]    acceleration of gravity
+b = 0.01; % [s*Nm/rad] friction coefficient
+
+dz = zeros(2,1);
+dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3);
+dz(2) = z(1);
diff --git a/examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m b/examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m
@@ -0,0 +1,110 @@
+%% loss_pendulum.m
+% *Summary:* Pendulum loss function; the loss is
+% $1-\exp(-0.5*d^2*a)$,  where $a>0$ and  $d^2$ is the squared difference
+% between the actual and desired position of the tip of the pendulum.
+% The mean and the variance of the loss are computed by averaging over the
+% Gaussian distribution of the state $p(x) = \mathcal N(m,s)$ with mean $m$
+% and covariance matrix $s$.
+% Derivatives of these quantities are computed when desired.
+%
+%
+%   function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s)
+%
+%
+% *Input arguments:*
+%
+%   cost            cost structure
+%     .p            lengths of the pendulum                         [1 x  1 ]
+%     .width        array of widths of the cost (summed together)
+%     .expl         (optional) exploration parameter
+%     .angle        (optional) array of angle indices
+%     .target       target state                                    [D x  1 ]
+%   m               mean of state distribution                      [D x  1 ]
+%   s               covariance matrix for the state distribution    [D x  D ]
+%
+% *Output arguments:*
+%
+%   L     expected cost                                             [1 x  1 ]
+%   dLdm  derivative of expected cost wrt. state mean vector        [1 x  D ]
+%   dLds  derivative of expected cost wrt. state covariance matrix  [1 x D^2]
+%   S2    variance of cost                                          [1 x  1 ]
+%
+% Copyright (C) 2008-2014 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2014-01-09
+%
+%% High-Level Steps
+% # Precomputations
+% # Define static penalty as distance from target setpoint
+% # Trigonometric augmentation
+% # Calculate loss
+
+function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s)
+%% Code
+if isfield(cost,'width'); cw = cost.width; else cw = 1; end
+if ~isfield(cost,'expl') || isempty(cost.expl); b = 0; else b =  cost.expl; end
+
+
+% 1. Some precomputations
+D0 = size(s,2);                           % state dimension
+D1 = D0 + 2*length(cost.angle);           % state dimension (with sin/cos)
+
+M = zeros(D1,1); M(1:D0) = m; S = zeros(D1); S(1:D0,1:D0) = s;
+Mdm = [eye(D0); zeros(D1-D0,D0)]; Sdm = zeros(D1*D1,D0);
+Mds = zeros(D1,D0*D0); Sds = kron(Mdm,Mdm);
+
+% 2. Define static penalty as distance from target setpoint
+ell = cost.p;
+Q = zeros(D1); Q(D0+1:D0+2,D0+1:D0+2) = eye(2)*ell^2;
+
+% 3. Trigonometric augmentation
+if D1-D0 > 0
+  target = [cost.target(:); ...
+    gTrig(cost.target(:), zeros(numel(cost.target)), cost.angle)];
+
+  i = 1:D0; k = D0+1:D1;
+  [M(k), S(k,k), C, mdm, sdm, Cdm, mds, sds, Cds] = ...
+    gTrig(M(i),S(i,i),cost.angle);
+  [S, Mdm, Mds, Sdm, Sds] = ...
+    fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D1);
+end
+
+% 4. Calculate loss
+L = 0; dLdm = zeros(1,D0); dLds = zeros(1,D0*D0); S2 = 0;
+for i = 1:length(cw)                    % scale mixture of immediate costs
+  cost.z = target; cost.W = Q/cw(i)^2;
+  [r, rdM, rdS, s2, s2dM, s2dS] = lossSat(cost, M, S);
+
+  L = L + r; S2 = S2 + s2;
+  dLdm = dLdm + rdM(:)'*Mdm + rdS(:)'*Sdm;
+  dLds = dLds + rdM(:)'*Mds + rdS(:)'*Sds;
+
+  if (b~=0 || ~isempty(b)) && abs(s2)>1e-12
+    L = L + b*sqrt(s2);
+    dLdm = dLdm + b/sqrt(s2) * ( s2dM(:)'*Mdm + s2dS(:)'*Sdm )/2;
+    dLds = dLds + b/sqrt(s2) * ( s2dM(:)'*Mds + s2dS(:)'*Sds )/2;
+  end
+end
+
+% normalize
+n = length(cw); L = L/n; dLdm = dLdm/n; dLds = dLds/n; S2 = S2/n;
+
+% Fill in covariance matrix...and derivatives ----------------------------
+function [S, Mdm, Mds, Sdm, Sds] = ...
+  fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D)
+X = reshape(1:D*D,[D D]); XT = X';                    % vectorised indices
+I=0*X; I(i,i)=1; ii=X(I==1)'; I=0*X; I(k,k)=1; kk=X(I==1)';
+I=0*X; I(i,k)=1; ik=X(I==1)'; ki=XT(I==1)';
+
+Mdm(k,:)  = mdm*Mdm(i,:) + mds*Sdm(ii,:);                      % chainrule
+Mds(k,:)  = mdm*Mds(i,:) + mds*Sds(ii,:);
+Sdm(kk,:) = sdm*Mdm(i,:) + sds*Sdm(ii,:);
+Sds(kk,:) = sdm*Mds(i,:) + sds*Sds(ii,:);
+dCdm      = Cdm*Mdm(i,:) + Cds*Sdm(ii,:);
+dCds      = Cdm*Mds(i,:) + Cds*Sds(ii,:);
+
+S(i,k) = S(i,i)*C; S(k,i) = S(i,k)';                        % off-diagonal
+SS = kron(eye(length(k)),S(i,i)); CC = kron(C',eye(length(i)));
+Sdm(ik,:) = SS*dCdm + CC*Sdm(ii,:); Sdm(ki,:) = Sdm(ik,:);
+Sds(ik,:) = SS*dCds + CC*Sds(ii,:); Sds(ki,:) = Sds(ik,:);
diff --git a/examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m b/examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m
@@ -0,0 +1,46 @@
+%% pendulum_learn.m
+% *Summary:* Script to learn a controller for the pendulum swingup
+%
+% Copyright (C) 2008-2013 by
+% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
+%
+% Last modified: 2013-03-27
+%
+%% High-Level Steps
+% # Load parameters
+% # Create J initial trajectories by applying random controls
+% # Controlled learning (train dynamics model, policy learning, policy
+% application)
+
+%% Code
+
+% 1. Initialization
+clear all; close all;
+settings_pendulum;            % load scenario-specific settings
+basename = 'pendulum_';       % filename used for saving data
+
+% 2. Initial J random rollouts
+for jj = 1:J
+  [xx, yy, realCost{jj}, latent{jj}] = ...
+    rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
+  x = [x; xx]; y = [y; yy];       % augment training sets for dynamics model
+  if plotting.verbosity > 0;      % visualization of trajectory
+    if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
+    draw_rollout_pendulum;
+  end
+end
+
+mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
+mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);
+
+% 3. Controlled learning (N iterations)
+for j = 1:N
+  trainDynModel;   % train (GP) dynamics model
+  learnPolicy;     % learn policy
+  applyController; % apply controller to system
+  disp(['controlled trial # ' num2str(j)]);
+  if plotting.verbosity > 0;      % visualization of trajectory
+    if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
+    draw_rollout_pendulum;
+  end
+end