Skip to content

Commit

Permalink
#14 Add scenario description for pendulum in matlab.
Browse files Browse the repository at this point in the history
  • Loading branch information
nrontsis authored and kyr-pol committed Jan 15, 2019
1 parent 8855fe5 commit e0dc994
Show file tree
Hide file tree
Showing 8 changed files with 464 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ __pycache__
*.swp
octave-workspace
*.egg
examples/pendulum-matlab/pilcoV0.9/**
tmp
81 changes: 81 additions & 0 deletions examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
%% draw_pendulum.m
% *Summary:* Draw the pendulum system with reward, applied torque,
% and predictive uncertainty of the tips of the pendulums
%
% function draw_pendulum(theta, torque, cost, text1, text2, M, S)
%
%
% *Input arguments:*
%
% theta1 angle of inner pendulum
% theta2 angle of outer pendulum
% f1 torque applied to inner pendulum
% f2 torque applied to outer pendulum
% cost cost structure
% .fcn function handle (it is assumed to use saturating cost)
% .<> other fields that are passed to cost
% text1 (optional) text field 1
% text2 (optional) text field 2
% M (optional) mean of state
% S (optional) covariance of state
%
%
% Copyright (C) 2008-2013 by
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
%
% Last modified: 2013-03-18


function draw_pendulum(theta, torque, cost, text1, text2, M, S)
%% Code

l = 0.6;
xmin = -1.2*l;
xmax = 1.2*l;
umax = 0.5;
height = 0;

% Draw pendulum
pendulum = [0, 0; l*sin(theta), -l*cos(theta)];
clf; hold on
plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4)

% plot ellipses around tips of pendulum (if M, S exist)
try
if max(max(S))>0
err = linspace(-1,1,100)*sqrt(S(2,2));
plot(l*sin(M(2)+2*err),-l*cos(M(2)+2*err),'b','linewidth',1)
plot(l*sin(M(2)+err),-l*cos(M(2)+err),'b','linewidth',2)
plot(l*sin(M(2)),-l*cos(M(2)),'b.','markersize',20)
end
catch
end

% Draw useful information
% target location
plot(0,l,'k+','MarkerSize',20);
plot([xmin, xmax], [-height, -height],'k','linewidth',2)
% joint
plot(0,0,'k.','markersize',24)
plot(0,0,'y.','markersize',14)
% tip of pendulum
plot(l*sin(theta),-l*cos(theta),'k.','markersize',24)
plot(l*sin(theta),-l*cos(theta),'y.','markersize',14)
plot(0,-2*l,'.w','markersize',0.005)
% applied torque
plot([0 torque/umax*xmax],[-0.5, -0.5],'g','linewidth',10);
% immediate reward
reward = 1-cost.fcn(cost,[0, theta]',zeros(2));
plot([0 reward*xmax],[-0.7, -0.7],'y', 'linewidth',10);
text(0,-0.5,'applied torque')
text(0,-0.7,'immediate reward')
if exist('text1','var')
text(0,-0.9, text1)
end
if exist('text2','var')
text(0,-1.1, text2)
end

set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-2*l 2*l]);
axis off;
drawnow;
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
%% draw_rollout_pendulum.m
% *Summary:* Script to draw a trajectory of the most recent pendulum trajectory
%
% Copyright (C) 2008-2013 by
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
%
% Last modified: 2013-03-27
%
%% High-Level Steps
% # For each time step, plot the observed trajectory

%% Code

% Loop over states in trajectory
for r = 1:size(xx,1)
cost.t = r;
if exist('j','var') && ~isempty(M{j})
draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost, ...
['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ...
['total experience (after this trial): ' num2str(dt*size(x,1)) ...
' sec'], M{j}(:,r), Sigma{j}(:,:,r));
else
draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost, ...
['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ...
['total experience (after this trial): ' num2str(dt*size(x,1)) ...
' sec'])
end
pause(dt);
end
44 changes: 44 additions & 0 deletions examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
%% dynamics_pendulum.m
% *Summary:* Implements ths ODE for simulating the pendulum dynamics, where
% an input torque f can be applied
%
% function dz = dynamics_pendulum(t,z,u)
%
%
% *Input arguments:*
%
% t current time step (called from ODE solver)
% z state [2 x 1]
% u (optional): torque f(t) applied to pendulum
%
% *Output arguments:*
%
% dz if 3 input arguments: state derivative wrt time
%
% Note: It is assumed that the state variables are of the following order:
% dtheta: [rad/s] angular velocity of pendulum
% theta: [rad] angle of pendulum
%
% A detailed derivation of the dynamics can be found in:
%
% M.P. Deisenroth:
% Efficient Reinforcement Learning Using Gaussian Processes, Appendix C,
% KIT Scientific Publishing, 2010.
%
%
% Copyright (C) 2008-2013 by
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
%
% Last modified: 2013-03-18

function dz = dynamics_pendulum(t,z,u)
%% Code

l = 1; % [m] length of pendulum
m = 1; % [kg] mass of pendulum
g = 9.82; % [m/s^2] acceleration of gravity
b = 0.01; % [s*Nm/rad] friction coefficient

dz = zeros(2,1);
dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3);
dz(2) = z(1);
110 changes: 110 additions & 0 deletions examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
%% loss_pendulum.m
% *Summary:* Pendulum loss function; the loss is
% $1-\exp(-0.5*d^2*a)$, where $a>0$ and $d^2$ is the squared difference
% between the actual and desired position of the tip of the pendulum.
% The mean and the variance of the loss are computed by averaging over the
% Gaussian distribution of the state $p(x) = \mathcal N(m,s)$ with mean $m$
% and covariance matrix $s$.
% Derivatives of these quantities are computed when desired.
%
%
% function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s)
%
%
% *Input arguments:*
%
% cost cost structure
% .p lengths of the pendulum [1 x 1 ]
% .width array of widths of the cost (summed together)
% .expl (optional) exploration parameter
% .angle (optional) array of angle indices
% .target target state [D x 1 ]
% m mean of state distribution [D x 1 ]
% s covariance matrix for the state distribution [D x D ]
%
% *Output arguments:*
%
% L expected cost [1 x 1 ]
% dLdm derivative of expected cost wrt. state mean vector [1 x D ]
% dLds derivative of expected cost wrt. state covariance matrix [1 x D^2]
% S2 variance of cost [1 x 1 ]
%
% Copyright (C) 2008-2014 by
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
%
% Last modified: 2014-01-09
%
%% High-Level Steps
% # Precomputations
% # Define static penalty as distance from target setpoint
% # Trigonometric augmentation
% # Calculate loss

function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s)
%% Code
if isfield(cost,'width'); cw = cost.width; else cw = 1; end
if ~isfield(cost,'expl') || isempty(cost.expl); b = 0; else b = cost.expl; end


% 1. Some precomputations
D0 = size(s,2); % state dimension
D1 = D0 + 2*length(cost.angle); % state dimension (with sin/cos)

M = zeros(D1,1); M(1:D0) = m; S = zeros(D1); S(1:D0,1:D0) = s;
Mdm = [eye(D0); zeros(D1-D0,D0)]; Sdm = zeros(D1*D1,D0);
Mds = zeros(D1,D0*D0); Sds = kron(Mdm,Mdm);

% 2. Define static penalty as distance from target setpoint
ell = cost.p;
Q = zeros(D1); Q(D0+1:D0+2,D0+1:D0+2) = eye(2)*ell^2;

% 3. Trigonometric augmentation
if D1-D0 > 0
target = [cost.target(:); ...
gTrig(cost.target(:), zeros(numel(cost.target)), cost.angle)];

i = 1:D0; k = D0+1:D1;
[M(k), S(k,k), C, mdm, sdm, Cdm, mds, sds, Cds] = ...
gTrig(M(i),S(i,i),cost.angle);
[S, Mdm, Mds, Sdm, Sds] = ...
fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D1);
end

% 4. Calculate loss
L = 0; dLdm = zeros(1,D0); dLds = zeros(1,D0*D0); S2 = 0;
for i = 1:length(cw) % scale mixture of immediate costs
cost.z = target; cost.W = Q/cw(i)^2;
[r, rdM, rdS, s2, s2dM, s2dS] = lossSat(cost, M, S);

L = L + r; S2 = S2 + s2;
dLdm = dLdm + rdM(:)'*Mdm + rdS(:)'*Sdm;
dLds = dLds + rdM(:)'*Mds + rdS(:)'*Sds;

if (b~=0 || ~isempty(b)) && abs(s2)>1e-12
L = L + b*sqrt(s2);
dLdm = dLdm + b/sqrt(s2) * ( s2dM(:)'*Mdm + s2dS(:)'*Sdm )/2;
dLds = dLds + b/sqrt(s2) * ( s2dM(:)'*Mds + s2dS(:)'*Sds )/2;
end
end

% normalize
n = length(cw); L = L/n; dLdm = dLdm/n; dLds = dLds/n; S2 = S2/n;

% Fill in covariance matrix...and derivatives ----------------------------
function [S, Mdm, Mds, Sdm, Sds] = ...
fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D)
X = reshape(1:D*D,[D D]); XT = X'; % vectorised indices
I=0*X; I(i,i)=1; ii=X(I==1)'; I=0*X; I(k,k)=1; kk=X(I==1)';
I=0*X; I(i,k)=1; ik=X(I==1)'; ki=XT(I==1)';

Mdm(k,:) = mdm*Mdm(i,:) + mds*Sdm(ii,:); % chainrule
Mds(k,:) = mdm*Mds(i,:) + mds*Sds(ii,:);
Sdm(kk,:) = sdm*Mdm(i,:) + sds*Sdm(ii,:);
Sds(kk,:) = sdm*Mds(i,:) + sds*Sds(ii,:);
dCdm = Cdm*Mdm(i,:) + Cds*Sdm(ii,:);
dCds = Cdm*Mds(i,:) + Cds*Sds(ii,:);

S(i,k) = S(i,i)*C; S(k,i) = S(i,k)'; % off-diagonal
SS = kron(eye(length(k)),S(i,i)); CC = kron(C',eye(length(i)));
Sdm(ik,:) = SS*dCdm + CC*Sdm(ii,:); Sdm(ki,:) = Sdm(ik,:);
Sds(ik,:) = SS*dCds + CC*Sds(ii,:); Sds(ki,:) = Sds(ik,:);
46 changes: 46 additions & 0 deletions examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
%% pendulum_learn.m
% *Summary:* Script to learn a controller for the pendulum swingup
%
% Copyright (C) 2008-2013 by
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen.
%
% Last modified: 2013-03-27
%
%% High-Level Steps
% # Load parameters
% # Create J initial trajectories by applying random controls
% # Controlled learning (train dynamics model, policy learning, policy
% application)

%% Code

% 1. Initialization
clear all; close all;
settings_pendulum; % load scenario-specific settings
basename = 'pendulum_'; % filename used for saving data

% 2. Initial J random rollouts
for jj = 1:J
[xx, yy, realCost{jj}, latent{jj}] = ...
rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost);
x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model
if plotting.verbosity > 0; % visualization of trajectory
if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
draw_rollout_pendulum;
end
end

mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0;
mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno);

% 3. Controlled learning (N iterations)
for j = 1:N
trainDynModel; % train (GP) dynamics model
learnPolicy; % learn policy
applyController; % apply controller to system
disp(['controlled trial # ' num2str(j)]);
if plotting.verbosity > 0; % visualization of trajectory
if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1);
draw_rollout_pendulum;
end
end
Loading

0 comments on commit e0dc994

Please sign in to comment.