-
Notifications
You must be signed in to change notification settings - Fork 84
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#14 Add scenario description for pendulum in matlab.
- Loading branch information
Showing
8 changed files
with
464 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,4 +5,5 @@ __pycache__ | |
*.swp | ||
octave-workspace | ||
*.egg | ||
examples/pendulum-matlab/pilcoV0.9/** | ||
tmp |
81 changes: 81 additions & 0 deletions
81
examples/pendulum-matlab/matlab-envinroment/draw_pendulum.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
%% draw_pendulum.m | ||
% *Summary:* Draw the pendulum system with reward, applied torque, | ||
% and predictive uncertainty of the tips of the pendulums | ||
% | ||
% function draw_pendulum(theta, torque, cost, text1, text2, M, S) | ||
% | ||
% | ||
% *Input arguments:* | ||
% | ||
% theta1 angle of inner pendulum | ||
% theta2 angle of outer pendulum | ||
% f1 torque applied to inner pendulum | ||
% f2 torque applied to outer pendulum | ||
% cost cost structure | ||
% .fcn function handle (it is assumed to use saturating cost) | ||
% .<> other fields that are passed to cost | ||
% text1 (optional) text field 1 | ||
% text2 (optional) text field 2 | ||
% M (optional) mean of state | ||
% S (optional) covariance of state | ||
% | ||
% | ||
% Copyright (C) 2008-2013 by | ||
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. | ||
% | ||
% Last modified: 2013-03-18 | ||
|
||
|
||
function draw_pendulum(theta, torque, cost, text1, text2, M, S) | ||
%% Code | ||
|
||
l = 0.6; | ||
xmin = -1.2*l; | ||
xmax = 1.2*l; | ||
umax = 0.5; | ||
height = 0; | ||
|
||
% Draw pendulum | ||
pendulum = [0, 0; l*sin(theta), -l*cos(theta)]; | ||
clf; hold on | ||
plot(pendulum(:,1), pendulum(:,2),'r','linewidth',4) | ||
|
||
% plot ellipses around tips of pendulum (if M, S exist) | ||
try | ||
if max(max(S))>0 | ||
err = linspace(-1,1,100)*sqrt(S(2,2)); | ||
plot(l*sin(M(2)+2*err),-l*cos(M(2)+2*err),'b','linewidth',1) | ||
plot(l*sin(M(2)+err),-l*cos(M(2)+err),'b','linewidth',2) | ||
plot(l*sin(M(2)),-l*cos(M(2)),'b.','markersize',20) | ||
end | ||
catch | ||
end | ||
|
||
% Draw useful information | ||
% target location | ||
plot(0,l,'k+','MarkerSize',20); | ||
plot([xmin, xmax], [-height, -height],'k','linewidth',2) | ||
% joint | ||
plot(0,0,'k.','markersize',24) | ||
plot(0,0,'y.','markersize',14) | ||
% tip of pendulum | ||
plot(l*sin(theta),-l*cos(theta),'k.','markersize',24) | ||
plot(l*sin(theta),-l*cos(theta),'y.','markersize',14) | ||
plot(0,-2*l,'.w','markersize',0.005) | ||
% applied torque | ||
plot([0 torque/umax*xmax],[-0.5, -0.5],'g','linewidth',10); | ||
% immediate reward | ||
reward = 1-cost.fcn(cost,[0, theta]',zeros(2)); | ||
plot([0 reward*xmax],[-0.7, -0.7],'y', 'linewidth',10); | ||
text(0,-0.5,'applied torque') | ||
text(0,-0.7,'immediate reward') | ||
if exist('text1','var') | ||
text(0,-0.9, text1) | ||
end | ||
if exist('text2','var') | ||
text(0,-1.1, text2) | ||
end | ||
|
||
set(gca,'DataAspectRatio',[1 1 1],'XLim',[xmin xmax],'YLim',[-2*l 2*l]); | ||
axis off; | ||
drawnow; |
29 changes: 29 additions & 0 deletions
29
examples/pendulum-matlab/matlab-envinroment/draw_rollout_pendulum.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
%% draw_rollout_pendulum.m | ||
% *Summary:* Script to draw a trajectory of the most recent pendulum trajectory | ||
% | ||
% Copyright (C) 2008-2013 by | ||
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. | ||
% | ||
% Last modified: 2013-03-27 | ||
% | ||
%% High-Level Steps | ||
% # For each time step, plot the observed trajectory | ||
|
||
%% Code | ||
|
||
% Loop over states in trajectory | ||
for r = 1:size(xx,1) | ||
cost.t = r; | ||
if exist('j','var') && ~isempty(M{j}) | ||
draw_pendulum(latent{j}(r,2), latent{j}(r,end), cost, ... | ||
['trial # ' num2str(j+J) ', T=' num2str(H*dt) ' sec'], ... | ||
['total experience (after this trial): ' num2str(dt*size(x,1)) ... | ||
' sec'], M{j}(:,r), Sigma{j}(:,:,r)); | ||
else | ||
draw_pendulum(latent{jj}(r,2), latent{jj}(r,end), cost, ... | ||
['(random) trial # ' num2str(1) ', T=' num2str(H*dt) ' sec'], ... | ||
['total experience (after this trial): ' num2str(dt*size(x,1)) ... | ||
' sec']) | ||
end | ||
pause(dt); | ||
end |
44 changes: 44 additions & 0 deletions
44
examples/pendulum-matlab/matlab-envinroment/dynamics_pendulum.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
%% dynamics_pendulum.m | ||
% *Summary:* Implements ths ODE for simulating the pendulum dynamics, where | ||
% an input torque f can be applied | ||
% | ||
% function dz = dynamics_pendulum(t,z,u) | ||
% | ||
% | ||
% *Input arguments:* | ||
% | ||
% t current time step (called from ODE solver) | ||
% z state [2 x 1] | ||
% u (optional): torque f(t) applied to pendulum | ||
% | ||
% *Output arguments:* | ||
% | ||
% dz if 3 input arguments: state derivative wrt time | ||
% | ||
% Note: It is assumed that the state variables are of the following order: | ||
% dtheta: [rad/s] angular velocity of pendulum | ||
% theta: [rad] angle of pendulum | ||
% | ||
% A detailed derivation of the dynamics can be found in: | ||
% | ||
% M.P. Deisenroth: | ||
% Efficient Reinforcement Learning Using Gaussian Processes, Appendix C, | ||
% KIT Scientific Publishing, 2010. | ||
% | ||
% | ||
% Copyright (C) 2008-2013 by | ||
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. | ||
% | ||
% Last modified: 2013-03-18 | ||
|
||
function dz = dynamics_pendulum(t,z,u) | ||
%% Code | ||
|
||
l = 1; % [m] length of pendulum | ||
m = 1; % [kg] mass of pendulum | ||
g = 9.82; % [m/s^2] acceleration of gravity | ||
b = 0.01; % [s*Nm/rad] friction coefficient | ||
|
||
dz = zeros(2,1); | ||
dz(1) = ( u(t) - b*z(1) - m*g*l*sin(z(2))/2 ) / (m*l^2/3); | ||
dz(2) = z(1); |
110 changes: 110 additions & 0 deletions
110
examples/pendulum-matlab/matlab-envinroment/loss_pendulum.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
%% loss_pendulum.m | ||
% *Summary:* Pendulum loss function; the loss is | ||
% $1-\exp(-0.5*d^2*a)$, where $a>0$ and $d^2$ is the squared difference | ||
% between the actual and desired position of the tip of the pendulum. | ||
% The mean and the variance of the loss are computed by averaging over the | ||
% Gaussian distribution of the state $p(x) = \mathcal N(m,s)$ with mean $m$ | ||
% and covariance matrix $s$. | ||
% Derivatives of these quantities are computed when desired. | ||
% | ||
% | ||
% function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s) | ||
% | ||
% | ||
% *Input arguments:* | ||
% | ||
% cost cost structure | ||
% .p lengths of the pendulum [1 x 1 ] | ||
% .width array of widths of the cost (summed together) | ||
% .expl (optional) exploration parameter | ||
% .angle (optional) array of angle indices | ||
% .target target state [D x 1 ] | ||
% m mean of state distribution [D x 1 ] | ||
% s covariance matrix for the state distribution [D x D ] | ||
% | ||
% *Output arguments:* | ||
% | ||
% L expected cost [1 x 1 ] | ||
% dLdm derivative of expected cost wrt. state mean vector [1 x D ] | ||
% dLds derivative of expected cost wrt. state covariance matrix [1 x D^2] | ||
% S2 variance of cost [1 x 1 ] | ||
% | ||
% Copyright (C) 2008-2014 by | ||
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. | ||
% | ||
% Last modified: 2014-01-09 | ||
% | ||
%% High-Level Steps | ||
% # Precomputations | ||
% # Define static penalty as distance from target setpoint | ||
% # Trigonometric augmentation | ||
% # Calculate loss | ||
|
||
function [L, dLdm, dLds, S2] = loss_pendulum(cost, m, s) | ||
%% Code | ||
if isfield(cost,'width'); cw = cost.width; else cw = 1; end | ||
if ~isfield(cost,'expl') || isempty(cost.expl); b = 0; else b = cost.expl; end | ||
|
||
|
||
% 1. Some precomputations | ||
D0 = size(s,2); % state dimension | ||
D1 = D0 + 2*length(cost.angle); % state dimension (with sin/cos) | ||
|
||
M = zeros(D1,1); M(1:D0) = m; S = zeros(D1); S(1:D0,1:D0) = s; | ||
Mdm = [eye(D0); zeros(D1-D0,D0)]; Sdm = zeros(D1*D1,D0); | ||
Mds = zeros(D1,D0*D0); Sds = kron(Mdm,Mdm); | ||
|
||
% 2. Define static penalty as distance from target setpoint | ||
ell = cost.p; | ||
Q = zeros(D1); Q(D0+1:D0+2,D0+1:D0+2) = eye(2)*ell^2; | ||
|
||
% 3. Trigonometric augmentation | ||
if D1-D0 > 0 | ||
target = [cost.target(:); ... | ||
gTrig(cost.target(:), zeros(numel(cost.target)), cost.angle)]; | ||
|
||
i = 1:D0; k = D0+1:D1; | ||
[M(k), S(k,k), C, mdm, sdm, Cdm, mds, sds, Cds] = ... | ||
gTrig(M(i),S(i,i),cost.angle); | ||
[S, Mdm, Mds, Sdm, Sds] = ... | ||
fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D1); | ||
end | ||
|
||
% 4. Calculate loss | ||
L = 0; dLdm = zeros(1,D0); dLds = zeros(1,D0*D0); S2 = 0; | ||
for i = 1:length(cw) % scale mixture of immediate costs | ||
cost.z = target; cost.W = Q/cw(i)^2; | ||
[r, rdM, rdS, s2, s2dM, s2dS] = lossSat(cost, M, S); | ||
|
||
L = L + r; S2 = S2 + s2; | ||
dLdm = dLdm + rdM(:)'*Mdm + rdS(:)'*Sdm; | ||
dLds = dLds + rdM(:)'*Mds + rdS(:)'*Sds; | ||
|
||
if (b~=0 || ~isempty(b)) && abs(s2)>1e-12 | ||
L = L + b*sqrt(s2); | ||
dLdm = dLdm + b/sqrt(s2) * ( s2dM(:)'*Mdm + s2dS(:)'*Sdm )/2; | ||
dLds = dLds + b/sqrt(s2) * ( s2dM(:)'*Mds + s2dS(:)'*Sds )/2; | ||
end | ||
end | ||
|
||
% normalize | ||
n = length(cw); L = L/n; dLdm = dLdm/n; dLds = dLds/n; S2 = S2/n; | ||
|
||
% Fill in covariance matrix...and derivatives ---------------------------- | ||
function [S, Mdm, Mds, Sdm, Sds] = ... | ||
fillIn(S,C,mdm,sdm,Cdm,mds,sds,Cds,Mdm,Sdm,Mds,Sds,i,k,D) | ||
X = reshape(1:D*D,[D D]); XT = X'; % vectorised indices | ||
I=0*X; I(i,i)=1; ii=X(I==1)'; I=0*X; I(k,k)=1; kk=X(I==1)'; | ||
I=0*X; I(i,k)=1; ik=X(I==1)'; ki=XT(I==1)'; | ||
|
||
Mdm(k,:) = mdm*Mdm(i,:) + mds*Sdm(ii,:); % chainrule | ||
Mds(k,:) = mdm*Mds(i,:) + mds*Sds(ii,:); | ||
Sdm(kk,:) = sdm*Mdm(i,:) + sds*Sdm(ii,:); | ||
Sds(kk,:) = sdm*Mds(i,:) + sds*Sds(ii,:); | ||
dCdm = Cdm*Mdm(i,:) + Cds*Sdm(ii,:); | ||
dCds = Cdm*Mds(i,:) + Cds*Sds(ii,:); | ||
|
||
S(i,k) = S(i,i)*C; S(k,i) = S(i,k)'; % off-diagonal | ||
SS = kron(eye(length(k)),S(i,i)); CC = kron(C',eye(length(i))); | ||
Sdm(ik,:) = SS*dCdm + CC*Sdm(ii,:); Sdm(ki,:) = Sdm(ik,:); | ||
Sds(ik,:) = SS*dCds + CC*Sds(ii,:); Sds(ki,:) = Sds(ik,:); |
46 changes: 46 additions & 0 deletions
46
examples/pendulum-matlab/matlab-envinroment/pendulum_learn.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
%% pendulum_learn.m | ||
% *Summary:* Script to learn a controller for the pendulum swingup | ||
% | ||
% Copyright (C) 2008-2013 by | ||
% Marc Deisenroth, Andrew McHutchon, Joe Hall, and Carl Edward Rasmussen. | ||
% | ||
% Last modified: 2013-03-27 | ||
% | ||
%% High-Level Steps | ||
% # Load parameters | ||
% # Create J initial trajectories by applying random controls | ||
% # Controlled learning (train dynamics model, policy learning, policy | ||
% application) | ||
|
||
%% Code | ||
|
||
% 1. Initialization | ||
clear all; close all; | ||
settings_pendulum; % load scenario-specific settings | ||
basename = 'pendulum_'; % filename used for saving data | ||
|
||
% 2. Initial J random rollouts | ||
for jj = 1:J | ||
[xx, yy, realCost{jj}, latent{jj}] = ... | ||
rollout(gaussian(mu0, S0), struct('maxU',policy.maxU), H, plant, cost); | ||
x = [x; xx]; y = [y; yy]; % augment training sets for dynamics model | ||
if plotting.verbosity > 0; % visualization of trajectory | ||
if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); | ||
draw_rollout_pendulum; | ||
end | ||
end | ||
|
||
mu0Sim(odei,:) = mu0; S0Sim(odei,odei) = S0; | ||
mu0Sim = mu0Sim(dyno); S0Sim = S0Sim(dyno,dyno); | ||
|
||
% 3. Controlled learning (N iterations) | ||
for j = 1:N | ||
trainDynModel; % train (GP) dynamics model | ||
learnPolicy; % learn policy | ||
applyController; % apply controller to system | ||
disp(['controlled trial # ' num2str(j)]); | ||
if plotting.verbosity > 0; % visualization of trajectory | ||
if ~ishandle(1); figure(1); else set(0,'CurrentFigure',1); end; clf(1); | ||
draw_rollout_pendulum; | ||
end | ||
end |
Oops, something went wrong.