diff --git a/demo-grid.wppl b/demo-grid.wppl new file mode 100644 index 0000000..08463d1 --- /dev/null +++ b/demo-grid.wppl @@ -0,0 +1,83 @@ +var H = 10 +var W = 5 +var make_states = function(i) { + return i == 1 ? [0] : make_states(i - 1).concat([i - 1]) +} +var S = make_states(H * W) +var G = [0, 4] +var A = [0, 1, 2, 3] // left, right, up, down + +var coord_actions = [ + [-1, 0], + [1, 0], + [0, 1], + [0, -1], +] + +var maze = [ + 0, 0, 0, 0, 0, + 0, 1, 0, 1, 0, + 0, 1, 1, 1, 0, + 0, 1, 0, 1, 0, + 0, 1, 0, 1, 0, + 0, 1, 0, 1, 0, + 0, 1, 0, 1, 0, + 0, 1, 0, 0, 0, + 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, +] + +var Tr = function(s, a) { + var x = s % W + var y = Math.floor(s / W) + + var next_x = x + coord_actions[a][0] + var next_y = y + coord_actions[a][1] + + var next_x = next_x < 0 ? 0 : (next_x > W - 1 ? W - 1 : next_x) + var next_y = next_y < 0 ? 0 : (next_y > H - 1 ? H - 1 : next_y) + + var next_state = next_x + W * next_y + return maze[next_state] == 1 ? s : next_state +} + +var R = function(s, a, g){ + return s == g ? 1.0 : 0.0 +} + +var is_terminating = function(s, g) { + return s == g +} + +var policy = dp.cache(function(s, g, t) { + return Infer(function() { + var a = uniformDraw(A) + var value = R(s, a, g) + (t <= 0 ? 0. : ( + is_terminating(s, g) ? 0.0 : (0.9 * expectation(Infer(function() { + var s_ = Tr(s, a) + return V(s_, g, t-1) + }))))) + factor(value) // factor(beta * value) + + return a + }) +}) + +var V = dp.cache(function(s, g, t) { + return expectation(Infer(function() { + var a = sample(policy(s, g, t)) + return R(s, a, g) + (t <= 0 ? + 0.0 : (is_terminating(s, g) ? 0. : + 0.9 * expectation(Infer(function() { + var s_ = Tr(s, a) + return V(s_, g, t-1) + }))))})) +}) + +var out = map( + function(s) { + return V(s, 0, 200) + }, S +) + +console.log(out)