Merge pull request #21 from danielz9999/LM-Cut

Implements LMCut heuristic.
JuliaPlanners · Sep 6, 2024 · 97f1fe9 · 97f1fe9
2 parents 7e53990 + cdd1105
commit 97f1fe9
Show file tree

Hide file tree

Showing 6 changed files with 275 additions and 9 deletions.
diff --git a/src/heuristics/heuristics.jl b/src/heuristics/heuristics.jl
@@ -117,3 +117,4 @@ include("pgraph.jl")
 include("hsp.jl")
 include("ff.jl")
 include("reachability.jl")
+include("lmcut.jl")
diff --git a/src/heuristics/lmcut.jl b/src/heuristics/lmcut.jl
@@ -0,0 +1,234 @@
+## The LM-Cut Heuristic ##
+export LMCut
+
+"""
+    LMCut()
+
+A landmark-based heuristic [1], which builds on top of the relaxed planning
+graph heuristic [`HMax`](@ref). This heuristic iteratively finds sets of
+actions through which any relaxed plan must pass (action landmarks), adding the
+cost of  the least costly landmark to the total heuristic value. This cost is
+then subtracted from the cost of each landmark, and the process is repeated
+until the cost of the relaxed plan is driven to zero. The value of the heuristic
+is thus the sum of the minimum cost actions across all sets of landmarks.
+
+[1] B. Bonet and H. Geffner, "Landmarks, Critical Paths and Abstractions: What's
+the Difference Anyway?,"  ICAPS (2009), vol. 19 no. 1, pp. 162-169.
+<https://doi.org/10.1609/icaps.v19i1.13370>
+"""
+mutable struct LMCut <: Heuristic
+    dynamic_goal::Bool # Flag whether goal-relevant information is dynamic
+    goal_hash::Union{Nothing,UInt} # Hash of most recently pre-computed goal
+    statics::Vector{Symbol} # Static domain fluents
+    graph::PlanningGraph # Precomputed planning graph
+    action_costs::Vector{Float32} # Precomputed action costs
+    LMCut() = new()
+end
+
+is_precomputed(h::LMCut) = isdefined(h, :graph)
+
+function Base.show(io::IO, h::LMCut)
+    is_precomputed_str = "precomputed=$(is_precomputed(h))"
+    print(io, summary(h), "(",  "", is_precomputed_str, ")")
+end
+
+function precompute!(h::LMCut, domain::Domain, state::State)
+    # If goal specification is not provided, assume dynamic goal
+    h.dynamic_goal = true
+    h.goal_hash = nothing
+    # Precompute static domain fluents and planning graph
+    h.statics = infer_static_fluents(domain)
+    h.graph = build_planning_graph(domain, state; statics=h.statics)
+    # Precompute cost of each action
+    n_actions = length(h.graph.actions)
+    h.action_costs = map(eachindex(h.graph.actions)) do act_idx
+        if h.graph.n_axioms < act_idx <= n_actions - h.graph.n_goals
+            return 1.0f0
+        else
+            return 0.0f0
+        end
+    end
+    return h
+end
+
+function precompute!(h::LMCut, domain::Domain, state::State, spec::Specification)
+    # If goal specification is provided, assume non-dynamic goal
+    h.dynamic_goal = false
+    h.goal_hash = hash(get_goal_terms(spec))
+    # Precompute static domain fluents and planning graph
+    h.statics = infer_static_fluents(domain)
+    h.graph = build_planning_graph(domain, state, spec; statics=h.statics)
+    # Precompute cost of each action
+    n_actions = length(h.graph.actions)
+    h.action_costs = map(enumerate(h.graph.actions)) do (act_idx, act)
+        if h.graph.n_axioms < act_idx <= n_actions - h.graph.n_goals
+            return has_action_cost(spec) ?
+                Float32(get_action_cost(spec, act.term)) : 1.0f0
+        else
+            return 0.0f0
+        end
+    end
+    return h
+end
+
+function compute(h::LMCut, domain::Domain, state::State, spec::Specification)
+    # If necessary, update planning graph with new goal
+    if h.dynamic_goal && hash(get_goal_terms(spec)) != h.goal_hash
+        h.graph = update_pgraph_goal!(h.graph, domain, state, spec;
+                                      statics=h.statics)
+        h.goal_hash = hash(get_goal_terms(spec))
+        n_actions = length(h.graph.actions)
+        resize!(h.action_costs, n_actions)
+        for (act_idx, act) in enumerate(h.graph.actions)
+            if h.graph.n_axioms < act_idx <= n_actions - h.graph.n_goals
+                h.action_costs[act_idx] = has_action_cost(spec) ?
+                    Float32(get_action_cost(spec, act.term)) : 1.0f0
+            else
+                h.action_costs[act_idx] = 0.0f0
+            end
+        end
+    end
+    # Compute set of initial facts
+    init_idxs = pgraph_init_idxs(h.graph, domain, state)
+    # Calculate relaxed costs of facts and the h-max value
+    cond_costs, _, goal_idx, goal_cost =
+        relaxed_pgraph_search(domain, state, spec, maximum, h.graph;
+                              action_costs = h.action_costs)
+    # Terminate early if goal is unreachable
+    goal_cost == Inf32 && return goal_cost
+    # Iteratively find landmark cuts and sum their costs
+    hval = 0.0f0
+    action_costs = copy(h.action_costs)
+    for _ in 1:length(h.graph.actions)
+        # Find the supporters for each action
+        supporters = find_supporters(h.graph, cond_costs)
+        # Construct the justification graph
+        jgraph = build_justification_graph(h.graph, supporters, action_costs)
+        # Extract the goal zone
+        goal_zone = extract_goal_zone(jgraph)
+        # Extract the pregoal zone, landmarks, and their cost
+        pregoal_zone, landmark_idxs, landmark_cost =
+            extract_pregoal_zone_and_landmarks(jgraph, goal_zone,
+                                               init_idxs, action_costs)
+        # Update heuristic value and action costs
+        hval += landmark_cost
+        for idx in landmark_idxs
+            action_costs[idx] -= landmark_cost
+        end
+        # Re-calculate relaxed costs to each fact
+        cond_costs, _, goal_idx, goal_cost =
+            relaxed_pgraph_search(domain, state, spec, maximum, h.graph;
+                                  action_costs = action_costs)
+        # Terminate once goal cost has been reduced to zero
+        iszero(goal_cost) && break
+    end
+    return hval
+end
+
+"Finds the most costly precondition (i.e. supporter) of each action."
+function find_supporters(pgraph::PlanningGraph, cond_costs::Vector{T}) where {T <: Real}
+    supporters = map(eachindex(pgraph.actions)) do act_idx
+        max_cond_idx = nothing
+        max_precond_val = typemin(T)
+        # Find most costly precondition clause
+        for precond_idxs in pgraph.act_parents[act_idx]
+            min_cond_idx = nothing
+            min_cond_val = typemax(T)
+            # Find least costly condition in the disjunctive clause
+            for cond_idx in precond_idxs
+                if isnothing(min_cond_idx) || cond_costs[cond_idx] < min_cond_val
+                    min_cond_idx = cond_idx
+                    min_cond_val = cond_costs[cond_idx]
+                end
+            end
+            if isnothing(max_cond_idx) || min_cond_val > max_precond_val
+                max_cond_idx = min_cond_idx
+                max_precond_val = min_cond_val
+            end
+        end
+        return max_cond_idx::Int
+    end
+    return supporters
+end
+
+"Justification graph used by the LMCut heuristic."
+struct JustificationGraph
+    fadjlist::Vector{Vector{Tuple{Int, Int}}}
+    badjlist::Vector{Vector{Int}}
+end
+
+function JustificationGraph(n_conditions::Int)
+    fadjlist = [Tuple{Int, Int}[] for _ in 1:n_conditions+1]
+    badjlist = [Int[] for _ in 1:n_conditions+1]
+    return JustificationGraph(fadjlist, badjlist)
+end
+
+"Constructs a justification graph from the relaxed planning graph."
+function build_justification_graph(
+    pgraph::PlanningGraph, supporters::Vector{Int}, action_costs::Vector{<:Real}
+)
+    n_conditions = length(pgraph.conditions)
+    n_actions = length(pgraph.actions)
+    last_nongoal_idx = n_actions - pgraph.n_goals
+    # Construct a new justification graph
+    jgraph = JustificationGraph(n_conditions)
+    # Add edges from supporter of each action to child conditions of each action
+    for act_idx in 1:n_actions
+        parent_idx = supporters[act_idx]
+        act_cost = action_costs[act_idx]
+        for child_idx in pgraph.act_children[act_idx]
+            push!(jgraph.fadjlist[parent_idx], (act_idx, child_idx))
+            act_cost == 0 && push!(jgraph.badjlist[child_idx], parent_idx)
+        end
+        # Add edges to from goal conditions to dummy goal node
+        if act_idx > last_nongoal_idx
+            push!(jgraph.fadjlist[parent_idx], (act_idx, n_conditions + 1))
+            push!(jgraph.badjlist[n_conditions + 1], parent_idx)
+        end
+    end
+    return jgraph
+end
+
+"Extract goal zone from the justification graph."
+function extract_goal_zone(jgraph::JustificationGraph)
+    goal_zone = Set{Int}()
+    goal_idx = length(jgraph.fadjlist)
+    queue = Int[goal_idx]
+    while !isempty(queue)
+        cond_idx = popfirst!(queue)
+        for parent_idx in jgraph.badjlist[cond_idx]
+            if !in(parent_idx, goal_zone)
+                push!(goal_zone, parent_idx)
+                push!(queue, parent_idx)
+            end
+        end
+    end
+    return goal_zone
+end
+
+"Extract pregoal zone and action landmarks from the justification graph."
+function extract_pregoal_zone_and_landmarks(
+    jgraph::JustificationGraph, goal_zone::Set{Int},
+    init_idxs::BitVector, action_costs::Vector{<:Real}
+)
+    landmark_idxs = Set{Int}()
+    landmark_cost = Inf32
+    queue = findall(init_idxs)
+    pregoal_zone = Set{Int}(queue)
+    while !isempty(queue)
+        cond_idx = popfirst!(queue)
+        for (act_idx, child_idx) in jgraph.fadjlist[cond_idx]
+            if child_idx in goal_zone
+                # Add action to set of landmarks
+                push!(landmark_idxs, act_idx)
+                act_cost = action_costs[act_idx]
+                landmark_cost = min(landmark_cost, act_cost)                    
+            elseif !(child_idx in pregoal_zone)
+                # Add node to pregoal zone and queue
+                push!(pregoal_zone, child_idx)
+                push!(queue, child_idx)
+            end
+        end
+    end
+    return pregoal_zone, landmark_idxs, landmark_cost
+end
diff --git a/src/heuristics/pgraph.jl b/src/heuristics/pgraph.jl
@@ -298,7 +298,8 @@ end
 
 "Compute relaxed costs and paths to each fact node of a planning graph."
 function relaxed_pgraph_search(domain::Domain, state::State, spec::Specification,
-                               accum_op::Function, graph::PlanningGraph)
+                               accum_op::Function, graph::PlanningGraph;
+                               action_costs = nothing)
     # Initialize fact costs, precondition flags,  etc.
     n_actions = length(graph.actions)
     n_conds = length(graph.conditions)
@@ -315,14 +316,14 @@ function relaxed_pgraph_search(domain::Domain, state::State, spec::Specification
 
     # Perform Djikstra / uniform-cost search until goals are reached
     goal_idx, goal_cost = nothing, Inf32
-    first_goal_idx = n_actions - graph.n_goals
+    last_nongoal_idx = n_actions - graph.n_goals
     while !isempty(queue) && isnothing(goal_idx)
         # Dequeue nearest fact/condition
         cond_idx = dequeue!(queue)
         # Iterate over child actions
         for (act_idx, precond_idx) in graph.cond_children[cond_idx]
             # Check if goal action is reached
-            is_goal = act_idx > first_goal_idx
+            is_goal = act_idx > last_nongoal_idx
             # Skip actions with no children
             !is_goal && isempty(graph.act_children[act_idx]) && continue
             # Skip actions already achieved
@@ -347,8 +348,12 @@ function relaxed_pgraph_search(domain::Domain, state::State, spec::Specification
                 path_cost = accum_op(act_parents) do precond_parents
                     minimum(costs[p] for p in precond_parents)
                 end
-                act_cost = has_action_cost(spec) ?
-                    get_action_cost(spec, graph.actions[act_idx].term) : 1
+                if isnothing(action_costs)
+                    act_cost = has_action_cost(spec) ?
+                        get_action_cost(spec, graph.actions[act_idx].term) : 1
+                else
+                    act_cost = action_costs[act_idx]
+                end
                 next_cost = path_cost + act_cost
                 next_dist = accum_op === maximum && !has_action_cost(spec) ?
                     next_cost : dists[cond_idx] + 1

diff --git a/src/heuristics/reachability.jl b/src/heuristics/reachability.jl
@@ -26,7 +26,7 @@ is_precomputed(h::ReachabilityHeuristic) = isdefined(h, :absdom)
 function precompute!(h::ReachabilityHeuristic,
                      domain::Domain, state::State, spec::Specification)
     # Store abstracted domain
-    h.absdom, _ = abstracted(domain, state)
+    h.absdom, _ = abstracted(domain, state; autowiden=true)
     return h
 end
 

diff --git a/test/heuristics.jl b/test/heuristics.jl
@@ -206,6 +206,32 @@ bw_act_spec = ActionGoal(pddl"(stack a ?x)", pddl"(on ?x c)")
 
 end
 
+@testset "LM-Cut Heuristic" begin
+
+lmcut = LMCut()
+@test 4 >= lmcut(blocksworld, bw_state, bw_problem.goal) >= 2
+@test 7 >= lmcut(wgc_domain, wgc_state, wgc_problem.goal) >= 1
+@test 4 >= lmcut(bw_axioms, ba_state, ba_problem.goal) >= 2
+
+# Test dynamic goal updating
+precompute!(lmcut, blocksworld, bw_state)
+@test 4 >= compute(lmcut, blocksworld, bw_state, bw_problem.goal) >= 2
+precompute!(lmcut, wgc_domain, wgc_state)
+@test 7 >= compute(lmcut, wgc_domain, wgc_state, wgc_problem.goal) >= 1
+precompute!(lmcut, bw_axioms, ba_state)
+@test 4 >= compute(lmcut, bw_axioms, ba_state, ba_problem.goal) >= 2
+
+# Test that LM-Cut always dominates HMax
+hmax = HMax()
+@test lmcut(blocksworld, bw_state, bw_problem.goal) >=
+    hmax(blocksworld, bw_state, bw_problem.goal)
+@test lmcut(wgc_domain, wgc_state, wgc_problem.goal) >=
+    hmax(wgc_domain, wgc_state, wgc_problem.goal)
+@test lmcut(bw_axioms, ba_state, ba_problem.goal) >=
+    hmax(bw_axioms, ba_state, ba_problem.goal)
+
+end
+
 @testset "Reachability Heuristics" begin
 
 reachability = ReachabilityHeuristic()

diff --git a/test/planners.jl b/test/planners.jl
@@ -147,7 +147,7 @@ sol = planner(doors_keys_gems, dkg_state, dkg_spec)
                             "(unlock key1 door1)", "(right)", "(right)",
                             "(up)", "(up)", "(pickup gem1)")
 
-planner = GreedyPlanner(HAdd())
+planner = GreedyPlanner(LMCut())
 sol = planner(blocksworld, bw_state, bw_spec)
 @test is_goal(bw_spec, blocksworld, sol.trajectory[end])
 @test collect(sol) == @pddl("(pick-up a)", "(stack a b)",
@@ -188,7 +188,7 @@ sol = planner(doors_keys_gems, dkg_state, dkg_spec)
                             "(unlock key1 door1)", "(right)", "(right)",
                             "(up)", "(up)", "(pickup gem1)")
 
-planner = AStarPlanner(HAdd())
+planner = AStarPlanner(LMCut())
 sol = planner(blocksworld, bw_state, bw_spec)
 @test is_goal(bw_spec, blocksworld, sol.trajectory[end])
 @test collect(sol) == @pddl("(pick-up a)", "(stack a b)",
@@ -432,7 +432,7 @@ actions, trajectory = simulator(sol, doors_keys_gems, dkg_state, dkg_spec)
                        "(up)", "(up)", "(pickup gem1)")
 @test get_value(sol, dkg_state) == -9.0
 
-planner = RTDP(heuristic=HAdd(), rollout_noise=1.0, n_rollouts=10)
+planner = RTDP(heuristic=LMCut(), rollout_noise=1.0, n_rollouts=10)
 sol = planner(blocksworld, bw_state, bw_spec)
 actions, trajectory = simulator(sol, blocksworld, bw_state, bw_spec)
 @test is_goal(bw_spec, blocksworld, trajectory[end])