HZ: avoid convergence if bisected

secant2 calls update, and update might switch to bisection in the U3 step. If we did bisect, the line-search progress step L2 testing whether the interval is shrinking fast enough is nonsensical (we might have shrunk multiple iterations of bisection, but that is not an indication that the secant model was "working"). Consequently, this reports back about whether bisection was engaged in `update`, and if so skip any kind of convergence assessment and do another iteration. Fixes JuliaNLSolvers#173.
timholy · Jan 28, 2024 · 0dc986c · 0dc986c
1 parent ded667a
commit 0dc986c
Showing 1 changed file with 40 additions and 34 deletions.
diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl
@@ -203,10 +203,10 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
         else
             # We'll still going downhill, expand the interval and try again.
             # Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + ϵ_k
-            # So cold = c has a lower objective than phi_0 up to epsilon. 
+            # So cold = c has a lower objective than phi_0 up to epsilon.
             # This makes it a viable step to return if bracketing fails.
 
-            # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c. 
+            # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c.
             # Going back to the loop with c = cold will only result in infinite cycling.
             # So returning (cold, phi_cold) and exiting the line search is the best move.
             cold = c
@@ -266,42 +266,47 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
             mayterminate[] = false # reset in case another initial guess is used next
             return a, values[ia] # lsr.value[ia]
         end
-        iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
+        iswolfe, iA, iB, bisected = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
         if iswolfe
             mayterminate[] = false # reset in case another initial guess is used next
             return alphas[iA], values[iA] # lsr.value[iA]
         end
         A = alphas[iA]
         B = alphas[iB]
         @assert B > A
-        if B - A < gamma * (b - a)
-            if display & LINESEARCH > 0
-                println("Linesearch: secant succeeded")
-            end
-            if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
-                # It's so flat, secant didn't do anything useful, time to quit
+        if !bisected
+            if B - A < gamma * (b - a)
                 if display & LINESEARCH > 0
-                    println("Linesearch: secant suggests it's flat")
+                    println("Linesearch: secant succeeded")
                 end
-                mayterminate[] = false # reset in case another initial guess is used next
-                return A, values[iA]
+                if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
+                    # It's so flat, secant didn't do anything useful, time to quit
+                    if display & LINESEARCH > 0
+                        println("Linesearch: secant suggests it's flat")
+                    end
+                    mayterminate[] = false # reset in case another initial guess is used next
+                    return A, values[iA]
+                end
+                ia = iA
+                ib = iB
+            else
+                # Secant is converging too slowly, use bisection
+                if display & LINESEARCH > 0
+                    println("Linesearch: secant failed, using bisection")
+                end
+                c = (A + B) / convert(T, 2)
+
+                phi_c, dphi_c = ϕdϕ(c)
+                @assert isfinite(phi_c) && isfinite(dphi_c)
+                push!(alphas, c)
+                push!(values, phi_c)
+                push!(slopes, dphi_c)
+
+                ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
             end
+        else
             ia = iA
             ib = iB
-        else
-            # Secant is converging too slowly, use bisection
-            if display & LINESEARCH > 0
-                println("Linesearch: secant failed, using bisection")
-            end
-            c = (A + B) / convert(T, 2)
-
-            phi_c, dphi_c = ϕdϕ(c)
-            @assert isfinite(phi_c) && isfinite(dphi_c)
-            push!(alphas, c)
-            push!(values, phi_c)
-            push!(slopes, dphi_c)
-
-            ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
         end
         iter += 1
     end
@@ -373,14 +378,15 @@ function secant2!(ϕdϕ,
     push!(slopes, dphi_c)
 
     ic = length(alphas)
+    bisected = false
     if satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
         if display & SECANT2 > 0
             println("secant2: first c satisfied Wolfe conditions")
         end
-        return true, ic, ic
+        return true, ic, ic, bisected
     end
 
-    iA, iB = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display)
+    iA, iB, bisected = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display)
     if display & SECANT2 > 0
         println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
     end
@@ -412,14 +418,14 @@ function secant2!(ϕdϕ,
             if display & SECANT2 > 0
                 println("secant2: second c satisfied Wolfe conditions")
             end
-            return true, ic, ic
+            return true, ic, ic, bisected
         end
         iA, iB = update!(ϕdϕ, alphas, values, slopes, iA, iB, ic, phi_lim, display)
     end
     if display & SECANT2 > 0
         println("secant2 output: a = ", alphas[iA], ", b = ", alphas[iB])
     end
-    return false, iA, iB
+    return false, iA, iB, bisected
 end
 
 # HZ, stages U0-U3
@@ -457,22 +463,22 @@ function update!(ϕdϕ,
                 ", dphi_c = ", dphi_c)
     end
     if c < a || c > b
-        return ia, ib #, 0, 0  # it's out of the bracketing interval
+        return ia, ib, false #, 0, 0  # it's out of the bracketing interval
     end
     if dphi_c >= zeroT
-        return ia, ic #, 0, 0  # replace b with a closer point
+        return ia, ic, false #, 0, 0  # replace b with a closer point
     end
     # We know dphi_c < 0. However, phi may not be monotonic between a
     # and c, so check that the value is also smaller than phi_0.  (It's
     # more dangerous to replace a than b, since we're leaving the
     # secure environment of alpha=0; that's why we didn't check this
     # above.)
     if phi_c <= phi_lim
-        return ic, ib#, 0, 0  # replace a
+        return ic, ib, false#, 0, 0  # replace a
     end
     # phi_c is bigger than phi_0, which implies that the minimum
     # lies between a and c. Find it via bisection.
-    return bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)
+    return (bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)..., true)
 end
 
 # HZ, stage U3 (with theta=0.5)