Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HZ: avoid convergence if bisected #174

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 40 additions & 34 deletions src/hagerzhang.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,10 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
else
# We'll still going downhill, expand the interval and try again.
# Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + ϵ_k
# So cold = c has a lower objective than phi_0 up to epsilon.
# So cold = c has a lower objective than phi_0 up to epsilon.
# This makes it a viable step to return if bracketing fails.

# Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c.
# Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c.
# Going back to the loop with c = cold will only result in infinite cycling.
# So returning (cold, phi_cold) and exiting the line search is the best move.
cold = c
Expand Down Expand Up @@ -266,42 +266,47 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
mayterminate[] = false # reset in case another initial guess is used next
return a, values[ia] # lsr.value[ia]
end
iswolfe, iA, iB = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
iswolfe, iA, iB, bisected = secant2!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, display)
if iswolfe
mayterminate[] = false # reset in case another initial guess is used next
return alphas[iA], values[iA] # lsr.value[iA]
end
A = alphas[iA]
B = alphas[iB]
@assert B > A
if B - A < gamma * (b - a)
if display & LINESEARCH > 0
println("Linesearch: secant succeeded")
end
if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
# It's so flat, secant didn't do anything useful, time to quit
if !bisected
if B - A < gamma * (b - a)
if display & LINESEARCH > 0
println("Linesearch: secant suggests it's flat")
println("Linesearch: secant succeeded")
end
mayterminate[] = false # reset in case another initial guess is used next
return A, values[iA]
if nextfloat(values[ia]) >= values[ib] && nextfloat(values[iA]) >= values[iB]
# It's so flat, secant didn't do anything useful, time to quit
if display & LINESEARCH > 0
println("Linesearch: secant suggests it's flat")
end
mayterminate[] = false # reset in case another initial guess is used next
return A, values[iA]
end
ia = iA
ib = iB
else
# Secant is converging too slowly, use bisection
if display & LINESEARCH > 0
println("Linesearch: secant failed, using bisection")
end
c = (A + B) / convert(T, 2)

phi_c, dphi_c = ϕdϕ(c)
@assert isfinite(phi_c) && isfinite(dphi_c)
push!(alphas, c)
push!(values, phi_c)
push!(slopes, dphi_c)

ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
end
else
ia = iA
ib = iB
else
# Secant is converging too slowly, use bisection
if display & LINESEARCH > 0
println("Linesearch: secant failed, using bisection")
end
c = (A + B) / convert(T, 2)

phi_c, dphi_c = ϕdϕ(c)
@assert isfinite(phi_c) && isfinite(dphi_c)
push!(alphas, c)
push!(values, phi_c)
push!(slopes, dphi_c)

ia, ib = update!(ϕdϕ, alphas, values, slopes, iA, iB, length(alphas), phi_lim, display)
end
iter += 1
end
Expand Down Expand Up @@ -373,14 +378,15 @@ function secant2!(ϕdϕ,
push!(slopes, dphi_c)

ic = length(alphas)
bisected = false
if satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
if display & SECANT2 > 0
println("secant2: first c satisfied Wolfe conditions")
end
return true, ic, ic
return true, ic, ic, bisected
end

iA, iB = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display)
iA, iB, bisected = update!(ϕdϕ, alphas, values, slopes, ia, ib, ic, phi_lim, display)
if display & SECANT2 > 0
println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
end
Expand Down Expand Up @@ -412,14 +418,14 @@ function secant2!(ϕdϕ,
if display & SECANT2 > 0
println("secant2: second c satisfied Wolfe conditions")
end
return true, ic, ic
return true, ic, ic, bisected
end
iA, iB = update!(ϕdϕ, alphas, values, slopes, iA, iB, ic, phi_lim, display)
end
if display & SECANT2 > 0
println("secant2 output: a = ", alphas[iA], ", b = ", alphas[iB])
end
return false, iA, iB
return false, iA, iB, bisected
end

# HZ, stages U0-U3
Expand Down Expand Up @@ -457,22 +463,22 @@ function update!(ϕdϕ,
", dphi_c = ", dphi_c)
end
if c < a || c > b
return ia, ib #, 0, 0 # it's out of the bracketing interval
return ia, ib, false #, 0, 0 # it's out of the bracketing interval
end
if dphi_c >= zeroT
return ia, ic #, 0, 0 # replace b with a closer point
return ia, ic, false #, 0, 0 # replace b with a closer point
end
# We know dphi_c < 0. However, phi may not be monotonic between a
# and c, so check that the value is also smaller than phi_0. (It's
# more dangerous to replace a than b, since we're leaving the
# secure environment of alpha=0; that's why we didn't check this
# above.)
if phi_c <= phi_lim
return ic, ib#, 0, 0 # replace a
return ic, ib, false#, 0, 0 # replace a
end
# phi_c is bigger than phi_0, which implies that the minimum
# lies between a and c. Find it via bisection.
return bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)
return (bisect!(ϕdϕ, alphas, values, slopes, ia, ic, phi_lim, display)..., true)
end

# HZ, stage U3 (with theta=0.5)
Expand Down
Loading