Merge pull request #464 from bknueven/prox_approx_4

Prox approx part 3
Pyomo · Dec 3, 2024 · 1a04420 · 1a04420
2 parents 0a8026c + 57c5dd8
commit 1a04420
Show file tree

Hide file tree

Showing 3 changed files with 134 additions and 54 deletions.
diff --git a/mpisppy/phbase.py b/mpisppy/phbase.py
@@ -8,6 +8,7 @@
 ###############################################################################
 import time
 import logging
+import math
 
 import numpy as np
 import mpisppy.MPI as MPI
@@ -689,6 +690,10 @@ def attach_PH_to_objective(self, add_duals, add_prox, add_smooth=0):
                 self.prox_approx_tol = self.options['proximal_linearization_tolerance']
             else:
                 self.prox_approx_tol = 1.e-1
+            # The proximal approximation code now checks the tolerance based on the x-coordinates
+            # as opposed to the y-coordinates. Therefore, we will use the square root of the
+            # y-coordinate tolerance.
+            self.prox_approx_tol = math.sqrt(self.prox_approx_tol)
         else:
             self._prox_approx = False
 
@@ -734,7 +739,7 @@ def attach_PH_to_objective(self, add_duals, add_prox, add_smooth=0):
                     elif self._prox_approx:
                         xvarsqrd = scenario._mpisppy_model.xsqvar[ndn_i]
                         scenario._mpisppy_data.xsqvar_prox_approx[ndn_i] = \
-                                ProxApproxManager(xvar, xvarsqrd, xbars[ndn_i], scenario._mpisppy_model.xsqvar_cuts, ndn_i)
+                                ProxApproxManager(scenario._mpisppy_model, xvar, ndn_i)
                     else:
                         xvarsqrd = xvar**2
                     prox_expr += (scenario._mpisppy_model.rho[ndn_i] / 2.0) * \

diff --git a/mpisppy/utils/prox_approx.py b/mpisppy/utils/prox_approx.py
@@ -8,42 +8,50 @@
 ###############################################################################
 
 from math import isclose
+from array import array
+from bisect import bisect
 from pyomo.core.expr.numeric_expr import LinearExpression
 
 # helpers for distance from y = x**2
-def _f(val, x_pnt, y_pnt):
-    return (( val - x_pnt )**2 + ( val**2 - y_pnt )**2)/2.
-def _df(val, x_pnt, y_pnt):
-    #return 2*(val - x_pnt) + 4*(val**2 - y_pnt)*val
-    return val*(1 - 2*y_pnt + 2*val*val) - x_pnt
-def _d2f(val, x_pnt, y_pnt):
-    return 1 + 6*val*val - 2*y_pnt
+# def _f(val, x_pnt, y_pnt):
+#     return (( val - x_pnt )**2 + ( val**2 - y_pnt )**2)/2.
+# def _df(val, x_pnt, y_pnt):
+#     #return 2*(val - x_pnt) + 4*(val**2 - y_pnt)*val
+#     return val*(1 - 2*y_pnt + 2*val*val) - x_pnt
+# def _d2f(val, x_pnt, y_pnt):
+#     return 1 + 6*val*val - 2*y_pnt
+# def _newton_step(val, x_pnt, y_pnt):
+#     return val - _df(val, x_pnt, y_pnt) / _d2f(val, x_pnt, y_pnt)
 
 def _newton_step(val, x_pnt, y_pnt):
-    return val - _df(val, x_pnt, y_pnt) / _d2f(val, x_pnt, y_pnt)
+    return val - (val * (1 - 2*y_pnt + 2*val*val) - x_pnt) / (1 + 6*val*val - 2*y_pnt)
 
 class ProxApproxManager:
     __slots__ = ()
 
-    def __new__(cls, xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i):
+    def __new__(cls, mpisppy_model, xvar, ndn_i):
         if xvar.is_integer():
-            return ProxApproxManagerDiscrete(xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i)
+            return ProxApproxManagerDiscrete(mpisppy_model, xvar, ndn_i)
         else:
-            return ProxApproxManagerContinuous(xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i)
+            return ProxApproxManagerContinuous(mpisppy_model, xvar, ndn_i)
 
 class _ProxApproxManager:
     '''
     A helper class to manage proximal approximations
     '''
     __slots__ = ()
 
-    def __init__(self, xvar, xvarsqrd, xbar, xsqvar_cuts, ndn_i):
+    def __init__(self, mpisppy_model, xvar, ndn_i):
         self.xvar = xvar
-        self.xvarsqrd = xvarsqrd
-        self.xbar = xbar
+        self.xvarsqrd = mpisppy_model.xsqvar[ndn_i]
+        self.cuts = mpisppy_model.xsqvar_cuts
+        self.xbar = mpisppy_model.xbars[ndn_i]
+        self.rho = mpisppy_model.rho[ndn_i]
+        self.W = mpisppy_model.W[ndn_i]
         self.var_index = ndn_i
-        self.cuts = xsqvar_cuts
         self.cut_index = 0
+        self.cut_values = array("d")
+        self.cut_values.append(0.0)
         self._store_bounds()
 
     def _store_bounds(self):
@@ -56,61 +64,122 @@ def _store_bounds(self):
         else:
             self.ub = self.xvar.ub
 
-    def add_cut(self, val, persistent_solver=None):
+    def add_cut(self, val, tolerance, persistent_solver):
         '''
         create a cut at val
         '''
         pass
 
+    def check_and_add_value(self, val, tolerance):
+        idx = bisect(self.cut_values, val)
+        # self.cut_values is empty, has one element
+        # or we're appending to the end
+        if idx == len(self.cut_values):
+            if val - self.cut_values[idx-1] < tolerance:
+                return False
+            else:
+                self.cut_values.insert(idx, val)
+                return True
+        # here we're at the beginning
+        if idx == 0:
+            if self.cut_values[idx] - val < tolerance:
+                return False
+            else:
+                self.cut_values.insert(idx, val)
+                return True
+        # in the middle
+        if self.cut_values[idx] - val < tolerance:
+            return False
+        if val - self.cut_values[idx-1] < tolerance:
+            return False
+        self.cut_values.insert(idx, val)
+        return True
+
+    def add_cuts(self, x_val, tolerance, persistent_solver):
+        x_bar = self.xbar.value
+        rho = self.rho.value
+        W = self.W.value
+
+        num_cuts = self.add_cut(x_val, tolerance, persistent_solver)
+        # rotate x_val around x_bar, the minimizer of (\rho / 2)(x - x_bar)^2
+        # to create a vertex at this point
+        rotated_x_val_x_bar = 2*x_bar - x_val
+        if not isclose(x_val, rotated_x_val_x_bar, abs_tol=tolerance):
+            num_cuts += self.add_cut(rotated_x_val_x_bar, tolerance, persistent_solver)
+        # aug_lagrange_point, is the minimizer of w\cdot x + (\rho / 2)(x - x_bar)^2
+        # to create a vertex at this point
+        aug_lagrange_point = -W / rho + x_bar
+        if not isclose(x_val, aug_lagrange_point, abs_tol=tolerance):
+            num_cuts += self.add_cut(2*aug_lagrange_point - x_val, tolerance, persistent_solver)
+        # finally, create another vertex at the aug_lagrange_point by rotating
+        # rotated_x_val_x_bar around the aug_lagrange_point
+        if not isclose(rotated_x_val_x_bar, aug_lagrange_point, abs_tol=tolerance):
+            num_cuts += self.add_cut(2*aug_lagrange_point - rotated_x_val_x_bar, tolerance, persistent_solver)
+        # If we only added 0 or 1 cuts initially, add up to two more
+        # to capture something of the proximal term. This can happen
+        # when x_bar == x_val and W == 0.
+        if self.cut_index <= 1:
+            lb, ub = self.xvar.bounds
+            upval = 1
+            dnval = 1
+            if ub is not None:
+                # after a lot of calculus, you can show that
+                # this point minimizes the error in the approximation
+                upval = 2.0 * (ub - x_val) / 3.0
+            if lb is not None:
+                dnval = 2.0 * (x_val - lb) / 3.0
+            num_cuts += self.add_cut(x_val + max(upval, tolerance+1e-06), tolerance, persistent_solver)
+            num_cuts += self.add_cut(x_val - max(dnval, tolerance+1e-06), tolerance, persistent_solver)
+        # print(f"{x_val=}, {x_bar=}, {W=}")
+        # print(f"{self.cut_values=}")
+        # print(f"{self.cut_index=}")
+        return num_cuts
+
     def check_tol_add_cut(self, tolerance, persistent_solver=None):
         '''
         add a cut if the tolerance is not satified
         '''
+        if self.xvar.fixed:
+            # don't do anything for fixed variables
+            return 0
         x_pnt = self.xvar.value
-        x_bar = self.xbar.value
         y_pnt = self.xvarsqrd.value
-        f_val = x_pnt**2
+        # f_val = x_pnt**2
 
-        #print(f"y-distance: {actual_val - measured_val})")
+        # print(f"{x_pnt=}, {y_pnt=}, {f_val=}")
+        # print(f"y-distance: {actual_val - measured_val})")
         if y_pnt is None:
-            self.add_cut(x_pnt, persistent_solver)
-            if not isclose(x_pnt, x_bar, abs_tol=1e-6):
-                self.add_cut(2*x_bar - x_pnt, persistent_solver)
-            return True
-
-        if (f_val - y_pnt) > tolerance:
-            '''
-            In this case, we project the point x_pnt, y_pnt onto
-            the curve y = x**2 by finding the minimum distance
-            between y = x**2 and x_pnt, y_pnt.
-
-            This involves solving a cubic equation, so instead
-            we start at x_pnt, y_pnt and run newtons algorithm
-            to get an approximate good-enough solution.
-            '''
-            this_val = x_pnt
-            #print(f"initial distance: {_f(this_val, x_pnt, y_pnt)**(0.5)}")
-            #print(f"this_val: {this_val}")
+            y_pnt = 0.0
+        # We project the point x_pnt, y_pnt onto
+        # the curve y = x**2 by finding the minimum distance
+        # between y = x**2 and x_pnt, y_pnt.
+        # This involves solving a cubic equation, so instead
+        # we start at x_pnt, y_pnt and run newtons algorithm
+        # to get an approximate good-enough solution.
+        this_val = x_pnt
+        # print(f"initial distance: {_f(this_val, x_pnt, y_pnt)**(0.5)}")
+        # print(f"this_val: {this_val}")
+        next_val = _newton_step(this_val, x_pnt, y_pnt)
+        while not isclose(this_val, next_val, rel_tol=1e-6, abs_tol=1e-6):
+            # print(f"newton step distance: {_f(next_val, x_pnt, y_pnt)**(0.5)}")
+            # print(f"next_val: {next_val}")
+            this_val = next_val
             next_val = _newton_step(this_val, x_pnt, y_pnt)
-            while not isclose(this_val, next_val, rel_tol=1e-6, abs_tol=1e-6):
-                #print(f"newton step distance: {_f(next_val, x_pnt, y_pnt)**(0.5)}")
-                #print(f"next_val: {next_val}")
-                this_val = next_val
-                next_val = _newton_step(this_val, x_pnt, y_pnt)
-            self.add_cut(next_val, persistent_solver)
-            if not isclose(next_val, x_bar, abs_tol=1e-6):
-                self.add_cut(2*x_bar - next_val, persistent_solver)
-            return True
-        return False
+        x_pnt = next_val
+        return self.add_cuts(x_pnt, tolerance,  persistent_solver)
 
 class ProxApproxManagerContinuous(_ProxApproxManager):
 
-    def add_cut(self, val, persistent_solver=None):
+    def add_cut(self, val, tolerance, persistent_solver):
         '''
         create a cut at val using a taylor approximation
         '''
-        # handled by bound
-        if val == 0:
+        lb, ub = self.xvar.bounds
+        if lb is not None and val < lb:
+            val = lb
+        if ub is not None and val > ub:
+            val = ub
+        if not self.check_and_add_value(val, tolerance):
             return 0
         # f'(a) = 2*val
         # f(a) - f'(a)a = val*val - 2*val*val
@@ -145,11 +214,16 @@ def _compute_mb(val):
 
 class ProxApproxManagerDiscrete(_ProxApproxManager):
 
-    def add_cut(self, val, persistent_solver=None):
+    def add_cut(self, val, tolerance, persistent_solver):
         '''
         create up to two cuts at val, exploiting integrality
         '''
         val = int(round(val))
+        if tolerance > 1:
+            # TODO: We should consider how to handle this, maybe.
+            #       Tolerances less than or equal 1 won't affect
+            #       discrete cuts
+            pass
 
         ## cuts are indexed by the x-value to the right
         ## e.g., the cut for (2,3) is indexed by 3

diff --git a/mpisppy/utils/sputils.py b/mpisppy/utils/sputils.py
@@ -983,8 +983,9 @@ def nonant_cost_coeffs(s):
     for var in repn.nonlinear_vars:
         if id(var) in s._mpisppy_data.varid_to_nonant_index:
             raise RuntimeError(
-                "Found nonlinear variables in the objective function. "
-                f"Variable {var} has nonlinear interactions in the objective funtion"
+                "A call to nonant_cost_coefficient found nonlinear variables in the objective function. "
+                f"Variable {var} has nonlinear interactions in the objective funtion. "
+                "Consider using gradient-based rho."
             )
     return cost_coefs