From 458d69c200d531ffdc7670239a1a57788d821ef0 Mon Sep 17 00:00:00 2001
From: Jiong Zhang <jiongz@amazon.com>
Date: Mon, 18 Dec 2023 15:01:08 -0800
Subject: [PATCH] Add target clipping and update init point for platt transform
 (#272)

---
 pecos/core/base.py           | 24 +++++++++++++++++------
 pecos/core/utils/newton.hpp  | 37 +++++++++++++++++++++---------------
 test/pecos/core/test_clib.py |  4 ++--
 3 files changed, 42 insertions(+), 23 deletions(-)
diff --git a/pecos/core/base.py b/pecos/core/base.py
index 36752d1..8ab467e 100644
--- a/pecos/core/base.py
+++ b/pecos/core/base.py
@@ -2058,25 +2058,37 @@ def link_calibrator_methods(self):
             [c_uint64, POINTER(c_double), POINTER(c_double), POINTER(c_double)],
         )
 
-    def fit_platt_transform(self, logits, tgt_prob):
+    def fit_platt_transform(self, logits, targets, clip_tgt_prob=True):
         """Python to C/C++ interface for platt transfrom fit.
 
         Ref: https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf
 
         Args:
             logits (ndarray): 1-d array of logit with length N.
-            tgt_prob (ndarray): 1-d array of target probability scores within [0, 1] with length N.
+            targets (ndarray): 1-d array of target probability scores within [0, 1] with length N.
+            clip_tgt_prob (bool): whether to clip the target probability to
+                [1/(prior0 + 2), 1 - 1/(prior1 + 2)]
+                where prior1 = sum(targets), prior0 = N - prior1
         Returns:
             A, B: coefficients for Platt's scale.
         """
         assert isinstance(logits, np.ndarray)
-        assert isinstance(tgt_prob, np.ndarray)
-        assert len(logits) == len(tgt_prob)
-        assert logits.dtype == tgt_prob.dtype
+        assert isinstance(targets, np.ndarray)
+        assert len(logits) == len(targets)
+        assert logits.dtype == targets.dtype
 
-        if tgt_prob.min() < 0 or tgt_prob.max() > 1.0:
+        if targets.min() < 0 or targets.max() > 1.0:
             raise ValueError("Target probability out of bound!")
 
+        min_prob, max_prob = 0.0, 1.0
+        if clip_tgt_prob:
+            prior1 = np.sum(targets)
+            prior0 = len(targets) - prior1
+            min_prob = 1.0 / (prior0 + 2.0)
+            max_prob = (prior1 + 1.0) / (prior1 + 2.0)
+
+        tgt_prob = np.clip(targets, min_prob, max_prob)
+
         AB = np.array([0, 0], dtype=np.float64)
 
         if tgt_prob.dtype == np.float32:
diff --git a/pecos/core/utils/newton.hpp b/pecos/core/utils/newton.hpp
index f6c088e..9fb7197 100644
--- a/pecos/core/utils/newton.hpp
+++ b/pecos/core/utils/newton.hpp
@@ -280,23 +280,29 @@ namespace pecos {
 
     template <typename value_type>
     uint32_t fit_platt_transform(size_t num_samples, const value_type *logits, const value_type *tgt_probs, double& A, double& B) {
-	// define the return code
-	enum {
-	    SUCCESS=0,
-	    LINE_SEARCH_FAIL=1,
-	    MAX_ITER_REACHED=2,
-	};
+        // define the return code
+        enum {
+            SUCCESS=0,
+            LINE_SEARCH_FAIL=1,
+            MAX_ITER_REACHED=2,
+        };
 
         // hyper parameters
         int max_iter = 100; // Maximal number of iterations
         double min_step = 1e-10;    // Minimal step taken in line search
         double sigma = 1e-12; // For numerically strict PD of Hessian
-        double eps = 1e-6;
+        double eps = 1e-5;
+
+        // calculate prior of B
+        double prior1 = 0;
+        for (size_t i = 0; i < num_samples; i++) {
+            prior1 += tgt_probs[i];
+        }
+        double prior0 = double(num_samples) - prior1;
 
-	int iter;
 
         // Initial Point and Initial Fun Value
-        A = 0.0; B = 1.0;
+        A = 0.0; B = log((prior0 + 1.0) / (prior1 + 1.0));
         double fval = 0.0;
 
         for (size_t i = 0; i < num_samples; i++) {
@@ -307,17 +313,18 @@ namespace pecos {
                 fval += (tgt_probs[i] - 1) * fApB + log(1 + exp(fApB));
             }
         }
+        int iter;
         for (iter = 0; iter < max_iter; iter++) {
             // Update Gradient and Hessian (use H' = H + sigma I)
             double h11 = sigma;
             double h22 = sigma; // numerically ensures strict PD
             double h21 = 0.0;
-            double g1 = 0.0;
-            double g2 = 0.0;
+            double g1 = A * sigma;
+            double g2 = B * sigma;
 
             for (size_t i = 0; i < num_samples; i++) {
                 double fApB = logits[i] * A + B;
-		double p = 0, q = 0;
+                double p = 0, q = 0;
                 if (fApB >= 0) {
                     p = exp(-fApB) / (1.0 + exp(-fApB));
                     q = 1.0 / (1.0 + exp(-fApB));
@@ -376,15 +383,15 @@ namespace pecos {
 
             if (stepsize < min_step) {
                 printf("WARNING: fit_platt_transform: Line search fails\n");
-		return LINE_SEARCH_FAIL;
+                return LINE_SEARCH_FAIL;
             }
         }
 
         if (iter >= max_iter) {
             printf("WARNING: fit_platt_transform: Reaching maximal iterations\n");
-	    return MAX_ITER_REACHED;
+            return MAX_ITER_REACHED;
         }
-	return SUCCESS;
+        return SUCCESS;
     }
 } // namespace pecos
 #endif
diff --git a/test/pecos/core/test_clib.py b/test/pecos/core/test_clib.py
index a6ee072..45ec02d 100644
--- a/test/pecos/core/test_clib.py
+++ b/test/pecos/core/test_clib.py
@@ -78,12 +78,12 @@ def test_platt_scale():
 
     orig = np.arange(-15, 15, 1, dtype=np.float32)
     tgt = np.array([1.0 / (1 + np.exp(A * t + B)) for t in orig], dtype=np.float32)
-    At, Bt = clib.fit_platt_transform(orig, tgt)
+    At, Bt = clib.fit_platt_transform(orig, tgt, clip_tgt_prob=False)
     assert B == approx(Bt, abs=1e-6), f"Platt_scale B error: {B} != {Bt}"
     assert A == approx(At, abs=1e-6), f"Platt_scale A error: {A} != {At}"
 
     orig = np.arange(-15, 15, 1, dtype=np.float64)
     tgt = np.array([1.0 / (1 + np.exp(A * t + B)) for t in orig], dtype=np.float64)
-    At, Bt = clib.fit_platt_transform(orig, tgt)
+    At, Bt = clib.fit_platt_transform(orig, tgt, clip_tgt_prob=False)
     assert B == approx(Bt, abs=1e-6), f"Platt_scale B error: {B} != {Bt}"
     assert A == approx(At, abs=1e-6), f"Platt_scale A error: {A} != {At}"