From 9e0b3ca9d769fdca5f9a2f900901668e97c7831a Mon Sep 17 00:00:00 2001 From: Yu-Xiang Wang Date: Thu, 25 Feb 2021 09:51:28 -0800 Subject: [PATCH] two new unit tests on the converters --- autodp/converter.py | 30 +++++++++---- autodp/dp_bank.py | 5 ++- autodp/fdp_bank.py | 16 +++++-- autodp/utils.py | 17 +++++++ test/unit_test_approxdp_to_fdp_conversion.py | 43 ++++++++++++++++++ test/unit_test_fdp_to_approxdp_conversion.py | 47 ++++++++++++++++++++ 6 files changed, 144 insertions(+), 14 deletions(-) create mode 100644 test/unit_test_approxdp_to_fdp_conversion.py create mode 100644 test/unit_test_fdp_to_approxdp_conversion.py diff --git a/autodp/converter.py b/autodp/converter.py index bd1dd64..fd27657 100644 --- a/autodp/converter.py +++ b/autodp/converter.py @@ -599,7 +599,10 @@ def fun1(logx): def fun2(logx): assert(logx <= 0) - grad_l, grad_h = fdp_grad(np.exp(logx)) + if np.isneginf(logx): + grad_l, grad_h = fdp_grad(0) + else: + grad_l, grad_h = fdp_grad(np.exp(logx)) log_neg_grad_l = np.log(-grad_l) log_neg_grad_h = np.log(-grad_h) @@ -635,7 +638,9 @@ def normal_equation(logx): return min(abs(high),abs(low)) # find x such that y = 1-\delta - bound1 = np.log(1-np.exp(fun1(np.log(1-delta)))) + tmp = fun1(np.log(1 - delta)) + bound1 = np.log(-tmp - tmp**2 / 2 - tmp**3 / 6) + #bound1 = np.log(1-np.exp(fun1(np.log(1-delta)))) #results = minimize_scalar(normal_equation, bounds=[-np.inf,0], bracket=[-1,-2]) results = minimize_scalar(normal_equation, method="Bounded", bounds=[bound1,0], options={'xatol': 1e-6, 'maxiter': 500, 'disp': 0}) @@ -648,15 +653,22 @@ def normal_equation(logx): raise RuntimeError(f"'find_logx' fails to find the tangent line: {results.message}") def approxdp(delta): - logx = find_logx(delta) - log_one_minus_f = fun1(logx) - # log_neg_grad_l, log_neg_grad_h = fun2(logx) - s, mag = utils.stable_log_diff_exp(log_one_minus_f,np.log(delta)) - eps = mag - logx - if eps < 0: + if delta == 0: + logx = -np.inf + log_neg_grad_l, log_neg_grad_h = fun2(logx) + return log_neg_grad_l + elif delta == 1: return 0.0 else: - return eps + logx = find_logx(delta) + log_one_minus_f = fun1(logx) + # log_neg_grad_l, log_neg_grad_h = fun2(logx) + s, mag = utils.stable_log_diff_exp(log_one_minus_f,np.log(delta)) + eps = mag - logx + if eps < 0: + return 0.0 + else: + return eps #approxdp(1e-3) diff --git a/autodp/dp_bank.py b/autodp/dp_bank.py index c148bda..8b47603 100644 --- a/autodp/dp_bank.py +++ b/autodp/dp_bank.py @@ -77,8 +77,9 @@ def fun(x): return np.inf else: return get_logdelta_ana_gaussian(sigma, x) - np.log(delta) - # The following by default uses the 'secant' method for finding - results = root_scalar(fun, x0=0, x1=5) + + eps_upperbound = 1/2/sigma**2+1/sigma*np.sqrt(2*np.log(1/delta)) + results = root_scalar(fun,bracket=[0, eps_upperbound]) if results.converged: return results.root else: diff --git a/autodp/fdp_bank.py b/autodp/fdp_bank.py index 5450046..38f91a6 100644 --- a/autodp/fdp_bank.py +++ b/autodp/fdp_bank.py @@ -5,6 +5,7 @@ import numpy as np from scipy.stats import norm +from autodp import utils def fDP_gaussian(params, fpr): @@ -51,7 +52,15 @@ def log_one_minus_fdp_gaussian(params, logfpr): if sigma == 0: return 0 else: - return norm.logsf(norm.ppf(1-np.exp(logfpr))-1/sigma) + if np.isneginf(logfpr): + return -np.inf + else: + + norm_ppf_one_minus_fpr = utils.stable_norm_ppf_one_minus_x(logfpr) + + return norm.logsf(norm_ppf_one_minus_fpr-1/sigma) + + def log_neg_fdp_grad_gaussian(params, logfpr): @@ -72,6 +81,7 @@ def log_neg_fdp_grad_gaussian(params, logfpr): elif logfpr == 0: #fpr == 1: return -np.inf, -np.inf else: - grad = -(norm.ppf(1 - np.exp(logfpr)) - - 1 / sigma) ** 2 / 2 + (norm.ppf(1 - np.exp(logfpr))) ** 2 / 2 + norm_ppf_one_minus_fpr = utils.stable_norm_ppf_one_minus_x(logfpr) + grad = -(norm_ppf_one_minus_fpr + - 1 / sigma) ** 2 / 2 + norm_ppf_one_minus_fpr ** 2 / 2 return grad, grad diff --git a/autodp/utils.py b/autodp/utils.py index 5054321..0fbbffb 100644 --- a/autodp/utils.py +++ b/autodp/utils.py @@ -1,5 +1,6 @@ import numpy as np from scipy.special import gammaln, comb +from scipy.stats import norm import math def stable_logsumexp(x): @@ -86,6 +87,22 @@ def stable_inplace_diff_in_log(vec, signs, n=-1): vec[j] = stable_logsumexp_two(vec[j], vec[j + 1]) signs[j] = signs[j + 1] +def stable_norm_ppf_one_minus_x(logx): + """ + This function compute the normal inverse CDF at 1-x by taking logx as an input. + Numerical stability has been taken into account with two levels of approximations. + :param logx: + :return: ppf(1-e^logx) + """ + if logx < -20: + # asymptotic approximation + norm_ppf_one_minus_x = np.sqrt(-2 * logx - np.log(-2*logx) - np.log(2*np.pi)) + # # if logx < -10: + # # norm_ppf_one_minus_x = norm.ppf(-logx - logx ** 2 / 2 - logx ** 3 / 6) + else: + norm_ppf_one_minus_x = norm.ppf(1 - np.exp(logx)) + return norm_ppf_one_minus_x + def get_forward_diffs(fun, n): """ This is the key function for computing up to nth order forward difference evaluated at 0""" diff --git a/test/unit_test_approxdp_to_fdp_conversion.py b/test/unit_test_approxdp_to_fdp_conversion.py new file mode 100644 index 0000000..0aa4533 --- /dev/null +++ b/test/unit_test_approxdp_to_fdp_conversion.py @@ -0,0 +1,43 @@ +from autodp.mechanism_zoo import GaussianMechanism +from autodp.fdp_bank import fDP_gaussian + +import numpy as np + +from absl.testing import absltest +from absl.testing import parameterized + +params = [0.05, 0.1, 0.2, 0.5,1.0, 2.0,5.0, 10.0] + + +def _fdp_conversion(sigma): + + # Using the log(1-f(fpr)) and log(- \partial f(fpr)) that are implemented dedicatedly + + fpr_list = np.linspace(0, 1, 21) + + # analytical gaussian implementation (privacy profile) + gm2 = GaussianMechanism(sigma, name='GM2', RDP_off=True) + + # direct f-DP implementation + fdp = lambda x: fDP_gaussian({'sigma': sigma},x) + + fdp_direct = fdp(fpr_list) + + # the fdp is converted by numerical methods from privacy profile. + fdp_converted = np.array([gm2.get_fDP(fpr) for fpr in fpr_list]) + + return fdp_direct - fdp_converted + + + +class Test_approxDP2fDP_Conversion(parameterized.TestCase): + + @parameterized.parameters(p for p in params) + def test_fdp_conversion(self, sigma): + max_diff = _fdp_conversion(sigma) + self.assertSequenceAlmostEqual(max_diff, np.zeros_like(max_diff), places=4) + + +if __name__ == '__main__': + absltest.main() + diff --git a/test/unit_test_fdp_to_approxdp_conversion.py b/test/unit_test_fdp_to_approxdp_conversion.py new file mode 100644 index 0000000..e556d2a --- /dev/null +++ b/test/unit_test_fdp_to_approxdp_conversion.py @@ -0,0 +1,47 @@ +from autodp.mechanism_zoo import GaussianMechanism +from autodp.dp_bank import get_eps_ana_gaussian + +import numpy as np + +from absl.testing import absltest +from absl.testing import parameterized + +params = [0.05, 0.1, 0.2, 0.5,1.0, 2.0, 5.0, 10.0] + + +def _fdp_conversion(sigma): + + delta_list = [0,1e-8, 1e-6, 1e-4, 1e-2, 0.3, 0.5, 1] + + # f-DP implementation + gm3 = GaussianMechanism(sigma, name='GM3', RDP_off=True, approxDP_off=True, fdp_off=False) + + # direct approxdp implementation + agm = lambda x: get_eps_ana_gaussian(sigma, x) + + eps_direct = np.array([agm(delta) for delta in delta_list]) + + # the fdp is converted by numerical methods from privacy profile. + eps_converted = np.array([gm3.get_approxDP(delta) for delta in delta_list]) + max_diff = eps_direct - eps_converted + + rel_diff = max_diff / (eps_direct+1e-10) + + if np.isinf(eps_direct[0]) and np.isinf(eps_converted[0]): + rel_diff[0] = 0 + return rel_diff + + +_fdp_conversion(0.05) + +class Test_approxDP2fDP_Conversion(parameterized.TestCase): + + @parameterized.parameters(p for p in params) + def test_fdp_conversion(self, sigma): + max_diff = _fdp_conversion(sigma) + self.assertSequenceAlmostEqual(max_diff, np.zeros_like(max_diff), places=2) + + +if __name__ == '__main__': + absltest.main() +