fix issue in pull request rdp_dp

yuxiangw · Nov 6, 2023 · 92daefd · 92daefd
1 parent 0abd50c
commit 92daefd
Show file tree

Hide file tree

Showing 3 changed files with 319 additions and 62 deletions.
diff --git a/autodp/dp_bank.py b/autodp/dp_bank.py
@@ -21,8 +21,8 @@ def get_eps_rdp(func, delta):
     :param delta:
     :return: The corresponding epsilon
     """
-    assert(delta >= 0)
-    acct = rdp_acct.anaRDPacct(m=10,m_max=10)
+    assert (delta >= 0)
+    acct = rdp_acct.anaRDPacct(m=10, m_max=10)
     acct.compose_mechanism(func)
     return acct.get_eps(delta)
 
@@ -34,38 +34,38 @@ def get_eps_rdp_subsampled(func, delta, prob):
     :param delta:
     :return: The corresponding epsilon
     """
-    assert(delta >= 0)
-    assert(prob >=0)
-    if prob==0:
+    assert (delta >= 0)
+    assert (prob >= 0)
+    if prob == 0:
         return 0
     elif prob == 1:
-        return get_eps_rdp(func,delta)
+        return get_eps_rdp(func, delta)
     else:
         acct = rdp_acct.anaRDPacct()
-        acct.compose_subsampled_mechanism(func,prob)
+        acct.compose_subsampled_mechanism(func, prob)
         return acct.get_eps(delta)
 
 
 # Get the eps and delta for a single Gaussian mechanism
 def get_eps_gaussian(sigma, delta):
     """ This function calculates the eps for Gaussian Mech given sigma and delta"""
-    assert(delta >= 0)
-    func = lambda x: rdp_bank.RDP_gaussian({'sigma':sigma},x)
-    return get_eps_rdp(func,delta)
+    assert (delta >= 0)
+    func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)
+    return get_eps_rdp(func, delta)
 
 
-def get_logdelta_ana_gaussian(sigma,eps):
+def get_logdelta_ana_gaussian(sigma, eps):
     """ This function calculates the delta parameter for analytical gaussian mechanism given eps"""
-    assert(eps>=0)
+    assert (eps >= 0)
     s, mag = utils.stable_log_diff_exp(norm.logcdf(0.5 / sigma - eps * sigma),
-                                       eps + norm.logcdf(-0.5/sigma - eps * sigma))
+                                       eps + norm.logcdf(-0.5 / sigma - eps * sigma))
     return mag
 
 
 def get_eps_ana_gaussian(sigma, delta):
     """ This function calculates the gaussian mechanism given sigma and delta using analytical GM"""
     # Basically inverting the above function by solving a nonlinear equation
-    assert(delta >=0 and delta <=1)
+    assert (delta >= 0 and delta <= 1)
 
     if delta == 0:
         return np.inf
@@ -78,71 +78,94 @@ def fun(x):
         else:
             return get_logdelta_ana_gaussian(sigma, x) - np.log(delta)
 
-    eps_upperbound = 1/2/sigma**2+1/sigma*np.sqrt(2*np.log(1/delta))
-    results = root_scalar(fun,bracket=[0, eps_upperbound])
+    eps_upperbound = 1 / 2 / sigma ** 2 + 1 / sigma * np.sqrt(2 * np.log(1 / delta))
+    results = root_scalar(fun, bracket=[0, eps_upperbound])
     if results.converged:
         return results.root
     else:
         raise RuntimeError(f"Failed to find epsilon: {results.flag}")
 
-def get_eps_gaussian_svt(x, sigma, delta, k, c, c_tilde):
+
+# def get_eps_gaussian_svt(x, sigma, delta, k, c, c_tilde):
+def get_eps_gaussian_svt(params, delta):
     """
-    submodule for generalized SVT with Gaussian noise
+    Implements Theorem 12 (stage-wise length-capped Gaussian SVT) in Zhu et.al., NeurIPS-20.
+
     we want to partition c into [c/c'] parts, each part using (k choose c')
     need to check whether (k choose c') > log(1/delta')
-    k is the maximam number of queries to answer for each chunk
-    x is log delta for each chunk, it needs to be negative
+    k is the maximum number of queries to answer for each chunk
+    x is delta for each chunk, it needs to be negative
     :param x:
     :param sigma:
     :param delta:
     :return:
     """
-    acct = dp_acct.DP_acct()
-    per_delta = np.exp(x) # per_delta for each c' chunk
-    coeff = comb(k,c_tilde)
-    assert per_delta < 1.0/(coeff)
-    #compute the eps per step with 1/(sigma_1**2) + sqrt(2/simga_1**2 *(log k + log(1/epr_delta)))
-    # compose eps for each chunk
-    while c:
-        if c>= c_tilde:
-            c = c - c_tilde
-        else:
-            # the remaining part c // c_tilde
-            c = 0
-            c_tilde = c
-            coeff = comb(k, c_tilde)
-        per_eps = (1.0+c_tilde) / (2*sigma ** 2) + np.sqrt((1.0 +c_tilde) / (2*sigma ** 2) * (np.log(coeff) - x))
-        acct.update_DPlosses(per_eps, per_delta)
-
-    compose_eps = acct.get_eps(delta)
-    return compose_eps
-
-
-def get_eps_laplace(b,delta):
-    assert(delta >= 0)
-    func = lambda x: rdp_bank.RDP_laplace({'b':b},x)
-    return get_eps_rdp(func,delta)
+    # {'sigma': params['sigma'], 'k': params['k'], 'c': params['c']}
+    sigma = params['sigma']
+    k = params.get('k', 1)
+    c = params.get('c', 1)
+    c_tilde = params.get('c_tilde', int(np.sqrt(c)))  # default c_tilde is sqrt(c)
+    coeff = comb(k, c_tilde)
+
+    # the function below returns the composed epsilon if each chunk is assigned a e^x budget of delta (the delta'
+    # used in Theorem 12).
+    def search_per_delta_each_chunk(x, sigma, c, c_tilde, k, coeff):
+        acct = dp_acct.DP_acct()
+        per_delta = np.exp(x)  # per_delta for each c' chunk
+        assert per_delta < 1.0 / (coeff)
+        # compute the eps per step with 1/(sigma_1**2) + sqrt(2/simga_1**2 *(log k + log(1/epr_delta)))
+        # compose eps for each chunk
+        while c:
+            if c >= c_tilde:
+                c = c - c_tilde
+            else:
+                # the remaining part c // c_tilde
+                c = 0
+                c_tilde = c
+                coeff = comb(k, c_tilde)
+            per_eps = (1.0 + c_tilde) / (2 * sigma ** 2) + np.sqrt(
+                (1.0 + c_tilde) / (2 * sigma ** 2) * (np.log(coeff) - x))
+            acct.update_DPlosses(per_eps, per_delta)
+        compose_eps = acct.get_eps(delta)
+        return compose_eps  # composed epsilon over c/c' chunks.
+
+    fun = lambda x: search_per_delta_each_chunk(x, sigma, c, c_tilde, k, coeff)
+    # the bound of per chunk delta (delta').
+    const = 10
+    right_bound = min(delta / (c_tilde + 1), 1.0 / coeff)
+    left_bound =delta / (c_tilde * 10)
+    # ensures the left bound is smaller than the right bound
+    while 10 * left_bound > right_bound:
+        const = const * 10
+        left_bound = left_bound * 0.1
+    results = minimize_scalar(fun, method='Bounded', bounds=[np.log(left_bound), np.log(right_bound)], options={'disp': False})
+    return results.fun
+
+
+def get_eps_laplace(b, delta):
+    assert (delta >= 0)
+    func = lambda x: rdp_bank.RDP_laplace({'b': b}, x)
+    return get_eps_rdp(func, delta)
 
 
-def get_eps_randresp(p,delta):
-    assert(delta >= 0)
-    func = lambda x: rdp_bank.RDP_randresponse({'p':p},x)
+def get_eps_randresp(p, delta):
+    assert (delta >= 0)
+    func = lambda x: rdp_bank.RDP_randresponse({'p': p}, x)
     return get_eps_rdp(func, delta)
 
 
+def get_eps_randresp_optimal(p, delta):
+    assert (delta >= 0)
+    if p < 0.5:
+        p = 1 - p
 
-def get_eps_randresp_optimal(p,delta):
-    assert(delta >= 0)
-    if p<0.5:
-        p = 1-p
-
-    if p==0 or p==1:
+    if p == 0 or p == 1:
         return np.inf
 
-    eps_max = np.log(p) - np.log(1-p)
+    eps_max = np.log(p) - np.log(1 - p)
     if delta == 0:
         return eps_max
-    elif delta >= 2*p - 1:
+    elif delta >= 2 * p - 1:
         return 0.0
     else:
-        return np.log(p-delta) - np.log(1 - p)
+        return np.log(p - delta) - np.log(1 - p)
diff --git a/autodp/mechanism_zoo.py b/autodp/mechanism_zoo.py
@@ -379,7 +379,7 @@ class GaussianSVT_Mechanism(Mechanism):
     The Gaussian-based SVT mechanism is described in
     https://papers.nips.cc/paper/2020/file/e9bf14a419d77534105016f5ec122d62-Paper.pdf
 
-    The mechanism takes the parameter k and c.k is the maximum length before
+    The mechanism takes the parameter k and c. k is the maximum length before
     the algorithm stops. c is the cut-off parameter.  Setting rdp_c_1 = True implies that
     we use RDP-based Gaussian-SVT with c=1, else c>1.
 
@@ -401,15 +401,16 @@ def __init__(self,params,name='GaussianSVT', rdp_c_1=True):
 class LaplaceSVT_Mechanism(Mechanism):
     """
     Laplace SVT (c>=1) with a RDP description.
-    The mechanism takes the parameter k and sigma.
-    k is the maximum length before the algorithm stops
+    b: the noise scale to perturb the threshold and the query.
+    k: the maximum length before the algorithm steps. k could be infinite.
+
     We provide the RDP implementation and pure-DP implementation
     """
     def __init__(self,params,name='GaussianSVT'):
         Mechanism.__init__(self)
         self.name=name
-        self.params={'b':params['b'],'k':params['k'], 'c':params['c']}
-
+        valid_keys = ['b', 'k', 'c']
+        self.params = dict(filter(lambda tuple: tuple[0] in valid_keys, params.items()))
         new_rdp = lambda x: rdp_bank.RDP_svt_laplace(self.params, x)
         self.propagate_updates(new_rdp, 'RDP')