-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdp_sums.ec
319 lines (277 loc) · 8.56 KB
/
dp_sums.ec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
require import Int IntExtra Real RealExtra Ring.
require import Distr List Aprhl StdRing StdOrder StdBigop.
(*---*) import IntID IntOrder RField RealOrder.
op eps: { real | 0%r <= eps } as ge0_eps.
hint exact : ge0_eps.
(*
so far we have assumed that we have only one data point
and we know that this is the data that may differ. However,
in general we are interested in situations where we have a whole
dataset
*)
module M1 = {
proc sum (ls : int list) : int = {
var s : int <- 0;
var z : int;
var i : int;
i <- 0;
while (i < size ls) {
s <- s + (nth 0 ls i);
i <- i + 1;
}
z <$ lap eps s;
return z;
}
}.
(*
To reason about this example we need to formulate
our notion of adjacency. We will start with some additional notion.
*)
pred eq_in_range (ms ns : int list, i j : int) =
forall (k : int),
i <= k <= j => nth 0 ms k = nth 0 ns k.
lemma eq_in_range_succ (ms ns : int list, i j : int) :
eq_in_range ms ns i j => eq_in_range ms ns (i + 1) j.
proof.
move => eir_i.
rewrite /eq_in_range => k le_iplus1_k_j.
rewrite eir_i /#.
qed.
pred adjacent_e (ms ns : int list) (i:int)=
size ms = size ns /\
0 <= i < size ms /\
eq_in_range ms ns 0 (i - 1) /\
`|nth 0 ms i - nth 0 ns i| <= 1 /\
eq_in_range ms ns (i + 1) (size ms - 1).
(*
here is our definition of adjacency
*)
pred adjacent (ms ns : int list) = exists i, adjacent_e ms ns i.
(*
we can prove some lemma about this.
*)
lemma size_eq_adjacent (ms ns : int list) :
adjacent ms ns => size ms = size ns.
proof.
rewrite /adjacent. rewrite /adjacent_e => [#].
smt().
qed.
lemma adjacent_sub_abs_bound (ms ns : int list, i : int) :
adjacent_e ms ns i => 0 <= i < size ms =>
`|nth 0 ms i - nth 0 ns i| <= 1.
proof.
rewrite /adjacent_e. smt().
qed.
lemma adjacent_ne_sub_eq_after (ms ns : int list, i : int) :
adjacent_e ms ns i => 0 <= i < size ms => nth 0 ms i <> nth 0 ns i =>
eq_in_range ms ns (i + 1) (size ms - 1).
proof.
rewrite /adjacent_e. smt().
qed.
(*
we can now prove the previous program differentially private
*)
lemma lem1 :
aequiv
[[ eps & 0%r]
M1.sum ~ M1.sum : adjacent ls{1} ls{2} ==> res{1} = res{2}].
proof.
proc.
seq 2 2: (adjacent ls{1} ls{2} /\ ={i, s} /\ i{1} = 0 /\ s{1} = 0).
auto. toequiv. auto.
seq 1 1 : (`|s{1} - s{2}| <= 1).
toequiv.
while
(adjacent ls{1} ls{2} /\ ={i} /\ 0 <= i{1} <= size ls{1} /\
(! ={s} =>
`|s{1} - s{2}| <= 1 /\
eq_in_range ls{1} ls{2} i{1} (size ls{1} - 1))).
wp. skip. progress.
smt().
smt().
smt(adjacent_sub_abs_bound).
smt(adjacent_ne_sub_eq_after).
smt(size_eq_adjacent).
smt(size_eq_adjacent).
auto; progress.
rewrite size_ge0.
smt(size_eq_adjacent).
smt(size_eq_adjacent).
smt(addzN).
lap (0) 1.
qed.
(*
the previous example still adds noise only at the end.
What shall we do if we want to add noise in an iterative way?
Let's consider releasing all the partial sums of a vector:
e.g. on input [1,2,3,4,5] we want to release [1,3,6,10,15]
*)
(* Here a first approach where we add noise to each sum *)
module M2 = {
proc dummy_sum (ls : int list) : int list = {
var s :int <- 0;
var output : int list;
var z : int;
var i :int <- 0;
output = [];
while (i < size ls ) {
s = s + (nth 0 ls i);
i <- i + 1;
(* notice that the laplace noise is now added in the loop *)
z <$ lap eps s;
output = z :: output;
}
return output;
}
}.
(* helper lemma *)
lemma bigops_help : forall n,
0<=n => eps * n%r = bigi predT (fun (_ : int) => eps) 0 n.
proof.
move => n H. rewrite sumr_const count_predT size_range max_ler.
smt().
rewrite intmulr; auto.
qed.
(*
we now want to prove that the budget that we spend depends on the size of the list, that is we want to prove that dummy_sum is n*eps-DP
*)
lemma dummy_sum n : 0<=n => aequiv [ [ (eps *n%r) & 0%r]
M2.dummy_sum ~ M2.dummy_sum
: (adjacent ls{1} ls{2} /\ n = size ls{1})
==> res{2} = res{1} ].
proof.
move => H.
proc.
seq 3 3: (adjacent ls{1} ls{2} /\ ={i, s, output} /\ i{1} = 0
/\ s{1} = 0 /\ 0<= n /\ n = size ls{1} ).
toequiv; auto.
(*
we could try to use the usual while rule but this would not bring us very far.
Instead, we use the approximate while rule which takes as additional parameters
two functions describing how the privacy budget change at each iteration, and how
the number of iterations decreases.
*)
awhile [ (fun _ => eps) & (fun _ => 0%r) ] n [n-i-1]
(adjacent ls{1} ls{2} /\ ={i, output} /\ 0 <= i{1} <= n /\
(! ={s} => `|s{1} - s{2}| <= 1 /\
eq_in_range ls{1} ls{2} i{1} (n - 1)) /\
0<= n /\ n = size ls{1});
first 3 try (auto; progress;smt(ge0_eps)).
smt(bigops_help).
rewrite sumr_const intmulr;auto.
move => v.
seq 2 2:
(adjacent ls{1} ls{2} /\ ={i, output} /\ 0 <= i{1} <= size ls{1} /\
(! ={s} => `|s{1} - s{2}| <= 1 /\
eq_in_range ls{1} ls{2} i{1} (size ls{1} - 1)) /\
0<= n /\ n = size ls{1} /\ v=n-i{1}).
toequiv; auto; progress.
smt(). smt().
smt(adjacent_sub_abs_bound).
smt(adjacent_ne_sub_eq_after).
smt().
seq 1 1:
(adjacent ls{1} ls{2} /\ ={i, output} /\ 0 <= i{1} <= size ls{1} /\
(! ={s} => `|s{1} - s{2}| <= 1 /\
eq_in_range ls{1} ls{2} i{1} (size ls{1} - 1)) /\
0<= n /\ n = size ls{1} /\ v=n-i{1} /\ z{1}=z{2}) <[ eps & 0%r ]>.
lap (0) 1.
auto; progress.
smt().
toequiv; auto; progress.
smt().
smt().
qed.
(* Here another approach where we add noise to each element *)
module M3 = {
proc noisy_sum (ls : int list) : int list = {
var s :int <- 0;
var output : int list;
var z : int;
var i :int <- 0;
output = [];
while (i < size ls ) {
(* we add noise before summing *)
z <$ lap eps (nth 0 ls i);
s <- s + z;
output <- z :: output;
i <- i + 1;
}
return output;
}
}.
(* other helper lemmas *)
lemma count_iota_0 : forall n,
count (transpose (=) n) (iota_ 0 n) = 0.
proof.
move => n.
apply count_eq0. rewrite hasPn.
move => x.
smt(mem_iota).
qed.
lemma count_iota : forall n j,
n=size(iota_ 0 n) => 0<=j< n => (count (fun (i : int) => j = n-i-1) (iota_ 0 n)) = 1.
proof.
elim/natind.
move => n Hn j Hs Hj. smt(size_iota).
move => n Hn HI j Hs Hj.
rewrite iotaSr. assumption. simplify. rewrite -cats1. rewrite count_cat.
rewrite /count. case (j=0) => H. rewrite H.
simplify.
have ? : (n+1-n-1=0). smt().
rewrite H0. simplify.
have ? :(fun i => i = n) =(fun i => 0=n+1-i-1). smt().
rewrite -H1. rewrite count_iota_0. smt().
have ? : (n+1-n-1=0). smt().
rewrite H0 H. simplify.
have ? :(fun i => j-1 = n-i-1) =(fun i => j=n+1-i-1). smt().
rewrite -H1. rewrite HI. smt(size_iota). smt(). smt().
qed.
lemma bigi_eps : forall n j,
0<= j < n=>
eps = bigi (fun (i : int) => j = n - i - 1) (fun (_ : int) => eps) 0 n.
proof.
move => ? ? ?. rewrite big_const. rewrite count_iota. smt(size_iota). assumption.
smt(iter1).
qed.
(* We can now prove that that noisy_sum is eps-DP *)
lemma noisy_sum n j : 0<= j < n => aequiv [ [ eps & 0%r]
M3.noisy_sum ~ M3.noisy_sum
: adjacent_e ls{1} ls{2} j /\ n=size ls{1}
==> res{2} = res{1} ].
proof.
move => H. proc.
seq 3 3: (adjacent_e ls{1} ls{2} j /\ ={i, s, output} /\ i{1} = 0
/\ s{1} = 0 /\ n=size ls{1}).
toequiv; auto.
(* notice the budget function we are using for epsilon *)
awhile [ (fun x => if j=n-x -1 then eps else 0%r ) & (fun _ => 0%r) ] n [n -i-1]
(adjacent_e ls{1} ls{2} j /\ ={i, output} /\ 0 <= i{1} <= size ls{1} /\ n=size ls{1} /\
(i{1}=j =>
`|(nth 0 ls{1} i{1}) - (nth 0 ls{2} i{2}) | <= 1 /\
eq_in_range ls{1} ls{2} (i{1}+1) (size ls{1} - 1)) /\
0<= size ls{1}); first 4 try (auto; progress;smt(ge0_eps)).
search pred1. search iota_. rewrite /bigi.
rewrite -big_mkcond. apply bigi_eps => //. rewrite sumr_const intmulr; smt().
move => k.
have H1: (j = n - k - 1)\/ (j <> n - k - 1). smt().
elim H1 => [?|?].
wp. progress.
lap 0 1. smt().
progress. smt().
progress. smt().
smt(). smt().
smt(adjacent_sub_abs_bound).
smt(adjacent_ne_sub_eq_after).
smt(size_eq_adjacent).
wp.
lap 0 0. smt().
progress.
smt(size_eq_adjacent).
auto; progress. smt().
smt().
smt(adjacent_sub_abs_bound).
smt(adjacent_ne_sub_eq_after).
smt(size_eq_adjacent).
smt(size_eq_adjacent).
qed.