Skip to content
Snippets Groups Projects
Commit 2476e3bf authored by Michele Alberti's avatar Michele Alberti
Browse files

[stdlib] Define standalone theory for csv dataset robustness.

parent fd4f4338
No related branches found
No related tags found
No related merge requests found
...@@ -280,13 +280,10 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = ...@@ -280,13 +280,10 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
in in
(* Dataset *) (* Dataset *)
let read_dataset : _ CRE.builtin = let read_dataset_csv : _ CRE.builtin =
fun engine ls vl ty -> fun engine ls vl ty ->
match vl with match vl with
| [ | [ Term { t_node = Tconst (ConstStr dataset); _ } ] ->
Term { t_node = Tconst (ConstStr dataset); _ };
Term { t_node = Tapp ({ ls_name = { id_string = "CSV"; _ }; _ }, []); _ };
] ->
let { env; cwd; _ } = CRE.user_env engine in let { env; cwd; _ } = CRE.user_env engine in
let caisar_op = let caisar_op =
let filename = Stdlib.Filename.concat cwd dataset in let filename = Stdlib.Filename.concat cwd dataset in
...@@ -295,7 +292,49 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = ...@@ -295,7 +292,49 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
Dataset (ds, dataset) Dataset (ds, dataset)
in in
value_term (term_of_caisar_op engine caisar_op ty) value_term (term_of_caisar_op engine caisar_op ty)
| [ Term _t1; Term _t2 ] -> reconstruct () | [ Term _t1 ] -> reconstruct ()
| _ -> invalid_arg (error_message ls)
in
let min_label : _ CRE.builtin =
fun engine ls vl _ty ->
match vl with
| [ Term ({ t_node = Tapp (ls1, _); _ } as _t1) ] -> (
match caisar_op_of_ls engine ls1 with
| Dataset (_, DS_csv csv) ->
(* TODO: test for non-empty csv. *)
let min_label =
List.fold_left csv ~init:None ~f:(fun min_label elt ->
let label = Int.of_string (List.hd_exn elt) in
Some
(match min_label with
| None -> label
| Some min_label -> min min_label label))
in
let min_label = Option.value_exn min_label in
value_int (BigInt.of_int min_label)
| _ -> assert false)
| [ Term _t1 ] -> reconstruct ()
| _ -> invalid_arg (error_message ls)
in
let max_label : _ CRE.builtin =
fun engine ls vl _ty ->
match vl with
| [ Term ({ t_node = Tapp (ls1, _); _ } as _t1) ] -> (
match caisar_op_of_ls engine ls1 with
| Dataset (_, DS_csv csv) ->
(* TODO: test for non-empty csv. *)
let max_label =
List.fold_left csv ~init:None ~f:(fun max_label elt ->
let label = Int.of_string (List.hd_exn elt) in
Some
(match max_label with
| None -> label
| Some max_label -> max max_label label))
in
let max_label = Option.value_exn max_label in
value_int (BigInt.of_int max_label)
| _ -> assert false)
| [ Term _t1 ] -> reconstruct ()
| _ -> invalid_arg (error_message ls) | _ -> invalid_arg (error_message ls)
in in
...@@ -316,7 +355,14 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = ...@@ -316,7 +355,14 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
([ "read_neural_network" ], None, read_neural_network); ([ "read_neural_network" ], None, read_neural_network);
([ Ident.op_infix "@@" ], None, apply_neural_network); ([ Ident.op_infix "@@" ], None, apply_neural_network);
] ); ] );
([ "dataset" ], "Dataset", [], [ ([ "read_dataset" ], None, read_dataset) ]); ( [ "dataset" ],
"DatasetCSV",
[],
[
([ "read_dataset_csv" ], None, read_dataset_csv);
([ "min_label" ], None, min_label);
([ "max_label" ], None, max_label);
] );
] ]
let bounded_quant engine vs ~cond : CRE.bounded_quant_result option = let bounded_quant engine vs ~cond : CRE.bounded_quant_result option =
......
...@@ -160,19 +160,19 @@ let register_ovo_support () = ...@@ -160,19 +160,19 @@ let register_ovo_support () =
let vectors = Term.Hls.create 10 let vectors = Term.Hls.create 10
let float64_t_ty env = let ty_float64_t env =
let th = Env.read_theory env [ "ieee_float" ] "Float64" in let th = Env.read_theory env [ "ieee_float" ] "Float64" in
Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) [] Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) []
let vector_ty env ty_elt = let ty_vector env ty_elt =
let th = Env.read_theory env [ "vector" ] "Vector" in let th = Env.read_theory env [ "vector" ] "Vector" in
Ty.ty_app (Theory.ns_find_ts th.th_export [ "vector" ]) [ ty_elt ] Ty.ty_app (Theory.ns_find_ts th.th_export [ "vector" ]) [ ty_elt ]
let create_vector = let create_vector =
Env.Wenv.memoize 13 (fun env -> Env.Wenv.memoize 13 (fun env ->
let h = Hashtbl.create (module Int) in let h = Hashtbl.create (module Int) in
let ty_elt = float64_t_ty env in let ty_elt = ty_float64_t env in
let ty = vector_ty env ty_elt in let ty = ty_vector env ty_elt in
Hashtbl.findi_or_add h ~default:(fun length -> Hashtbl.findi_or_add h ~default:(fun length ->
let ls = let ls =
let id = Ident.id_fresh "vector" in let id = Ident.id_fresh "vector" in
...@@ -208,7 +208,7 @@ let fresh_nn_ls env name = ...@@ -208,7 +208,7 @@ let fresh_nn_ls env name =
let create_nn_nnet = let create_nn_nnet =
Env.Wenv.memoize 13 (fun env -> Env.Wenv.memoize 13 (fun env ->
let h = Hashtbl.create (module String) in let h = Hashtbl.create (module String) in
let ty_elt = float64_t_ty env in let ty_elt = ty_float64_t env in
Hashtbl.findi_or_add h ~default:(fun filename -> Hashtbl.findi_or_add h ~default:(fun filename ->
let ls = fresh_nn_ls env "nnet_nn" in let ls = fresh_nn_ls env "nnet_nn" in
let nn = let nn =
...@@ -230,7 +230,7 @@ let create_nn_nnet = ...@@ -230,7 +230,7 @@ let create_nn_nnet =
let create_nn_onnx = let create_nn_onnx =
Env.Wenv.memoize 13 (fun env -> Env.Wenv.memoize 13 (fun env ->
let h = Hashtbl.create (module String) in let h = Hashtbl.create (module String) in
let ty_elt = float64_t_ty env in let ty_elt = ty_float64_t env in
Hashtbl.findi_or_add h ~default:(fun filename -> Hashtbl.findi_or_add h ~default:(fun filename ->
let ls = fresh_nn_ls env "onnx_nn" in let ls = fresh_nn_ls env "onnx_nn" in
let onnx = let onnx =
...@@ -269,11 +269,9 @@ let datasets = Term.Hls.create 10 ...@@ -269,11 +269,9 @@ let datasets = Term.Hls.create 10
let fresh_dataset_csv_ls env name = let fresh_dataset_csv_ls env name =
let ty = let ty =
let ty_elt = vector_ty env (float64_t_ty env) in let ty_feature = ty_float64_t env in
let th = Env.read_theory env [ "dataset" ] "Dataset" in let th = Env.read_theory env [ "dataset" ] "DatasetCSV" in
Ty.ty_app Ty.ty_app (Theory.ns_find_ts th.th_export [ "dataset" ]) [ ty_feature ]
(Theory.ns_find_ts th.th_export [ "dataset" ])
[ Ty.ty_int; ty_elt ]
in in
let id = Ident.id_fresh name in let id = Ident.id_fresh name in
Term.create_fsymbol id [] ty Term.create_fsymbol id [] ty
......
...@@ -22,21 +22,32 @@ ...@@ -22,21 +22,32 @@
(** {1 Datasets} *) (** {1 Datasets} *)
(** {2 Generic Datasets} *) (** {2 CSV Datasets}
theory Dataset A dataset in CSV format is such that each element is given as:
- first column is the label,
- rest of the columns are the vector features.
*)
theory DatasetCSV
use int.Int
use vector.Vector use vector.Vector
type dataset 'a 'b = vector ('a, 'b) type label_ = int
type format = CSV type features 'a = vector 'a
type dataset 'a = vector (label_, features 'a)
function read_dataset_csv (f: string) : dataset 'a
function read_dataset (f: string) (k: format) : dataset 'a 'b function min_label (d: dataset 'a) : label_
function max_label (d: dataset 'a) : label_
predicate forall_ (d: dataset 'a 'b) (f: 'a -> 'b -> bool) = predicate forall_ (d: dataset 'a) (f: label_ -> features 'a -> bool) =
Vector.forall_ d (fun e -> let a, b = e in f a b) Vector.forall_ d (fun e -> let i, a = e in f i a)
function foreach (d: dataset 'a 'b) (f: 'a -> 'b -> 'c) : vector 'c = function foreach (d: dataset 'a) (f: label_ -> features 'a -> 'b) : vector 'b =
Vector.foreach d (fun e -> let a, b = e in f a b) Vector.foreach d (fun e -> let i, a = e in f i a)
end end
...@@ -2,5 +2,5 @@ ...@@ -2,5 +2,5 @@
(section (section
(site (site
(caisar stdlib))) (caisar stdlib)))
(files caisar.mlw vector.mlw nn.mlw dataset.mlw) (files caisar.mlw robustness.mlw vector.mlw nn.mlw dataset.mlw)
(package caisar)) (package caisar))
...@@ -22,8 +22,43 @@ ...@@ -22,8 +22,43 @@
(** {1 Robustness} *) (** {1 Robustness} *)
(** {2 Dataset Robustness} *) (** {2 Robustness of CSV Datasets} *)
theory DatasetRobustness theory RobustDatasetCSV
use ieee_float.Float64
use int.Int
use vector.Vector
use nn.NeuralNetwork
use dataset.DatasetCSV
type elt = features t
predicate valid_label (min_label: label_) (max_label: label_) (l: label_) =
min_label <= l <= max_label
predicate valid_elt (e: elt) =
forall v: index. valid_index e v -> (0.0: t) .<= e[v] .<= (1.0: t)
predicate bounded_by_epsilon (e: elt) (eps: t) =
forall v: index. valid_index e v -> .- eps .<= e[v] .<= eps
predicate robust_around (advises: elt -> label_ -> bool) (eps: t) (l: label_) (e: elt) =
forall perturbed_elt: elt.
has_length perturbed_elt (length e) ->
valid_elt perturbed_elt ->
let perturbation = perturbed_elt - e in
bounded_by_epsilon perturbation eps ->
advises perturbed_elt l
predicate advises (valid_label: label_ -> bool) (nn: nn) (e: elt) (l: label_) =
valid_label l ->
forall j: int. valid_label j -> j <> l -> (nn @@ e)[l] .>= (nn @@ e)[j]
predicate robust (nn: nn) (d: dataset t) (eps: t) =
let min_label = DatasetCSV.min_label d in
let max_label = DatasetCSV.max_label d in
let advises = advises (valid_label min_label max_label) nn in
DatasetCSV.forall_ d (robust_around advises eps)
end end
\ No newline at end of file
Test interpret on dataset Test interpret on dataset
$ cat - > dataset.csv << EOF $ cat - > dataset.csv << EOF
> 1,0.0,1.0,0.784313725,0.019607843,0.776470588 > 2,0.0,1.0,0.784313725,0.019607843,0.776470588
> 0,0.941176471,0,0,0.011764706,0.039215686 > 0,0.941176471,0,0,0.011764706,0.039215686
> EOF > EOF
...@@ -13,39 +13,13 @@ Test interpret on dataset ...@@ -13,39 +13,13 @@ Test interpret on dataset
> theory T > theory T
> use ieee_float.Float64 > use ieee_float.Float64
> use int.Int > use int.Int
> use vector.Vector
> use nn.NeuralNetwork > use nn.NeuralNetwork
> use dataset.Dataset > use dataset.DatasetCSV
> > use robustness.RobustDatasetCSV
> type image = vector t
> type label_ = int
>
> predicate valid_image (i: image) =
> forall v: index. valid_index i v -> (0.0: t) .<= i[v] .<= (1.0: t)
>
> predicate valid_label (l: label_) = 0 <= l <= 2
>
> predicate advises (n: nn) (i: image) (l: label_) =
> valid_label l ->
> forall j: int. valid_label j -> j <> l -> (n@@i)[l] .> (n@@i)[j]
>
> predicate bounded_by_epsilon (i: image) (eps: t) =
> forall v: index. valid_index i v -> .- eps .<= i[v] .<= eps
>
> predicate robust_around (n: nn) (eps: t) (l: label_) (i: image) =
> forall perturbed_image: image.
> has_length perturbed_image (length i) ->
> valid_image perturbed_image ->
> let perturbation = perturbed_image - i in
> bounded_by_epsilon perturbation eps ->
> advises n perturbed_image l
>
> predicate robust (n: nn) (d: dataset label_ image) (eps: t) =
> Dataset.forall_ d (robust_around n eps)
> >
> goal G: > goal G:
> let nn = read_neural_network "TestNetwork.nnet" NNet in > let nn = read_neural_network "TestNetwork.nnet" NNet in
> let dataset = read_dataset "dataset.csv" CSV in > let dataset = read_dataset_csv "dataset.csv" in
> let eps = (0.0100000000000000002081668171172168513294309377670288085937500000:t) in > let eps = (0.0100000000000000002081668171172168513294309377670288085937500000:t) in
> robust nn dataset eps > robust nn dataset eps
> end > end
...@@ -71,7 +45,7 @@ Test interpret on dataset ...@@ -71,7 +45,7 @@ Test interpret on dataset
x3 <= 0.029607843000000001743021726952065364457666873931884765625 x3 <= 0.029607843000000001743021726952065364457666873931884765625
x4 >= 0.7664705880000000082219457908649928867816925048828125 x4 >= 0.7664705880000000082219457908649928867816925048828125
x4 <= 0.7864705880000000259855141848674975335597991943359375 x4 <= 0.7864705880000000259855141848674975335597991943359375
+y0 -y1 >= 0 +y0 -y2 >= 0
[DEBUG]{ProverSpec} Prover-tailored specification: [DEBUG]{ProverSpec} Prover-tailored specification:
x0 >= 0.0 x0 >= 0.0
...@@ -94,7 +68,7 @@ Test interpret on dataset ...@@ -94,7 +68,7 @@ Test interpret on dataset
x3 <= 0.029607843000000001743021726952065364457666873931884765625 x3 <= 0.029607843000000001743021726952065364457666873931884765625
x4 >= 0.7664705880000000082219457908649928867816925048828125 x4 >= 0.7664705880000000082219457908649928867816925048828125
x4 <= 0.7864705880000000259855141848674975335597991943359375 x4 <= 0.7864705880000000259855141848674975335597991943359375
+y2 -y1 >= 0 +y1 -y2 >= 0
[DEBUG]{ProverSpec} Prover-tailored specification: [DEBUG]{ProverSpec} Prover-tailored specification:
x0 >= 0.0 x0 >= 0.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment