diff --git a/src/interpretation.ml b/src/interpretation.ml index f35d6e976de0c4aabde0e131844439ca74a498c6..f7901c003c85f8663cf340d7d436c3cd4c8806a9 100644 --- a/src/interpretation.ml +++ b/src/interpretation.ml @@ -46,7 +46,13 @@ type data = D_csv of string list [@@deriving show] type caisar_op = | NeuralNetwork of nn - | Dataset of dataset + | Dataset of Term.lsymbol * dataset + [@printer + fun fmt (ls, ds) -> + Fmt.pf fmt "%a %a" + Fmt.(option Language.pp_dataset) + (Language.lookup_dataset_csv ls) + pp_dataset ds] | Data of data | Vector of Term.lsymbol [@printer @@ -68,6 +74,7 @@ let ls_of_caisar_op engine caisar_op ty_args ty = match caisar_op with | NeuralNetwork (NNet n | ONNX n) -> n | Vector v -> v + | Dataset (d, _) -> d | _ -> Term.create_lsymbol id ty_args ty in Hashtbl.Poly.add_exn caisar_env.ls_of_caisar_op ~key:caisar_op ~data:ls; @@ -120,6 +127,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = let error_message ls = Fmt.str "Invalid arguments for '%a'" Pretty.print_ls ls in + (* Vector *) let vget : _ CRE.builtin = fun engine ls vl ty -> @@ -130,7 +138,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = ] -> ( let i = Number.to_small_integer i in match caisar_op_of_ls engine ls1 with - | Dataset (DS_csv csv) -> + | Dataset (_, DS_csv csv) -> let row = List.nth_exn csv i in let label, features = match row with @@ -139,14 +147,14 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = in let ty_features = match ty with - | Some { ty_node = Tyapp (_, [ ty; _ ]); _ } -> Some ty + | Some { ty_node = Tyapp (_, [ _; ty ]); _ } -> Some ty | _ -> assert false in let t_features, t_label = ( term_of_caisar_op engine (Data (D_csv features)) ty_features, Term.t_int_const (BigInt.of_int (Int.of_string label)) ) in - value_term (Term.t_tuple [ t_features; t_label ]) + value_term (Term.t_tuple [ t_label; t_features ]) | Vector v -> let n = Option.value_exn (Language.lookup_vector v) in assert (List.length tl1 = n && i <= n); @@ -160,7 +168,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = match vl with | [ Term { t_node = Tapp (ls, []); _ } ] -> ( match caisar_op_of_ls engine ls with - | Dataset (DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv)) + | Dataset (_, DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv)) | Vector v -> value_int (BigInt.of_int (Option.value_exn (Language.lookup_vector v))) | Data (D_csv data) -> value_int (BigInt.of_int (List.length data)) @@ -234,7 +242,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = Vector (Language.create_vector env n) in Eval (term_of_caisar_op ~args engine caisar_op ty) - | Dataset (DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv)) + | Dataset (_, DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv)) | Data _ | NeuralNetwork _ -> assert false) | [ Term _t1; Term _t2 ] -> reconstruct () | _ -> invalid_arg (error_message ls) @@ -279,11 +287,12 @@ let caisar_builtins : caisar_env CRE.built_in_theories list = Term { t_node = Tconst (ConstStr dataset); _ }; Term { t_node = Tapp ({ ls_name = { id_string = "CSV"; _ }; _ }, []); _ }; ] -> - let { cwd; _ } = CRE.user_env engine in + let { env; cwd; _ } = CRE.user_env engine in let caisar_op = let filename = Caml.Filename.concat cwd dataset in + let ds = Language.create_dataset_csv env filename in let dataset = DS_csv (Csv.load filename) in - Dataset dataset + Dataset (ds, dataset) in value_term (term_of_caisar_op engine caisar_op ty) | [ Term _t1; Term _t2 ] -> reconstruct () @@ -347,11 +356,23 @@ let bounded_quant engine vs ~cond : CRE.bounded_quant_result option = | _ -> None let declare_language_lsymbols caisar_env task = - (* Declare [Language] logic symbols for neural networks and vectors only. *) + (* Declare [Language] logic symbols only. *) Term.Hls.fold (fun ls _ task -> if Language.mem_vector ls || Language.mem_nn ls + || Language.mem_dataset_csv ls then + (* Add meta for neural network and dataset declarations. *) + let task = + match (Language.lookup_nn ls, Language.lookup_dataset_csv ls) with + | None, None -> task + | Some { nn_filename; _ }, None -> + Task.add_meta task Utils.meta_nn_filename [ MAstr nn_filename ] + | None, Some (CSV ds_filename) -> + Task.add_meta task Utils.meta_dataset_filename [ MAstr ds_filename ] + | Some _, Some _ -> assert false + in + (* Add actual logic symbol declaration. *) let decl = Decl.create_param_decl ls in Task.add_decl task decl else task) diff --git a/src/language.ml b/src/language.ml index 930d45d96733928241162cb021fbb6dcca4ed287..6d9a186638aa5d1270848491c0e6c5280b89906d 100644 --- a/src/language.ml +++ b/src/language.ml @@ -156,7 +156,7 @@ let register_ovo_support () = Env.register_format ~desc:"OVO format" Pmodule.mlw_language "OVO" [ "ovo" ] (fun env _ filename _ -> ovo_parser env filename) -(* -- Vector *) +(* -- Vectors *) let vectors = Term.Hls.create 10 @@ -164,14 +164,15 @@ let float64_t_ty env = let th = Env.read_theory env [ "ieee_float" ] "Float64" in Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) [] +let vector_ty env ty_elt = + let th = Env.read_theory env [ "interpretation" ] "Vector" in + Ty.ty_app (Theory.ns_find_ts th.th_export [ "vector" ]) [ ty_elt ] + let create_vector = Env.Wenv.memoize 13 (fun env -> let h = Hashtbl.create (module Int) in let ty_elt = float64_t_ty env in - let ty = - let th = Env.read_theory env [ "interpretation" ] "Vector" in - Ty.ty_app (Theory.ns_find_ts th.th_export [ "vector" ]) [ ty_elt ] - in + let ty = vector_ty env ty_elt in Hashtbl.findi_or_add h ~default:(fun length -> let ls = let id = Ident.id_fresh "vector" in @@ -183,7 +184,7 @@ let create_vector = let lookup_vector = Term.Hls.find_opt vectors let mem_vector = Term.Hls.mem vectors -(* -- Classifier *) +(* -- Neural Networks *) type nn = { nn_nb_inputs : int; @@ -258,3 +259,32 @@ let create_nn_onnx = let lookup_nn = Term.Hls.find_opt nets let mem_nn = Term.Hls.mem nets + +(* -- Datasets *) + +type dataset = CSV of string [@@deriving show] + +let datasets = Term.Hls.create 10 + +let fresh_dataset_csv_ls env name = + let ty = + let ty_elt = vector_ty env (float64_t_ty env) in + let th = Env.read_theory env [ "interpretation" ] "Dataset" in + Ty.ty_app + (Theory.ns_find_ts th.th_export [ "dataset" ]) + [ Ty.ty_int; ty_elt ] + in + let id = Ident.id_fresh name in + Term.create_fsymbol id [] ty + +let create_dataset_csv = + Env.Wenv.memoize 13 (fun env -> + (* TODO: Should use actual dataset element types. *) + let h = Hashtbl.create (module String) in + Hashtbl.findi_or_add h ~default:(fun filename -> + let ls = fresh_dataset_csv_ls env "dataset_csv" in + Term.Hls.add datasets ls (CSV filename); + ls)) + +let lookup_dataset_csv = Term.Hls.find_opt datasets +let mem_dataset_csv = Term.Hls.mem datasets diff --git a/src/language.mli b/src/language.mli index f1558fba07149d35809bb4a30cf9c5a79676e9de..09f10ade8b88a3361cb53ba890b83855c853cba0 100644 --- a/src/language.mli +++ b/src/language.mli @@ -63,13 +63,13 @@ val ovo_parser : Env.env -> string -> Pmodule.pmodule Wstdlib.Mstr.t (* [ovo_parser env filename] parses and creates the theories corresponding to the given ovo [filename]. The result is memoized. *) -(** -- Vector *) +(** {2 Vectors} *) val create_vector : Env.env -> int -> Term.lsymbol val lookup_vector : Term.lsymbol -> int option val mem_vector : Term.lsymbol -> bool -(** -- Neural Network *) +(** {2 Neural Networks} *) type nn = private { nn_nb_inputs : int; @@ -84,3 +84,11 @@ val create_nn_nnet : Env.env -> string -> Term.lsymbol val create_nn_onnx : Env.env -> string -> Term.lsymbol val lookup_nn : Term.lsymbol -> nn option val mem_nn : Term.lsymbol -> bool + +(** {2 Datasets} *) + +type dataset = private CSV of string [@@deriving show] + +val create_dataset_csv : Env.env -> string -> Term.lsymbol +val lookup_dataset_csv : Term.lsymbol -> dataset option +val mem_dataset_csv : Term.lsymbol -> bool diff --git a/src/proof_strategy.ml b/src/proof_strategy.ml index 609e5aae415791e2d26f1d5eaba6288a1d47a130..25d4c9b5f98573504ebd46ebfd0dcfd22ec2ff9e 100644 --- a/src/proof_strategy.ml +++ b/src/proof_strategy.ml @@ -55,5 +55,13 @@ let apply_native_nn_prover task = let trans = Trans.seq [ Introduction.introduce_premises; Native_nn_prover.trans ] in - let tasks = Trans.apply Split_goal.split_goal_full task in + let tasks = + match Task.on_meta_excl Utils.meta_dataset_filename task with + | None -> [ task ] + | Some _ -> + (* Property verification on a dataset typically involves goals with huge + toplevel conjunctions, one conjunct for each element of the dataset: + first thing to do is to destruct such toplevel conjunctions. *) + Trans.apply Split_goal.split_goal_full task + in do_apply_prover ~lookup ~trans tasks diff --git a/src/transformations/native_nn_prover.ml b/src/transformations/native_nn_prover.ml index a357e564c34762d5390a44229601411cbcce98bc..090820a1cca9550030ea2415c4c247e14b16433e 100644 --- a/src/transformations/native_nn_prover.ml +++ b/src/transformations/native_nn_prover.ml @@ -111,19 +111,14 @@ let simplify_nn_application input_vars output_vars = (fun task_hd task -> match task_hd.task_decl.td_node with | Decl { d_node = Dparam ls; _ } -> - (* Add meta for neural network and input variable declarations. Note - that each meta needs to appear before the corresponding declaration - in order to be leveraged by prover printers. *) + (* Add meta for input variable declarations. Note that each meta needs + to appear before the corresponding declaration in order to be + leveraged by prover printers. *) let task = - match (Term.Mls.find_opt ls input_vars, Language.lookup_nn ls) with - | None, None -> task - | Some index, None -> + match Term.Mls.find_opt ls input_vars with + | None -> task + | Some index -> Task.add_meta task Utils.meta_input [ MAls ls; MAint index ] - | None, Some { nn_filename; _ } -> - Task.add_meta task Utils.meta_nn_filename [ MAstr nn_filename ] - | Some _, Some _ -> - (* [ls] cannot be an input variable and a nn at the same time. *) - assert false in Task.add_tdecl task task_hd.task_decl | Decl ({ d_node = Dprop (Pgoal, _, _); _ } as decl) -> diff --git a/src/transformations/utils.ml b/src/transformations/utils.ml index 7d702728500364025cb874275d076d6c599bf70c..fa4e92e885701c8c8774baa0566a76b9dddf3027 100644 --- a/src/transformations/utils.ml +++ b/src/transformations/utils.ml @@ -25,16 +25,22 @@ open Why3 let meta_input = Theory.( register_meta "caisar_input" - ~desc:"Indicates the input position in the neural network" + ~desc:"Indicates an input position among the inputs of the neural network" [ MTlsymbol; MTint ]) let meta_output = Theory.( register_meta "caisar_output" - ~desc:"Indicates the output position in the neural network" + ~desc: + "Indicates an output position among the outputs of the neural network" [ MTlsymbol; MTint ]) let meta_nn_filename = Theory.( register_meta_excl "caisar_nnet_or_onnx" - ~desc:"Indicates the filename of the network" [ MTstring ]) + ~desc:"Indicates the filename of the neural network" [ MTstring ]) + +let meta_dataset_filename = + Theory.( + register_meta_excl "caisar_dataset" + ~desc:"Indicates the filename of the dataset" [ MTstring ]) diff --git a/src/transformations/utils.mli b/src/transformations/utils.mli index 3c94d06a393278bc21bfc72649b654851d5e03f1..1f40cfc3b7917e6f0ea2e24cf379de2cee967624 100644 --- a/src/transformations/utils.mli +++ b/src/transformations/utils.mli @@ -23,10 +23,13 @@ open Why3 val meta_input : Theory.meta -(** Indicate the input position. *) +(** Indicates an input position among the inputs of the neural network. *) val meta_output : Theory.meta -(** Indicate the output position. *) +(** Indicates an output position among the outputs of the neural network. *) val meta_nn_filename : Theory.meta -(** The filename of the nnet or onnx model. *) +(** The filename of the neural network. *) + +val meta_dataset_filename : Theory.meta +(** The filename of the dataset. *) diff --git a/tests/interpretation_dataset.t b/tests/interpretation_dataset.t index eb6ef3e756420ce07c278a303cb10e6e138722a2..8ce1d6076853bf3f506625105d98ae35f5600ea9 100644 --- a/tests/interpretation_dataset.t +++ b/tests/interpretation_dataset.t @@ -14,7 +14,6 @@ Test interpret on dataset $ caisar verify --format whyml --prover Marabou --ltag=ProverSpec - 2>&1 <<EOF | ./filter_tmpdir.sh > theory T > use ieee_float.Float64 - > use bool.Bool > use int.Int > use interpretation.Vector > use interpretation.NeuralNetwork @@ -35,7 +34,7 @@ Test interpret on dataset > predicate bounded_by_epsilon (i: image) (eps: t) = > forall v: index. valid_index i v -> .- eps .<= i[v] .<= eps > - > predicate robust_around (n: nn) (eps: t) (i: image) (l: label_) = + > predicate robust_around (n: nn) (eps: t) (l: label_) (i: image) = > forall perturbed_image: image. > has_length perturbed_image (length i) -> > valid_image perturbed_image -> @@ -43,7 +42,7 @@ Test interpret on dataset > bounded_by_epsilon perturbation eps -> > advises n perturbed_image l > - > predicate robust (n: nn) (d: dataset image label_) (eps: t) = + > predicate robust (n: nn) (d: dataset label_ image) (eps: t) = > Dataset.forall_ d (robust_around n eps) > > goal G: diff --git a/tests/pyrat.t b/tests/pyrat.t index f34d87e691374242e9157fda35ac14909bdd0016..4c36cd0903f0aedcd140eea9e243a4611c96f20e 100644 --- a/tests/pyrat.t +++ b/tests/pyrat.t @@ -33,54 +33,21 @@ Test verify [DEBUG]{ProverSpec} Prover-tailored specification: 0.0 <= x0 x0 <= 0.5 - 0.0 < y0 + 0.0 < y0 and y0 < 0.5 [DEBUG]{ProverSpec} Prover-tailored specification: 0.0 <= x0 x0 <= 0.5 - y0 < 0.5 - - [DEBUG]{ProverSpec} Prover-tailored specification: - 0.0 <= x0 - x0 <= 0.5 - 0.5 <= x1 - x1 <= 1.0 - (0.0 < y0 or 0.5 < y0) - - [DEBUG]{ProverSpec} Prover-tailored specification: - 0.0 <= x0 - x0 <= 0.5 - 0.5 <= x1 - x1 <= 1.0 - 0.0 < y1 - - [DEBUG]{ProverSpec} Prover-tailored specification: - 0.0 <= x0 - x0 <= 0.5 - 0.5 <= x1 - x1 <= 1.0 - y1 < 0.5 - - [DEBUG]{ProverSpec} Prover-tailored specification: - 0.375 <= x0 - x0 <= 0.75 - 0.5 <= x1 - x1 <= 1.0 - (0.0 < y0 or 0.5 < y0) - - [DEBUG]{ProverSpec} Prover-tailored specification: - 0.375 <= x0 - x0 <= 0.75 0.5 <= x1 x1 <= 1.0 - 0.0 < y1 + (0.0 < y0 or 0.5 < y0) and 0.0 < y1 and y1 < 0.5 [DEBUG]{ProverSpec} Prover-tailored specification: 0.375 <= x0 x0 <= 0.75 0.5 <= x1 x1 <= 1.0 - y1 < 0.5 + (0.0 < y0 or 0.5 < y0) and 0.0 < y1 and y1 < 0.5 Goal G: Unknown () Goal H: Unknown () diff --git a/tests/pyrat_onnx.t b/tests/pyrat_onnx.t index 2c983e67eebfc3482421d316e73561bbd752c61a..f6512e2bc0419ff272fc531ef9f4d5caa1f52898 100644 --- a/tests/pyrat_onnx.t +++ b/tests/pyrat_onnx.t @@ -26,11 +26,6 @@ Test verify [DEBUG]{ProverSpec} Prover-tailored specification: 0.0 <= x0 x0 <= 0.5 - 0.0 < y0 - - [DEBUG]{ProverSpec} Prover-tailored specification: - 0.0 <= x0 - x0 <= 0.5 - y0 < 0.5 + 0.0 < y0 and y0 < 0.5 Goal G: Unknown ()