[proof_strategy] Split task goal only in presence of datasets.

Avoid splitting when not necessary as it may incur in performance hit. Indeed, splitting means more (tinier) goals to verify, hence more call to provers.

[proof_strategy] Split task goal only in presence of datasets.
Avoid splitting when not necessary as it may incur in performance hit. Indeed, splitting means more (tinier) goals to verify, hence more call to provers.
9b38a9fb · Michele Alberti · 32378657 · 9b38a9fb · 9b38a9fb · 9b38a9fb
Commit 9b38a9fb authored 1 year ago by Michele Alberti
--- a/src/interpretation.ml
+++ b/src/interpretation.ml
@@ -46,7 +46,13 @@ type data = D_csv of string list [@@deriving show]

 type caisar_op =
  | NeuralNetwork of nn
-  | Dataset of dataset
+  | Dataset of Term.lsymbol * dataset
+      [@printer
+        fun fmt (ls, ds) ->
+          Fmt.pf fmt "%a %a"
+            Fmt.(option Language.pp_dataset)
+            (Language.lookup_dataset_csv ls)
+            pp_dataset ds]
  | Data of data
  | Vector of Term.lsymbol
      [@printer
@@ -68,6 +74,7 @@ let ls_of_caisar_op engine caisar_op ty_args ty =
      match caisar_op with
      | NeuralNetwork (NNet n | ONNX n) -> n
      | Vector v -> v
+      | Dataset (d, _) -> d
      | _ -> Term.create_lsymbol id ty_args ty
    in
    Hashtbl.Poly.add_exn caisar_env.ls_of_caisar_op ~key:caisar_op ~data:ls;
@@ -130,7 +137,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
    ] -> (
      let i = Number.to_small_integer i in
      match caisar_op_of_ls engine ls1 with
-      | Dataset (DS_csv csv) ->
+      | Dataset (_, DS_csv csv) ->
        let row = List.nth_exn csv i in
        let label, features =
          match row with
@@ -160,7 +167,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
    match vl with
    | [ Term { t_node = Tapp (ls, []); _ } ] -> (
      match caisar_op_of_ls engine ls with
-      | Dataset (DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv))
+      | Dataset (_, DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv))
      | Vector v ->
        value_int (BigInt.of_int (Option.value_exn (Language.lookup_vector v)))
      | Data (D_csv data) -> value_int (BigInt.of_int (List.length data))
@@ -234,7 +241,7 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
          Vector (Language.create_vector env n)
        in
        Eval (term_of_caisar_op ~args engine caisar_op ty)
-      | Dataset (DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv))
+      | Dataset (_, DS_csv csv) -> value_int (BigInt.of_int (Csv.lines csv))
      | Data _ | NeuralNetwork _ -> assert false)
    | [ Term _t1; Term _t2 ] -> reconstruct ()
    | _ -> invalid_arg (error_message ls)
@@ -279,11 +286,12 @@ let caisar_builtins : caisar_env CRE.built_in_theories list =
     Term { t_node = Tconst (ConstStr dataset); _ };
     Term { t_node = Tapp ({ ls_name = { id_string = "CSV"; _ }; _ }, []); _ };
    ] ->
-      let { cwd; _ } = CRE.user_env engine in
+      let { env; cwd; _ } = CRE.user_env engine in
      let caisar_op =
        let filename = Caml.Filename.concat cwd dataset in
+        let ds = Language.create_dataset_csv env filename in
        let dataset = DS_csv (Csv.load filename) in
-        Dataset dataset
+        Dataset (ds, dataset)
      in
      value_term (term_of_caisar_op engine caisar_op ty)
    | [ Term _t1; Term _t2 ] -> reconstruct ()
@@ -347,11 +355,23 @@ let bounded_quant engine vs ~cond : CRE.bounded_quant_result option =
  | _ -> None

 let declare_language_lsymbols caisar_env task =
-  (* Declare [Language] logic symbols for neural networks and vectors only. *)
+  (* Declare [Language] logic symbols only. *)
  Term.Hls.fold
    (fun ls _ task ->
      if Language.mem_vector ls || Language.mem_nn ls
+         || Language.mem_dataset_csv ls
      then
+        (* Add meta for neural network and dataset declarations. *)
+        let task =
+          match (Language.lookup_nn ls, Language.lookup_dataset_csv ls) with
+          | None, None -> task
+          | Some { nn_filename; _ }, None ->
+            Task.add_meta task Utils.meta_nn_filename [ MAstr nn_filename ]
+          | None, Some (CSV ds_filename) ->
+            Task.add_meta task Utils.meta_dataset_filename [ MAstr ds_filename ]
+          | Some _, Some _ -> assert false
+        in
+        (* Add actual logic symbol declaration. *)
        let decl = Decl.create_param_decl ls in
        Task.add_decl task decl
      else task)

--- a/src/language.ml
+++ b/src/language.ml
@@ -156,7 +156,7 @@ let register_ovo_support () =
  Env.register_format ~desc:"OVO format" Pmodule.mlw_language "OVO" [ "ovo" ]
    (fun env _ filename _ -> ovo_parser env filename)

-(* -- Vector *)
+(* -- Vectors *)

 let vectors = Term.Hls.create 10

@@ -164,14 +164,15 @@ let float64_t_ty env =
  let th = Env.read_theory env [ "ieee_float" ] "Float64" in
  Ty.ty_app (Theory.ns_find_ts th.th_export [ "t" ]) []

+let vector_ty env ty_elt =
+  let th = Env.read_theory env [ "interpretation" ] "Vector" in
+  Ty.ty_app (Theory.ns_find_ts th.th_export [ "vector" ]) [ ty_elt ]
+
 let create_vector =
  Env.Wenv.memoize 13 (fun env ->
    let h = Hashtbl.create (module Int) in
    let ty_elt = float64_t_ty env in
-    let ty =
-      let th = Env.read_theory env [ "interpretation" ] "Vector" in
-      Ty.ty_app (Theory.ns_find_ts th.th_export [ "vector" ]) [ ty_elt ]
-    in
+    let ty = vector_ty env ty_elt in
    Hashtbl.findi_or_add h ~default:(fun length ->
      let ls =
        let id = Ident.id_fresh "vector" in
@@ -183,7 +184,7 @@ let create_vector =
 let lookup_vector = Term.Hls.find_opt vectors
 let mem_vector = Term.Hls.mem vectors

-(* -- Classifier *)
+(* -- Neural Networks *)

 type nn = {
  nn_nb_inputs : int;
@@ -258,3 +259,32 @@ let create_nn_onnx =

 let lookup_nn = Term.Hls.find_opt nets
 let mem_nn = Term.Hls.mem nets
+
+(* -- Datasets *)
+
+type dataset = CSV of string [@@deriving show]
+
+let datasets = Term.Hls.create 10
+
+let fresh_dataset_csv_ls env name =
+  let ty =
+    let ty_elt = vector_ty env (float64_t_ty env) in
+    let th = Env.read_theory env [ "interpretation" ] "Dataset" in
+    Ty.ty_app
+      (Theory.ns_find_ts th.th_export [ "dataset" ])
+      [ ty_elt; Ty.ty_int ]
+  in
+  let id = Ident.id_fresh name in
+  Term.create_fsymbol id [] ty
+
+let create_dataset_csv =
+  Env.Wenv.memoize 13 (fun env ->
+    (* TODO: Should use actual dataset element types. *)
+    let h = Hashtbl.create (module String) in
+    Hashtbl.findi_or_add h ~default:(fun filename ->
+      let ls = fresh_dataset_csv_ls env "dataset_csv" in
+      Term.Hls.add datasets ls (CSV filename);
+      ls))
+
+let lookup_dataset_csv = Term.Hls.find_opt datasets
+let mem_dataset_csv = Term.Hls.mem datasets
--- a/src/language.mli
+++ b/src/language.mli
@@ -63,13 +63,13 @@ val ovo_parser : Env.env -> string -> Pmodule.pmodule Wstdlib.Mstr.t
 (* [ovo_parser env filename] parses and creates the theories corresponding to
   the given ovo [filename]. The result is memoized. *)

-(** -- Vector *)
+(** {2 Vectors} *)

 val create_vector : Env.env -> int -> Term.lsymbol
 val lookup_vector : Term.lsymbol -> int option
 val mem_vector : Term.lsymbol -> bool

-(** -- Neural Network *)
+(** {2 Neural Networks} *)

 type nn = private {
  nn_nb_inputs : int;
@@ -84,3 +84,11 @@ val create_nn_nnet : Env.env -> string -> Term.lsymbol
 val create_nn_onnx : Env.env -> string -> Term.lsymbol
 val lookup_nn : Term.lsymbol -> nn option
 val mem_nn : Term.lsymbol -> bool
+
+(** {2 Datasets} *)
+
+type dataset = private CSV of string [@@deriving show]
+
+val create_dataset_csv : Env.env -> string -> Term.lsymbol
+val lookup_dataset_csv : Term.lsymbol -> dataset option
+val mem_dataset_csv : Term.lsymbol -> bool
--- a/src/proof_strategy.ml
+++ b/src/proof_strategy.ml
@@ -55,5 +55,13 @@ let apply_native_nn_prover task =
  let trans =
    Trans.seq [ Introduction.introduce_premises; Native_nn_prover.trans ]
  in
-  let tasks = Trans.apply Split_goal.split_goal_full task in
+  let tasks =
+    match Task.on_meta_excl Utils.meta_dataset_filename task with
+    | None -> [ task ]
+    | Some _ ->
+      (* Property verification on a dataset typically involves goals with huge
+         toplevel conjunctions, one conjunct for each element of the dataset:
+         first thing to do is to destruct such toplevel conjunctions. *)
+      Trans.apply Split_goal.split_goal_full task
+  in
  do_apply_prover ~lookup ~trans tasks
--- a/src/transformations/native_nn_prover.ml
+++ b/src/transformations/native_nn_prover.ml
@@ -111,19 +111,14 @@ let simplify_nn_application input_vars output_vars =
    (fun task_hd task ->
      match task_hd.task_decl.td_node with
      | Decl { d_node = Dparam ls; _ } ->
-        (* Add meta for neural network and input variable declarations. Note
-           that each meta needs to appear before the corresponding declaration
-           in order to be leveraged by prover printers. *)
+        (* Add meta for input variable declarations. Note that each meta needs
+           to appear before the corresponding declaration in order to be
+           leveraged by prover printers. *)
        let task =
-          match (Term.Mls.find_opt ls input_vars, Language.lookup_nn ls) with
-          | None, None -> task
-          | Some index, None ->
+          match Term.Mls.find_opt ls input_vars with
+          | None -> task
+          | Some index ->
            Task.add_meta task Utils.meta_input [ MAls ls; MAint index ]
-          | None, Some { nn_filename; _ } ->
-            Task.add_meta task Utils.meta_nn_filename [ MAstr nn_filename ]
-          | Some _, Some _ ->
-            (* [ls] cannot be an input variable and a nn at the same time. *)
-            assert false
        in
        Task.add_tdecl task task_hd.task_decl
      | Decl ({ d_node = Dprop (Pgoal, _, _); _ } as decl) ->

--- a/src/transformations/utils.ml
+++ b/src/transformations/utils.ml
@@ -25,16 +25,22 @@ open Why3
 let meta_input =
  Theory.(
    register_meta "caisar_input"
-      ~desc:"Indicates the input position in the neural network"
+      ~desc:"Indicates an input position among the inputs of the neural network"
      [ MTlsymbol; MTint ])

 let meta_output =
  Theory.(
    register_meta "caisar_output"
-      ~desc:"Indicates the output position in the neural network"
+      ~desc:
+        "Indicates an output position among the outputs of the neural network"
      [ MTlsymbol; MTint ])

 let meta_nn_filename =
  Theory.(
    register_meta_excl "caisar_nnet_or_onnx"
-      ~desc:"Indicates the filename of the network" [ MTstring ])
+      ~desc:"Indicates the filename of the neural network" [ MTstring ])
+
+let meta_dataset_filename =
+  Theory.(
+    register_meta_excl "caisar_dataset"
+      ~desc:"Indicates the filename of the dataset" [ MTstring ])
--- a/src/transformations/utils.mli
+++ b/src/transformations/utils.mli
@@ -23,10 +23,13 @@
 open Why3

 val meta_input : Theory.meta
-(** Indicate the input position. *)
+(** Indicates an input position among the inputs of the neural network. *)

 val meta_output : Theory.meta
-(** Indicate the output position. *)
+(** Indicates an output position among the outputs of the neural network. *)

 val meta_nn_filename : Theory.meta
-(** The filename of the nnet or onnx model. *)
+(** The filename of the neural network. *)
+
+val meta_dataset_filename : Theory.meta
+(** The filename of the dataset. *)
--- a/tests/pyrat.t
+++ b/tests/pyrat.t
@@ -33,54 +33,21 @@ Test verify
  [DEBUG]{ProverSpec} Prover-tailored specification:
  0.0 <= x0
  x0 <= 0.5
-  0.0 <  y0
+  0.0 <  y0 and y0 <  0.5
  
  [DEBUG]{ProverSpec} Prover-tailored specification:
  0.0 <= x0
  x0 <= 0.5
-  y0 <  0.5
-  
-  [DEBUG]{ProverSpec} Prover-tailored specification:
-  0.0 <= x0
-  x0 <= 0.5
-  0.5 <= x1
-  x1 <= 1.0
-  (0.0 <  y0 or 0.5 <  y0)
-  
-  [DEBUG]{ProverSpec} Prover-tailored specification:
-  0.0 <= x0
-  x0 <= 0.5
-  0.5 <= x1
-  x1 <= 1.0
-  0.0 <  y1
-  
-  [DEBUG]{ProverSpec} Prover-tailored specification:
-  0.0 <= x0
-  x0 <= 0.5
-  0.5 <= x1
-  x1 <= 1.0
-  y1 <  0.5
-  
-  [DEBUG]{ProverSpec} Prover-tailored specification:
-  0.375 <= x0
-  x0 <= 0.75
-  0.5 <= x1
-  x1 <= 1.0
-  (0.0 <  y0 or 0.5 <  y0)
-  
-  [DEBUG]{ProverSpec} Prover-tailored specification:
-  0.375 <= x0
-  x0 <= 0.75
  0.5 <= x1
  x1 <= 1.0
-  0.0 <  y1
+  (0.0 <  y0 or 0.5 <  y0) and 0.0 <  y1 and y1 <  0.5
  
  [DEBUG]{ProverSpec} Prover-tailored specification:
  0.375 <= x0
  x0 <= 0.75
  0.5 <= x1
  x1 <= 1.0
-  y1 <  0.5
+  (0.0 <  y0 or 0.5 <  y0) and 0.0 <  y1 and y1 <  0.5
  
  Goal G: Unknown ()
  Goal H: Unknown ()
--- a/tests/pyrat_onnx.t
+++ b/tests/pyrat_onnx.t
@@ -26,11 +26,6 @@ Test verify
  [DEBUG]{ProverSpec} Prover-tailored specification:
  0.0 <= x0
  x0 <= 0.5
-  0.0 <  y0
-  
-  [DEBUG]{ProverSpec} Prover-tailored specification:
-  0.0 <= x0
-  x0 <= 0.5
-  y0 <  0.5
+  0.0 <  y0 and y0 <  0.5
  
  Goal G: Unknown ()