Skip to content
Snippets Groups Projects
Commit 93e5b94f authored by Michele Alberti's avatar Michele Alberti
Browse files

[verification] Introduce verification result type.

Decouple the verification process from its result logging.
parent cdc44d86
No related branches found
No related tags found
No related merge requests found
...@@ -119,16 +119,34 @@ let timelimit_of_string s = ...@@ -119,16 +119,34 @@ let timelimit_of_string s =
| [ v ], [ "h" ] -> Int.of_string v * 3600 | [ v ], [ "h" ] -> Int.of_string v * 3600
| _ -> invalid_arg "Unrecognized time limit" | _ -> invalid_arg "Unrecognized time limit"
let verify ?format ~loadpath ?memlimit ?timelimit ?dataset ?outfile prover files let log_theory_answer =
= let open Why3 in
Wstdlib.Mstr.iter (fun theory_name task_answers ->
Logs.info (fun m ->
m "@[Verification results for theory `%s'@]" theory_name);
List.iter task_answers
~f:(fun { Verification.id; result; additional_info } ->
let additional_info =
match additional_info with Generic s -> s | Dataset _ -> None
in
Logs.app (fun m ->
m "@[Goal %a:@ %a%a@]" Pretty.print_pr id
Call_provers.print_prover_answer result
Fmt.(option ~none:nop (any " " ++ string))
additional_info)))
let verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover files =
let debug = log_level_is_debug () in let debug = log_level_is_debug () in
let memlimit = Option.map memlimit ~f:memlimit_of_string in let memlimit = Option.map memlimit ~f:memlimit_of_string in
let timelimit = Option.map timelimit ~f:timelimit_of_string in let timelimit = Option.map timelimit ~f:timelimit_of_string in
List.iter let theory_answers =
~f: List.map files
(Verification.verify ~debug ?format ~loadpath ?memlimit ?timelimit ~f:
?dataset ?outfile prover) (Verification.verify ~debug ?format ~loadpath ?memlimit ?timelimit
files ?dataset prover)
in
List.iter theory_answers ~f:log_theory_answer;
theory_answers
let verify_json ?memlimit ?timelimit ?outfile json = let verify_json ?memlimit ?timelimit ?outfile json =
let jin = Result.ok_exn (Verification.JSON.input_of_string json) in let jin = Result.ok_exn (Verification.JSON.input_of_string json) in
...@@ -147,11 +165,11 @@ let verify_json ?memlimit ?timelimit ?outfile json = ...@@ -147,11 +165,11 @@ let verify_json ?memlimit ?timelimit ?outfile json =
| None -> Option.map jin.time_limit ~f:Int.to_string | None -> Option.map jin.time_limit ~f:Int.to_string
| Some _ -> timelimit | Some _ -> timelimit
in in
let outfile = let _outfile =
(* Precedence to the command line option, if any. *) (* Precedence to the command line option, if any. *)
match outfile with None -> jin.output_file | Some _ -> outfile match outfile with None -> jin.output_file | Some _ -> outfile
in in
verify ~loadpath:[] ?memlimit ?timelimit ~dataset ?outfile prover [ file ] verify ~loadpath:[] ?memlimit ?timelimit ~dataset prover [ file ]
let exec_cmd cmdname cmd = let exec_cmd cmdname cmd =
Logs.debug (fun m -> m "Command `%s'" cmdname); Logs.debug (fun m -> m "Command `%s'" cmdname);
...@@ -251,7 +269,8 @@ let verify_cmd = ...@@ -251,7 +269,8 @@ let verify_cmd =
Term.( Term.(
const (fun format loadpath memlimit timelimit prover dataset files _ -> const (fun format loadpath memlimit timelimit prover dataset files _ ->
exec_cmd cmdname (fun () -> exec_cmd cmdname (fun () ->
verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover files)) ignore
(verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover files)))
$ format $ loadpath $ memlimit $ timelimit $ prover $ dataset $ files $ format $ loadpath $ memlimit $ timelimit $ prover $ dataset $ files
$ setup_logs) $ setup_logs)
in in
......
...@@ -187,6 +187,18 @@ module File = struct ...@@ -187,6 +187,18 @@ module File = struct
| Json j -> JSON.pretty fmt j | Json j -> JSON.pretty fmt j
end end
type theory_answer = answer list Wstdlib.Mstr.t
and answer = {
id : Decl.prsymbol;
result : Call_provers.prover_answer;
additional_info : additional_info;
}
and additional_info =
| Generic of string option
| Dataset of Csv.t
let () = let () =
Language.register_nnet_support (); Language.register_nnet_support ();
Language.register_onnx_support (); Language.register_onnx_support ();
...@@ -253,45 +265,44 @@ let combine_prover_answers answers = ...@@ -253,45 +265,44 @@ let combine_prover_answers answers =
| Call_provers.Valid, r | r, Call_provers.Valid -> r | Call_provers.Valid, r | r, Call_provers.Valid -> r
| _ -> acc) | _ -> acc)
let record_dataset_results ~dataset ~dataset_answers prover_answer outfile = (* let record_dataset_results ~dataset ~dataset_answers prover_answer outfile =
let percentage_valid = * let percentage_valid =
let count = * let count =
List.count dataset_answers ~f:(fun prover_answer -> * List.count dataset_answers ~f:(fun prover_answer ->
match prover_answer with Call_provers.Valid -> true | _ -> false) * match prover_answer with Call_provers.Valid -> true | _ -> false)
in * in
Float.of_int count /. Float.of_int (List.length dataset_answers) *. 100. * Float.of_int count /. Float.of_int (List.length dataset_answers) *. 100.
in * in
let dataset_results = * let dataset_results =
let csv_in_channel = Csv.of_channel (Stdlib.open_in dataset) in * let csv_in_channel = Csv.of_channel (Stdlib.open_in dataset) in
let dataset_results, csv_out_channel = * let dataset_results, csv_out_channel =
let filename, out_channel = * let filename, out_channel =
Filename.open_temp_file "caisar" "results.csv" * Filename.open_temp_file "caisar" "results.csv"
in * in
(filename, Csv.to_channel out_channel) * (filename, Csv.to_channel out_channel)
in * in
let dataset = Csv.input_all csv_in_channel in * let dataset = Csv.input_all csv_in_channel in
Csv.close_in csv_in_channel; * Csv.close_in csv_in_channel;
if List.length dataset = List.length dataset_answers * if List.length dataset = List.length dataset_answers
then * then
List.iter2_exn dataset dataset_answers ~f:(fun record prover_answer -> * List.iter2_exn dataset dataset_answers ~f:(fun record prover_answer ->
let result = * let result =
Fmt.str "%a" Call_provers.print_prover_answer prover_answer * Fmt.str "%a" Call_provers.print_prover_answer prover_answer
in * in
let record = result :: record in * let record = result :: record in
Csv.output_record csv_out_channel record); * Csv.output_record csv_out_channel record);
Csv.close_out csv_out_channel; * Csv.close_out csv_out_channel;
dataset_results * dataset_results
in * in
let output = * let output =
{ JSON.result = prover_answer; percentage_valid; dataset_results } * { JSON.result = prover_answer; percentage_valid; dataset_results }
in * in
let out_channel = Stdlib.open_out outfile in * let out_channel = Stdlib.open_out outfile in
Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output); * Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output);
Logs.info (fun m -> m "@[Results recorded in file '%s'@]" outfile); * Logs.info (fun m -> m "@[Results recorded in file '%s'@]" outfile);
Stdlib.close_out out_channel * Stdlib.close_out out_channel *)
let answer_dataset ?outfile limit config env prover config_prover driver dataset let answer_dataset limit config env prover config_prover driver dataset task =
task =
let dataset_predicate = let dataset_predicate =
let on_model ls = let on_model ls =
let message = let message =
...@@ -316,31 +327,42 @@ let answer_dataset ?outfile limit config env prover config_prover driver dataset ...@@ -316,31 +327,42 @@ let answer_dataset ?outfile limit config env prover config_prover driver dataset
Dataset.tasks_of_nn_csv_predicate env dataset_predicate Dataset.tasks_of_nn_csv_predicate env dataset_predicate
|> List.map ~f:(Driver.prepare_task driver) |> List.map ~f:(Driver.prepare_task driver)
in in
let tasks =
(* We turn each task in [dataset_tasks] into a list (ie, conjunction) of
disjunctions of tasks. *)
match prover with
| Prover.Marabou -> List.map ~f:(Trans.apply Split.split_all) dataset_tasks
| _ -> [ dataset_tasks ]
in
let command = Whyconf.get_complete_command ~with_steps:false config_prover in let command = Whyconf.get_complete_command ~with_steps:false config_prover in
let command = let command =
let nn_file = Unix.realpath dataset_predicate.model.filename in let nn_file = Unix.realpath dataset_predicate.model.filename in
Re__Core.replace_string nnet_or_onnx ~by:nn_file command Re__Core.replace_string nnet_or_onnx ~by:nn_file command
in in
let dataset_answers = let dataset_answers =
List.map tasks ~f:(fun dataset_elt_tasks -> match prover with
let dataset_elt_answers = | Prover.Marabou ->
List.map dataset_elt_tasks (* We turn each task in [dataset_tasks] into a list (ie, conjunction) of
~f:(call_prover_on_task limit config command driver) disjunctions of tasks. *)
in let tasks = List.map ~f:(Trans.apply Split.split_all) dataset_tasks in
combine_prover_answers dataset_elt_answers) List.map tasks ~f:(fun dataset_elt_tasks ->
let dataset_elt_answers =
List.map dataset_elt_tasks
~f:(call_prover_on_task limit config command driver)
in
combine_prover_answers dataset_elt_answers)
| _ ->
List.map dataset_tasks
~f:(call_prover_on_task limit config command driver)
in in
let prover_answer = combine_prover_answers dataset_answers in let prover_answer = combine_prover_answers dataset_answers in
Option.iter let additional_info =
~f:(record_dataset_results ~dataset ~dataset_answers prover_answer) let csv_in_channel = Csv.of_channel (Stdlib.open_in dataset) in
outfile; let dataset = Csv.input_all csv_in_channel in
(prover_answer, None) Csv.close_in csv_in_channel;
if List.length dataset <> List.length dataset_answers
then failwith "Inconsistent number of prover answers and dataset records"
else
List.map2_exn dataset dataset_answers ~f:(fun record prover_answer ->
let result =
Fmt.str "%a" Call_provers.print_prover_answer prover_answer
in
result :: record)
in
(prover_answer, additional_info)
let answer_generic limit config env prover config_prover driver task = let answer_generic limit config env prover config_prover driver task =
let task = Proof_strategy.apply_native_nn_prover env task in let task = Proof_strategy.apply_native_nn_prover env task in
...@@ -366,26 +388,31 @@ let answer_generic limit config env prover config_prover driver task = ...@@ -366,26 +388,31 @@ let answer_generic limit config env prover config_prover driver task =
let prover_answer = combine_prover_answers answers in let prover_answer = combine_prover_answers answers in
(prover_answer, None) (prover_answer, None)
let call_prover ?dataset ?outfile ~limit config env prover config_prover driver let call_prover ?dataset ~limit config env prover config_prover driver task =
task = let result, additional_info =
let prover_answer, additional_info =
match prover with match prover with
| Prover.Saver -> answer_saver limit config env config_prover dataset task | Prover.Saver ->
let prover_answer, additional_info =
answer_saver limit config env config_prover dataset task
in
(prover_answer, Generic additional_info)
| (Marabou | Pyrat | Nnenum) when Option.is_some dataset -> | (Marabou | Pyrat | Nnenum) when Option.is_some dataset ->
let dataset = Unix.realpath (Option.value_exn dataset) in let dataset = Unix.realpath (Option.value_exn dataset) in
answer_dataset ?outfile limit config env prover config_prover driver let prover_answer, additional_info =
dataset task answer_dataset limit config env prover config_prover driver dataset task
in
(prover_answer, Dataset additional_info)
| Marabou | Pyrat | CVC5 | Nnenum -> | Marabou | Pyrat | CVC5 | Nnenum ->
answer_generic limit config env prover config_prover driver task let prover_answer, additional_info =
answer_generic limit config env prover config_prover driver task
in
(prover_answer, Generic additional_info)
in in
Logs.app (fun m -> let id = Task.task_goal task in
m "@[Goal %a:@ %a%a@]" Pretty.print_pr (Task.task_goal task) { id; result; additional_info }
Call_provers.print_prover_answer prover_answer
Fmt.(option ~none:nop (any " " ++ string))
additional_info)
let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset
?outfile prover file = prover file =
if debug then Debug.set_flag (Debug.lookup_flag "call_prover"); if debug then Debug.set_flag (Debug.lookup_flag "call_prover");
let env, config = create_env ~debug loadpath in let env, config = create_env ~debug loadpath in
let main = Whyconf.get_main config in let main = Whyconf.get_main config in
...@@ -408,8 +435,8 @@ let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset ...@@ -408,8 +435,8 @@ let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset
mlw_files mlw_files
| Json json -> JSON.mstr_theory_of_json env json | Json json -> JSON.mstr_theory_of_json env json
in in
Wstdlib.Mstr.iter Wstdlib.Mstr.map
(fun _ theory -> (fun theory ->
let tasks = Task.split_theory theory None None in let tasks = Task.split_theory theory None None in
let config_prover = let config_prover =
let altern = let altern =
...@@ -433,9 +460,7 @@ let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset ...@@ -433,9 +460,7 @@ let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset
in in
Driver.load_driver_absolute env file config_prover.extra_drivers Driver.load_driver_absolute env file config_prover.extra_drivers
in in
List.iter List.map
~f: ~f:(call_prover ?dataset ~limit main env prover config_prover driver)
(call_prover ?dataset ?outfile ~limit main env prover config_prover
driver)
tasks) tasks)
mstr_theory mstr_theory
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
(* *) (* *)
(**************************************************************************) (**************************************************************************)
open Why3
module JSON : sig module JSON : sig
type input = private { type input = private {
prover : Prover.t; prover : Prover.t;
...@@ -35,7 +37,7 @@ module JSON : sig ...@@ -35,7 +37,7 @@ module JSON : sig
normalization : Dataset.normalization option; normalization : Dataset.normalization option;
kind : Dataset.property; kind : Dataset.property;
} }
[@@deriving yojson, show] [@@deriving yojson { strict = false }, show]
type output = private { type output = private {
result : prover_answer; result : prover_answer;
...@@ -43,7 +45,7 @@ module JSON : sig ...@@ -43,7 +45,7 @@ module JSON : sig
dataset_results : string; dataset_results : string;
} }
and prover_answer = Why3.Call_provers.prover_answer = and prover_answer = Call_provers.prover_answer =
| Valid | Valid
| Invalid | Invalid
| Timeout | Timeout
...@@ -66,6 +68,18 @@ module File : sig ...@@ -66,6 +68,18 @@ module File : sig
val pretty : Format.formatter -> t -> unit val pretty : Format.formatter -> t -> unit
end end
type theory_answer = answer list Wstdlib.Mstr.t
and answer = private {
id : Decl.prsymbol;
result : Call_provers.prover_answer;
additional_info : additional_info;
}
and additional_info = private
| Generic of string option
| Dataset of Csv.t
val verify : val verify :
?debug:bool -> ?debug:bool ->
?format:string -> ?format:string ->
...@@ -73,13 +87,11 @@ val verify : ...@@ -73,13 +87,11 @@ val verify :
?memlimit:int -> ?memlimit:int ->
?timelimit:int -> ?timelimit:int ->
?dataset:string -> ?dataset:string ->
?outfile:string ->
Prover.t -> Prover.t ->
File.t -> File.t ->
unit theory_answer
(** [verify debug format loadpath memlimit timeout dataset outfile prover file] (** [verify debug format loadpath memlimit timeout dataset prover file] launches
launches a verification of the given [file] with the provided [prover] and a verification of the given [file] with the provided [prover] and [dataset].
[dataset].
@param debug when set, enables debug information. @param debug when set, enables debug information.
@param format is the [file] format. @param format is the [file] format.
...@@ -90,4 +102,6 @@ val verify : ...@@ -90,4 +102,6 @@ val verify :
@param dataset @param dataset
is the filepath of a dataset (eg, in CSV format) possibly appearing in is the filepath of a dataset (eg, in CSV format) possibly appearing in
[file]. [file].
@param outfile is the filepath where recording the verification results. *) @return
for each theory, an [answer] for each goal of the theory, respecting the
order of appearance. *)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment