Skip to content
Snippets Groups Projects
Commit f054eea1 authored by Michele Alberti's avatar Michele Alberti
Browse files

[verification] Rework additional information for dataset results and simplify...

[verification] Rework additional information for dataset results and simplify its JSON serialization.
parent 87ae4989
No related branches found
No related tags found
No related merge requests found
...@@ -43,7 +43,7 @@ type output = { ...@@ -43,7 +43,7 @@ type output = {
id : string; id : string;
prover_answer : prover_answer; prover_answer : prover_answer;
percentage_valid : float; percentage_valid : float;
dataset_results : string; dataset_results : prover_answer list;
} }
and prover_answer = Call_provers.prover_answer = and prover_answer = Call_provers.prover_answer =
......
...@@ -42,7 +42,7 @@ type output = { ...@@ -42,7 +42,7 @@ type output = {
id : string; id : string;
prover_answer : prover_answer; prover_answer : prover_answer;
percentage_valid : float; percentage_valid : float;
dataset_results : string; dataset_results : prover_answer list;
} }
and prover_answer = Call_provers.prover_answer = and prover_answer = Call_provers.prover_answer =
......
...@@ -149,40 +149,30 @@ let verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover ?prover_altern ...@@ -149,40 +149,30 @@ let verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover ?prover_altern
List.iter theory_answers ~f:log_theory_answer; List.iter theory_answers ~f:log_theory_answer;
theory_answers theory_answers
let record_dataset_results id prover_answer dataset outfile = let record_json_output id prover_answer dataset_results file =
let percentage_valid = let percentage_valid =
let nb_valid = let nb_valid =
List.count dataset ~f:(fun record -> List.count dataset_results ~f:(fun prover_answer ->
match List.hd_exn record with "Valid" -> true | _ -> false) match prover_answer with Why3.Call_provers.Valid -> true | _ -> false)
in in
let nb_valid = Float.of_int nb_valid in let nb_valid = Float.of_int nb_valid in
let total = Float.of_int (List.length dataset) in let total = Float.of_int (List.length dataset_results) in
nb_valid /. total *. 100. nb_valid /. total *. 100.
in in
let dataset_results =
let dataset_results, csv_out_channel =
let filename, out_channel =
Caml.Filename.open_temp_file "caisar" "results.csv"
in
(filename, Csv.to_channel out_channel)
in
Csv.output_all csv_out_channel dataset;
Csv.close_out csv_out_channel;
dataset_results
in
let output = { JSON.id; prover_answer; percentage_valid; dataset_results } in let output = { JSON.id; prover_answer; percentage_valid; dataset_results } in
let out_channel = Stdlib.open_out outfile in let out_channel = Stdlib.open_out file in
Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output); Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output);
Logs.info (fun m -> m "@[Results recorded in file '%s'@]" outfile); Logs.info (fun m -> m "@[Results recorded in file '%s'@]" file);
Stdlib.close_out out_channel Stdlib.close_out out_channel
let record_theory_answers id outfile = let record_verification_result id verification_result file =
Why3.Wstdlib.Mstr.iter (fun theory_name task_answers -> Why3.Wstdlib.Mstr.iter
match task_answers with (fun name (answers : Verification.answer list) ->
| [ { Verification.prover_answer; additional_info = Dataset dataset; _ } ] match answers with
-> | [ { prover_answer; additional_info = Dataset dataset_results; _ } ] ->
record_dataset_results id prover_answer dataset outfile record_json_output id prover_answer dataset_results file
| _ -> failwith (Fmt.str "Unexpected answers for theory '%s'" theory_name)) | _ -> failwith (Fmt.str "Unexpected answers for theory '%s'" name))
verification_result
let verify_json ?memlimit ?timelimit ?outfile json = let verify_json ?memlimit ?timelimit ?outfile json =
let jin = JSON.input_of_string json in let jin = JSON.input_of_string json in
...@@ -202,16 +192,16 @@ let verify_json ?memlimit ?timelimit ?outfile json = ...@@ -202,16 +192,16 @@ let verify_json ?memlimit ?timelimit ?outfile json =
(* Precedence to the command line option, if any. *) (* Precedence to the command line option, if any. *)
match outfile with Some _ -> outfile | None -> jin.output_file match outfile with Some _ -> outfile | None -> jin.output_file
in in
let file = Result.ok_or_failwith (Verification.File.of_json_input jin) in let infile = Result.ok_or_failwith (Verification.File.of_json_input jin) in
let theory_answers = let verification_results =
verify ~loadpath:[] ?memlimit ?timelimit ~dataset:jin.property.dataset verify ~loadpath:[] ?memlimit ?timelimit ~dataset:jin.property.dataset
jin.prover [ file ] jin.prover [ infile ]
in in
match theory_answers with match verification_results with
| [] -> assert false (* We always build one theory from the provided JSON. *) | [] -> assert false (* We always build one theory from the provided JSON. *)
| [ theory_answer ] -> | [ verification_result ] ->
Option.iter outfile ~f:(fun outfile -> Option.iter outfile
record_theory_answers jin.id outfile theory_answer) ~f:(record_verification_result jin.id verification_result)
| _ -> failwith "Unexpected more than one theory from a JSON file" | _ -> failwith "Unexpected more than one theory from a JSON file"
let exec_cmd cmdname cmd = let exec_cmd cmdname cmd =
......
...@@ -57,7 +57,7 @@ module File = struct ...@@ -57,7 +57,7 @@ module File = struct
| JSON jin -> JSON.pretty_input fmt jin | JSON jin -> JSON.pretty_input fmt jin
end end
type theory_answer = answer list Wstdlib.Mstr.t type verification_result = answer list Wstdlib.Mstr.t
and answer = { and answer = {
id : Decl.prsymbol; id : Decl.prsymbol;
...@@ -67,7 +67,7 @@ and answer = { ...@@ -67,7 +67,7 @@ and answer = {
and additional_info = and additional_info =
| Generic of string option | Generic of string option
| Dataset of Csv.t | Dataset of Call_provers.prover_answer list
let () = let () =
Language.register_nnet_support (); Language.register_nnet_support ();
...@@ -116,12 +116,12 @@ let answer_saver limit config env config_prover dataset task = ...@@ -116,12 +116,12 @@ let answer_saver limit config env config_prover dataset task =
match answer.prover_answer with match answer.prover_answer with
| Call_provers.Unknown "" -> | Call_provers.Unknown "" ->
let additional_info = Fmt.str "%d/%d" answer.nb_proved answer.nb_total in let additional_info = Fmt.str "%d/%d" answer.nb_proved answer.nb_total in
(Call_provers.Unknown additional_info, None) (Call_provers.Unknown additional_info, Generic None)
| Call_provers.Unknown _ -> | Call_provers.Unknown _ ->
assert false (* By construction of SAVer's Unknown answer. *) assert false (* By construction of SAVer's Unknown answer. *)
| prover_answer -> | prover_answer ->
let additional_info = Fmt.str "(%d/%d)" answer.nb_proved answer.nb_total in let additional_info = Fmt.str "(%d/%d)" answer.nb_proved answer.nb_total in
(prover_answer, Some additional_info) (prover_answer, Generic (Some additional_info))
let answer_aimos limit config env config_prover dataset task aimos_config = let answer_aimos limit config env config_prover dataset task aimos_config =
let predicate = let predicate =
...@@ -156,7 +156,7 @@ let answer_aimos limit config env config_prover dataset task aimos_config = ...@@ -156,7 +156,7 @@ let answer_aimos limit config env config_prover dataset task aimos_config =
let answer = let answer =
AIMOS.call_prover limit config config_prover predicate aimos_filename AIMOS.call_prover limit config config_prover predicate aimos_filename
in in
let additional_info = None in let additional_info = Generic None in
(answer, additional_info) (answer, additional_info)
let nnet_or_onnx = Re__Core.(compile (str "%{nnet-onnx}")) let nnet_or_onnx = Re__Core.(compile (str "%{nnet-onnx}"))
...@@ -230,12 +230,7 @@ let answer_dataset limit config env prover config_prover driver dataset task = ...@@ -230,12 +230,7 @@ let answer_dataset limit config env prover config_prover driver dataset task =
Csv.close_in csv_in_channel; Csv.close_in csv_in_channel;
if List.length dataset <> List.length dataset_answers if List.length dataset <> List.length dataset_answers
then failwith "Inconsistent number of prover answers and dataset records" then failwith "Inconsistent number of prover answers and dataset records"
else else Dataset dataset_answers
List.map2_exn dataset dataset_answers ~f:(fun record prover_answer ->
let result =
Fmt.str "%a" Call_provers.print_prover_answer prover_answer
in
result :: record)
in in
(prover_answer, additional_info) (prover_answer, additional_info)
...@@ -261,33 +256,21 @@ let answer_generic limit config env prover config_prover driver task = ...@@ -261,33 +256,21 @@ let answer_generic limit config env prover config_prover driver task =
List.map tasks ~f:(call_prover_on_task limit config command driver) List.map tasks ~f:(call_prover_on_task limit config command driver)
in in
let prover_answer = combine_prover_answers answers in let prover_answer = combine_prover_answers answers in
(prover_answer, None) let additional_info = Generic None in
(prover_answer, additional_info)
let call_prover ?dataset ~limit config env prover config_prover driver task = let call_prover ?dataset ~limit config env prover config_prover driver task =
let prover_answer, additional_info = let prover_answer, additional_info =
match prover with match prover with
| Prover.Saver -> | Prover.Saver -> answer_saver limit config env config_prover dataset task
let prover_answer, additional_info =
answer_saver limit config env config_prover dataset task
in
(prover_answer, Generic additional_info)
| Aimos -> | Aimos ->
let prover_answer, additional_info = (* TODO: add real config file *)
(* TODO: add real config file *) answer_aimos limit config env config_prover dataset task None
answer_aimos limit config env config_prover dataset task None
in
(prover_answer, Generic additional_info)
| (Marabou | Pyrat | Nnenum) when Option.is_some dataset -> | (Marabou | Pyrat | Nnenum) when Option.is_some dataset ->
let dataset = Unix.realpath (Option.value_exn dataset) in let dataset = Unix.realpath (Option.value_exn dataset) in
let prover_answer, additional_info = answer_dataset limit config env prover config_prover driver dataset task
answer_dataset limit config env prover config_prover driver dataset task
in
(prover_answer, Dataset additional_info)
| Marabou | Pyrat | CVC5 | Nnenum -> | Marabou | Pyrat | CVC5 | Nnenum ->
let prover_answer, additional_info = answer_generic limit config env prover config_prover driver task
answer_generic limit config env prover config_prover driver task
in
(prover_answer, Generic additional_info)
in in
let id = Task.task_goal task in let id = Task.task_goal task in
{ id; prover_answer; additional_info } { id; prover_answer; additional_info }
......
...@@ -30,7 +30,7 @@ module File : sig ...@@ -30,7 +30,7 @@ module File : sig
val pretty : Format.formatter -> t -> unit val pretty : Format.formatter -> t -> unit
end end
type theory_answer = answer list Wstdlib.Mstr.t type verification_result = answer list Wstdlib.Mstr.t
and answer = private { and answer = private {
id : Decl.prsymbol; id : Decl.prsymbol;
...@@ -40,7 +40,8 @@ and answer = private { ...@@ -40,7 +40,8 @@ and answer = private {
and additional_info = private and additional_info = private
| Generic of string option | Generic of string option
| Dataset of Csv.t | Dataset of Call_provers.prover_answer list
(** A prover answer per data point. *)
val verify : val verify :
?debug:bool -> ?debug:bool ->
...@@ -52,8 +53,8 @@ val verify : ...@@ -52,8 +53,8 @@ val verify :
Prover.t -> Prover.t ->
?prover_altern:string -> ?prover_altern:string ->
File.t -> File.t ->
theory_answer verification_result
(** [verify debug format loadpath memlimit timelimit dataset prover prover_altern file] (** [verify ?debug ?format ~loadpath ?memlimit ?timelimit ?dataset prover ?prover_altern file]
launches a verification of the given [file] with the provided [prover] and launches a verification of the given [file] with the provided [prover] and
[dataset]. [dataset].
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment