Skip to content
Snippets Groups Projects
Commit f054eea1 authored by Michele Alberti's avatar Michele Alberti
Browse files

[verification] Rework additional information for dataset results and simplify...

[verification] Rework additional information for dataset results and simplify its JSON serialization.
parent 87ae4989
No related branches found
No related tags found
No related merge requests found
......@@ -43,7 +43,7 @@ type output = {
id : string;
prover_answer : prover_answer;
percentage_valid : float;
dataset_results : string;
dataset_results : prover_answer list;
}
and prover_answer = Call_provers.prover_answer =
......
......@@ -42,7 +42,7 @@ type output = {
id : string;
prover_answer : prover_answer;
percentage_valid : float;
dataset_results : string;
dataset_results : prover_answer list;
}
and prover_answer = Call_provers.prover_answer =
......
......@@ -149,40 +149,30 @@ let verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover ?prover_altern
List.iter theory_answers ~f:log_theory_answer;
theory_answers
let record_dataset_results id prover_answer dataset outfile =
let record_json_output id prover_answer dataset_results file =
let percentage_valid =
let nb_valid =
List.count dataset ~f:(fun record ->
match List.hd_exn record with "Valid" -> true | _ -> false)
List.count dataset_results ~f:(fun prover_answer ->
match prover_answer with Why3.Call_provers.Valid -> true | _ -> false)
in
let nb_valid = Float.of_int nb_valid in
let total = Float.of_int (List.length dataset) in
let total = Float.of_int (List.length dataset_results) in
nb_valid /. total *. 100.
in
let dataset_results =
let dataset_results, csv_out_channel =
let filename, out_channel =
Caml.Filename.open_temp_file "caisar" "results.csv"
in
(filename, Csv.to_channel out_channel)
in
Csv.output_all csv_out_channel dataset;
Csv.close_out csv_out_channel;
dataset_results
in
let output = { JSON.id; prover_answer; percentage_valid; dataset_results } in
let out_channel = Stdlib.open_out outfile in
let out_channel = Stdlib.open_out file in
Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output);
Logs.info (fun m -> m "@[Results recorded in file '%s'@]" outfile);
Logs.info (fun m -> m "@[Results recorded in file '%s'@]" file);
Stdlib.close_out out_channel
let record_theory_answers id outfile =
Why3.Wstdlib.Mstr.iter (fun theory_name task_answers ->
match task_answers with
| [ { Verification.prover_answer; additional_info = Dataset dataset; _ } ]
->
record_dataset_results id prover_answer dataset outfile
| _ -> failwith (Fmt.str "Unexpected answers for theory '%s'" theory_name))
let record_verification_result id verification_result file =
Why3.Wstdlib.Mstr.iter
(fun name (answers : Verification.answer list) ->
match answers with
| [ { prover_answer; additional_info = Dataset dataset_results; _ } ] ->
record_json_output id prover_answer dataset_results file
| _ -> failwith (Fmt.str "Unexpected answers for theory '%s'" name))
verification_result
let verify_json ?memlimit ?timelimit ?outfile json =
let jin = JSON.input_of_string json in
......@@ -202,16 +192,16 @@ let verify_json ?memlimit ?timelimit ?outfile json =
(* Precedence to the command line option, if any. *)
match outfile with Some _ -> outfile | None -> jin.output_file
in
let file = Result.ok_or_failwith (Verification.File.of_json_input jin) in
let theory_answers =
let infile = Result.ok_or_failwith (Verification.File.of_json_input jin) in
let verification_results =
verify ~loadpath:[] ?memlimit ?timelimit ~dataset:jin.property.dataset
jin.prover [ file ]
jin.prover [ infile ]
in
match theory_answers with
match verification_results with
| [] -> assert false (* We always build one theory from the provided JSON. *)
| [ theory_answer ] ->
Option.iter outfile ~f:(fun outfile ->
record_theory_answers jin.id outfile theory_answer)
| [ verification_result ] ->
Option.iter outfile
~f:(record_verification_result jin.id verification_result)
| _ -> failwith "Unexpected more than one theory from a JSON file"
let exec_cmd cmdname cmd =
......
......@@ -57,7 +57,7 @@ module File = struct
| JSON jin -> JSON.pretty_input fmt jin
end
type theory_answer = answer list Wstdlib.Mstr.t
type verification_result = answer list Wstdlib.Mstr.t
and answer = {
id : Decl.prsymbol;
......@@ -67,7 +67,7 @@ and answer = {
and additional_info =
| Generic of string option
| Dataset of Csv.t
| Dataset of Call_provers.prover_answer list
let () =
Language.register_nnet_support ();
......@@ -116,12 +116,12 @@ let answer_saver limit config env config_prover dataset task =
match answer.prover_answer with
| Call_provers.Unknown "" ->
let additional_info = Fmt.str "%d/%d" answer.nb_proved answer.nb_total in
(Call_provers.Unknown additional_info, None)
(Call_provers.Unknown additional_info, Generic None)
| Call_provers.Unknown _ ->
assert false (* By construction of SAVer's Unknown answer. *)
| prover_answer ->
let additional_info = Fmt.str "(%d/%d)" answer.nb_proved answer.nb_total in
(prover_answer, Some additional_info)
(prover_answer, Generic (Some additional_info))
let answer_aimos limit config env config_prover dataset task aimos_config =
let predicate =
......@@ -156,7 +156,7 @@ let answer_aimos limit config env config_prover dataset task aimos_config =
let answer =
AIMOS.call_prover limit config config_prover predicate aimos_filename
in
let additional_info = None in
let additional_info = Generic None in
(answer, additional_info)
let nnet_or_onnx = Re__Core.(compile (str "%{nnet-onnx}"))
......@@ -230,12 +230,7 @@ let answer_dataset limit config env prover config_prover driver dataset task =
Csv.close_in csv_in_channel;
if List.length dataset <> List.length dataset_answers
then failwith "Inconsistent number of prover answers and dataset records"
else
List.map2_exn dataset dataset_answers ~f:(fun record prover_answer ->
let result =
Fmt.str "%a" Call_provers.print_prover_answer prover_answer
in
result :: record)
else Dataset dataset_answers
in
(prover_answer, additional_info)
......@@ -261,33 +256,21 @@ let answer_generic limit config env prover config_prover driver task =
List.map tasks ~f:(call_prover_on_task limit config command driver)
in
let prover_answer = combine_prover_answers answers in
(prover_answer, None)
let additional_info = Generic None in
(prover_answer, additional_info)
let call_prover ?dataset ~limit config env prover config_prover driver task =
let prover_answer, additional_info =
match prover with
| Prover.Saver ->
let prover_answer, additional_info =
answer_saver limit config env config_prover dataset task
in
(prover_answer, Generic additional_info)
| Prover.Saver -> answer_saver limit config env config_prover dataset task
| Aimos ->
let prover_answer, additional_info =
(* TODO: add real config file *)
answer_aimos limit config env config_prover dataset task None
in
(prover_answer, Generic additional_info)
(* TODO: add real config file *)
answer_aimos limit config env config_prover dataset task None
| (Marabou | Pyrat | Nnenum) when Option.is_some dataset ->
let dataset = Unix.realpath (Option.value_exn dataset) in
let prover_answer, additional_info =
answer_dataset limit config env prover config_prover driver dataset task
in
(prover_answer, Dataset additional_info)
answer_dataset limit config env prover config_prover driver dataset task
| Marabou | Pyrat | CVC5 | Nnenum ->
let prover_answer, additional_info =
answer_generic limit config env prover config_prover driver task
in
(prover_answer, Generic additional_info)
answer_generic limit config env prover config_prover driver task
in
let id = Task.task_goal task in
{ id; prover_answer; additional_info }
......
......@@ -30,7 +30,7 @@ module File : sig
val pretty : Format.formatter -> t -> unit
end
type theory_answer = answer list Wstdlib.Mstr.t
type verification_result = answer list Wstdlib.Mstr.t
and answer = private {
id : Decl.prsymbol;
......@@ -40,7 +40,8 @@ and answer = private {
and additional_info = private
| Generic of string option
| Dataset of Csv.t
| Dataset of Call_provers.prover_answer list
(** A prover answer per data point. *)
val verify :
?debug:bool ->
......@@ -52,8 +53,8 @@ val verify :
Prover.t ->
?prover_altern:string ->
File.t ->
theory_answer
(** [verify debug format loadpath memlimit timelimit dataset prover prover_altern file]
verification_result
(** [verify ?debug ?format ~loadpath ?memlimit ?timelimit ?dataset prover ?prover_altern file]
launches a verification of the given [file] with the provided [prover] and
[dataset].
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment