From 93e5b94ff4497cc5aebf34b2800b3389fc3aa9f0 Mon Sep 17 00:00:00 2001 From: Michele Alberti <michele.alberti@cea.fr> Date: Thu, 8 Dec 2022 11:52:33 +0100 Subject: [PATCH] [verification] Introduce verification result type. Decouple the verification process from its result logging. --- src/main.ml | 39 +++++++--- src/verification.ml | 173 +++++++++++++++++++++++++------------------ src/verification.mli | 30 ++++++-- 3 files changed, 150 insertions(+), 92 deletions(-) diff --git a/src/main.ml b/src/main.ml index 85c830e..85ab60a 100644 --- a/src/main.ml +++ b/src/main.ml @@ -119,16 +119,34 @@ let timelimit_of_string s = | [ v ], [ "h" ] -> Int.of_string v * 3600 | _ -> invalid_arg "Unrecognized time limit" -let verify ?format ~loadpath ?memlimit ?timelimit ?dataset ?outfile prover files - = +let log_theory_answer = + let open Why3 in + Wstdlib.Mstr.iter (fun theory_name task_answers -> + Logs.info (fun m -> + m "@[Verification results for theory `%s'@]" theory_name); + List.iter task_answers + ~f:(fun { Verification.id; result; additional_info } -> + let additional_info = + match additional_info with Generic s -> s | Dataset _ -> None + in + Logs.app (fun m -> + m "@[Goal %a:@ %a%a@]" Pretty.print_pr id + Call_provers.print_prover_answer result + Fmt.(option ~none:nop (any " " ++ string)) + additional_info))) + +let verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover files = let debug = log_level_is_debug () in let memlimit = Option.map memlimit ~f:memlimit_of_string in let timelimit = Option.map timelimit ~f:timelimit_of_string in - List.iter - ~f: - (Verification.verify ~debug ?format ~loadpath ?memlimit ?timelimit - ?dataset ?outfile prover) - files + let theory_answers = + List.map files + ~f: + (Verification.verify ~debug ?format ~loadpath ?memlimit ?timelimit + ?dataset prover) + in + List.iter theory_answers ~f:log_theory_answer; + theory_answers let verify_json ?memlimit ?timelimit ?outfile json = let jin = Result.ok_exn (Verification.JSON.input_of_string json) in @@ -147,11 +165,11 @@ let verify_json ?memlimit ?timelimit ?outfile json = | None -> Option.map jin.time_limit ~f:Int.to_string | Some _ -> timelimit in - let outfile = + let _outfile = (* Precedence to the command line option, if any. *) match outfile with None -> jin.output_file | Some _ -> outfile in - verify ~loadpath:[] ?memlimit ?timelimit ~dataset ?outfile prover [ file ] + verify ~loadpath:[] ?memlimit ?timelimit ~dataset prover [ file ] let exec_cmd cmdname cmd = Logs.debug (fun m -> m "Command `%s'" cmdname); @@ -251,7 +269,8 @@ let verify_cmd = Term.( const (fun format loadpath memlimit timelimit prover dataset files _ -> exec_cmd cmdname (fun () -> - verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover files)) + ignore + (verify ?format ~loadpath ?memlimit ?timelimit ?dataset prover files))) $ format $ loadpath $ memlimit $ timelimit $ prover $ dataset $ files $ setup_logs) in diff --git a/src/verification.ml b/src/verification.ml index 7f7c7df..5329e3f 100644 --- a/src/verification.ml +++ b/src/verification.ml @@ -187,6 +187,18 @@ module File = struct | Json j -> JSON.pretty fmt j end +type theory_answer = answer list Wstdlib.Mstr.t + +and answer = { + id : Decl.prsymbol; + result : Call_provers.prover_answer; + additional_info : additional_info; +} + +and additional_info = + | Generic of string option + | Dataset of Csv.t + let () = Language.register_nnet_support (); Language.register_onnx_support (); @@ -253,45 +265,44 @@ let combine_prover_answers answers = | Call_provers.Valid, r | r, Call_provers.Valid -> r | _ -> acc) -let record_dataset_results ~dataset ~dataset_answers prover_answer outfile = - let percentage_valid = - let count = - List.count dataset_answers ~f:(fun prover_answer -> - match prover_answer with Call_provers.Valid -> true | _ -> false) - in - Float.of_int count /. Float.of_int (List.length dataset_answers) *. 100. - in - let dataset_results = - let csv_in_channel = Csv.of_channel (Stdlib.open_in dataset) in - let dataset_results, csv_out_channel = - let filename, out_channel = - Filename.open_temp_file "caisar" "results.csv" - in - (filename, Csv.to_channel out_channel) - in - let dataset = Csv.input_all csv_in_channel in - Csv.close_in csv_in_channel; - if List.length dataset = List.length dataset_answers - then - List.iter2_exn dataset dataset_answers ~f:(fun record prover_answer -> - let result = - Fmt.str "%a" Call_provers.print_prover_answer prover_answer - in - let record = result :: record in - Csv.output_record csv_out_channel record); - Csv.close_out csv_out_channel; - dataset_results - in - let output = - { JSON.result = prover_answer; percentage_valid; dataset_results } - in - let out_channel = Stdlib.open_out outfile in - Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output); - Logs.info (fun m -> m "@[Results recorded in file '%s'@]" outfile); - Stdlib.close_out out_channel +(* let record_dataset_results ~dataset ~dataset_answers prover_answer outfile = + * let percentage_valid = + * let count = + * List.count dataset_answers ~f:(fun prover_answer -> + * match prover_answer with Call_provers.Valid -> true | _ -> false) + * in + * Float.of_int count /. Float.of_int (List.length dataset_answers) *. 100. + * in + * let dataset_results = + * let csv_in_channel = Csv.of_channel (Stdlib.open_in dataset) in + * let dataset_results, csv_out_channel = + * let filename, out_channel = + * Filename.open_temp_file "caisar" "results.csv" + * in + * (filename, Csv.to_channel out_channel) + * in + * let dataset = Csv.input_all csv_in_channel in + * Csv.close_in csv_in_channel; + * if List.length dataset = List.length dataset_answers + * then + * List.iter2_exn dataset dataset_answers ~f:(fun record prover_answer -> + * let result = + * Fmt.str "%a" Call_provers.print_prover_answer prover_answer + * in + * let record = result :: record in + * Csv.output_record csv_out_channel record); + * Csv.close_out csv_out_channel; + * dataset_results + * in + * let output = + * { JSON.result = prover_answer; percentage_valid; dataset_results } + * in + * let out_channel = Stdlib.open_out outfile in + * Yojson.Safe.to_channel out_channel (JSON.output_to_yojson output); + * Logs.info (fun m -> m "@[Results recorded in file '%s'@]" outfile); + * Stdlib.close_out out_channel *) -let answer_dataset ?outfile limit config env prover config_prover driver dataset - task = +let answer_dataset limit config env prover config_prover driver dataset task = let dataset_predicate = let on_model ls = let message = @@ -316,31 +327,42 @@ let answer_dataset ?outfile limit config env prover config_prover driver dataset Dataset.tasks_of_nn_csv_predicate env dataset_predicate |> List.map ~f:(Driver.prepare_task driver) in - let tasks = - (* We turn each task in [dataset_tasks] into a list (ie, conjunction) of - disjunctions of tasks. *) - match prover with - | Prover.Marabou -> List.map ~f:(Trans.apply Split.split_all) dataset_tasks - | _ -> [ dataset_tasks ] - in let command = Whyconf.get_complete_command ~with_steps:false config_prover in let command = let nn_file = Unix.realpath dataset_predicate.model.filename in Re__Core.replace_string nnet_or_onnx ~by:nn_file command in let dataset_answers = - List.map tasks ~f:(fun dataset_elt_tasks -> - let dataset_elt_answers = - List.map dataset_elt_tasks - ~f:(call_prover_on_task limit config command driver) - in - combine_prover_answers dataset_elt_answers) + match prover with + | Prover.Marabou -> + (* We turn each task in [dataset_tasks] into a list (ie, conjunction) of + disjunctions of tasks. *) + let tasks = List.map ~f:(Trans.apply Split.split_all) dataset_tasks in + List.map tasks ~f:(fun dataset_elt_tasks -> + let dataset_elt_answers = + List.map dataset_elt_tasks + ~f:(call_prover_on_task limit config command driver) + in + combine_prover_answers dataset_elt_answers) + | _ -> + List.map dataset_tasks + ~f:(call_prover_on_task limit config command driver) in let prover_answer = combine_prover_answers dataset_answers in - Option.iter - ~f:(record_dataset_results ~dataset ~dataset_answers prover_answer) - outfile; - (prover_answer, None) + let additional_info = + let csv_in_channel = Csv.of_channel (Stdlib.open_in dataset) in + let dataset = Csv.input_all csv_in_channel in + Csv.close_in csv_in_channel; + if List.length dataset <> List.length dataset_answers + then failwith "Inconsistent number of prover answers and dataset records" + else + List.map2_exn dataset dataset_answers ~f:(fun record prover_answer -> + let result = + Fmt.str "%a" Call_provers.print_prover_answer prover_answer + in + result :: record) + in + (prover_answer, additional_info) let answer_generic limit config env prover config_prover driver task = let task = Proof_strategy.apply_native_nn_prover env task in @@ -366,26 +388,31 @@ let answer_generic limit config env prover config_prover driver task = let prover_answer = combine_prover_answers answers in (prover_answer, None) -let call_prover ?dataset ?outfile ~limit config env prover config_prover driver - task = - let prover_answer, additional_info = +let call_prover ?dataset ~limit config env prover config_prover driver task = + let result, additional_info = match prover with - | Prover.Saver -> answer_saver limit config env config_prover dataset task + | Prover.Saver -> + let prover_answer, additional_info = + answer_saver limit config env config_prover dataset task + in + (prover_answer, Generic additional_info) | (Marabou | Pyrat | Nnenum) when Option.is_some dataset -> let dataset = Unix.realpath (Option.value_exn dataset) in - answer_dataset ?outfile limit config env prover config_prover driver - dataset task + let prover_answer, additional_info = + answer_dataset limit config env prover config_prover driver dataset task + in + (prover_answer, Dataset additional_info) | Marabou | Pyrat | CVC5 | Nnenum -> - answer_generic limit config env prover config_prover driver task + let prover_answer, additional_info = + answer_generic limit config env prover config_prover driver task + in + (prover_answer, Generic additional_info) in - Logs.app (fun m -> - m "@[Goal %a:@ %a%a@]" Pretty.print_pr (Task.task_goal task) - Call_provers.print_prover_answer prover_answer - Fmt.(option ~none:nop (any " " ++ string)) - additional_info) + let id = Task.task_goal task in + { id; result; additional_info } let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset - ?outfile prover file = + prover file = if debug then Debug.set_flag (Debug.lookup_flag "call_prover"); let env, config = create_env ~debug loadpath in let main = Whyconf.get_main config in @@ -408,8 +435,8 @@ let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset mlw_files | Json json -> JSON.mstr_theory_of_json env json in - Wstdlib.Mstr.iter - (fun _ theory -> + Wstdlib.Mstr.map + (fun theory -> let tasks = Task.split_theory theory None None in let config_prover = let altern = @@ -433,9 +460,7 @@ let verify ?(debug = false) ?format ~loadpath ?memlimit ?timelimit ?dataset in Driver.load_driver_absolute env file config_prover.extra_drivers in - List.iter - ~f: - (call_prover ?dataset ?outfile ~limit main env prover config_prover - driver) + List.map + ~f:(call_prover ?dataset ~limit main env prover config_prover driver) tasks) mstr_theory diff --git a/src/verification.mli b/src/verification.mli index 8ed9023..327c836 100644 --- a/src/verification.mli +++ b/src/verification.mli @@ -20,6 +20,8 @@ (* *) (**************************************************************************) +open Why3 + module JSON : sig type input = private { prover : Prover.t; @@ -35,7 +37,7 @@ module JSON : sig normalization : Dataset.normalization option; kind : Dataset.property; } - [@@deriving yojson, show] + [@@deriving yojson { strict = false }, show] type output = private { result : prover_answer; @@ -43,7 +45,7 @@ module JSON : sig dataset_results : string; } - and prover_answer = Why3.Call_provers.prover_answer = + and prover_answer = Call_provers.prover_answer = | Valid | Invalid | Timeout @@ -66,6 +68,18 @@ module File : sig val pretty : Format.formatter -> t -> unit end +type theory_answer = answer list Wstdlib.Mstr.t + +and answer = private { + id : Decl.prsymbol; + result : Call_provers.prover_answer; + additional_info : additional_info; +} + +and additional_info = private + | Generic of string option + | Dataset of Csv.t + val verify : ?debug:bool -> ?format:string -> @@ -73,13 +87,11 @@ val verify : ?memlimit:int -> ?timelimit:int -> ?dataset:string -> - ?outfile:string -> Prover.t -> File.t -> - unit -(** [verify debug format loadpath memlimit timeout dataset outfile prover file] - launches a verification of the given [file] with the provided [prover] and - [dataset]. + theory_answer +(** [verify debug format loadpath memlimit timeout dataset prover file] launches + a verification of the given [file] with the provided [prover] and [dataset]. @param debug when set, enables debug information. @param format is the [file] format. @@ -90,4 +102,6 @@ val verify : @param dataset is the filepath of a dataset (eg, in CSV format) possibly appearing in [file]. - @param outfile is the filepath where recording the verification results. *) + @return + for each theory, an [answer] for each goal of the theory, respecting the + order of appearance. *) -- GitLab