From 9f344c60022edb709d97b6a5dd8139cf00f80d20 Mon Sep 17 00:00:00 2001 From: Julien Girard <julien.girard2@cea.fr> Date: Mon, 22 May 2023 15:27:33 +0200 Subject: [PATCH] [tests] Initial display of prepared tasks for provers --- src/aimos.ml | 3 + src/verification.ml | 6 + tests/aimos.t | 25 ++++ tests/cvc5.t | 285 +++++++++++++++++++++++++++++++++++++++++++- tests/pyrat.t | 36 +++++- tests/pyrat_onnx.t | 9 +- 6 files changed, 360 insertions(+), 4 deletions(-) diff --git a/src/aimos.ml b/src/aimos.ml index f51a3b1..76fe9ea 100644 --- a/src/aimos.ml +++ b/src/aimos.ml @@ -94,6 +94,9 @@ let build_command config_prover amplitude in let command = Whyconf.get_complete_command ~with_steps:false config_prover in + Logs.info (fun m -> + m "@[Task sent to prover:\n%s@]" + (Stdio.In_channel.read_all (Unix.realpath aimos_config))); Re__Core.replace_string re_aimos_file ~by:aimos_config command let re_aimos_output = Re__Pcre.regexp "((,\\s)(\\d+\\.\\d+))" diff --git a/src/verification.ml b/src/verification.ml index 93df264..415a0f2 100644 --- a/src/verification.ml +++ b/src/verification.ml @@ -175,6 +175,11 @@ let combine_prover_answers answers = | Call_provers.Valid, r | r, Call_provers.Valid -> r | _ -> acc) +let print_prover_task driver task = + let print fm = ignore (Driver.print_task_prepared driver fm task) in + Logs.info (fun m -> m "@[Task sent to prover:\n%t@]" print) + +(* TODO: does printing the task means printing the csv here?*) let answer_dataset limit config env prover config_prover driver dataset task = let dataset_predicate = let on_model ls = @@ -256,6 +261,7 @@ let answer_generic limit config prover config_prover driver ~proof_strategy task List.map tasks ~f:(call_prover_on_task limit config command driver)) in let prover_answer = combine_prover_answers answers in + List.iter ~f:(fun t -> print_prover_task driver t) tasks; let additional_info = Generic None in (prover_answer, additional_info) diff --git a/tests/aimos.t b/tests/aimos.t index 1d53b28..5d318b9 100644 --- a/tests/aimos.t +++ b/tests/aimos.t @@ -23,3 +23,28 @@ Test verify > end > EOF [caisar] Goal G: Valid + + $ caisar verify -L . --format whyml --prover=AIMOS --dataset=test_data.csv --verbosity=info - 2>&1 <<EOF + > theory AIMOS_oracle + > use TestNetwork.AsArray + > use ieee_float.Float64 + > use caisar.DatasetClassification + > use caisar.DatasetClassificationProps + > + > goal G: meta_robust model dataset (1.0:t) ("reluplex_rotation":string) ("config/custom_transformations.py":string) ("classif_min":string) (0:int) (0:int) (0:int) + > end + > EOF + [caisar][INFO] Task sent to prover: + options: + plot: false + inputs_path: $TESTCASE_ROOT/test_data.csv + transformations: + - name: reluplex_rotation + custom_t_path: config/custom_transformations.py + models: + - defaults: + models_path: /home/julien/LAISER/CAISAR/caisar/_build/default/tests/TestNetwork.nnet + out_mode: classif_min + + [caisar][INFO] Verification results for theory 'AIMOS_oracle' + [caisar] Goal G: Valid diff --git a/tests/cvc5.t b/tests/cvc5.t index bfca315..87aeec0 100644 --- a/tests/cvc5.t +++ b/tests/cvc5.t @@ -7,7 +7,7 @@ Test verify $ PATH=$(pwd)/bin:$PATH $ caisar verify -L . --format whyml --prover=CVC5 - 2>&1 <<EOF | ./filter_tmpdir.sh - > theory T + > theory CVC5 > use TestNetworkONNX.AsTuple as TN > use ieee_float.Float64 > @@ -18,3 +18,286 @@ Test verify > end > EOF [caisar] Goal G: Unknown (unknown) + $ caisar verify -L . --format whyml --prover=CVC5 --verbosity=info - 2>&1 <<EOF | ./filter_tmpdir.sh + > theory CVC5_oracle + > use TestNetworkONNX.AsTuple as TN + > use ieee_float.Float64 + > + > goal G: forall x1 x2 x3. + > (0.0:t) .< x1 .< (0.5:t) -> + > let (y1,y2) = TN.nn_apply x1 x2 x3 in + > (0.0:t) .< y1 .< (0.5:t) /\ (0.0:t) .< y2 .< (1.0:t) + > end + > EOF + [caisar][INFO] Task sent to prover: + ;; produced by cvc5.drv ;; + (set-logic ALL) + (set-info :smt-lib-version 2.6) + ;;; generated by SMT-LIB2 driver + ;;; SMT-LIB2 driver: bit-vectors, common part + ;;; SMT-LIB2: integer arithmetic + ;;; SMT-LIB2: real arithmetic + (define-fun fp.isFinite64 ((x Float64)) Bool (not (or (fp.isInfinite x) (fp.isNaN x)))) + (define-fun fp.isIntegral64 ((x Float64)) Bool (or (fp.isZero x) (and (fp.isNormal x) (= x (fp.roundToIntegral RNE x))))) + (declare-sort uni 0) + + (declare-sort ty 0) + + ;; "sort" + (declare-fun sort (ty + uni) Bool) + + ;; "witness" + (declare-fun witness (ty) uni) + + ;; "witness_sort" + (assert + (forall ((a ty)) (sort a (witness a)))) + + ;; "int" + (declare-fun int () ty) + + ;; "real" + (declare-fun real () ty) + + ;; "t" + (declare-fun t () ty) + + ;; "mode" + (declare-fun mode () ty) + + ;; "add_div" + (assert + (forall ((x Real) (y Real) (z Real)) + (=> + (not (= z 0.0)) + (= (/ (+ x y) z) (+ (/ x z) (/ y z)))))) + + ;; "sub_div" + (assert + (forall ((x Real) (y Real) (z Real)) + (=> + (not (= z 0.0)) + (= (/ (- x y) z) (- (/ x z) (/ y z)))))) + + ;; "neg_div" + (assert + (forall ((x Real) (y Real)) + (=> + (not (= y 0.0)) + (= (/ (- x) y) (- (/ x y)))))) + + ;; "assoc_mul_div" + (assert + (forall ((x Real) (y Real) (z Real)) + (=> + (not (= z 0.0)) + (= (/ (* x y) z) (* x (/ y z)))))) + + ;; "assoc_div_mul" + (assert + (forall ((x Real) (y Real) (z Real)) + (=> + (and + (not (= y 0.0)) + (not (= z 0.0))) + (= (/ (/ x y) z) (/ x (* y z)))))) + + ;; "assoc_div_div" + (assert + (forall ((x Real) (y Real) (z Real)) + (=> + (and + (not (= y 0.0)) + (not (= z 0.0))) + (= (/ x (/ y z)) (/ (* x z) y))))) + + ;; "CompatOrderMult" + (assert + (forall ((x Real) (y Real) (z Real)) + (=> + (<= x y) + (=> + (<= 0.0 z) + (<= (* x z) (* y z)))))) + + ;; "max_real" + (declare-fun max_real () Real) + + ;; "max_int" + (declare-fun max_int () Int) + + ;; "sqr" + (declare-fun sqr (Real) Real) + + ;; "sqr'def" + (assert + (forall ((x Real)) (= (sqr x) (* x x)))) + + ;; "sqrt" + (declare-fun sqrt1 (Real) Real) + + ;; "Sqrt_positive" + (assert + (forall ((x Real)) + (=> (<= 0.0 x) (<= 0.0 (sqrt1 x))))) + + ;; "Sqrt_square" + (assert + (forall ((x Real)) + (=> (<= 0.0 x) (= (sqr (sqrt1 x)) x)))) + + ;; "Square_sqrt" + (assert + (forall ((x Real)) + (=> (<= 0.0 x) (= (sqrt1 (* x x)) x)))) + + ;; "Sqrt_mul" + (assert + (forall ((x Real) (y Real)) + (=> + (and (<= 0.0 x) (<= 0.0 y)) + (= (sqrt1 (* x y)) (* (sqrt1 x) + (sqrt1 + y)))))) + + ;; "Sqrt_le" + (assert + (forall ((x Real) (y Real)) + (=> + (and (<= 0.0 x) (<= x y)) + (<= (sqrt1 x) (sqrt1 y))))) + + ;; "relu" + (declare-fun relu (Float64) Float64) + + ;; "relu'def" + (assert + (forall ((a Float64)) + (ite (fp.lt (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) a) + (= (relu a) a) + (= (relu a) (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000))))) + + (declare-datatypes ((tuple2 2)) + ((par (a + a1) ((Tuple2 + (Tuple2_proj a)(Tuple2_proj1 a1)))))) + + ;; "tuple2" + (declare-fun tuple21 (ty + ty) ty) + + ;; "Tuple2" + (declare-fun Tuple21 (ty + ty + uni + uni) uni) + + ;; "Tuple2_sort" + (assert + (forall ((a2 ty) (a3 ty)) + (forall ((x uni) (x1 uni)) + (sort + (tuple21 a3 a2) + (Tuple21 a3 a2 x x1))))) + + ;; "Tuple2_proj_1" + (declare-fun Tuple2_proj_1 (ty + ty + uni) uni) + + ;; "Tuple2_proj_1_sort" + (assert + (forall ((a2 ty) (a3 ty)) + (forall ((x uni)) + (sort a3 (Tuple2_proj_1 a3 a2 x))))) + + ;; "Tuple2_proj_1'def" + (assert + (forall ((a2 ty) (a3 ty)) + (forall ((u uni) (u1 uni)) + (=> + (sort a3 u) + (= (Tuple2_proj_1 + a3 + a2 + (Tuple21 a3 a2 u u1)) u))))) + + ;; "Tuple2_proj_2" + (declare-fun Tuple2_proj_2 (ty + ty + uni) uni) + + ;; "Tuple2_proj_2_sort" + (assert + (forall ((a2 ty) (a3 ty)) + (forall ((x uni)) + (sort a2 (Tuple2_proj_2 a3 a2 x))))) + + ;; "Tuple2_proj_2'def" + (assert + (forall ((a2 ty) (a3 ty)) + (forall ((u uni) (u1 uni)) + (=> + (sort a2 u1) + (= (Tuple2_proj_2 + a3 + a2 + (Tuple21 a3 a2 u u1)) u1))))) + + ;; "tuple2_inversion" + (assert + (forall ((a2 ty) (a3 ty)) + (forall ((u uni)) + (=> + (sort (tuple21 a3 a2) u) + (= u (Tuple21 + a3 + a2 + (Tuple2_proj_1 a3 a2 u) + (Tuple2_proj_2 a3 a2 u))))))) + + ;; Goal "G" + ;; File "stdin", line 5, characters 7-8 + (assert + (not + (forall ((x1 Float64) (x2 Float64) (x3 Float64)) + (=> + (and + (fp.lt (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) x1) + (fp.lt x1 (fp #b0 #b01111111110 #b0000000000000000000000000000000000000000000000000000))) + (match (let ((n_id_1_2 x3)) + (let ((n_id_1_1 x2)) + (let ((n_id_1_0 x1)) + (let ((n_id_2_1 (fp.add RNE (fp.add RNE (fp.add RNE (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) (fp.mul RNE n_id_1_0 (fp #b1 #b01111111100 #b0010110110101110110001100000000000000000000000000000))) (fp.mul RNE n_id_1_1 (fp #b0 #b01111111100 #b0001010101101010011100100000000000000000000000000000))) (fp.mul RNE n_id_1_2 (fp #b1 #b01111111100 #b0001110001000101101000100000000000000000000000000000))))) + (let ((n_id_2_0 (fp.add RNE (fp.add RNE (fp.add RNE (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) (fp.mul RNE n_id_1_0 (fp #b1 #b01111111100 #b0010110110101110110001100000000000000000000000000000))) (fp.mul RNE n_id_1_1 (fp #b0 #b01111111100 #b0001010101101010011100100000000000000000000000000000))) (fp.mul RNE n_id_1_2 (fp #b1 #b01111111100 #b0001110001000101101000100000000000000000000000000000))))) + (let ((n_id_3_1 (fp.add RNE n_id_2_1 (fp #b1 #b01111111110 #b0001011011111110000011100000000000000000000000000000)))) + (let ((n_id_3_0 (fp.add RNE n_id_2_0 (fp #b0 #b01111111101 #b0110010010001010001001100000000000000000000000000000)))) + (let ((n_id_4_1 + (relu + n_id_3_1))) + (let ((n_id_4_0 + (relu + n_id_3_0))) + (let ((n_id_5_1 (fp.add RNE (fp.add RNE (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) (fp.mul RNE n_id_4_0 (fp #b1 #b01111111101 #b0111110011110010111010000000000000000000000000000000))) (fp.mul RNE n_id_4_1 (fp #b0 #b01111111110 #b0100011001000110100010000000000000000000000000000000))))) + (let ((n_id_5_0 (fp.add RNE (fp.add RNE (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) (fp.mul RNE n_id_4_0 (fp #b1 #b01111111101 #b0111110011110010111010000000000000000000000000000000))) (fp.mul RNE n_id_4_1 (fp #b0 #b01111111110 #b0100011001000110100010000000000000000000000000000000))))) + (let ((n_id_6_1 (fp.add RNE n_id_5_1 (fp #b1 #b01111111110 #b0011101101010011010001100000000000000000000000000000)))) + (let ((n_id_6_0 (fp.add RNE n_id_5_0 (fp #b0 #b01111111110 #b0011000001100111100001000000000000000000000000000000)))) + (let ((y2 n_id_6_1)) + (let ((y1 n_id_6_0)) + (Tuple2 + y1 + y2)))))))))))))))) ( + ((Tuple2 x x4) (and + (and + (fp.lt (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) x) + (fp.lt x (fp #b0 #b01111111110 #b0000000000000000000000000000000000000000000000000000))) + (and + (fp.lt (fp #b0 #b00000000000 #b0000000000000000000000000000000000000000000000000000) x4) + (fp.lt x4 (fp #b0 #b01111111111 #b0000000000000000000000000000000000000000000000000000))))))))))) + + (check-sat) + (get-info :reason-unknown) + + [caisar][INFO] Verification results for theory 'CVC5_oracle' + [caisar] Goal G: Unknown (unknown) diff --git a/tests/pyrat.t b/tests/pyrat.t index a59926e..8de49de 100644 --- a/tests/pyrat.t +++ b/tests/pyrat.t @@ -7,7 +7,8 @@ Test verify $ PATH=$(pwd)/bin:$PATH $ caisar verify -L . --format whyml --prover=PyRAT - 2>&1 <<EOF | ./filter_tmpdir.sh - > theory T + > theory PyRAT + > use TestNetwork.AsTuple > use ieee_float.Float64 > use bool.Bool > use int.Int @@ -31,3 +32,36 @@ Test verify > EOF [caisar] Goal G: Unknown () [caisar] Goal H: Unknown () + + $ caisar verify -L . --format whyml --prover=PyRAT --verbosity=info - 2>&1 <<EOF | ./filter_tmpdir.sh + > theory PyRAT_oracle + > use TestNetwork.AsTuple + > use ieee_float.Float64 + > use bool.Bool + > use int.Int + > use interpretation.Vector + > use interpretation.NeuralNetwork + > + > constant nn: nn = read_neural_network "TestNetwork.nnet" NNet + > + > goal H: + > forall i: vector t. + > has_length i 5 -> + > ((0.0:t) .<= i[0] .<= (0.5:t) \/ (0.375:t) .<= i[0] .<= (0.75:t)) /\ (0.5:t) .<= i[1] .<= (1.0:t) -> + > ((0.0:t) .< (nn@@i)[0] \/ (0.5:t) .< (nn@@i)[0]) /\ (0.0:t) .< (nn@@i)[1] .< (0.5:t) + > end + > EOF + [caisar][INFO] Task sent to prover: + 0.0 < x0 + x0 < 0.5 + 0.5 < x1 + x1 < 1.0 + (0.0 < y0 or 0.5 < y0) and 0.0 < y1 and y1 < 0.5 + [caisar][INFO] Task sent to prover: + 0.375 < x0 + x0 < 0.75 + 0.5 < x1 + x1 < 1.0 + (0.0 < y0 or 0.5 < y0) and 0.0 < y1 and y1 < 0.5 + [caisar][INFO] Verification results for theory 'PyRAT_oracle' + [caisar] Goal H: Unknown () diff --git a/tests/pyrat_onnx.t b/tests/pyrat_onnx.t index 168624e..280fd97 100644 --- a/tests/pyrat_onnx.t +++ b/tests/pyrat_onnx.t @@ -6,8 +6,8 @@ Test verify $ PATH=$(pwd)/bin:$PATH - $ caisar verify -L . --format whyml --prover=PyRAT - 2>&1 <<EOF | ./filter_tmpdir.sh - > theory T + $ caisar verify -L . --format whyml --prover=PyRAT --verbosity=info - 2>&1 <<EOF | ./filter_tmpdir.sh + > theory PyRAT_ONNX > use ieee_float.Float64 > use bool.Bool > use int.Int @@ -23,4 +23,9 @@ Test verify > (0.0:t) .< (nn@@i)[0] .< (0.5:t) > end > EOF + [caisar][INFO] Task sent to prover: + 0.0 < x0 + x0 < 0.5 + 0.0 < y0 and y0 < 0.5 + [caisar][INFO] Verification results for theory 'T' [caisar] Goal G: Unknown () -- GitLab