From f1371ff0ab4262b93dc620d5cbc751a98ffbf53e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20B=C3=BChler?= <david.buhler@cea.fr>
Date: Fri, 22 Mar 2024 16:48:26 +0100
Subject: [PATCH] [Eva] Eval_terms: reduces on valid_string and
 valid_read_string ACSL predicates.

---
 src/plugins/eva/legacy/eval_terms.ml      | 62 +++++++++++++++++++++++
 tests/builtins/oracle/strchr.res.oracle   | 15 ++----
 tests/builtins/oracle/strlen.res.oracle   |  2 +-
 tests/builtins/oracle/strnlen2.res.oracle |  2 +-
 tests/builtins/oracle/wcslen.res.oracle   |  5 +-
 tests/libc/oracle/libgen_h.res.oracle     |  4 +-
 tests/libc/oracle/stdlib_c_env.res.oracle |  8 +--
 7 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/src/plugins/eva/legacy/eval_terms.ml b/src/plugins/eva/legacy/eval_terms.ml
index 709c0be2138..f1bb7553e45 100644
--- a/src/plugins/eva/legacy/eval_terms.ml
+++ b/src/plugins/eva/legacy/eval_terms.ml
@@ -1990,6 +1990,59 @@ and reduce_by_valid env positive access (tset: term) =
   in
   do_one env tset
 
+(* Reduces the possible value of [arg] by assuming it points to a valid string
+   (or not if [positive] is false), for reading or writing according to [access].
+   This reduces the possible value of [arg] to a valid pointer (thus only
+   considering the first character of the string), and filters out bases that
+   cannot be a valid string because strlen returns bottom.
+   This reduction could be improved by also reducing offsets according to the
+   position of \0 in the pointed strings. *)
+and reduce_by_valid_string ~alarm_mode env positive ~wide ~access arg =
+  (* First, reduces [arg] assuming it is a valid pointer. *)
+  let env = reduce_by_valid env positive access arg in
+  try
+    let exact_location = eval_term_as_exact_locs ~alarm_mode env arg in
+    (* Reduce the cvalue [v]:
+       - if [positive] holds, remove bases which cannot be a valid string
+         as the proper strlen builtin returns bottom;
+       - if [positive] is false, remove bases which are a valid string,
+         as the proper strlen builtin returns no alarm. *)
+    let reduce v =
+      let wrapper =
+        if wide
+        then Builtins_string.frama_c_wcslen_wrapper
+        else Builtins_string.frama_c_strlen_wrapper
+      in
+      let aux base offset acc =
+        let value = Cvalue.V.inject base offset in
+        let v, alarms = apply_logic_builtin wrapper env [value] in
+        if (positive && Cvalue.V.is_bottom v)
+        || (not positive && not alarms)
+        then acc
+        else Cvalue.V.add base offset acc
+      in
+      Cvalue.V.fold_i aux v Cvalue.V.bottom
+    in
+    match exact_location with
+    | Logic_var logic_var ->
+      let cvalue = LogicVarEnv.find logic_var env.logic_vars in
+      let cvalue = reduce cvalue in
+      if V.is_bottom cvalue then raise Reduce_to_bottom;
+      add_logic_var env logic_var cvalue
+    | Location (typ_loc, locs) ->
+      let aux loc env =
+        let state = env_current_state env in
+        let v = find_or_alarm ~alarm_mode state loc in
+        let v = Cvalue_forward.reinterpret typ_loc v in
+        let v' = reduce v in
+        if V.is_bottom v' then raise Reduce_to_bottom;
+        if V.equal v' v then env else
+          let state' = Cvalue.Model.reduce_previous_binding state loc v' in
+          overwrite_current_state env state'
+      in
+      Eval_op.apply_on_all_locs aux locs env
+  with Not_an_exact_loc | LogicEvalError _ -> env
+
 (* reduce [tl] so that [rl rel tr] holds *)
 and reduce_by_left_relation ~alarm_mode env positive tl rel tr =
   try
@@ -2144,6 +2197,15 @@ and reduce_by_known_papp ~alarm_mode env positive li _labels args =
         Eval_op.apply_on_all_locs aux locsl env
     end
 
+  | "valid_read_string", [arg] ->
+    reduce_by_valid_string ~alarm_mode env positive ~wide:false ~access:Read arg
+  | "valid_string", [arg] ->
+    reduce_by_valid_string ~alarm_mode env positive ~wide:false ~access:Write arg
+  | "valid_read_wstring", [arg] ->
+    reduce_by_valid_string ~alarm_mode env positive ~wide:true ~access:Read arg
+  | "valid_wstring", [arg] ->
+    reduce_by_valid_string ~alarm_mode env positive ~wide:true ~access:Write arg
+
   | _ -> (* Do not fail here. We can be asked to reduce on predicates that we
             can evaluate, but on which we are not able to reduce on (yet ?).*)
     env
diff --git a/tests/builtins/oracle/strchr.res.oracle b/tests/builtins/oracle/strchr.res.oracle
index 4dff3499825..28d8bac402b 100644
--- a/tests/builtins/oracle/strchr.res.oracle
+++ b/tests/builtins/oracle/strchr.res.oracle
@@ -613,8 +613,6 @@
 [eva] strchr.c:524: Call to builtin strchr
 [eva:alarm] strchr.c:524: Warning: 
   function strchr: precondition 'valid_string_s' got status unknown.
-[eva:alarm] strchr.c:524: Warning: 
-  pointer subtraction. assert \base_addr(_ss_0) ≡ \base_addr(s);
 [eva] strchr.c:524: Frama_C_show_each_mystrchr: [-1..26]
 [eva] strchr.c:525: assertion got status valid.
 [eva] computing for function init_array_nondet <- strchr_unbounded <- main.
@@ -653,9 +651,7 @@
   signed overflow. assert -2147483648 ≤ _ss - s;
 [eva:alarm] strchr.c:538: Warning: 
   signed overflow. assert _ss - s ≤ 2147483647;
-[eva] strchr.c:538: 
-  Frama_C_show_each_mystrchr:
-  {{ garbled mix of &{s; "hello"} (origin: Arithmetic {strchr.c:537}) }}
+[eva] strchr.c:538: Frama_C_show_each_mystrchr: [-2147483648..2147483647]
 [eva] Recording results for strchr_invalid
 [eva] Done for function strchr_invalid
 [eva] computing for function strchr_garbled_mix_in_char <- main.
@@ -677,7 +673,7 @@
 [eva:garbled-mix:summary] 
   Origins of garbled mix generated during analysis:
     strchr.c:537: arithmetic operation on addresses
-      (read in 4 statements, propagated through 4 statements)
+      (read in 1 statement, propagated through 2 statements)
       garbled mix of &{s; "hello"}
     strchr.c:543: arithmetic operation on addresses
       (read in 1 statement, propagated through 2 statements)
@@ -743,11 +739,8 @@
    [3] ∈ {0}
   z3 ∈ {3}
 [eva:final-states] Values at end of function strchr_invalid:
-  s ∈
-   {{ garbled mix of &{s; "hello"} (origin: Arithmetic {strchr.c:537}) }}
-  unused ∈
-        {{ garbled mix of &{s; "hello"}
-         (origin: Arithmetic {strchr.c:537}) }}
+  s ∈ {{ &s + {0; 1; 2; 3} ; "hello" + {0; 1; 2; 3; 4; 5} }}
+  unused ∈ [--..--]
 [eva:final-states] Values at end of function strchr_large:
   Frama_C_entropy_source ∈ [--..--]
   c ∈ {0}
diff --git a/tests/builtins/oracle/strlen.res.oracle b/tests/builtins/oracle/strlen.res.oracle
index f49d683946a..1998d07eb2c 100644
--- a/tests/builtins/oracle/strlen.res.oracle
+++ b/tests/builtins/oracle/strlen.res.oracle
@@ -448,7 +448,7 @@
   offset4 ∈ {-1; 0; 1; 2}
   offset5 ∈ [-4..7]
   offset6 ∈ [-10..0]
-  p ∈ {{ &buf + [-10..0] }}
+  p ∈ {{ &buf[0] }}
 [eva:final-states] Values at end of function small_sets:
   s ∈ {{ "b\000c" }}
   p ∈ {{ "b\000c" + {0; 2} }}
diff --git a/tests/builtins/oracle/strnlen2.res.oracle b/tests/builtins/oracle/strnlen2.res.oracle
index d1ba49e93a3..f554968beef 100644
--- a/tests/builtins/oracle/strnlen2.res.oracle
+++ b/tests/builtins/oracle/strnlen2.res.oracle
@@ -877,7 +877,7 @@
   offset4 ∈ {-1; 0; 1; 2}
   offset5 ∈ [-4..7]
   offset6 ∈ [-10..0]
-  p ∈ {{ &buf + [-10..0] }}
+  p ∈ {{ &buf[0] }}
 [eva:final-states] Values at end of function no_zero_but_ok:
   s[0..9] ∈ {1}
    [10] ∈ {0}
diff --git a/tests/builtins/oracle/wcslen.res.oracle b/tests/builtins/oracle/wcslen.res.oracle
index 5130d9010d0..a5de0cd7797 100644
--- a/tests/builtins/oracle/wcslen.res.oracle
+++ b/tests/builtins/oracle/wcslen.res.oracle
@@ -478,8 +478,7 @@
             [1] ∈ {0}
 [eva:final-states] Values at end of function negative_offsets:
   Frama_C_entropy_source ∈ [--..--]
-  buf[0..88] ∈ {65} or UNINITIALIZED
-     [89..98] ∈ {0; 65} or UNINITIALIZED
+  buf[0..98] ∈ {65} or UNINITIALIZED
      [99] ∈ {0}
   len1 ∈ UNINITIALIZED
   len2 ∈ UNINITIALIZED
@@ -491,7 +490,7 @@
   offset4 ∈ {-1; 0; 1; 2}
   offset5 ∈ [-4..7]
   offset6 ∈ [-10..0]
-  p ∈ {{ &buf + [-40..0],0%4 }}
+  p ∈ {{ &buf[0] }}
 [eva:final-states] Values at end of function small_sets:
   s ∈ {{ L"b\000c" }}
   p ∈ {{ L"b\000c" + {0; 8} }}
diff --git a/tests/libc/oracle/libgen_h.res.oracle b/tests/libc/oracle/libgen_h.res.oracle
index a5a008b319c..9e5405c1b59 100644
--- a/tests/libc/oracle/libgen_h.res.oracle
+++ b/tests/libc/oracle/libgen_h.res.oracle
@@ -42,7 +42,7 @@
   __fc_dirname[0..4095] ∈ [--..--]
   path[0..127] ∈ [--..--]
   base ∈ {{ &__fc_basename[0] ; &path[0] }}
-  base2 ∈ {{ NULL ; &__fc_basename[0] }}
+  base2 ∈ {{ &__fc_basename[0] }}
   dir ∈ {{ &__fc_dirname[0] ; &path[0] }}
-  dir2 ∈ {{ NULL ; &__fc_dirname[0] }}
+  dir2 ∈ {{ &__fc_dirname[0] }}
   __retres ∈ {0}
diff --git a/tests/libc/oracle/stdlib_c_env.res.oracle b/tests/libc/oracle/stdlib_c_env.res.oracle
index d22196b6d84..1f2d4bb2994 100644
--- a/tests/libc/oracle/stdlib_c_env.res.oracle
+++ b/tests/libc/oracle/stdlib_c_env.res.oracle
@@ -163,8 +163,6 @@
 [eva:alarm] FRAMAC_SHARE/libc/stdlib.c:137: Warning: 
   function strchr: precondition 'valid_string_s' got status unknown.
 [eva] FRAMAC_SHARE/libc/stdlib.c:141: Call to builtin strlen
-[eva:alarm] FRAMAC_SHARE/libc/stdlib.c:141: Warning: 
-  function strlen: precondition 'valid_string_s' got status unknown.
 [eva] FRAMAC_SHARE/libc/stdlib.c:147: 
   Reusing old results for call to __fc_initenv
 [eva] computing for function Frama_C_nondet <- setenv <- main.
@@ -237,11 +235,7 @@
 [eva] computing for function unsetenv <- main.
   Called from stdlib_c_env.c:24.
 [eva] FRAMAC_SHARE/libc/stdlib.c:167: Call to builtin strchr
-[eva:alarm] FRAMAC_SHARE/libc/stdlib.c:167: Warning: 
-  function strchr: precondition 'valid_string_s' got status unknown.
 [eva] FRAMAC_SHARE/libc/stdlib.c:171: Call to builtin strlen
-[eva:alarm] FRAMAC_SHARE/libc/stdlib.c:171: Warning: 
-  function strlen: precondition 'valid_string_s' got status unknown.
 [eva] FRAMAC_SHARE/libc/stdlib.c:177: 
   Reusing old results for call to __fc_initenv
 [eva] computing for function Frama_C_nondet <- unsetenv <- main.
@@ -311,7 +305,7 @@
    [8..9] ∈ {0}
   i2 ∈ [--..--]
   r1 ∈ {{ NULL ; &s[0] ; &__fc_env_strings + [0..63] ; "BLA=1" }}
-  r2 ∈ {{ NULL ; &s[0] ; &__fc_env_strings + [0..63] ; "BLA=1" }}
+  r2 ∈ {{ &s[0] ; &__fc_env_strings + [0..63] ; "BLA=1" }}
   i3 ∈ {-1; 0}
   i4 ∈ {-1; 0}
   i5 ∈ {-1; 0}
-- 
GitLab