From eb0d991eb60d302f33cfe7e32a997735d9fd34fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20B=C3=BChler?= <david.buhler@cea.fr>
Date: Thu, 21 Mar 2024 14:11:06 +0100
Subject: [PATCH] [Eva] Fixes garbled mix origins id.

Do not add a unique id into the origin datatype, as this interacts badly with
hashconsing.
Instead, binds each origin to a unique id in the global table used to record
garbled mix history.
---
 .../abstract_interp/locations.ml              |   7 +-
 src/kernel_services/abstract_interp/origin.ml | 128 ++++++++++++------
 .../abstract_interp/origin.mli                |   3 +
 .../oracle/multidim-relations.res.oracle      |   3 -
 4 files changed, 90 insertions(+), 51 deletions(-)

diff --git a/src/kernel_services/abstract_interp/locations.ml b/src/kernel_services/abstract_interp/locations.ml
index 4e951a71e40..768e9c12c42 100644
--- a/src/kernel_services/abstract_interp/locations.ml
+++ b/src/kernel_services/abstract_interp/locations.ml
@@ -196,10 +196,9 @@ module Location_Bytes = struct
     if Base.Hptset.(equal b empty || equal b Base.null_set) then
       top_int
     else begin
-      if Base.Hptset.mem Base.null b then
-        Top (Base.SetLattice.inject b, o)
-      else
-        Top (Base.(SetLattice.inject (Hptset.add null b)), o)
+      let bases = Base.SetLattice.inject Base.(Hptset.add null b) in
+      Origin.register bases o;
+      Top (bases, o)
     end
 
   (** some functions can reduce a garbled mix, make sure to normalize
diff --git a/src/kernel_services/abstract_interp/origin.ml b/src/kernel_services/abstract_interp/origin.ml
index eb11a9658de..99acf93c89c 100644
--- a/src/kernel_services/abstract_interp/origin.ml
+++ b/src/kernel_services/abstract_interp/origin.ml
@@ -48,23 +48,10 @@ let kind_label = function
 type location = Cil_datatype.Location.t
 
 type tt =
-  | Origin of { kind: kind; loc: location; id: int; }
+  | Origin of { kind: kind; loc: location; }
   | Well
   | Unknown
 
-(* Unique id for each origin. Used to keep the oldest origin when a garbled
-   mix may have several origins. *)
-module Id = State_builder.Counter (struct let name = "Origin.Id" end)
-
-let current kind =
-  let id = Id.next () in
-  let loc = Current_loc.get () in
-  Origin { kind; loc; id; }
-
-let well = Well
-let unknown = Unknown
-let is_unknown t = t = Unknown
-
 module Prototype = struct
   include Datatype.Serializable_undefined
   type t = tt
@@ -99,9 +86,9 @@ end
 
 include Datatype.Make_with_collections (Prototype)
 
-let pretty_as_reason fmt org =
-  if not (is_unknown org)
-  then Format.fprintf fmt " because of %a" pretty org
+let pretty_as_reason fmt origin =
+  if origin <> Unknown
+  then Format.fprintf fmt " because of %a" pretty origin
 
 let descr = function
   | Unknown -> "unknown origin"
@@ -114,17 +101,16 @@ let descr = function
     | Merge -> "imprecise merge of addresses"
     | Arith -> "arithmetic operation on addresses"
 
-(* Keep the oldest known origin: it is probably the most relevant origin, as
-   subsequent ones may have been created because of the first. *)
-let join t1 t2 =
-  if t1 == t2 then t1 else
-    match t1, t2 with
-    | Unknown, x | x, Unknown -> x
-    | Well, _ | _, Well -> Well
-    | Origin o1, Origin o2 -> if o1.id <= o2.id then t1 else t2
+
+let current kind = Origin { kind; loc = Current_loc.get (); }
+
+let well = Well
+let unknown = Unknown
+let is_unknown t = t = Unknown
 
 
 (* For each garbled mix origin, keep track of:
+   - a unique id used to keep the oldest origin when merging two garbled mix.
    - the number of writes (according to [register_write] below), i.e. the number
      of times a garbled mix with this origin has been written in a state during
      the analysis.
@@ -137,35 +123,90 @@ let join t1 t2 =
 
    These info are printed at the end of an Eva analysis. *)
 
-module History_Info = struct
-  let name = "Origin.History"
-  let dependencies = []
-  let size = 32
-end
-
 module LocSet = Cil_datatype.Location.Set
 
-(* Locations of writes, locations of reads, related bases. *)
-module History_Data = Datatype.Triple (LocSet) (LocSet) (Base.SetLattice)
-module History = State_builder.Hashtbl (Hashtbl) (History_Data) (History_Info)
+module History = struct
+
+  module Counter = State_builder.Counter (struct let name = "Origin.Id.Counter" end)
 
-let clear () = Id.reset (); History.clear ()
+  (* Locations of writes, locations of reads, related bases. *)
+  module Data = Datatype.Triple (LocSet) (LocSet) (Base.SetLattice)
+  module IdData = Datatype.Pair (Datatype.Int) (Data)
+
+  module History_Info = struct
+    let name = "Origin.History"
+    let dependencies = []
+    let size = 10
+  end
+
+  module Table = State_builder.Hashtbl (Hashtbl) (IdData) (History_Info)
+
+  let create () =
+    let id = Counter.next () in
+    id, (LocSet.empty, LocSet.empty, Base.SetLattice.bottom)
+
+  let register origin =
+    if not (Table.mem origin)
+    then Table.replace origin (create ())
+
+  let apply ~change origin =
+    let id, data = try Table.find origin with Not_found -> create () in
+    Table.replace origin (id, change data)
+
+  let cardinal_writes origin =
+    try let (_, (w, _r, _b)) = Table.find origin in LocSet.cardinal w
+    with Not_found -> 0
+
+  let id origin =
+    try fst (Table.find origin)
+    with Not_found -> Stdlib.max_int
+
+  let to_list () = List.of_seq (Table.to_seq ())
+
+  let clear () = Counter.reset (); Table.clear ()
+end
+
+let clear () = History.clear ()
+
+(* Keep the oldest known origin: it is probably the most relevant origin, as
+   subsequent ones may have been created because of the first. *)
+let join t1 t2 =
+  if t1 == t2 then t1 else
+    match t1, t2 with
+    | Unknown, x | x, Unknown -> x
+    | Well, _ | _, Well -> Well
+    | Origin _, Origin _ -> if History.id t1 < History.id t2 then t1 else t2
 
 let is_current = function
   | Unknown | Well -> false
   | Origin { loc } -> Cil_datatype.Location.equal loc (Current_loc.get ())
 
-(* Returns true if the origin has never been registered and is related to the
-   current location. *)
+(* Registers the creation of the garbled mix of some origins: Misalign_write and
+   Merge, which are directly written in the cvalue states, and Arith which can
+   be created by many cvalue operations.
+   Does not register:
+   - Leaf origins, as garbled mix are often created by assigns clause and then
+     reduced to precise values by postconditions. Garbled mix with leaf origin
+     are thus registered by [register_write] called at the end of the
+     interpretation of a function specification.
+   - Misalign_read, which should be registered as a read by the cvalue domain.
+     This avoids prioritizing garbled mix created by the bitwise domain.
+   - Unknown and Well origins, which are processed separately by the [join]. *)
+let register _bases t =
+  match t with
+  | Origin { kind = Misalign_write | Merge | Arith } -> History.register t
+  | Origin { kind = Leaf | Misalign_read } | Unknown | Well -> ()
+
+(* Registers a write of a garbled mix of known origin.
+   Returns true if it is related to the current location. *)
 let register_write bases t =
   if is_unknown t then false else
     let current_loc = Current_loc.get () in
-    let is_new = not (History.mem t) in
+    let is_new = History.cardinal_writes t = 0 in
     let change (w, r, b) =
       LocSet.add current_loc w, r, Base.SetLattice.join b bases
     in
-    let create _ = LocSet.singleton current_loc, LocSet.empty, bases in
-    ignore (History.memo ~change create t);
+    History.apply ~change t;
     is_new && is_current t
 
 (* Registers a read only if the current location is not that of the origin. *)
@@ -175,14 +216,13 @@ let register_read bases t =
     let change (w, r, b) =
       w, LocSet.add current_loc r, Base.SetLattice.join b bases
     in
-    let create _ = LocSet.empty, LocSet.singleton current_loc, bases in
-    ignore (History.memo ~change create t)
+    History.apply ~change t
 
 (* Returns the list of recorded origins, sorted by number of reads.
    Origins with no reads are filtered. *)
 let get_history () =
-  let list = List.of_seq (History.to_seq ()) in
-  let count (origin, (w, r, bases)) =
+  let list = History.to_list () in
+  let count (origin, (_id, (w, r, bases))) =
     if LocSet.is_empty r
     then None
     else Some (origin, (LocSet.cardinal w, LocSet.cardinal r, bases))
diff --git a/src/kernel_services/abstract_interp/origin.mli b/src/kernel_services/abstract_interp/origin.mli
index 44f23b22189..35d51309cd8 100644
--- a/src/kernel_services/abstract_interp/origin.mli
+++ b/src/kernel_services/abstract_interp/origin.mli
@@ -54,6 +54,9 @@ val descr: t -> string
 
 val join: t -> t -> t
 
+(** Records the creation of an imprecise value of the given bases. *)
+val register: Base.SetLattice.t -> t -> unit
+
 (** Records the write of an imprecise value of the given bases,
     with the given origin.
     Returns [true] if the given origin has never been written before,
diff --git a/tests/value/oracle/multidim-relations.res.oracle b/tests/value/oracle/multidim-relations.res.oracle
index cd237bf6d63..b1030ddaf97 100644
--- a/tests/value/oracle/multidim-relations.res.oracle
+++ b/tests/value/oracle/multidim-relations.res.oracle
@@ -53,9 +53,6 @@
     multidim-relations.c:19: imprecise merge of addresses
       (read in 2 statements, propagated through 0 statements)
       garbled mix of &{g; h}
-    multidim-relations.c:18: imprecise merge of addresses
-      (read in 1 statement, propagated through 0 statements)
-      garbled mix of &{g; h}
 [eva] ====== VALUES COMPUTED ======
 [eva:final-states] Values at end of function init_array:
   t[0]{.kind; .[bits 8 to 31]#} ∈ {0; 1} repeated %8 
-- 
GitLab