From 7dd8cb6a3b25bcd4c46035c4a996f107f113d721 Mon Sep 17 00:00:00 2001
From: Virgile Prevosto <virgile.prevosto@m4x.org>
Date: Mon, 26 Feb 2024 19:27:26 +0100
Subject: [PATCH] [parser] towards better locations for syntax errors

---
 src/kernel_internals/parsing/cparser.mly      | 83 +++++++++++--------
 src/kernel_internals/parsing/errorloc.ml      | 43 +++++-----
 src/kernel_internals/parsing/errorloc.mli     | 10 +--
 tests/syntax/oracle/error_end_decl.res.oracle | 16 ++++
 4 files changed, 90 insertions(+), 62 deletions(-)

diff --git a/src/kernel_internals/parsing/cparser.mly b/src/kernel_internals/parsing/cparser.mly
index f47a10fb825..7f288f6aea6 100644
--- a/src/kernel_internals/parsing/cparser.mly
+++ b/src/kernel_internals/parsing/cparser.mly
@@ -89,14 +89,15 @@ and isJUSTBASE = function
   | PARENTYPE (_, d, _) -> isJUSTBASE d
   | _ -> false
 
-let announceFunctionName ((n, decl, _, _):name) =
+let announceFunctionName ((n, decl, _, loc):name) =
   !Lexerhack.add_identifier n;
   (* Start a context that includes the parameter names and the whole body.
    * Will pop when we finish parsing the function body *)
   !Lexerhack.push_context ();
   (try findProto decl
    with NoProto ->
-     Errorloc.parse_error "Cannot find the prototype in a function definition");
+     Errorloc.parse_error
+       ~loc "Cannot find the prototype in a function definition");
   currentFunctionName := n
 
 let check_funspec_abrupt_clauses fname spec =
@@ -106,8 +107,8 @@ let check_funspec_abrupt_clauses fname spec =
 	(function
 	| (Cil_types.Normal | Cil_types.Exits),_ -> ()
 	| (Cil_types.Breaks | Cil_types.Continues | Cil_types.Returns),
-          { Logic_ptree.tp_statement = { lexpr_loc = (loc,_)}} ->
-          Errorloc.parse_error ~source:loc
+          { Logic_ptree.tp_statement = { lexpr_loc = loc}} ->
+          Errorloc.parse_error ~loc
             "Specification of function %s can only contain ensures or \
                  exits post-conditions" fname)
 	bhv.Logic_ptree.b_post_cond)
@@ -206,18 +207,18 @@ let doOldParDecl (names: string list)
   let args = List.map findOneName names in
   (args, isva)
 
-let int64_to_char value =
+let int64_to_char ~loc value =
   if (Int64.compare value (Int64.of_int 255) > 0) ||
     (Int64.compare value Int64.zero < 0) then
-    Errorloc.parse_error "integral literal 0x%Lx too big" value
+    Errorloc.parse_error ~loc "integral literal 0x%Lx too big" value
   else
     Char.chr (Int64.to_int value)
 
 (* takes a not-nul-terminated list, and converts it to a string. *)
-let intlist_to_string str =
+let intlist_to_string ~loc str =
   let buffer = Buffer.create 64 in
   let add_char c =
-    Buffer.add_char buffer (int64_to_char c)
+    Buffer.add_char buffer (int64_to_char ~loc c)
   in
   let add_char_list l = List.iter add_char l in
   List.iter add_char_list str ;
@@ -276,13 +277,14 @@ let transformOffsetOf (speclist, dtype) member =
   let rec replaceBase e =
     let node = match e.expr_node with
       | VARIABLE field ->
-	  MEMBEROFPTR (castExpr, field)
+        MEMBEROFPTR (castExpr, field)
       | MEMBEROF (base, field) ->
-	  MEMBEROF (replaceBase base, field)
+        MEMBEROF (replaceBase base, field)
       | INDEX (base, index) ->
-	  INDEX (replaceBase base, index)
+        INDEX (replaceBase base, index)
       | _ ->
-	Errorloc.parse_error "malformed offset expression in offsetof macro"
+        Errorloc.parse_error ~loc:e.expr_loc
+          "malformed offset expression in offsetof macro"
     in { e with expr_node = node }
   in
   let memberExpr = replaceBase member in
@@ -479,14 +481,17 @@ global:
 | EXTERN string_constant declaration
     { LINKAGE (fst $2, (*handleLoc*) (snd $2), [ $3 ]) }
 | EXTERN string_constant LBRACE globals RBRACE
-    { LINKAGE (fst $2, (*handleLoc*) (snd $2),
-                 List.map
-                   (fun (x,y) ->
-                      if x then
-                        Errorloc.parse_error "invalid ghost in extern linkage \
-                                              specification"
-		      else y)
-                   $4)  }
+    { LINKAGE (
+      fst $2, (*handleLoc*) (snd $2),
+      List.map
+        (fun (x,y) ->
+           if x then
+             let loc = Cabshelper.get_definitionloc y in
+             Errorloc.parse_error ~loc
+               "invalid ghost in extern linkage specification"
+           else y)
+      $4)
+    }
 | ASM LPAREN string_constant RPAREN SEMICOLON
                                         { GLOBASM (fst $3, (*handleLoc*) $1) }
 | pragma                                { $1 }
@@ -759,7 +764,9 @@ constant:
 string_constant:
 /* Now that we know this constant isn't part of a wstring, convert it
    back to a string for easy viewing. */
-    string_list                         { intlist_to_string (fst $1), snd $1 }
+    string_list                         {
+      let loc = Cil_datatype.Location.of_lexing_loc $loc in
+      intlist_to_string ~loc (fst $1), snd $1 }
 ;
 
 string_list:
@@ -979,7 +986,7 @@ statement:
           let loc = Cil_datatype.Location.of_lexing_loc $loc($1) in
           match $4 with
           | [] -> (* should not happen if grammar is written correctly *)
-            Errorloc.parse_error "empty statement after label"
+            Errorloc.parse_error ~loc "empty statement after label"
           | s :: others -> no_ghost [LABEL($1,s,loc)] @ others }
 
 |   CASE expression COLON annotated_statement
@@ -1109,7 +1116,8 @@ generic_selection: /* ISO C11 6.5.1.1 */
       }
 |   GENERIC LPAREN assignment_expression RPAREN
       {
-        Errorloc.parse_error
+        let loc = Cil_datatype.Location.of_lexing_loc $loc in
+        Errorloc.parse_error ~loc
           "_Generic requires at least one generic association";
       }
 
@@ -1442,12 +1450,14 @@ pointer_opt:
 ;
 
 type_name: /* (* ISO 6.7.6 *) */
-  decl_spec_list abstract_decl { let d, a = $2 in
-                                 if a <> [] then
-                                   Errorloc.parse_error
-                                     "attributes in type name";
-                                 (fst $1, d)
-                               }
+  specif=decl_spec_list name=abstract_decl {
+    let d, a = name in
+    if a <> [] then begin
+      let loc = Cil_datatype.Location.of_lexing_loc ($loc(name)) in
+      Errorloc.parse_error ~loc "attributes in type name"
+    end;
+    (fst specif, d)
+  }
 | decl_spec_list               { (fst $1, JUSTBASE) }
 ;
 abstract_decl: /* (* ISO 6.7.6. *) */
@@ -1556,9 +1566,8 @@ cvspec:
     {
       let annot, loc = $1 in
       if String.compare annot "\\ghost" = 0 then begin
-        let start = $startpos in
-        let source = Cil_datatype.Position.of_lexing_pos start in
-        Errorloc.parse_error ~source "Use of \\ghost out of ghost code"
+        let loc = Cil_datatype.Location.of_lexing_loc $loc in
+        Errorloc.parse_error ~loc "Use of \\ghost out of ghost code"
       end else
         SpecCV(CV_ATTRIBUTE_ANNOT annot), loc
     }
@@ -1834,8 +1843,14 @@ asmattr:
 |    CONST asmattr                      { ("const", []) :: $2 }
 ;
 asmtemplate:
-    one_string                         { [intlist_to_string [fst $1]] }
-|   one_string asmtemplate             { intlist_to_string [fst $1] :: $2 }
+    line=one_string                         {
+      let loc = Cil_datatype.Location.of_lexing_loc $loc in
+      [intlist_to_string ~loc [fst line]]
+    }
+|   line=one_string rest=asmtemplate             {
+  let loc = Cil_datatype.Location.of_lexing_loc $loc(line) in
+  intlist_to_string ~loc [fst line] :: rest
+}
 ;
 asmoutputs:
   /* empty */           { None }
diff --git a/src/kernel_internals/parsing/errorloc.ml b/src/kernel_internals/parsing/errorloc.ml
index 5ff43c012ec..8a7715c735a 100644
--- a/src/kernel_internals/parsing/errorloc.ml
+++ b/src/kernel_internals/parsing/errorloc.ml
@@ -250,38 +250,37 @@ let pp_location fmt (pos_start, pos_end) =
     Format.fprintf fmt "Location: between %a and %a"
       pp_pos pos_start pp_pos pos_end
 
-let parse_error ?source msg =
+let parse_error ?loc msg =
   let current = Option.get !current in
-  let last_pos =
-    match source with
-    | None ->
-      Cil_datatype.Position.of_lexing_pos current.lexbuf.Lexing.lex_curr_p
-    | Some s -> s
-  in
   (* there are cases when we are called before menhir has requested at
      least two tokens, ending up in an assertion failure. Unfortunately,
      ErrorReports API does not allow us to check whether the buffer is
      empty or not.
   *)
+  let all_pos = Stack.create() in
   let () =
-    try ignore (MenhirLib.ErrorReports.last current.menhir_pos) with _ -> ()
-  in
-  let start_pos =
+    (* this is absolutely not a hack and used MenhirLib exactly as intended. *)
     try
-      let pos,_ =
+      let pp loc = Stack.push loc all_pos; "" in
+      ignore (MenhirLib.ErrorReports.show pp current.menhir_pos)
+    with _ -> ()
+  in
+  let start_pos,last_pos =
+    match loc with
+    | Some (s,l) -> s, l
+    | None ->
+      if Stack.is_empty all_pos then
         Cil_datatype.Location.of_lexing_loc
-          (MenhirLib.ErrorReports.last current.menhir_pos)
-      in
-      if Cil_datatype.Position.compare pos last_pos <= 0 then pos
+          (current.lexbuf.Lexing.lex_start_p, current.lexbuf.Lexing.lex_curr_p)
       else
-        (* during interaction between C and ACSL parser, it might happen,
-           at least as long as we haven't completed the move to menhir for
-           ACSL, that the start_pos seen by menhir is after the current position
-           of the (shared) lexbuf. This would lead to confusing error messages,
-           so we drop the one from menhir.
-        *)
-        last_pos
-    with _ -> last_pos
+        let _,start_pos = Stack.pop all_pos in
+        let last_pos =
+          if Stack.is_empty all_pos then
+            current.lexbuf.Lexing.lex_start_p
+          else
+            fst (Stack.pop all_pos)
+        in
+        Cil_datatype.Location.of_lexing_loc (start_pos, last_pos)
   in
   let pretty_token fmt token =
     (* prints more detailed information around the erroneous token;
diff --git a/src/kernel_internals/parsing/errorloc.mli b/src/kernel_internals/parsing/errorloc.mli
index e067ebd1d20..2dba434819b 100644
--- a/src/kernel_internals/parsing/errorloc.mli
+++ b/src/kernel_internals/parsing/errorloc.mli
@@ -84,14 +84,12 @@ val pp_context_from_file:
 val pp_location: Format.formatter -> Cil_types.location -> unit
 
 (** Emits the corresponding error message with some location information.
-    If given, [source] will be treated as the last position of the offending
-    expression that led to the error. It defaults to the current position of
-    the lexbuf currently in use (i.e. {!startParsing} must have been called
-    before that, and no {!finishParsing} call must have been done in between).
-    The start position will be inferred from menhir's error reporting mecanisms.
+    If not given, [location] will be considered to be between the end of
+    the forelast token read by the parser and the start of the last token,
+    i.e. we assume the parser has read an unexpected token.
 *)
 val parse_error:
-  ?source:Filepath.position -> ('a, Format.formatter, unit, 'b) format4 -> 'a
+  ?loc:Cil_types.location -> ('a, Format.formatter, unit, 'b) format4 -> 'a
 
 val had_errors : unit -> bool
 (** Has an error been raised since the last call to {!clear_errors}? *)
diff --git a/tests/syntax/oracle/error_end_decl.res.oracle b/tests/syntax/oracle/error_end_decl.res.oracle
index e69de29bb2d..ffb6550d934 100644
--- a/tests/syntax/oracle/error_end_decl.res.oracle
+++ b/tests/syntax/oracle/error_end_decl.res.oracle
@@ -0,0 +1,16 @@
+[kernel] Parsing error_end_decl.i (no preprocessing)
+[kernel] error_end_decl.i:7: 
+  syntax error:
+  Location: between lines 7 and 12
+  5     
+  6     /*@ assigns *p \from \nothing; */
+  
+  7     void f(char *p)
+  8     
+  9     /*@
+  10      assigns \result \from \nothing;
+  11      ensures \result == 0;
+  12    */
+  
+  13    char g(char *p);
+[kernel] Frama-C aborted: invalid user input.
-- 
GitLab