From 21466b3847ad750a9a07620c4fe4726cde9643e0 Mon Sep 17 00:00:00 2001
From: Virgile Prevosto <virgile.prevosto@m4x.org>
Date: Wed, 28 Feb 2024 09:25:01 +0100
Subject: [PATCH] [lexer] use currentLoc instead of directly lexbuf;

Also, give a better localization of unfinished `/@ ... @/`
annotations in ghost code.
---
 src/kernel_internals/parsing/clexer.mll       | 52 +++++++++++--------
 .../oracle/ghost_multiline_annot.1.res.oracle |  7 ++-
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/src/kernel_internals/parsing/clexer.mll b/src/kernel_internals/parsing/clexer.mll
index 84b23d1063a..2e95e6c11da 100644
--- a/src/kernel_internals/parsing/clexer.mll
+++ b/src/kernel_internals/parsing/clexer.mll
@@ -52,11 +52,8 @@ module H = Hashtbl
 module E = Errorloc
 
 let currentLoc () = E.currentLoc ()
-let parse_error lexbuf msg =
-  let loc =
-    Cil_datatype.Location.of_lexing_loc
-      (lexbuf.Lexing.lex_start_p,lexbuf.Lexing.lex_curr_p)
-  in
+
+let parse_error ?(loc=currentLoc()) msg =
   E.parse_error ~loc msg
 
 (* Convert char into Int64 *)
@@ -73,9 +70,15 @@ let enter_ghost_code () = ghost_code := true
 let exit_ghost_code () = ghost_code := false
 
 let ghost_annot = ref false
+let ghost_annot_start = ref Cil_datatype.Location.unknown
 let is_ghost_annot () = !ghost_annot
-let enter_ghost_annot () = ghost_annot := true
-let exit_ghost_annot () = ghost_annot := false
+let get_ghost_annot_start () = !ghost_annot_start
+let enter_ghost_annot () =
+  ghost_annot := true;
+  ghost_annot_start:= currentLoc()
+let exit_ghost_annot () =
+  ghost_annot := false;
+  ghost_annot_start := Cil_datatype.Location.unknown
 
 let add_comment c = Cabshelper.Comments.add (currentLoc()) c
 
@@ -279,7 +282,7 @@ let finish () =
 
 
 (*** escape character management ***)
-let scan_escape lexbuf = function
+let scan_escape = function
   | 'n' -> int64_of_char '\n'
   | 'r' -> int64_of_char '\r'
   | 't' -> int64_of_char '\t'
@@ -296,7 +299,7 @@ let scan_escape lexbuf = function
   | '[' -> int64_of_char '['
   | '%' -> int64_of_char '%'
   | '\\' -> int64_of_char '\\'
-  | other -> parse_error lexbuf "Unrecognized escape sequence: \\%c" other
+  | other -> parse_error "Unrecognized escape sequence: \\%c" other
 
 let scan_hex_escape str =
   let radix = Int64.of_int 16 in
@@ -330,7 +333,7 @@ let lex_oct_escape remainder lexbuf =
 
 let lex_simple_escape remainder lexbuf =
   let lexchar = Lexing.lexeme_char lexbuf 1 in
-  let prefix = scan_escape lexbuf lexchar in
+  let prefix = scan_escape lexchar in
   prefix :: remainder lexbuf
 
 let lex_unescaped remainder lexbuf =
@@ -529,7 +532,7 @@ rule initial = parse
     {
     if is_ghost_code () || is_oneline_ghost ()
     then GHOST (currentLoc())
-    else parse_error lexbuf "Use of \\ghost out of ghost code"
+    else parse_error "Use of \\ghost out of ghost code"
     }
 
   | blank {initial lexbuf} | '\n'
@@ -629,7 +632,7 @@ rule initial = parse
     if c = !annot_char then
       if is_ghost_code () || is_oneline_ghost ()
       then (enter_ghost_annot () ; annot_lex initial annot_first_token lexbuf)
-      else parse_error lexbuf "This kind of annotation is valid only inside ghost code"
+      else parse_error "This kind of annotation is valid only inside ghost code"
     else (lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1 ; SLASH)
     }
 
@@ -686,7 +689,7 @@ rule initial = parse
   | _ as c
     {
     if is_ghost_code() && c = '@' then initial lexbuf
-    else parse_error lexbuf "Invalid symbol"
+    else parse_error "Invalid symbol"
     }
 
 
@@ -697,7 +700,7 @@ and might_end_ghost = parse
 
 and comment buffer = parse
   | "*/" {  }
-  | eof  { parse_error lexbuf "Unterminated C comment" }
+  | eof  { parse_error "Unterminated C comment" }
   | _    { lex_comment comment buffer lexbuf }
 
 
@@ -794,7 +797,7 @@ and str = parse
   | hex_escape  { lex_hex_escape str lexbuf }
   | oct_escape  { lex_oct_escape str lexbuf }
   | escape      { lex_simple_escape str lexbuf }
-  | eof         { parse_error lexbuf "unterminated string" }
+  | eof         { parse_error "unterminated string" }
   | _           { lex_unescaped str lexbuf }
 
 
@@ -803,14 +806,14 @@ and chr =  parse
   | hex_escape  { lex_hex_escape chr lexbuf }
   | oct_escape  { lex_oct_escape chr lexbuf }
   | escape      { lex_simple_escape chr lexbuf }
-  | eof         { parse_error lexbuf "unterminated char" }
+  | eof         { parse_error "unterminated char" }
   | _           { lex_unescaped chr lexbuf }
 
 
 and annot_first_token = parse
   | "ghost" ((blank| '\\'?'\n' | ghost_comments)* as comments) "else"
     {
-    if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
+    if is_oneline_ghost () then parse_error "nested ghost code" ;
     Buffer.clear buf ;
     let loc = currentLoc () in
     do_ghost_else_comments true comments ;
@@ -820,7 +823,7 @@ and annot_first_token = parse
 
   | "ghost"
     {
-    if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
+    if is_oneline_ghost () then parse_error "nested ghost code" ;
     Buffer.clear buf ;
     enter_ghost_code () ;
     LGHOST
@@ -835,12 +838,15 @@ and annot_token = parse
   | "*/"
     {
     if is_ghost_annot ()
-    then parse_error lexbuf "Ghost multi-line annotation not terminated" ;
+    then begin
+      let loc = (fst (get_ghost_annot_start()), snd (currentLoc())) in
+      parse_error ~loc "Ghost multi-line annotation not terminated"
+    end;
     let s = Buffer.contents buf in
     make_annot ~one_line:false initial lexbuf s
     }
 
-  | eof  { parse_error lexbuf "Unterminated annotation" }
+  | eof  { parse_error "Unterminated annotation" }
   | '\n' { E.newline() ; Buffer.add_char buf '\n' ; annot_token lexbuf }
   | _ as c
     {
@@ -861,14 +867,14 @@ and annot_one_line = parse
   | "ghost" ((blank|"\\\n")+ as comments) "else"
     {
     do_ghost_else_comments false comments ;
-    if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
+    if is_oneline_ghost () then parse_error "nested ghost code" ;
     enter_oneline_ghost () ;
     LGHOST_ELSE (currentLoc ())
     }
 
   | "ghost"
     {
-    if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
+    if is_oneline_ghost () then parse_error "nested ghost code" ;
     enter_oneline_ghost () ;
     LGHOST
     }
@@ -879,7 +885,7 @@ and annot_one_line = parse
 
 and annot_one_line_logic = parse
   | '\n'    { make_annot ~one_line:true initial lexbuf (Buffer.contents buf) }
-  | eof     { parse_error lexbuf "Invalid C file: should end with a newline" }
+  | eof     { parse_error "Invalid C file: should end with a newline" }
   | _ as c  { Buffer.add_char buf c ; annot_one_line_logic lexbuf }
 
 
diff --git a/tests/syntax/oracle/ghost_multiline_annot.1.res.oracle b/tests/syntax/oracle/ghost_multiline_annot.1.res.oracle
index 1d4cfa1f5eb..4e2d22bea4b 100644
--- a/tests/syntax/oracle/ghost_multiline_annot.1.res.oracle
+++ b/tests/syntax/oracle/ghost_multiline_annot.1.res.oracle
@@ -1,11 +1,10 @@
 [kernel] Parsing ghost_multiline_annot.c (with preprocessing)
-[kernel] ghost_multiline_annot.c:34: 
+[kernel] ghost_multiline_annot.c:35: 
   Ghost multi-line annotation not terminated:
-  Location: between lines 34 and 41
-  32    {
+  Location: between lines 35 and 41
   33      /*@ ghost
-  
   34          int x = 10;
+  
   35          /@ loop invariant x >= 0;
   36             loop assigns x;
   37             loop variant x;
-- 
GitLab