Skip to content
Snippets Groups Projects
Commit d55f7566 authored by Maxime Jacquemin's avatar Maxime Jacquemin Committed by Andre Maroneze
Browse files

[Lexer] do some refactoring

parent 1ba0da7f
No related branches found
No related tags found
No related merge requests found
...@@ -53,6 +53,9 @@ module E = Errorloc ...@@ -53,6 +53,9 @@ module E = Errorloc
let currentLoc () = E.currentLoc () let currentLoc () = E.currentLoc ()
(* Convert char into Int64 *)
let int64_of_char c = Int64.of_int (Char.code c)
let one_line_ghost = ref false let one_line_ghost = ref false
let is_oneline_ghost () = !one_line_ghost let is_oneline_ghost () = !one_line_ghost
let enter_oneline_ghost () = one_line_ghost := true let enter_oneline_ghost () = one_line_ghost := true
...@@ -68,274 +71,222 @@ let is_ghost_annot () = !ghost_annot ...@@ -68,274 +71,222 @@ let is_ghost_annot () = !ghost_annot
let enter_ghost_annot () = ghost_annot := true let enter_ghost_annot () = ghost_annot := true
let exit_ghost_annot () = ghost_annot := false let exit_ghost_annot () = ghost_annot := false
let addComment c = Cabshelper.Comments.add (currentLoc()) c let add_comment c = Cabshelper.Comments.add (currentLoc()) c
(* Some debugging support for line numbers *)
let dbgToken (t: token) =
if false then begin
let dprintf fmt = Kernel.debug fmt in
(match t with
IDENT n -> dprintf "IDENT(%s)\n" n
| LBRACE l -> dprintf "LBRACE(%d)\n" (fst l).Filepath.pos_lnum
| RBRACE l -> dprintf "RBRACE(%d)\n" (fst l).Filepath.pos_lnum
| IF l -> dprintf "IF(%d)\n" (fst l).Filepath.pos_lnum
| SWITCH l -> dprintf "SWITCH(%d)\n" (fst l).Filepath.pos_lnum
| RETURN l -> dprintf "RETURN(%d)\n" (fst l).Filepath.pos_lnum
| _ -> ()) ;
t
end else
t
(* (*
** Keyword hashtable ** Keyword hashtable
*) *)
let lexicon = H.create 211 let lexicon = H.create 211
let init_lexicon _ = let add key builder = H.add lexicon key builder
let remove key = H.remove lexicon key
let is_c_keyword s = Hashtbl.mem lexicon s
(* Search for a keyword, default to variable name, as opposed to type *)
let scan_ident id =
let here = currentLoc () in
try (H.find lexicon id) here
with Not_found -> IDENT id
let valid key builder =
add key builder
let unsupported key =
let msg f k = Format.fprintf f "%s is currently unsupported by Frama-C." k in
add key (fun loc -> Kernel.abort ~source:(fst loc) "%a" msg key)
let warning_C11 key builder =
let warning () = Kernel.(warning ~wkey:wkey_c11 "%s is a C11 keyword" key) in
add key (fun loc -> warning () ; builder loc)
let thread_keyword () =
let wkey = Kernel.wkey_conditional_feature in
let s = "__thread is a GCC extension, use a GCC-based machdep to enable it" in
let warning () = Kernel.warning ~wkey "%s" s ; IDENT "__thread" in
add "__thread" (fun loc -> if Cil.gccMode () then THREAD loc else warning ())
let filename_keyword () =
let convert acc c = int64_of_char c :: acc in
let path (loc : Cil_types.location) = (fst loc).pos_path in
let filename loc = Filepath.Normalized.to_pretty_string (path loc) in
let ints loc = List.rev (String.fold_left convert [] (filename loc)) in
add "__FC_FILENAME__" (fun loc -> CST_STRING (ints loc, loc))
let init_lexicon () =
H.clear lexicon ; H.clear lexicon ;
Logic_env.reset_typenames () ; Logic_env.reset_typenames () ;
Logic_env.builtin_types_as_typenames () ; Logic_env.builtin_types_as_typenames () ;
List.iter valid "auto" (fun loc -> AUTO loc) ;
(fun (key, builder) -> H.add lexicon key builder) valid "const" (fun loc -> CONST loc) ;
[ ("auto", fun loc -> AUTO loc); valid "__const" (fun loc -> CONST loc) ;
("const", fun loc -> CONST loc); valid "__const__" (fun loc -> CONST loc) ;
("__const", fun loc -> CONST loc); valid "static" (fun loc -> STATIC loc) ;
("__const__", fun loc -> CONST loc); valid "extern" (fun loc -> EXTERN loc) ;
("static", fun loc -> STATIC loc); valid "long" (fun loc -> LONG loc) ;
("extern", fun loc -> EXTERN loc); valid "short" (fun loc -> SHORT loc) ;
("long", fun loc -> LONG loc); valid "register" (fun loc -> REGISTER loc) ;
("short", fun loc -> SHORT loc); valid "signed" (fun loc -> SIGNED loc) ;
("register", fun loc -> REGISTER loc); valid "__signed" (fun loc -> SIGNED loc) ;
("signed", fun loc -> SIGNED loc); valid "unsigned" (fun loc -> UNSIGNED loc) ;
("__signed", fun loc -> SIGNED loc); valid "volatile" (fun loc -> VOLATILE loc) ;
("unsigned", fun loc -> UNSIGNED loc); valid "__volatile" (fun loc -> VOLATILE loc) ;
("volatile", fun loc -> VOLATILE loc);
("__volatile", fun loc -> VOLATILE loc);
(* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile (* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile
* are accepted GCC-isms *) * are accepted GCC-isms *)
("char", fun loc -> CHAR loc); valid "char" (fun loc -> CHAR loc) ;
("_Bool", fun loc -> BOOL loc); valid "_Bool" (fun loc -> BOOL loc) ;
("int", fun loc -> INT loc); valid "int" (fun loc -> INT loc) ;
("float", fun loc -> FLOAT loc); valid "float" (fun loc -> FLOAT loc) ;
("double", fun loc -> DOUBLE loc); valid "double" (fun loc -> DOUBLE loc) ;
("void", fun loc -> VOID loc); valid "void" (fun loc -> VOID loc) ;
("enum", fun loc -> ENUM loc); valid "enum" (fun loc -> ENUM loc) ;
("struct", fun loc -> STRUCT loc); valid "struct" (fun loc -> STRUCT loc) ;
("typedef", fun loc -> TYPEDEF loc); valid "typedef" (fun loc -> TYPEDEF loc) ;
("union", fun loc -> UNION loc); valid "union" (fun loc -> UNION loc) ;
("break", fun loc -> BREAK loc); valid "break" (fun loc -> BREAK loc) ;
("continue", fun loc -> CONTINUE loc); valid "continue" (fun loc -> CONTINUE loc) ;
("goto", fun loc -> GOTO loc); valid "goto" (fun loc -> GOTO loc) ;
("return", fun loc -> dbgToken (RETURN loc)); valid "return" (fun loc -> RETURN loc) ;
("switch", fun loc -> dbgToken (SWITCH loc)); valid "switch" (fun loc -> SWITCH loc) ;
("case", fun loc -> CASE loc); valid "case" (fun loc -> CASE loc) ;
("default", fun loc -> DEFAULT loc); valid "default" (fun loc -> DEFAULT loc) ;
("while", fun loc -> WHILE loc); valid "while" (fun loc -> WHILE loc) ;
("do", fun loc -> DO loc); valid "do" (fun loc -> DO loc) ;
("for", fun loc -> FOR loc); valid "for" (fun loc -> FOR loc) ;
("if", fun loc -> dbgToken (IF loc)); valid "if" (fun loc -> IF loc) ;
("else", fun _ -> ELSE); valid "else" (fun _ -> ELSE) ;
(*** Implementation specific keywords ***) (*** Implementation specific keywords ***)
("__signed__", fun loc -> SIGNED loc); valid "__signed__" (fun loc -> SIGNED loc) ;
("__inline__", fun loc -> INLINE loc); valid "__inline__" (fun loc -> INLINE loc) ;
("inline", fun loc -> INLINE loc); valid "inline" (fun loc -> INLINE loc) ;
("__inline", fun loc -> INLINE loc); valid "__inline" (fun loc -> INLINE loc) ;
("_Noreturn", warning_C11 "_Noreturn" (fun loc -> NORETURN loc) ;
fun loc -> warning_C11 "_Static_assert" (fun loc -> STATIC_ASSERT loc) ;
Kernel.(warning ~wkey:wkey_c11 "_Noreturn is a C11 keyword"); valid "__attribute__" (fun loc -> ATTRIBUTE loc) ;
NORETURN loc); valid "__attribute" (fun loc -> ATTRIBUTE loc) ;
("_Static_assert", valid "_Nullable" (fun loc -> NOP_ATTRIBUTE loc) ;
fun loc -> valid "__blockattribute__" (fun _ -> BLOCKATTRIBUTE) ;
Kernel.(warning ~wkey:wkey_c11 "_Static_assert is a C11 keyword"); valid "__blockattribute" (fun _ -> BLOCKATTRIBUTE) ;
STATIC_ASSERT loc); valid "__asm__" (fun loc -> ASM loc) ;
("__attribute__", fun loc -> ATTRIBUTE loc); valid "asm" (fun loc -> ASM loc) ;
("__attribute", fun loc -> ATTRIBUTE loc); valid "__typeof__" (fun loc -> TYPEOF loc) ;
("_Nullable", fun loc -> NOP_ATTRIBUTE loc); valid "__typeof" (fun loc -> TYPEOF loc) ;
("__blockattribute__", fun _ -> BLOCKATTRIBUTE); valid "typeof" (fun loc -> TYPEOF loc) ;
("__blockattribute", fun _ -> BLOCKATTRIBUTE); valid "__alignof" (fun loc -> ALIGNOF loc) ;
("__asm__", fun loc -> ASM loc); valid "__alignof__" (fun loc -> ALIGNOF loc) ;
("asm", fun loc -> ASM loc); valid "__volatile__" (fun loc -> VOLATILE loc) ;
("__typeof__", fun loc -> TYPEOF loc); valid "__volatile" (fun loc -> VOLATILE loc) ;
("__typeof", fun loc -> TYPEOF loc); valid "__FUNCTION__" (fun loc -> FUNCTION__ loc) ;
("typeof", fun loc -> TYPEOF loc); valid "__func__" (fun loc -> FUNCTION__ loc) ; (* ISO 6.4.2.2 *)
("__alignof", fun loc -> ALIGNOF loc); valid "__PRETTY_FUNCTION__" (fun loc -> PRETTY_FUNCTION__ loc) ;
("__alignof__", fun loc -> ALIGNOF loc); valid "__label__" (fun _ -> LABEL__) ;
("__volatile__", fun loc -> VOLATILE loc);
("__volatile", fun loc -> VOLATILE loc);
("__FUNCTION__", fun loc -> FUNCTION__ loc);
("__func__", fun loc -> FUNCTION__ loc); (* ISO 6.4.2.2 *)
("__PRETTY_FUNCTION__", fun loc -> PRETTY_FUNCTION__ loc);
("__label__", fun _ -> LABEL__);
(*** weimer: GCC arcana ***) (*** weimer: GCC arcana ***)
("__restrict", fun loc -> RESTRICT loc); valid "__restrict" (fun loc -> RESTRICT loc) ;
("restrict", fun loc -> RESTRICT loc); valid "restrict" (fun loc -> RESTRICT loc) ;
(* ("__extension__", EXTENSION); *)
(**** MS VC ***) (**** MS VC ***)
("__int64", fun _ -> INT64 (currentLoc ())); valid "__int64" (fun _ -> INT64 (currentLoc ())) ;
("__int32", fun loc -> INT loc); valid "__int32" (fun loc -> INT loc) ;
("_cdecl", fun _ -> MSATTR ("_cdecl", currentLoc ())); valid "_cdecl" ( fun _ -> MSATTR ("_cdecl", currentLoc ())) ;
("__cdecl", fun _ -> MSATTR ("__cdecl", currentLoc ())); valid "__cdecl" (fun _ -> MSATTR ("__cdecl", currentLoc ())) ;
("_stdcall", fun _ -> MSATTR ("_stdcall", currentLoc ())); valid "_stdcall" (fun _ -> MSATTR ("_stdcall", currentLoc ())) ;
("__stdcall", fun _ -> MSATTR ("__stdcall", currentLoc ())); valid "__stdcall" (fun _ -> MSATTR ("__stdcall", currentLoc ())) ;
("_fastcall", fun _ -> MSATTR ("_fastcall", currentLoc ())); valid "_fastcall" (fun _ -> MSATTR ("_fastcall", currentLoc ())) ;
("__fastcall", fun _ -> MSATTR ("__fastcall", currentLoc ())); valid "__fastcall" (fun _ -> MSATTR ("__fastcall", currentLoc ())) ;
("__w64", fun _ -> MSATTR("__w64", currentLoc ())); valid "__w64" (fun _ -> MSATTR("__w64", currentLoc ())) ;
("__declspec", fun loc -> DECLSPEC loc); valid "__declspec" (fun loc -> DECLSPEC loc) ;
("__forceinline", fun loc -> INLINE loc); (* !! we turn forceinline (* !! we turn forceinline into inline *)
* into inline *) valid "__forceinline" (fun loc -> INLINE loc) ;
(* weimer: some files produced by 'GCC -E' expect this type to be (* Some files produced by 'GCC -E' expect this type to be defined *)
* defined *) valid "__builtin_va_list" (fun _ -> NAMED_TYPE "__builtin_va_list") ;
("__builtin_va_list", valid "__builtin_va_arg" (fun loc -> BUILTIN_VA_ARG loc) ;
fun _ -> NAMED_TYPE "__builtin_va_list"); valid "__builtin_types_compatible_p" (fun loc -> BUILTIN_TYPES_COMPAT loc) ;
("__builtin_va_arg", fun loc -> BUILTIN_VA_ARG loc); valid "__builtin_offsetof" (fun loc -> BUILTIN_OFFSETOF loc) ;
("__builtin_types_compatible_p", fun loc -> BUILTIN_TYPES_COMPAT loc); warning_C11 "_Thread_local" (fun loc -> THREAD_LOCAL loc) ;
("__builtin_offsetof", fun loc -> BUILTIN_OFFSETOF loc);
("_Thread_local",
fun loc ->
Kernel.(warning ~wkey:wkey_c11 "_Thread_local is a C11 keyword");
THREAD_LOCAL loc);
(* We recognize __thread for GCC machdeps *) (* We recognize __thread for GCC machdeps *)
("__thread", thread_keyword () ;
fun loc -> filename_keyword () ;
if Cil.gccMode () then
THREAD loc
else begin
Kernel.(
warning
~wkey:wkey_conditional_feature
"__thread is a GCC extension, use a GCC-based machdep to enable it");
IDENT "__thread"
end);
("__FC_FILENAME__",
(fun loc ->
let filename =
Filepath.Normalized.to_pretty_string (fst loc).pos_path
in
let seq = String.to_seq filename in
let convert_char c = Int64.of_int (Char.code c) in
let l =
Seq.fold_left (fun acc c -> convert_char c :: acc) [] seq
in
CST_STRING (List.rev l,loc)));
(* The following C11 tokens are not yet supported, so we provide some (* The following C11 tokens are not yet supported, so we provide some
helpful error messages. Usage of 'fatal' instead of 'error' below helpful error messages. Usage of 'fatal' instead of 'error' below
prevents duplicate error messages due to parsing errors. *) prevents duplicate error messages due to parsing errors. *)
("_Alignas", unsupported "_Alignas" ;
fun loc -> unsupported "_Alignof" ;
Kernel.abort ~source:(fst loc) unsupported "_Complex" ;
"_Alignas is currently unsupported by Frama-C."); warning_C11 "_Generic" (fun loc -> GENERIC loc) ;
("_Alignof", unsupported "_Imaginary"
fun loc ->
Kernel.abort ~source:(fst loc)
"_Alignof is currently unsupported by Frama-C.");
("_Complex",
fun loc ->
Kernel.abort ~source:(fst loc)
"_Complex is currently unsupported by Frama-C.");
("_Generic",
fun loc ->
Kernel.(warning ~wkey:wkey_c11 "_Generic is a C11 keyword");
GENERIC loc);
("_Imaginary",
fun loc ->
Kernel.abort ~source:(fst loc)
"_Imaginary is currently unsupported by Frama-C.");
]
let is_c_keyword s = Hashtbl.mem lexicon s
(* Mark an identifier as a type name. The old mapping is preserved and will (* Mark an identifier as a type name. The old mapping is preserved and will
* be reinstated when we exit this context *) * be reinstated when we exit this context *)
let add_type name = let add_type name =
(* ignore (print_string ("adding type name " ^ name ^ "\n")); *) add name (fun _ -> NAMED_TYPE name) ;
H.add lexicon name (fun _ -> NAMED_TYPE name);
Logic_env.add_typename name Logic_env.add_typename name
let context : string list list ref = ref [ [] ] let remove_type name =
remove name ; Logic_env.remove_typename name
let typedef_decl = ref false
let is_typedef () = !typedef_decl
let set_typedef () = typedef_decl := true
let reset_typedef () = typedef_decl := false let context : string list list ref = ref [ [] ]
let push_context _ = context := [] :: !context let push_context _ = context := [] :: !context
let pop_context _ = let pop_context _ =
match !context with match !context with
[] -> Kernel.fatal "Empty context stack" | [] -> Kernel.fatal "Empty context stack"
| con::sub -> | con :: sub -> context := sub ; List.iter remove_type con
(context := sub;
List.iter (fun name ->
(* Format.eprintf
"removing lexicon for %s@." name; *)
H.remove lexicon name;
Logic_env.remove_typename name
) con)
(* Mark an identifier as a variable name. The old mapping is preserved and (* Mark an identifier as a variable name. The old mapping is preserved and
* will be reinstated when we exit this context *) * will be reinstated when we exit this context *)
let add_identifier name = let add_identifier name =
match !context with match !context with
[] -> Kernel.fatal "Empty context stack" | [] -> Kernel.fatal "Empty context stack"
| con :: sub -> | con :: sub ->
(context := (name::con)::sub; context := (name :: con) :: sub ;
(*Format.eprintf "adding IDENT for %s@." name;*) add name (fun _ -> IDENT name) ;
H.add lexicon name (fun _ -> dbgToken (IDENT name));
Logic_env.hide_typename name Logic_env.hide_typename name
)
(*
** Useful primitives let typedef_decl = ref false
*) let is_typedef () = !typedef_decl
let scan_ident id = let set_typedef () = typedef_decl := true
let here = currentLoc () in let reset_typedef () = typedef_decl := false
try (H.find lexicon id) here
(* default to variable name, as opposed to type *)
with Not_found -> dbgToken (IDENT id)
(* (*
** Buffer processor ** Buffer processor
*) *)
let init ~filename lexer = let init ~filename lexer =
init_lexicon (); init_lexicon () ; E.startParsing filename lexer
E.startParsing filename lexer
let finish () = let finish () =
E.finishParsing (); E.finishParsing () ; Logic_env.reset_typenames ()
Logic_env.reset_typenames ()
(*** escape character management ***) (*** escape character management ***)
let scan_escape (char: char) : int64 = let scan_escape = function
let result = match char with | 'n' -> int64_of_char '\n'
'n' -> '\n' | 'r' -> int64_of_char '\r'
| 'r' -> '\r' | 't' -> int64_of_char '\t'
| 't' -> '\t' | 'b' -> int64_of_char '\b'
| 'b' -> '\b' | 'f' -> int64_of_char '\012' (* ASCII code 12 *)
| 'f' -> '\012' (* ASCII code 12 *) | 'v' -> int64_of_char '\011' (* ASCII code 11 *)
| 'v' -> '\011' (* ASCII code 11 *) | 'a' -> int64_of_char '\007' (* ASCII code 7 *)
| 'a' -> '\007' (* ASCII code 7 *) | 'e' | 'E' -> int64_of_char '\027' (* ASCII code 27. This is a GCC extension *)
| 'e' | 'E' -> '\027' (* ASCII code 27. This is a GCC extension *) | '\'' -> int64_of_char '\''
| '\'' -> '\'' | '"' -> int64_of_char '"' (* '"' *)
| '"'-> '"' (* '"' *) | '?' -> int64_of_char '?'
| '?' -> '?' | '(' -> int64_of_char '('
| '(' -> '(' | '{' -> int64_of_char '{'
| '{' -> '{' | '[' -> int64_of_char '['
| '[' -> '[' | '%' -> int64_of_char '%'
| '%' -> '%' | '\\' -> int64_of_char '\\'
| '\\' -> '\\' | other -> E.parse_error "Unrecognized escape sequence: \\%c" other
| other ->
E.parse_error "Unrecognized escape sequence: \\%c" other
in
Int64.of_int (Char.code result)
let scan_hex_escape str = let scan_hex_escape str =
let radix = Int64.of_int 16 in let radix = Int64.of_int 16 in
...@@ -373,77 +324,49 @@ let lex_simple_escape remainder lexbuf = ...@@ -373,77 +324,49 @@ let lex_simple_escape remainder lexbuf =
prefix :: remainder lexbuf prefix :: remainder lexbuf
let lex_unescaped remainder lexbuf = let lex_unescaped remainder lexbuf =
let prefix = Int64.of_int (Char.code (Lexing.lexeme_char lexbuf 0)) in let prefix = int64_of_char (Lexing.lexeme_char lexbuf 0) in
prefix :: remainder lexbuf prefix :: remainder lexbuf
let lex_comment remainder buffer lexbuf = let lex_comment remainder buffer lexbuf =
let s = Lexing.lexeme lexbuf in let s = Lexing.lexeme lexbuf in
if s = "\n" then E.newline() ; if s = "\n" then E.newline() ;
(match buffer with None -> () | Some b -> Buffer.add_string b s) ; Option.iter (fun b -> Buffer.add_string b s) buffer ;
remainder buffer lexbuf remainder buffer lexbuf
(* We ignore the last line if it does not contain "\n" as it means that
it directly precedes the "else" and thus contains only whitespaces *)
let rec iter_comments ends_with_n f = function
| [] -> ()
| [ _ ] when not ends_with_n -> ()
| x :: l -> f x ; iter_comments ends_with_n f l
let do_ghost_else_comments register comments = let do_ghost_else_comments register comments =
let ends_with_n =
let last = String.length comments in let last = String.length comments in
(* note that comments contains at least a blank *) (* note that comments contains at least a blank *)
'\n' = String.get comments (last - 1) let ends_with_n = '\n' = String.get comments (last - 1) in
in
let comments = String.split_on_char '\n' comments in let comments = String.split_on_char '\n' comments in
let comments = List.map String.trim comments in let comments = List.map String.trim comments in
let process_line s = let print_comments = Kernel.PrintComments.get () in
E.newline () ; let do_comment s = register && print_comments && not (String.equal "" s) in
if register let process_line s = E.newline () ; if do_comment s then add_comment s in
&& Kernel.PrintComments.get () iter_comments ends_with_n process_line comments
&& not (String.equal "" s)
then
addComment s
in
let rec iter f = function
| [] -> ()
(* We ignore the last line if it does not contain "\n" as it means that
it directly precedes the "else" and thus contains only whitespaces *)
| [ _ ] when not ends_with_n -> ()
| x :: l -> f x ; iter f l
in
iter process_line comments
let do_lex_comment ?(first_string="") remainder lexbuf = let do_lex_comment ?(first_string="") remainder lexbuf =
let buffer = if Kernel.PrintComments.get () then
if Kernel.PrintComments.get () then begin
let b = Buffer.create 80 in let b = Buffer.create 80 in
Buffer.add_string b first_string; let () = Buffer.add_string b first_string in
Some b let () = remainder (Some b) lexbuf in
end else None add_comment (Buffer.contents b)
in remainder buffer lexbuf ; else remainder None lexbuf
match buffer with
| Some b -> addComment (Buffer.contents b) let do_oneline_ghost ~first_string comment lexbuf initial =
| None -> () do_lex_comment ~first_string comment lexbuf ;
E.newline () ;
if is_oneline_ghost ()
(* ISO standard locale-specific function to convert a wide character then (exit_oneline_ghost () ; RGHOST)
* into a sequence of normal characters. Here we work on strings. else initial lexbuf
* We convert L"Hi" to "H\000i\000"
matth: this seems unused.
let wbtowc wstr =
let len = String.length wstr in
let dest = String.make (len * 2) '\000' in
for i = 0 to len-1 do
dest.[i*2] <- wstr.[i] ;
done ;
dest
*)
(* This function converts the "Hi" in L"Hi" to { L'H', L'i', L'\0' }
matth: this seems unused.
let wstr_to_warray wstr =
let len = String.length wstr in
let res = ref "{ " in
for i = 0 to len-1 do
res := !res ^ (Printf.sprintf "L'%c', " wstr.[i])
done ;
res := !res ^ "}" ;
!res
*)
(* Pragmas get explicit end-of-line tokens. (* Pragmas get explicit end-of-line tokens.
* Elsewhere they are silently discarded as whitespace. *) * Elsewhere they are silently discarded as whitespace. *)
...@@ -451,11 +374,9 @@ let pragmaLine = ref false ...@@ -451,11 +374,9 @@ let pragmaLine = ref false
let annot_char = ref '@' let annot_char = ref '@'
let () =
Kernel.ReadAnnot.add_set_hook
(fun _ x ->
(* prevent the C lexer interpretation of comments *) (* prevent the C lexer interpretation of comments *)
annot_char := if x then '@' else '\000') let prevent _ x = annot_char := if x then '@' else '\000'
let () = Kernel.ReadAnnot.add_set_hook prevent
let annot_start_pos = ref Cabshelper.cabslu let annot_start_pos = ref Cabshelper.cabslu
let buf = Buffer.create 1024 let buf = Buffer.create 1024
...@@ -464,40 +385,39 @@ let save_current_pos () = ...@@ -464,40 +385,39 @@ let save_current_pos () =
annot_start_pos := currentLoc () annot_start_pos := currentLoc ()
let annot_lex initial rule lexbuf = let annot_lex initial rule lexbuf =
try
save_current_pos () ; save_current_pos () ;
Buffer.clear buf ; Buffer.clear buf ;
rule lexbuf try rule lexbuf
with Parsing.Parse_error -> with Parsing.Parse_error ->
let source = Cil_datatype.Position.of_lexing_pos (Lexing.lexeme_start_p lexbuf) in let start = Lexing.lexeme_start_p lexbuf in
let source = Cil_datatype.Position.of_lexing_pos start in
Kernel.warning ~wkey:Kernel.wkey_annot_error ~source "skipping annotation" ; Kernel.warning ~wkey:Kernel.wkey_annot_error ~source "skipping annotation" ;
initial lexbuf initial lexbuf
let make_annot ~one_line default lexbuf s = let make_annot ~one_line default lexbuf s =
let start = snd !annot_start_pos in let start = snd !annot_start_pos in
match Logic_lexer.annot (start, s) with match Logic_lexer.annot (start, s) with
(* error occured and annotation is discarded. Find a normal token. *)
| None -> default lexbuf
| Some (stop, token) -> | Some (stop, token) ->
lexbuf.Lexing.lex_curr_p <- Cil_datatype.Position.to_lexing_pos stop ; lexbuf.Lexing.lex_curr_p <- Cil_datatype.Position.to_lexing_pos stop ;
if one_line then E.newline () ; if one_line then E.newline () ;
(match token with match token with
| Logic_ptree.Adecl d -> DECL d | Logic_ptree.Adecl d -> DECL d
| Logic_ptree.Aspec -> SPEC (start, s) | Logic_ptree.Aspec -> SPEC (start, s)
(* At this point, we only have identified a function spec. Complete (* At this point, we only have identified a function spec. Complete
parsing of the annotation will only occur in the cparser.mly rule. parsing of the annotation will only occur in the cparser.mly rule. *)
*)
| Logic_ptree.Acode_annot (loc,a) -> CODE_ANNOT (a, loc) | Logic_ptree.Acode_annot (loc,a) -> CODE_ANNOT (a, loc)
| Logic_ptree.Aloop_annot (loc,a) -> LOOP_ANNOT (a,loc) | Logic_ptree.Aloop_annot (loc,a) -> LOOP_ANNOT (a,loc)
| Logic_ptree.Aattribute_annot (loc,a) -> ATTRIBUTE_ANNOT (a, loc)) | Logic_ptree.Aattribute_annot (loc,a) -> ATTRIBUTE_ANNOT (a, loc)
| None -> (* error occured and annotation is discarded. Find a normal token. *)
default lexbuf
let parse_error lexbuf msg = let parse_error lexbuf msg =
let loc = lexbuf.Lexing.lex_curr_p in let loc = lexbuf.Lexing.lex_curr_p in
let source = Cil_datatype.Position.of_lexing_pos loc in let source = Cil_datatype.Position.of_lexing_pos loc in
E.parse_error ~source msg E.parse_error ~source msg
let () =
(* Initialize the pointer in Errormsg *) (* Initialize the pointer in Errormsg *)
let () =
Lexerhack.add_type := add_type ; Lexerhack.add_type := add_type ;
Lexerhack.push_context := push_context ; Lexerhack.push_context := push_context ;
Lexerhack.pop_context := pop_context ; Lexerhack.pop_context := pop_context ;
...@@ -507,18 +427,18 @@ let () = ...@@ -507,18 +427,18 @@ let () =
Lexerhack.reset_typedef := reset_typedef Lexerhack.reset_typedef := reset_typedef
} }
let decdigit = ['0'-'9'] let decdigit = ['0'-'9']
let octdigit = ['0'-'7'] let octdigit = ['0'-'7']
let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
let binarydigit = ['0' '1'] let binarydigit = ['0' '1']
let letter = ['a'- 'z' 'A'-'Z'] let letter = ['a'- 'z' 'A'-'Z']
let usuffix = ['u' 'U'] let usuffix = ['u' 'U']
let lsuffix = "l" | "L" | "ll" | "LL" let lsuffix = "l" | "L" | "ll" | "LL"
let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix
| usuffix ? "i64"
let intsuffix =
lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix | usuffix ? "i64"
let hexprefix = '0' ['x' 'X'] let hexprefix = '0' ['x' 'X']
let binaryprefix = '0' ['b' 'B'] let binaryprefix = '0' ['b' 'B']
...@@ -530,144 +450,138 @@ let binarynum = binaryprefix binarydigit+ intsuffix? ...@@ -530,144 +450,138 @@ let binarynum = binaryprefix binarydigit+ intsuffix?
let exponent = ['e' 'E']['+' '-']? decdigit+ let exponent = ['e' 'E']['+' '-']? decdigit+
let fraction = '.' decdigit+ let fraction = '.' decdigit+
let decfloat = (intnum? fraction) let decfloat =
(intnum? fraction)
| (intnum exponent) | (intnum exponent)
| (intnum? fraction exponent) | (intnum? fraction exponent)
| (intnum '.') | (intnum '.')
| (intnum '.' exponent) | (intnum '.' exponent)
let hexfraction = hexdigit* '.' hexdigit+ | hexdigit+ '.' let hexfraction = hexdigit* '.' hexdigit+ | hexdigit+ '.'
let binexponent = ['p' 'P'] ['+' '-']? decdigit+ let binexponent = ['p' 'P'] ['+' '-']? decdigit+
let hexfloat = hexprefix hexfraction binexponent let hexfloat =
hexprefix hexfraction binexponent
| hexprefix hexdigit+ binexponent | hexprefix hexdigit+ binexponent
let floatsuffix = ['f' 'F' 'l' 'L'] let floatsuffix = ['f' 'F' 'l' 'L']
let floatnum = (decfloat | hexfloat) floatsuffix? let floatnum = (decfloat | hexfloat) floatsuffix?
let ident = (letter|'_')(letter|decdigit|'_'|'$')* let ident = (letter|'_')(letter|decdigit|'_'|'$')*
let blank = [' ' '\t' '\012' '\r' '\026'(*this is the plain old DOS eof char*)]+ (* \026 is the dos EOF character *)
let blank = [' ' '\t' '\012' '\r' '\026']+
let escape = '\\' _ let escape = '\\' _
let hex_escape = '\\' ['x' 'X'] hexdigit+ let hex_escape = '\\' ['x' 'X'] hexdigit+
let oct_escape = '\\' octdigit octdigit? octdigit? let oct_escape = '\\' octdigit octdigit? octdigit?
(* Pragmas that are not parsed by CIL. We lex them as PRAGMA_LINE tokens *) (* Solaris-style pragmas *)
let no_parse_pragma = let solaris_pragmas =
"warning" | "GCC" "ident" | "section" | "option" | "asm" | "use_section" | "weak"
(* Solaris-style pragmas: *) | "redefine_extname" | "TCS_align"
| "ident" | "section" | "option" | "asm" | "use_section" | "weak"
| "redefine_extname"
| "TCS_align"
(* Embedded world *) (* Embedded world *)
| "global_register" | "location" let embedded_pragmas = "global_register" | "location"
(* Pragmas that are not parsed by CIL. We lex them as PRAGMA_LINE tokens *)
let no_parse_pragma = "warning" | "GCC" | solaris_pragmas | embedded_pragmas
let ghost_comments = "//\n" | ("//" [^'\n''@'] ([^'\n']*("\\\n")?)* '\n')
let ghost_comments = "//\n"
| ("//" [^'\n''@'] ([^'\n']*("\\\n")?)* '\n')
rule initial = parse rule initial = parse
| "/*" ("" | "@{" | "@}" as suf) (* Skip special doxygen comments. Use of '@'
instead of '!annot_char' is intentional *) (* Skip special doxygen comments. Use of '@' instead of '!annot_char' is
intentional *)
| "/*" ("" | "@{" | "@}" as suf)
{ {
do_lex_comment ~first_string:suf comment lexbuf ; do_lex_comment ~first_string:suf comment lexbuf ;
initial lexbuf initial lexbuf
} }
| "/*" ([^ '*' '\n'] as c) | "/*" ([^ '*' '\n'] as c)
{ if c = !annot_char then begin {
annot_lex initial annot_first_token lexbuf let first_string = String.make 1 c in
end else if c = !annot_char
begin then annot_lex initial annot_first_token lexbuf
do_lex_comment ~first_string:(String.make 1 c) comment lexbuf ; else (do_lex_comment ~first_string comment lexbuf ; initial lexbuf)
initial lexbuf
end
} }
| "//" ("" | "@{" | "@}" as suf) (* See comment for "/*@{" above *) (* See comment for "/*@{" above *)
| "//" ("" | "@{" | "@}" as suf)
{ {
do_lex_comment ~first_string:suf onelinecomment lexbuf ; do_oneline_ghost ~first_string:suf onelinecomment lexbuf initial
E.newline();
if is_oneline_ghost () then begin
exit_oneline_ghost ();
RGHOST
end else begin
initial lexbuf
end
} }
| "//" ([^ '\n'] as c) | "//" ([^ '\n'] as c)
{ if c = !annot_char then begin {
annot_lex initial annot_one_line lexbuf let first_string = String.make 1 c in
end else if c = !annot_char
begin then annot_lex initial annot_one_line lexbuf
do_lex_comment ~first_string:(String.make 1 c) onelinecomment lexbuf; else do_oneline_ghost ~first_string onelinecomment lexbuf initial
E.newline();
if is_oneline_ghost () then
begin
exit_oneline_ghost ();
RGHOST
end
else
begin
initial lexbuf
end
end
} }
| "\\ghost" | "\\ghost"
{ if is_ghost_code () || is_oneline_ghost () then begin {
GHOST (currentLoc()) if is_ghost_code () || is_oneline_ghost ()
end else begin then GHOST (currentLoc())
parse_error lexbuf "Use of \\ghost out of ghost code" else parse_error lexbuf "Use of \\ghost out of ghost code"
end }
| blank {initial lexbuf} | '\n'
{
E.newline () ;
if !pragmaLine then (pragmaLine := false ; PRAGMA_EOL)
else if is_oneline_ghost () then (exit_oneline_ghost () ; RGHOST)
else initial lexbuf
} }
| blank {initial lexbuf} | '\\' '\r' * '\n'
| '\n' { E.newline (); {
if !pragmaLine then E.newline () ;
begin
pragmaLine := false;
PRAGMA_EOL
end
else if is_oneline_ghost () then
begin
exit_oneline_ghost ();
RGHOST
end
else begin
initial lexbuf
end }
| '\\' '\r' * '\n' { E.newline ();
initial lexbuf initial lexbuf
} }
| '#' { hash lexbuf } | '#' { hash lexbuf }
| "%:" { hash lexbuf } | "%:" { hash lexbuf }
| "_Pragma" { PRAGMA (currentLoc ()) } | "_Pragma" { PRAGMA (currentLoc ()) }
| '\'' {
| '\''
{
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let content = chr lexbuf in let content = chr lexbuf in
let last = Lexing.lexeme_end_p lexbuf in let last = Lexing.lexeme_end_p lexbuf in
CST_CHAR (content, Cil_datatype.Location.of_lexing_loc (start,last)) CST_CHAR (content, Cil_datatype.Location.of_lexing_loc (start,last))
} }
| "L'" {
| "L'"
{
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let content = chr lexbuf in let content = chr lexbuf in
let last = Lexing.lexeme_end_p lexbuf in let last = Lexing.lexeme_end_p lexbuf in
CST_WCHAR (content, Cil_datatype.Location.of_lexing_loc (start,last)) CST_WCHAR (content, Cil_datatype.Location.of_lexing_loc (start,last))
} }
| '"' {
| '"'
{
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let content = str lexbuf in let content = str lexbuf in
let last = Lexing.lexeme_end_p lexbuf in let last = Lexing.lexeme_end_p lexbuf in
CST_STRING (content, Cil_datatype.Location.of_lexing_loc (start,last)) CST_STRING (content, Cil_datatype.Location.of_lexing_loc (start,last))
} }
| "L\"" {
| "L\""
{
let start = Lexing.lexeme_start_p lexbuf in let start = Lexing.lexeme_start_p lexbuf in
let content = str lexbuf in let content = str lexbuf in
let last = Lexing.lexeme_end_p lexbuf in let last = Lexing.lexeme_end_p lexbuf in
CST_WSTRING(content, Cil_datatype.Location.of_lexing_loc (start,last)) CST_WSTRING(content, Cil_datatype.Location.of_lexing_loc (start,last))
} }
| floatnum { CST_FLOAT (Lexing.lexeme lexbuf, currentLoc ()) } | floatnum { CST_FLOAT (Lexing.lexeme lexbuf, currentLoc ()) }
| binarynum { (* GCC Extension for binary numbers *)
CST_INT (Lexing.lexeme lexbuf, currentLoc ())} (* GCC Extension for binary numbers *)
| binarynum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) }
| hexnum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) } | hexnum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) }
| octnum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) } | octnum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) }
| intnum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) } | intnum { CST_INT (Lexing.lexeme lexbuf, currentLoc ()) }
...@@ -697,21 +611,23 @@ rule initial = parse ...@@ -697,21 +611,23 @@ rule initial = parse
| "->" { ARROW } | "->" { ARROW }
| '+' { PLUS (currentLoc ()) } | '+' { PLUS (currentLoc ()) }
| '-' { MINUS (currentLoc ()) } | '-' { MINUS (currentLoc ()) }
| '*' | '*'
{ if is_ghost_code () then might_end_ghost lexbuf {
else if is_ghost_code ()
STAR (currentLoc ())} then might_end_ghost lexbuf
else STAR (currentLoc ())
}
| "/" ([^ '\n'] as c) | "/" ([^ '\n'] as c)
{ if c = !annot_char then {
if is_ghost_code () || is_oneline_ghost () then begin if c = !annot_char then
enter_ghost_annot(); if is_ghost_code () || is_oneline_ghost ()
annot_lex initial annot_first_token lexbuf then (enter_ghost_annot () ; annot_lex initial annot_first_token lexbuf)
end else else parse_error lexbuf "This kind of annotation is valid only inside ghost code"
parse_error lexbuf "This kind of annotation is valid only inside ghost code" else (lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1 ; SLASH)
else begin }
lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
SLASH
end }
| '/' { SLASH } | '/' { SLASH }
| '%' { PERCENT } | '%' { PERCENT }
| '!' { EXCLAM (currentLoc ()) } | '!' { EXCLAM (currentLoc ()) }
...@@ -721,152 +637,174 @@ rule initial = parse ...@@ -721,152 +637,174 @@ rule initial = parse
| '|' { PIPE } | '|' { PIPE }
| '^' { CIRC } | '^' { CIRC }
| '?' { QUEST } | '?' { QUEST }
| ':' | ':'
{ if Cabshelper.is_attr_test () then begin {
Cabshelper.pop_attr_test (); COLON2 if Cabshelper.is_attr_test ()
end else COLON then (Cabshelper.pop_attr_test () ; COLON2)
else COLON
} }
| '~' {TILDE (currentLoc ())}
| '{' {dbgToken (LBRACE (currentLoc ()))} | '~' { TILDE (currentLoc ()) }
| '}' {dbgToken (RBRACE (currentLoc ()))} | '{' { LBRACE (currentLoc ()) }
| "<%" {dbgToken (LBRACE (currentLoc ()))} | '}' { RBRACE (currentLoc ()) }
| "%>" {dbgToken (RBRACE (currentLoc ()))} | "<%" { LBRACE (currentLoc ()) }
| "%>" { RBRACE (currentLoc ()) }
| '[' { LBRACKET } | '[' { LBRACKET }
| ']' { RBRACKET } | ']' { RBRACKET }
| "<:" { LBRACKET } | "<:" { LBRACKET }
| ":>" { RBRACKET } | ":>" { RBRACKET }
| '(' {dbgToken (LPAREN (currentLoc ())) } | '(' { LPAREN (currentLoc ()) }
| ')' { RPAREN } | ')' { RPAREN }
| ';' {dbgToken (SEMICOLON (currentLoc ())) } | ';' { SEMICOLON (currentLoc ()) }
| ',' { COMMA } | ',' { COMMA }
| '.' { DOT } | '.' { DOT }
| "sizeof" { SIZEOF (currentLoc ()) } | "sizeof" { SIZEOF (currentLoc ()) }
| "__asm" { ASM (currentLoc ()) } | "__asm" { ASM (currentLoc ()) }
(* If we see __pragma we eat it and the matching parentheses as well *) (* If we see __pragma we consume it and the matching parentheses as well *)
| "__pragma" { let _ = matchingpars 0 lexbuf in | "__pragma" { let _ = matchingpars 0 lexbuf in initial lexbuf }
initial lexbuf
}
(* __extension__ is a black. The parser runs into some conflicts if we let it (* __extension__ is a black. The parser runs into some conflicts if we let it
* pass *) pass *)
| "__extension__" { initial lexbuf } | "__extension__" { initial lexbuf }
| ident { scan_ident (Lexing.lexeme lexbuf) } | ident { scan_ident (Lexing.lexeme lexbuf) }
| eof | eof
{ if is_oneline_ghost() then begin {
exit_oneline_ghost (); RGHOST if is_oneline_ghost()
end then (exit_oneline_ghost () ; RGHOST)
else EOF else EOF
} }
| _ as c | _ as c
{ if is_ghost_code() && c = '@' then initial lexbuf {
if is_ghost_code() && c = '@' then initial lexbuf
else parse_error lexbuf "Invalid symbol" else parse_error lexbuf "Invalid symbol"
} }
and might_end_ghost = parse and might_end_ghost = parse
| '/' { exit_ghost_code() ; RGHOST } | '/' { exit_ghost_code() ; RGHOST }
| "" { STAR (currentLoc()) } | "" { STAR (currentLoc()) }
and comment buffer = parse and comment buffer = parse
| "*/" { } | "*/" { }
| eof { parse_error lexbuf "Unterminated C comment" } | eof { parse_error lexbuf "Unterminated C comment" }
| _ { lex_comment comment buffer lexbuf } | _ { lex_comment comment buffer lexbuf }
and onelinecomment buffer = parse and onelinecomment buffer = parse
| "*/" { if is_ghost_code () then | "*/"
{
(* end of multiline comment *) (* end of multiline comment *)
lexbuf.Lexing.lex_curr_pos <- if is_ghost_code ()
lexbuf.Lexing.lex_curr_pos - 2 then lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 2
else else lex_comment onelinecomment buffer lexbuf
lex_comment onelinecomment buffer lexbuf
} }
| '\n' | eof { } | '\n' | eof { }
| _ { lex_comment onelinecomment buffer lexbuf } | _ { lex_comment onelinecomment buffer lexbuf }
and matchingpars parsopen = parse and matchingpars parsopen = parse
'\n' { E.newline (); matchingpars parsopen lexbuf } | '\n' { E.newline (); matchingpars parsopen lexbuf }
| blank { matchingpars parsopen lexbuf } | blank { matchingpars parsopen lexbuf }
| '(' { matchingpars (parsopen + 1) lexbuf } | '(' { matchingpars (parsopen + 1) lexbuf }
| ')' { if parsopen > 1 then | ')' { if parsopen > 1 then matchingpars (parsopen - 1) lexbuf }
matchingpars (parsopen - 1) lexbuf | "/*" { do_lex_comment comment lexbuf ; matchingpars parsopen lexbuf }
} | '"' { let _ = str lexbuf in matchingpars parsopen lexbuf }
| "/*" { do_lex_comment comment lexbuf ;
matchingpars parsopen lexbuf }
| '"' { let _ = str lexbuf in
matchingpars parsopen lexbuf }
| _ { matchingpars parsopen lexbuf } | _ { matchingpars parsopen lexbuf }
(* # <line number> <file name> ... *) (* # <line number> <file name> ... *)
and hash = parse and hash = parse
'\n' { E.newline (); initial lexbuf} | '\n' { E.newline (); initial lexbuf}
| blank { hash lexbuf} | blank { hash lexbuf}
| intnum { (* We are seeing a line number. This is the number for the
* next line *) (* We are seeing a line number. This is the number for the next line *)
| intnum
{
let s = Lexing.lexeme lexbuf in let s = Lexing.lexeme lexbuf in
let lineno = try let msg () = Kernel.warning "Bad line number in preprocessed file: %s" s in
int_of_string s let lineno = try int_of_string s with Failure _ -> msg () ; -1 in
with Failure _ ->
(* the int is too big. *)
Kernel.warning "Bad line number in preprocessed file: %s" s;
(-1)
in
E.setCurrentLine (lineno - 1) ; E.setCurrentLine (lineno - 1) ;
(* A file name may follow *) (* A file name may follow *)
file lexbuf } file lexbuf
| "line" { hash lexbuf } (* MSVC line number info *) }
(* For pragmas with irregular syntax, like #pragma warning,
* we parse them as a whole line. *) (* MSVC line number info *)
| "line" { hash lexbuf }
(* For pragmas with irregular syntax, like #pragma warning, we parse them as a
whole line. *)
| "pragma" blank (no_parse_pragma as pragmaName) | "pragma" blank (no_parse_pragma as pragmaName)
{ let here = currentLoc () in {
let here = currentLoc () in
PRAGMA_LINE (pragmaName ^ pragma lexbuf, here) PRAGMA_LINE (pragmaName ^ pragma lexbuf, here)
} }
| "pragma" { pragmaLine := true ; PRAGMA (currentLoc ()) } | "pragma" { pragmaLine := true ; PRAGMA (currentLoc ()) }
| _ { endline lexbuf } | _ { endline lexbuf }
and file = parse and file = parse
'\n' {E.newline (); initial lexbuf} | '\n' { E.newline () ; initial lexbuf }
| blank { file lexbuf } | blank { file lexbuf }
(* The //-ending file directive is a GCC extension that provides the CWD of the
preprocessor when the file was preprocessed. *) (* The //-ending file directive is a GCC extension that provides the CWD of
| '"' ([^ '\012' '\t' '"']* as d) "//\"" { the preprocessor when the file was preprocessed. *)
| '"' ([^ '\012' '\t' '"']* as d) "//\""
{
E.setCurrentWorkingDirectory d ; E.setCurrentWorkingDirectory d ;
endline lexbuf } endline lexbuf
| '"' (([^ '\012' '\t' '"']|"\\\"")* as f) '"' { }
| '"' (([^ '\012' '\t' '"']|"\\\"")* as f) '"'
{
let unescape = Str.regexp_string "\\\"" in let unescape = Str.regexp_string "\\\"" in
let f = Str.global_replace unescape "\"" f in let f = Str.global_replace unescape "\"" f in
E.setCurrentFile f ; E.setCurrentFile f ;
endline lexbuf} endline lexbuf
}
| _ { endline lexbuf } | _ { endline lexbuf }
and endline = parse and endline = parse
'\n' { E.newline (); initial lexbuf} | '\n' { E.newline () ; initial lexbuf }
| eof { EOF } | eof { EOF }
| _ { endline lexbuf } | _ { endline lexbuf }
and pragma = parse and pragma = parse
'\n' { E.newline (); "" } | '\n' { E.newline () ; "" }
| _ { let cur = Lexing.lexeme lexbuf in | _ { let cur = Lexing.lexeme lexbuf in cur ^ (pragma lexbuf) }
cur ^ (pragma lexbuf) }
and str = parse and str = parse
'"' {[]} (* no nul terminiation in CST_STRING '"' *) (* no nul terminiation in CST_STRING '"' *)
| '"' { [] }
| hex_escape { lex_hex_escape str lexbuf } | hex_escape { lex_hex_escape str lexbuf }
| oct_escape { lex_oct_escape str lexbuf } | oct_escape { lex_oct_escape str lexbuf }
| escape { lex_simple_escape str lexbuf } | escape { lex_simple_escape str lexbuf }
| eof { parse_error lexbuf "unterminated string" } | eof { parse_error lexbuf "unterminated string" }
| _ { lex_unescaped str lexbuf } | _ { lex_unescaped str lexbuf }
and chr = parse and chr = parse
'\'' {[]} | '\'' { [] }
| hex_escape { lex_hex_escape chr lexbuf } | hex_escape { lex_hex_escape chr lexbuf }
| oct_escape { lex_oct_escape chr lexbuf } | oct_escape { lex_oct_escape chr lexbuf }
| escape { lex_simple_escape chr lexbuf } | escape { lex_simple_escape chr lexbuf }
| eof { parse_error lexbuf "unterminated char" } | eof { parse_error lexbuf "unterminated char" }
| _ { lex_unescaped chr lexbuf } | _ { lex_unescaped chr lexbuf }
and annot_first_token = parse and annot_first_token = parse
| "ghost" ((blank| '\\'?'\n' | ghost_comments)* as comments) "else" { | "ghost" ((blank| '\\'?'\n' | ghost_comments)* as comments) "else"
{
if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ; if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
Buffer.clear buf ; Buffer.clear buf ;
let loc = currentLoc () in let loc = currentLoc () in
...@@ -874,54 +812,78 @@ and annot_first_token = parse ...@@ -874,54 +812,78 @@ and annot_first_token = parse
enter_ghost_code () ; enter_ghost_code () ;
LGHOST_ELSE (loc) LGHOST_ELSE (loc)
} }
| "ghost" {
| "ghost"
{
if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ; if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
Buffer.clear buf ; Buffer.clear buf ;
enter_ghost_code () ; enter_ghost_code () ;
LGHOST LGHOST
} }
| ' '|'@'|'\t'|'\r' as c { Buffer.add_char buf c ; annot_first_token lexbuf } | ' '|'@'|'\t'|'\r' as c { Buffer.add_char buf c ; annot_first_token lexbuf }
| '\n' { E.newline() ; Buffer.add_char buf '\n' ; annot_first_token lexbuf } | '\n' { E.newline() ; Buffer.add_char buf '\n' ; annot_first_token lexbuf }
| "" { annot_token lexbuf } | "" { annot_token lexbuf }
and annot_token = parse and annot_token = parse
| "*/" { if is_ghost_annot () then | "*/"
parse_error lexbuf "Ghost multi-line annotation not terminated"; {
if is_ghost_annot ()
then parse_error lexbuf "Ghost multi-line annotation not terminated" ;
let s = Buffer.contents buf in let s = Buffer.contents buf in
make_annot ~one_line:false initial lexbuf s } make_annot ~one_line:false initial lexbuf s
}
| eof { parse_error lexbuf "Unterminated annotation" } | eof { parse_error lexbuf "Unterminated annotation" }
| '\n' { E.newline() ; Buffer.add_char buf '\n' ; annot_token lexbuf } | '\n' { E.newline() ; Buffer.add_char buf '\n' ; annot_token lexbuf }
| _ as c { if is_ghost_annot () && c = !annot_char then | _ as c
might_end_ghost_annot lexbuf {
else (Buffer.add_char buf c; annot_token lexbuf) } if is_ghost_annot () && c = !annot_char
then might_end_ghost_annot lexbuf
else (Buffer.add_char buf c ; annot_token lexbuf)
}
and might_end_ghost_annot = parse and might_end_ghost_annot = parse
| '/' { exit_ghost_annot (); | '/' { exit_ghost_annot ();
let s = Buffer.contents buf in let s = Buffer.contents buf in
make_annot ~one_line:false initial lexbuf s } make_annot ~one_line:false initial lexbuf s }
| "" { Buffer.add_char buf !annot_char; annot_token lexbuf } | "" { Buffer.add_char buf !annot_char; annot_token lexbuf }
and annot_one_line = parse and annot_one_line = parse
| "ghost" ((blank|"\\\n")+ as comments) "else" { | "ghost" ((blank|"\\\n")+ as comments) "else"
{
do_ghost_else_comments false comments ; do_ghost_else_comments false comments ;
if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ; if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
enter_oneline_ghost (); LGHOST_ELSE (currentLoc ()) enter_oneline_ghost () ;
LGHOST_ELSE (currentLoc ())
} }
| "ghost" {
| "ghost"
{
if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ; if is_oneline_ghost () then parse_error lexbuf "nested ghost code" ;
enter_oneline_ghost (); LGHOST enter_oneline_ghost () ;
LGHOST
} }
| ' '|'@'|'\t'|'\r' as c { Buffer.add_char buf c ; annot_one_line lexbuf } | ' '|'@'|'\t'|'\r' as c { Buffer.add_char buf c ; annot_one_line lexbuf }
| "" { annot_one_line_logic lexbuf } | "" { annot_one_line_logic lexbuf }
and annot_one_line_logic = parse and annot_one_line_logic = parse
| '\n' { make_annot ~one_line:true initial lexbuf (Buffer.contents buf) } | '\n' { make_annot ~one_line:true initial lexbuf (Buffer.contents buf) }
| eof { parse_error lexbuf "Invalid C file: should end with a newline" } | eof { parse_error lexbuf "Invalid C file: should end with a newline" }
| _ as c { Buffer.add_char buf c ; annot_one_line_logic lexbuf } | _ as c { Buffer.add_char buf c ; annot_one_line_logic lexbuf }
{
{
(* Catch the exceptions raised by the lexer itself *) (* Catch the exceptions raised by the lexer itself *)
let initial lexbuf = let initial lexbuf =
try initial lexbuf try initial lexbuf
with Failure _ -> raise Parsing.Parse_error with Failure _ -> raise Parsing.Parse_error
} }
(* (*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment