Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
frama-c
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pub
frama-c
Commits
d55f7566
Commit
d55f7566
authored
1 year ago
by
Maxime Jacquemin
Committed by
Andre Maroneze
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
[Lexer] do some refactoring
parent
1ba0da7f
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/kernel_internals/parsing/clexer.mll
+412
-450
412 additions, 450 deletions
src/kernel_internals/parsing/clexer.mll
with
412 additions
and
450 deletions
src/kernel_internals/parsing/clexer.mll
+
412
−
450
View file @
d55f7566
...
@@ -53,6 +53,9 @@ module E = Errorloc
...
@@ -53,6 +53,9 @@ module E = Errorloc
let
currentLoc
()
=
E
.
currentLoc
()
let
currentLoc
()
=
E
.
currentLoc
()
(* Convert char into Int64 *)
let
int64_of_char
c
=
Int64
.
of_int
(
Char
.
code
c
)
let
one_line_ghost
=
ref
false
let
one_line_ghost
=
ref
false
let
is_oneline_ghost
()
=
!
one_line_ghost
let
is_oneline_ghost
()
=
!
one_line_ghost
let
enter_oneline_ghost
()
=
one_line_ghost
:=
true
let
enter_oneline_ghost
()
=
one_line_ghost
:=
true
...
@@ -68,274 +71,222 @@ let is_ghost_annot () = !ghost_annot
...
@@ -68,274 +71,222 @@ let is_ghost_annot () = !ghost_annot
let
enter_ghost_annot
()
=
ghost_annot
:=
true
let
enter_ghost_annot
()
=
ghost_annot
:=
true
let
exit_ghost_annot
()
=
ghost_annot
:=
false
let
exit_ghost_annot
()
=
ghost_annot
:=
false
let
addComment
c
=
Cabshelper
.
Comments
.
add
(
currentLoc
()
)
c
let
add_comment
c
=
Cabshelper
.
Comments
.
add
(
currentLoc
()
)
c
(* Some debugging support for line numbers *)
let
dbgToken
(
t
:
token
)
=
if
false
then
begin
let
dprintf
fmt
=
Kernel
.
debug
fmt
in
(
match
t
with
IDENT
n
->
dprintf
"IDENT(%s)
\n
"
n
|
LBRACE
l
->
dprintf
"LBRACE(%d)
\n
"
(
fst
l
)
.
Filepath
.
pos_lnum
|
RBRACE
l
->
dprintf
"RBRACE(%d)
\n
"
(
fst
l
)
.
Filepath
.
pos_lnum
|
IF
l
->
dprintf
"IF(%d)
\n
"
(
fst
l
)
.
Filepath
.
pos_lnum
|
SWITCH
l
->
dprintf
"SWITCH(%d)
\n
"
(
fst
l
)
.
Filepath
.
pos_lnum
|
RETURN
l
->
dprintf
"RETURN(%d)
\n
"
(
fst
l
)
.
Filepath
.
pos_lnum
|
_
->
()
)
;
t
end
else
t
(*
(*
** Keyword hashtable
** Keyword hashtable
*)
*)
let
lexicon
=
H
.
create
211
let
lexicon
=
H
.
create
211
let
init_lexicon
_
=
let
add
key
builder
=
H
.
add
lexicon
key
builder
let
remove
key
=
H
.
remove
lexicon
key
let
is_c_keyword
s
=
Hashtbl
.
mem
lexicon
s
(* Search for a keyword, default to variable name, as opposed to type *)
let
scan_ident
id
=
let
here
=
currentLoc
()
in
try
(
H
.
find
lexicon
id
)
here
with
Not_found
->
IDENT
id
let
valid
key
builder
=
add
key
builder
let
unsupported
key
=
let
msg
f
k
=
Format
.
fprintf
f
"%s is currently unsupported by Frama-C."
k
in
add
key
(
fun
loc
->
Kernel
.
abort
~
source
:
(
fst
loc
)
"%a"
msg
key
)
let
warning_C11
key
builder
=
let
warning
()
=
Kernel
.(
warning
~
wkey
:
wkey_c11
"%s is a C11 keyword"
key
)
in
add
key
(
fun
loc
->
warning
()
;
builder
loc
)
let
thread_keyword
()
=
let
wkey
=
Kernel
.
wkey_conditional_feature
in
let
s
=
"__thread is a GCC extension, use a GCC-based machdep to enable it"
in
let
warning
()
=
Kernel
.
warning
~
wkey
"%s"
s
;
IDENT
"__thread"
in
add
"__thread"
(
fun
loc
->
if
Cil
.
gccMode
()
then
THREAD
loc
else
warning
()
)
let
filename_keyword
()
=
let
convert
acc
c
=
int64_of_char
c
::
acc
in
let
path
(
loc
:
Cil_types
.
location
)
=
(
fst
loc
)
.
pos_path
in
let
filename
loc
=
Filepath
.
Normalized
.
to_pretty_string
(
path
loc
)
in
let
ints
loc
=
List
.
rev
(
String
.
fold_left
convert
[]
(
filename
loc
))
in
add
"__FC_FILENAME__"
(
fun
loc
->
CST_STRING
(
ints
loc
,
loc
))
let
init_lexicon
()
=
H
.
clear
lexicon
;
H
.
clear
lexicon
;
Logic_env
.
reset_typenames
()
;
Logic_env
.
reset_typenames
()
;
Logic_env
.
builtin_types_as_typenames
()
;
Logic_env
.
builtin_types_as_typenames
()
;
List
.
iter
valid
"auto"
(
fun
loc
->
AUTO
loc
)
;
(
fun
(
key
,
builder
)
->
H
.
add
lexicon
key
builder
)
valid
"const"
(
fun
loc
->
CONST
loc
)
;
[
(
"auto"
,
fun
loc
->
AUTO
loc
);
valid
"__const"
(
fun
loc
->
CONST
loc
)
;
(
"const"
,
fun
loc
->
CONST
loc
);
valid
"__const__"
(
fun
loc
->
CONST
loc
)
;
(
"__const"
,
fun
loc
->
CONST
loc
);
valid
"static"
(
fun
loc
->
STATIC
loc
)
;
(
"__const__"
,
fun
loc
->
CONST
loc
);
valid
"extern"
(
fun
loc
->
EXTERN
loc
)
;
(
"static"
,
fun
loc
->
STATIC
loc
);
valid
"long"
(
fun
loc
->
LONG
loc
)
;
(
"extern"
,
fun
loc
->
EXTERN
loc
);
valid
"short"
(
fun
loc
->
SHORT
loc
)
;
(
"long"
,
fun
loc
->
LONG
loc
);
valid
"register"
(
fun
loc
->
REGISTER
loc
)
;
(
"short"
,
fun
loc
->
SHORT
loc
);
valid
"signed"
(
fun
loc
->
SIGNED
loc
)
;
(
"register"
,
fun
loc
->
REGISTER
loc
);
valid
"__signed"
(
fun
loc
->
SIGNED
loc
)
;
(
"signed"
,
fun
loc
->
SIGNED
loc
);
valid
"unsigned"
(
fun
loc
->
UNSIGNED
loc
)
;
(
"__signed"
,
fun
loc
->
SIGNED
loc
);
valid
"volatile"
(
fun
loc
->
VOLATILE
loc
)
;
(
"unsigned"
,
fun
loc
->
UNSIGNED
loc
);
valid
"__volatile"
(
fun
loc
->
VOLATILE
loc
)
;
(
"volatile"
,
fun
loc
->
VOLATILE
loc
);
(
"__volatile"
,
fun
loc
->
VOLATILE
loc
);
(* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile
(* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile
* are accepted GCC-isms *)
* are accepted GCC-isms *)
(
"char"
,
fun
loc
->
CHAR
loc
);
valid
"char"
(
fun
loc
->
CHAR
loc
)
;
(
"_Bool"
,
fun
loc
->
BOOL
loc
);
valid
"_Bool"
(
fun
loc
->
BOOL
loc
)
;
(
"int"
,
fun
loc
->
INT
loc
);
valid
"int"
(
fun
loc
->
INT
loc
)
;
(
"float"
,
fun
loc
->
FLOAT
loc
);
valid
"float"
(
fun
loc
->
FLOAT
loc
)
;
(
"double"
,
fun
loc
->
DOUBLE
loc
);
valid
"double"
(
fun
loc
->
DOUBLE
loc
)
;
(
"void"
,
fun
loc
->
VOID
loc
);
valid
"void"
(
fun
loc
->
VOID
loc
)
;
(
"enum"
,
fun
loc
->
ENUM
loc
);
valid
"enum"
(
fun
loc
->
ENUM
loc
)
;
(
"struct"
,
fun
loc
->
STRUCT
loc
);
valid
"struct"
(
fun
loc
->
STRUCT
loc
)
;
(
"typedef"
,
fun
loc
->
TYPEDEF
loc
);
valid
"typedef"
(
fun
loc
->
TYPEDEF
loc
)
;
(
"union"
,
fun
loc
->
UNION
loc
);
valid
"union"
(
fun
loc
->
UNION
loc
)
;
(
"break"
,
fun
loc
->
BREAK
loc
);
valid
"break"
(
fun
loc
->
BREAK
loc
)
;
(
"continue"
,
fun
loc
->
CONTINUE
loc
);
valid
"continue"
(
fun
loc
->
CONTINUE
loc
)
;
(
"goto"
,
fun
loc
->
GOTO
loc
);
valid
"goto"
(
fun
loc
->
GOTO
loc
)
;
(
"return"
,
fun
loc
->
dbgToken
(
RETURN
loc
)
)
;
valid
"return"
(
fun
loc
->
RETURN
loc
)
;
(
"switch"
,
fun
loc
->
dbgToken
(
SWITCH
loc
)
)
;
valid
"switch"
(
fun
loc
->
SWITCH
loc
)
;
(
"case"
,
fun
loc
->
CASE
loc
);
valid
"case"
(
fun
loc
->
CASE
loc
)
;
(
"default"
,
fun
loc
->
DEFAULT
loc
);
valid
"default"
(
fun
loc
->
DEFAULT
loc
)
;
(
"while"
,
fun
loc
->
WHILE
loc
);
valid
"while"
(
fun
loc
->
WHILE
loc
)
;
(
"do"
,
fun
loc
->
DO
loc
);
valid
"do"
(
fun
loc
->
DO
loc
)
;
(
"for"
,
fun
loc
->
FOR
loc
);
valid
"for"
(
fun
loc
->
FOR
loc
)
;
(
"if"
,
fun
loc
->
dbgToken
(
IF
loc
)
)
;
valid
"if"
(
fun
loc
->
IF
loc
)
;
(
"else"
,
fun
_
->
ELSE
);
valid
"else"
(
fun
_
->
ELSE
)
;
(*** Implementation specific keywords ***)
(*** Implementation specific keywords ***)
(
"__signed__"
,
fun
loc
->
SIGNED
loc
);
valid
"__signed__"
(
fun
loc
->
SIGNED
loc
)
;
(
"__inline__"
,
fun
loc
->
INLINE
loc
);
valid
"__inline__"
(
fun
loc
->
INLINE
loc
)
;
(
"inline"
,
fun
loc
->
INLINE
loc
);
valid
"inline"
(
fun
loc
->
INLINE
loc
)
;
(
"__inline"
,
fun
loc
->
INLINE
loc
);
valid
"__inline"
(
fun
loc
->
INLINE
loc
)
;
(
"_Noreturn"
,
warning_C11
"_Noreturn"
(
fun
loc
->
NORETURN
loc
)
;
fun
loc
->
warning_C11
"_Static_assert"
(
fun
loc
->
STATIC_ASSERT
loc
)
;
Kernel
.(
warning
~
wkey
:
wkey_c11
"_Noreturn is a C11 keyword"
);
valid
"__attribute__"
(
fun
loc
->
ATTRIBUTE
loc
)
;
NORETURN
loc
);
valid
"__attribute"
(
fun
loc
->
ATTRIBUTE
loc
)
;
(
"_Static_assert"
,
valid
"_Nullable"
(
fun
loc
->
NOP_ATTRIBUTE
loc
)
;
fun
loc
->
valid
"__blockattribute__"
(
fun
_
->
BLOCKATTRIBUTE
)
;
Kernel
.(
warning
~
wkey
:
wkey_c11
"_Static_assert is a C11 keyword"
);
valid
"__blockattribute"
(
fun
_
->
BLOCKATTRIBUTE
)
;
STATIC_ASSERT
loc
);
valid
"__asm__"
(
fun
loc
->
ASM
loc
)
;
(
"__attribute__"
,
fun
loc
->
ATTRIBUTE
loc
);
valid
"asm"
(
fun
loc
->
ASM
loc
)
;
(
"__attribute"
,
fun
loc
->
ATTRIBUTE
loc
);
valid
"__typeof__"
(
fun
loc
->
TYPEOF
loc
)
;
(
"_Nullable"
,
fun
loc
->
NOP_ATTRIBUTE
loc
);
valid
"__typeof"
(
fun
loc
->
TYPEOF
loc
)
;
(
"__blockattribute__"
,
fun
_
->
BLOCKATTRIBUTE
);
valid
"typeof"
(
fun
loc
->
TYPEOF
loc
)
;
(
"__blockattribute"
,
fun
_
->
BLOCKATTRIBUTE
);
valid
"__alignof"
(
fun
loc
->
ALIGNOF
loc
)
;
(
"__asm__"
,
fun
loc
->
ASM
loc
);
valid
"__alignof__"
(
fun
loc
->
ALIGNOF
loc
)
;
(
"asm"
,
fun
loc
->
ASM
loc
);
valid
"__volatile__"
(
fun
loc
->
VOLATILE
loc
)
;
(
"__typeof__"
,
fun
loc
->
TYPEOF
loc
);
valid
"__volatile"
(
fun
loc
->
VOLATILE
loc
)
;
(
"__typeof"
,
fun
loc
->
TYPEOF
loc
);
valid
"__FUNCTION__"
(
fun
loc
->
FUNCTION__
loc
)
;
(
"typeof"
,
fun
loc
->
TYPEOF
loc
);
valid
"__func__"
(
fun
loc
->
FUNCTION__
loc
)
;
(* ISO 6.4.2.2 *)
(
"__alignof"
,
fun
loc
->
ALIGNOF
loc
);
valid
"__PRETTY_FUNCTION__"
(
fun
loc
->
PRETTY_FUNCTION__
loc
)
;
(
"__alignof__"
,
fun
loc
->
ALIGNOF
loc
);
valid
"__label__"
(
fun
_
->
LABEL__
)
;
(
"__volatile__"
,
fun
loc
->
VOLATILE
loc
);
(
"__volatile"
,
fun
loc
->
VOLATILE
loc
);
(
"__FUNCTION__"
,
fun
loc
->
FUNCTION__
loc
);
(
"__func__"
,
fun
loc
->
FUNCTION__
loc
);
(* ISO 6.4.2.2 *)
(
"__PRETTY_FUNCTION__"
,
fun
loc
->
PRETTY_FUNCTION__
loc
);
(
"__label__"
,
fun
_
->
LABEL__
);
(*** weimer: GCC arcana ***)
(*** weimer: GCC arcana ***)
(
"__restrict"
,
fun
loc
->
RESTRICT
loc
);
valid
"__restrict"
(
fun
loc
->
RESTRICT
loc
)
;
(
"restrict"
,
fun
loc
->
RESTRICT
loc
);
valid
"restrict"
(
fun
loc
->
RESTRICT
loc
)
;
(* ("__extension__", EXTENSION); *)
(**** MS VC ***)
(**** MS VC ***)
(
"__int64"
,
fun
_
->
INT64
(
currentLoc
()
));
valid
"__int64"
(
fun
_
->
INT64
(
currentLoc
()
))
;
(
"__int32"
,
fun
loc
->
INT
loc
);
valid
"__int32"
(
fun
loc
->
INT
loc
)
;
(
"_cdecl"
,
fun
_
->
MSATTR
(
"_cdecl"
,
currentLoc
()
));
valid
"_cdecl"
(
fun
_
->
MSATTR
(
"_cdecl"
,
currentLoc
()
))
;
(
"__cdecl"
,
fun
_
->
MSATTR
(
"__cdecl"
,
currentLoc
()
));
valid
"__cdecl"
(
fun
_
->
MSATTR
(
"__cdecl"
,
currentLoc
()
))
;
(
"_stdcall"
,
fun
_
->
MSATTR
(
"_stdcall"
,
currentLoc
()
));
valid
"_stdcall"
(
fun
_
->
MSATTR
(
"_stdcall"
,
currentLoc
()
))
;
(
"__stdcall"
,
fun
_
->
MSATTR
(
"__stdcall"
,
currentLoc
()
));
valid
"__stdcall"
(
fun
_
->
MSATTR
(
"__stdcall"
,
currentLoc
()
))
;
(
"_fastcall"
,
fun
_
->
MSATTR
(
"_fastcall"
,
currentLoc
()
));
valid
"_fastcall"
(
fun
_
->
MSATTR
(
"_fastcall"
,
currentLoc
()
))
;
(
"__fastcall"
,
fun
_
->
MSATTR
(
"__fastcall"
,
currentLoc
()
));
valid
"__fastcall"
(
fun
_
->
MSATTR
(
"__fastcall"
,
currentLoc
()
))
;
(
"__w64"
,
fun
_
->
MSATTR
(
"__w64"
,
currentLoc
()
));
valid
"__w64"
(
fun
_
->
MSATTR
(
"__w64"
,
currentLoc
()
))
;
(
"__declspec"
,
fun
loc
->
DECLSPEC
loc
);
valid
"__declspec"
(
fun
loc
->
DECLSPEC
loc
)
;
(
"__forceinline"
,
fun
loc
->
INLINE
loc
);
(* !! we turn forceinline
(* !! we turn forceinline into inline *)
* into inline *)
valid
"__forceinline"
(
fun
loc
->
INLINE
loc
)
;
(* weimer: some files produced by 'GCC -E' expect this type to be
(* Some files produced by 'GCC -E' expect this type to be defined *)
* defined *)
valid
"__builtin_va_list"
(
fun
_
->
NAMED_TYPE
"__builtin_va_list"
)
;
(
"__builtin_va_list"
,
valid
"__builtin_va_arg"
(
fun
loc
->
BUILTIN_VA_ARG
loc
)
;
fun
_
->
NAMED_TYPE
"__builtin_va_list"
);
valid
"__builtin_types_compatible_p"
(
fun
loc
->
BUILTIN_TYPES_COMPAT
loc
)
;
(
"__builtin_va_arg"
,
fun
loc
->
BUILTIN_VA_ARG
loc
);
valid
"__builtin_offsetof"
(
fun
loc
->
BUILTIN_OFFSETOF
loc
)
;
(
"__builtin_types_compatible_p"
,
fun
loc
->
BUILTIN_TYPES_COMPAT
loc
);
warning_C11
"_Thread_local"
(
fun
loc
->
THREAD_LOCAL
loc
)
;
(
"__builtin_offsetof"
,
fun
loc
->
BUILTIN_OFFSETOF
loc
);
(
"_Thread_local"
,
fun
loc
->
Kernel
.(
warning
~
wkey
:
wkey_c11
"_Thread_local is a C11 keyword"
);
THREAD_LOCAL
loc
);
(* We recognize __thread for GCC machdeps *)
(* We recognize __thread for GCC machdeps *)
(
"__thread"
,
thread_keyword
()
;
fun
loc
->
filename_keyword
()
;
if
Cil
.
gccMode
()
then
THREAD
loc
else
begin
Kernel
.(
warning
~
wkey
:
wkey_conditional_feature
"__thread is a GCC extension, use a GCC-based machdep to enable it"
);
IDENT
"__thread"
end
);
(
"__FC_FILENAME__"
,
(
fun
loc
->
let
filename
=
Filepath
.
Normalized
.
to_pretty_string
(
fst
loc
)
.
pos_path
in
let
seq
=
String
.
to_seq
filename
in
let
convert_char
c
=
Int64
.
of_int
(
Char
.
code
c
)
in
let
l
=
Seq
.
fold_left
(
fun
acc
c
->
convert_char
c
::
acc
)
[]
seq
in
CST_STRING
(
List
.
rev
l
,
loc
)));
(* The following C11 tokens are not yet supported, so we provide some
(* The following C11 tokens are not yet supported, so we provide some
helpful error messages. Usage of 'fatal' instead of 'error' below
helpful error messages. Usage of 'fatal' instead of 'error' below
prevents duplicate error messages due to parsing errors. *)
prevents duplicate error messages due to parsing errors. *)
(
"_Alignas"
,
unsupported
"_Alignas"
;
fun
loc
->
unsupported
"_Alignof"
;
Kernel
.
abort
~
source
:
(
fst
loc
)
unsupported
"_Complex"
;
"_Alignas is currently unsupported by Frama-C."
);
warning_C11
"_Generic"
(
fun
loc
->
GENERIC
loc
)
;
(
"_Alignof"
,
unsupported
"_Imaginary"
fun
loc
->
Kernel
.
abort
~
source
:
(
fst
loc
)
"_Alignof is currently unsupported by Frama-C."
);
(
"_Complex"
,
fun
loc
->
Kernel
.
abort
~
source
:
(
fst
loc
)
"_Complex is currently unsupported by Frama-C."
);
(
"_Generic"
,
fun
loc
->
Kernel
.(
warning
~
wkey
:
wkey_c11
"_Generic is a C11 keyword"
);
GENERIC
loc
);
(
"_Imaginary"
,
fun
loc
->
Kernel
.
abort
~
source
:
(
fst
loc
)
"_Imaginary is currently unsupported by Frama-C."
);
]
let
is_c_keyword
s
=
Hashtbl
.
mem
lexicon
s
(* Mark an identifier as a type name. The old mapping is preserved and will
(* Mark an identifier as a type name. The old mapping is preserved and will
* be reinstated when we exit this context *)
* be reinstated when we exit this context *)
let
add_type
name
=
let
add_type
name
=
(* ignore (print_string ("adding type name " ^ name ^ "\n")); *)
add
name
(
fun
_
->
NAMED_TYPE
name
)
;
H
.
add
lexicon
name
(
fun
_
->
NAMED_TYPE
name
);
Logic_env
.
add_typename
name
Logic_env
.
add_typename
name
let
context
:
string
list
list
ref
=
ref
[
[]
]
let
remove_type
name
=
remove
name
;
Logic_env
.
remove_typename
name
let
typedef_decl
=
ref
false
let
is_typedef
()
=
!
typedef_decl
let
set_typedef
()
=
typedef_decl
:=
true
let
reset_typedef
()
=
typedef_decl
:=
false
let
context
:
string
list
list
ref
=
ref
[
[]
]
let
push_context
_
=
context
:=
[]
::
!
context
let
push_context
_
=
context
:=
[]
::
!
context
let
pop_context
_
=
let
pop_context
_
=
match
!
context
with
match
!
context
with
[]
->
Kernel
.
fatal
"Empty context stack"
|
[]
->
Kernel
.
fatal
"Empty context stack"
|
con
::
sub
->
|
con
::
sub
->
context
:=
sub
;
List
.
iter
remove_type
con
(
context
:=
sub
;
List
.
iter
(
fun
name
->
(* Format.eprintf
"removing lexicon for %s@." name; *)
H
.
remove
lexicon
name
;
Logic_env
.
remove_typename
name
)
con
)
(* Mark an identifier as a variable name. The old mapping is preserved and
(* Mark an identifier as a variable name. The old mapping is preserved and
* will be reinstated when we exit this context *)
* will be reinstated when we exit this context *)
let
add_identifier
name
=
let
add_identifier
name
=
match
!
context
with
match
!
context
with
[]
->
Kernel
.
fatal
"Empty context stack"
|
[]
->
Kernel
.
fatal
"Empty context stack"
|
con
::
sub
->
|
con
::
sub
->
(
context
:=
(
name
::
con
)
::
sub
;
context
:=
(
name
::
con
)
::
sub
;
(*Format.eprintf "adding IDENT for %s@." name;*)
add
name
(
fun
_
->
IDENT
name
)
;
H
.
add
lexicon
name
(
fun
_
->
dbgToken
(
IDENT
name
));
Logic_env
.
hide_typename
name
Logic_env
.
hide_typename
name
)
(*
** Useful primitives
let
typedef_decl
=
ref
false
*)
let
is_typedef
()
=
!
typedef_decl
let
scan_ident
id
=
let
set_typedef
()
=
typedef_decl
:=
true
let
here
=
currentLoc
()
in
let
reset_typedef
()
=
typedef_decl
:=
false
try
(
H
.
find
lexicon
id
)
here
(* default to variable name, as opposed to type *)
with
Not_found
->
dbgToken
(
IDENT
id
)
(*
(*
** Buffer processor
** Buffer processor
*)
*)
let
init
~
filename
lexer
=
let
init
~
filename
lexer
=
init_lexicon
()
;
init_lexicon
()
;
E
.
startParsing
filename
lexer
E
.
startParsing
filename
lexer
let
finish
()
=
let
finish
()
=
E
.
finishParsing
()
;
E
.
finishParsing
()
;
Logic_env
.
reset_typenames
()
Logic_env
.
reset_typenames
()
(*** escape character management ***)
(*** escape character management ***)
let
scan_escape
(
char
:
char
)
:
int64
=
let
scan_escape
=
function
let
result
=
match
char
with
|
'
n'
->
int64_of_char
'\n'
'
n'
->
'\n'
|
'
r'
->
int64_of_char
'\r'
|
'
r'
->
'\r'
|
'
t'
->
int64_of_char
'\t'
|
'
t'
->
'\t'
|
'
b'
->
int64_of_char
'\b'
|
'
b'
->
'\b'
|
'
f'
->
int64_of_char
'\012'
(* ASCII code 12 *)
|
'
f'
->
'\012'
(* ASCII code 12 *)
|
'
v'
->
int64_of_char
'\011'
(* ASCII code 11 *)
|
'
v'
->
'\011'
(* ASCII code 11 *)
|
'
a'
->
int64_of_char
'\007'
(* ASCII code 7 *)
|
'
a'
->
'\007'
(* ASCII code 7 *)
|
'
e'
|
'
E'
->
int64_of_char
'\027'
(* ASCII code 27. This is a GCC extension *)
|
'
e'
|
'
E'
->
'\027'
(* ASCII code 27. This is a GCC extension *)
|
'\''
->
int64_of_char
'\''
|
'\''
->
'\''
|
'
"' -> int64_of_char '"
'
(* '"' *)
|
'
"'-> '"
'
(* '"' *)
|
'
?
'
->
int64_of_char
'
?
'
|
'
?
'
->
'
?
'
|
'
(
'
->
int64_of_char
'
(
'
|
'
(
'
->
'
(
'
|
'
{
'
->
int64_of_char
'
{
'
|
'
{
'
->
'
{
'
|
'
[
'
->
int64_of_char
'
[
'
|
'
[
'
->
'
[
'
|
'
%
'
->
int64_of_char
'
%
'
|
'
%
'
->
'
%
'
|
'\\'
->
int64_of_char
'\\'
|
'\\'
->
'\\'
|
other
->
E
.
parse_error
"Unrecognized escape sequence:
\\
%c"
other
|
other
->
E
.
parse_error
"Unrecognized escape sequence:
\\
%c"
other
in
Int64
.
of_int
(
Char
.
code
result
)
let
scan_hex_escape
str
=
let
scan_hex_escape
str
=
let
radix
=
Int64
.
of_int
16
in
let
radix
=
Int64
.
of_int
16
in
...
@@ -373,77 +324,49 @@ let lex_simple_escape remainder lexbuf =
...
@@ -373,77 +324,49 @@ let lex_simple_escape remainder lexbuf =
prefix
::
remainder
lexbuf
prefix
::
remainder
lexbuf
let
lex_unescaped
remainder
lexbuf
=
let
lex_unescaped
remainder
lexbuf
=
let
prefix
=
I
nt64
.
of_
int
(
Char
.
code
(
Lexing
.
lexeme_char
lexbuf
0
)
)
in
let
prefix
=
i
nt64
_
of_
char
(
Lexing
.
lexeme_char
lexbuf
0
)
in
prefix
::
remainder
lexbuf
prefix
::
remainder
lexbuf
let
lex_comment
remainder
buffer
lexbuf
=
let
lex_comment
remainder
buffer
lexbuf
=
let
s
=
Lexing
.
lexeme
lexbuf
in
let
s
=
Lexing
.
lexeme
lexbuf
in
if
s
=
"
\n
"
then
E
.
newline
()
;
if
s
=
"
\n
"
then
E
.
newline
()
;
(
match
buffer
with
None
->
()
|
Some
b
->
Buffer
.
add_string
b
s
)
;
Option
.
iter
(
fun
b
->
Buffer
.
add_string
b
s
)
buffer
;
remainder
buffer
lexbuf
remainder
buffer
lexbuf
(* We ignore the last line if it does not contain "\n" as it means that
it directly precedes the "else" and thus contains only whitespaces *)
let
rec
iter_comments
ends_with_n
f
=
function
|
[]
->
()
|
[
_
]
when
not
ends_with_n
->
()
|
x
::
l
->
f
x
;
iter_comments
ends_with_n
f
l
let
do_ghost_else_comments
register
comments
=
let
do_ghost_else_comments
register
comments
=
let
ends_with_n
=
let
last
=
String
.
length
comments
in
let
last
=
String
.
length
comments
in
(* note that comments contains at least a blank *)
(* note that comments contains at least a blank *)
'\n'
=
String
.
get
comments
(
last
-
1
)
let
ends_with_n
=
'\n'
=
String
.
get
comments
(
last
-
1
)
in
in
let
comments
=
String
.
split_on_char
'\n'
comments
in
let
comments
=
String
.
split_on_char
'\n'
comments
in
let
comments
=
List
.
map
String
.
trim
comments
in
let
comments
=
List
.
map
String
.
trim
comments
in
let
process_line
s
=
let
print_comments
=
Kernel
.
PrintComments
.
get
()
in
E
.
newline
()
;
let
do_comment
s
=
register
&&
print_comments
&&
not
(
String
.
equal
""
s
)
in
if
register
let
process_line
s
=
E
.
newline
()
;
if
do_comment
s
then
add_comment
s
in
&&
Kernel
.
PrintComments
.
get
()
iter_comments
ends_with_n
process_line
comments
&&
not
(
String
.
equal
""
s
)
then
addComment
s
in
let
rec
iter
f
=
function
|
[]
->
()
(* We ignore the last line if it does not contain "\n" as it means that
it directly precedes the "else" and thus contains only whitespaces *)
|
[
_
]
when
not
ends_with_n
->
()
|
x
::
l
->
f
x
;
iter
f
l
in
iter
process_line
comments
let
do_lex_comment
?
(
first_string
=
""
)
remainder
lexbuf
=
let
do_lex_comment
?
(
first_string
=
""
)
remainder
lexbuf
=
let
buffer
=
if
Kernel
.
PrintComments
.
get
()
then
if
Kernel
.
PrintComments
.
get
()
then
begin
let
b
=
Buffer
.
create
80
in
let
b
=
Buffer
.
create
80
in
Buffer
.
add_string
b
first_string
;
let
()
=
Buffer
.
add_string
b
first_string
in
Some
b
let
()
=
remainder
(
Some
b
)
lexbuf
in
end
else
None
add_comment
(
Buffer
.
contents
b
)
in
remainder
buffer
lexbuf
;
else
remainder
None
lexbuf
match
buffer
with
|
Some
b
->
addComment
(
Buffer
.
contents
b
)
let
do_oneline_ghost
~
first_string
comment
lexbuf
initial
=
|
None
->
()
do_lex_comment
~
first_string
comment
lexbuf
;
E
.
newline
()
;
if
is_oneline_ghost
()
(* ISO standard locale-specific function to convert a wide character
then
(
exit_oneline_ghost
()
;
RGHOST
)
* into a sequence of normal characters. Here we work on strings.
else
initial
lexbuf
* We convert L"Hi" to "H\000i\000"
matth: this seems unused.
let wbtowc wstr =
let len = String.length wstr in
let dest = String.make (len * 2) '\000' in
for i = 0 to len-1 do
dest.[i*2] <- wstr.[i] ;
done ;
dest
*)
(* This function converts the "Hi" in L"Hi" to { L'H', L'i', L'\0' }
matth: this seems unused.
let wstr_to_warray wstr =
let len = String.length wstr in
let res = ref "{ " in
for i = 0 to len-1 do
res := !res ^ (Printf.sprintf "L'%c', " wstr.[i])
done ;
res := !res ^ "}" ;
!res
*)
(* Pragmas get explicit end-of-line tokens.
(* Pragmas get explicit end-of-line tokens.
* Elsewhere they are silently discarded as whitespace. *)
* Elsewhere they are silently discarded as whitespace. *)
...
@@ -451,11 +374,9 @@ let pragmaLine = ref false
...
@@ -451,11 +374,9 @@ let pragmaLine = ref false
let
annot_char
=
ref
'
@
'
let
annot_char
=
ref
'
@
'
let
()
=
Kernel
.
ReadAnnot
.
add_set_hook
(
fun
_
x
->
(* prevent the C lexer interpretation of comments *)
(* prevent the C lexer interpretation of comments *)
annot_char
:=
if
x
then
'
@
'
else
'\000'
)
let
prevent
_
x
=
annot_char
:=
if
x
then
'
@
'
else
'\000'
let
()
=
Kernel
.
ReadAnnot
.
add_set_hook
prevent
let
annot_start_pos
=
ref
Cabshelper
.
cabslu
let
annot_start_pos
=
ref
Cabshelper
.
cabslu
let
buf
=
Buffer
.
create
1024
let
buf
=
Buffer
.
create
1024
...
@@ -464,40 +385,39 @@ let save_current_pos () =
...
@@ -464,40 +385,39 @@ let save_current_pos () =
annot_start_pos
:=
currentLoc
()
annot_start_pos
:=
currentLoc
()
let
annot_lex
initial
rule
lexbuf
=
let
annot_lex
initial
rule
lexbuf
=
try
save_current_pos
()
;
save_current_pos
()
;
Buffer
.
clear
buf
;
Buffer
.
clear
buf
;
rule
lexbuf
try
rule
lexbuf
with
Parsing
.
Parse_error
->
with
Parsing
.
Parse_error
->
let
source
=
Cil_datatype
.
Position
.
of_lexing_pos
(
Lexing
.
lexeme_start_p
lexbuf
)
in
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
source
=
Cil_datatype
.
Position
.
of_lexing_pos
start
in
Kernel
.
warning
~
wkey
:
Kernel
.
wkey_annot_error
~
source
"skipping annotation"
;
Kernel
.
warning
~
wkey
:
Kernel
.
wkey_annot_error
~
source
"skipping annotation"
;
initial
lexbuf
initial
lexbuf
let
make_annot
~
one_line
default
lexbuf
s
=
let
make_annot
~
one_line
default
lexbuf
s
=
let
start
=
snd
!
annot_start_pos
in
let
start
=
snd
!
annot_start_pos
in
match
Logic_lexer
.
annot
(
start
,
s
)
with
match
Logic_lexer
.
annot
(
start
,
s
)
with
(* error occured and annotation is discarded. Find a normal token. *)
|
None
->
default
lexbuf
|
Some
(
stop
,
token
)
->
|
Some
(
stop
,
token
)
->
lexbuf
.
Lexing
.
lex_curr_p
<-
Cil_datatype
.
Position
.
to_lexing_pos
stop
;
lexbuf
.
Lexing
.
lex_curr_p
<-
Cil_datatype
.
Position
.
to_lexing_pos
stop
;
if
one_line
then
E
.
newline
()
;
if
one_line
then
E
.
newline
()
;
(
match
token
with
match
token
with
|
Logic_ptree
.
Adecl
d
->
DECL
d
|
Logic_ptree
.
Adecl
d
->
DECL
d
|
Logic_ptree
.
Aspec
->
SPEC
(
start
,
s
)
|
Logic_ptree
.
Aspec
->
SPEC
(
start
,
s
)
(* At this point, we only have identified a function spec. Complete
(* At this point, we only have identified a function spec. Complete
parsing of the annotation will only occur in the cparser.mly rule.
parsing of the annotation will only occur in the cparser.mly rule. *)
*)
|
Logic_ptree
.
Acode_annot
(
loc
,
a
)
->
CODE_ANNOT
(
a
,
loc
)
|
Logic_ptree
.
Acode_annot
(
loc
,
a
)
->
CODE_ANNOT
(
a
,
loc
)
|
Logic_ptree
.
Aloop_annot
(
loc
,
a
)
->
LOOP_ANNOT
(
a
,
loc
)
|
Logic_ptree
.
Aloop_annot
(
loc
,
a
)
->
LOOP_ANNOT
(
a
,
loc
)
|
Logic_ptree
.
Aattribute_annot
(
loc
,
a
)
->
ATTRIBUTE_ANNOT
(
a
,
loc
))
|
Logic_ptree
.
Aattribute_annot
(
loc
,
a
)
->
ATTRIBUTE_ANNOT
(
a
,
loc
)
|
None
->
(* error occured and annotation is discarded. Find a normal token. *)
default
lexbuf
let
parse_error
lexbuf
msg
=
let
parse_error
lexbuf
msg
=
let
loc
=
lexbuf
.
Lexing
.
lex_curr_p
in
let
loc
=
lexbuf
.
Lexing
.
lex_curr_p
in
let
source
=
Cil_datatype
.
Position
.
of_lexing_pos
loc
in
let
source
=
Cil_datatype
.
Position
.
of_lexing_pos
loc
in
E
.
parse_error
~
source
msg
E
.
parse_error
~
source
msg
let
()
=
(* Initialize the pointer in Errormsg *)
(* Initialize the pointer in Errormsg *)
let
()
=
Lexerhack
.
add_type
:=
add_type
;
Lexerhack
.
add_type
:=
add_type
;
Lexerhack
.
push_context
:=
push_context
;
Lexerhack
.
push_context
:=
push_context
;
Lexerhack
.
pop_context
:=
pop_context
;
Lexerhack
.
pop_context
:=
pop_context
;
...
@@ -507,18 +427,18 @@ let () =
...
@@ -507,18 +427,18 @@ let () =
Lexerhack
.
reset_typedef
:=
reset_typedef
Lexerhack
.
reset_typedef
:=
reset_typedef
}
}
let
decdigit
=
[
'
0
'
-
'
9
'
]
let
decdigit
=
[
'
0
'
-
'
9
'
]
let
octdigit
=
[
'
0
'
-
'
7
'
]
let
octdigit
=
[
'
0
'
-
'
7
'
]
let
hexdigit
=
[
'
0
'
-
'
9
'
'
a'
-
'
f'
'
A'
-
'
F'
]
let
hexdigit
=
[
'
0
'
-
'
9
'
'
a'
-
'
f'
'
A'
-
'
F'
]
let
binarydigit
=
[
'
0
'
'
1
'
]
let
binarydigit
=
[
'
0
'
'
1
'
]
let
letter
=
[
'
a'
-
'
z'
'
A'
-
'
Z'
]
let
letter
=
[
'
a'
-
'
z'
'
A'
-
'
Z'
]
let
usuffix
=
[
'
u'
'
U'
]
let
usuffix
=
[
'
u'
'
U'
]
let
lsuffix
=
"l"
|
"L"
|
"ll"
|
"LL"
let
lsuffix
=
"l"
|
"L"
|
"ll"
|
"LL"
let
intsuffix
=
lsuffix
|
usuffix
|
usuffix
lsuffix
|
lsuffix
usuffix
|
usuffix
?
"i64"
let
intsuffix
=
lsuffix
|
usuffix
|
usuffix
lsuffix
|
lsuffix
usuffix
|
usuffix
?
"i64"
let
hexprefix
=
'
0
'
[
'
x'
'
X'
]
let
hexprefix
=
'
0
'
[
'
x'
'
X'
]
let
binaryprefix
=
'
0
'
[
'
b'
'
B'
]
let
binaryprefix
=
'
0
'
[
'
b'
'
B'
]
...
@@ -530,144 +450,138 @@ let binarynum = binaryprefix binarydigit+ intsuffix?
...
@@ -530,144 +450,138 @@ let binarynum = binaryprefix binarydigit+ intsuffix?
let
exponent
=
[
'
e'
'
E'
][
'
+
'
'
-
'
]
?
decdigit
+
let
exponent
=
[
'
e'
'
E'
][
'
+
'
'
-
'
]
?
decdigit
+
let
fraction
=
'.'
decdigit
+
let
fraction
=
'.'
decdigit
+
let
decfloat
=
(
intnum
?
fraction
)
let
decfloat
=
(
intnum
?
fraction
)
|
(
intnum
exponent
)
|
(
intnum
exponent
)
|
(
intnum
?
fraction
exponent
)
|
(
intnum
?
fraction
exponent
)
|
(
intnum
'.'
)
|
(
intnum
'.'
)
|
(
intnum
'.'
exponent
)
|
(
intnum
'.'
exponent
)
let
hexfraction
=
hexdigit
*
'.'
hexdigit
+
|
hexdigit
+
'.'
let
hexfraction
=
hexdigit
*
'.'
hexdigit
+
|
hexdigit
+
'.'
let
binexponent
=
[
'
p'
'
P'
]
[
'
+
'
'
-
'
]
?
decdigit
+
let
binexponent
=
[
'
p'
'
P'
]
[
'
+
'
'
-
'
]
?
decdigit
+
let
hexfloat
=
hexprefix
hexfraction
binexponent
let
hexfloat
=
hexprefix
hexfraction
binexponent
|
hexprefix
hexdigit
+
binexponent
|
hexprefix
hexdigit
+
binexponent
let
floatsuffix
=
[
'
f'
'
F'
'
l'
'
L'
]
let
floatsuffix
=
[
'
f'
'
F'
'
l'
'
L'
]
let
floatnum
=
(
decfloat
|
hexfloat
)
floatsuffix
?
let
floatnum
=
(
decfloat
|
hexfloat
)
floatsuffix
?
let
ident
=
(
letter
|
'
_'
)(
letter
|
decdigit
|
'
_'
|
'
$
'
)
*
let
ident
=
(
letter
|
'
_'
)(
letter
|
decdigit
|
'
_'
|
'
$
'
)
*
let
blank
=
[
'
'
'\t'
'\012'
'\r'
'\026'
(*this is the plain old DOS eof char*)
]
+
(* \026 is the dos EOF character *)
let
blank
=
[
'
'
'\t'
'\012'
'\r'
'\026'
]
+
let
escape
=
'\\'
_
let
escape
=
'\\'
_
let
hex_escape
=
'\\'
[
'
x'
'
X'
]
hexdigit
+
let
hex_escape
=
'\\'
[
'
x'
'
X'
]
hexdigit
+
let
oct_escape
=
'\\'
octdigit
octdigit
?
octdigit
?
let
oct_escape
=
'\\'
octdigit
octdigit
?
octdigit
?
(* Pragmas that are not parsed by CIL. We lex them as PRAGMA_LINE tokens *)
(* Solaris-style pragmas *)
let
no_parse_pragma
=
let
solaris_pragmas
=
"warning"
|
"GCC"
"ident"
|
"section"
|
"option"
|
"asm"
|
"use_section"
|
"weak"
(* Solaris-style pragmas: *)
|
"redefine_extname"
|
"TCS_align"
|
"ident"
|
"section"
|
"option"
|
"asm"
|
"use_section"
|
"weak"
|
"redefine_extname"
|
"TCS_align"
(* Embedded world *)
(* Embedded world *)
|
"global_register"
|
"location"
let
embedded_pragmas
=
"global_register"
|
"location"
(* Pragmas that are not parsed by CIL. We lex them as PRAGMA_LINE tokens *)
let
no_parse_pragma
=
"warning"
|
"GCC"
|
solaris_pragmas
|
embedded_pragmas
let
ghost_comments
=
"//
\n
"
|
(
"//"
[
^
'\n'
'
@
'
]
([
^
'\n'
]
*
(
"
\\\n
"
)
?
)
*
'\n'
)
let
ghost_comments
=
"//
\n
"
|
(
"//"
[
^
'\n'
'
@
'
]
([
^
'\n'
]
*
(
"
\\\n
"
)
?
)
*
'\n'
)
rule
initial
=
parse
rule
initial
=
parse
|
"/*"
(
""
|
"@{"
|
"@}"
as
suf
)
(* Skip special doxygen comments. Use of '@'
instead of '!annot_char' is intentional *)
(* Skip special doxygen comments. Use of '@' instead of '!annot_char' is
intentional *)
|
"/*"
(
""
|
"@{"
|
"@}"
as
suf
)
{
{
do_lex_comment
~
first_string
:
suf
comment
lexbuf
;
do_lex_comment
~
first_string
:
suf
comment
lexbuf
;
initial
lexbuf
initial
lexbuf
}
}
|
"/*"
([
^
'
*
'
'\n'
]
as
c
)
|
"/*"
([
^
'
*
'
'\n'
]
as
c
)
{
if
c
=
!
annot_char
then
begin
{
annot_lex
initial
annot_first_token
lexbuf
let
first_string
=
String
.
make
1
c
in
end
else
if
c
=
!
annot_char
begin
then
annot_lex
initial
annot_first_token
lexbuf
do_lex_comment
~
first_string
:
(
String
.
make
1
c
)
comment
lexbuf
;
else
(
do_lex_comment
~
first_string
comment
lexbuf
;
initial
lexbuf
)
initial
lexbuf
end
}
}
|
"//"
(
""
|
"@{"
|
"@}"
as
suf
)
(* See comment for "/*@{" above *)
(* See comment for "/*@{" above *)
|
"//"
(
""
|
"@{"
|
"@}"
as
suf
)
{
{
do_lex_comment
~
first_string
:
suf
onelinecomment
lexbuf
;
do_oneline_ghost
~
first_string
:
suf
onelinecomment
lexbuf
initial
E
.
newline
()
;
if
is_oneline_ghost
()
then
begin
exit_oneline_ghost
()
;
RGHOST
end
else
begin
initial
lexbuf
end
}
}
|
"//"
([
^
'\n'
]
as
c
)
|
"//"
([
^
'\n'
]
as
c
)
{
if
c
=
!
annot_char
then
begin
{
annot_lex
initial
annot_one_line
lexbuf
let
first_string
=
String
.
make
1
c
in
end
else
if
c
=
!
annot_char
begin
then
annot_lex
initial
annot_one_line
lexbuf
do_lex_comment
~
first_string
:
(
String
.
make
1
c
)
onelinecomment
lexbuf
;
else
do_oneline_ghost
~
first_string
onelinecomment
lexbuf
initial
E
.
newline
()
;
if
is_oneline_ghost
()
then
begin
exit_oneline_ghost
()
;
RGHOST
end
else
begin
initial
lexbuf
end
end
}
}
|
"
\\
ghost"
|
"
\\
ghost"
{
if
is_ghost_code
()
||
is_oneline_ghost
()
then
begin
{
GHOST
(
currentLoc
()
)
if
is_ghost_code
()
||
is_oneline_ghost
()
end
else
begin
then
GHOST
(
currentLoc
()
)
parse_error
lexbuf
"Use of
\\
ghost out of ghost code"
else
parse_error
lexbuf
"Use of
\\
ghost out of ghost code"
end
}
|
blank
{
initial
lexbuf
}
|
'\n'
{
E
.
newline
()
;
if
!
pragmaLine
then
(
pragmaLine
:=
false
;
PRAGMA_EOL
)
else
if
is_oneline_ghost
()
then
(
exit_oneline_ghost
()
;
RGHOST
)
else
initial
lexbuf
}
}
|
blank
{
initial
lexbuf
}
|
'\\'
'\r'
*
'\n'
|
'\n'
{
E
.
newline
()
;
{
if
!
pragmaLine
then
E
.
newline
()
;
begin
pragmaLine
:=
false
;
PRAGMA_EOL
end
else
if
is_oneline_ghost
()
then
begin
exit_oneline_ghost
()
;
RGHOST
end
else
begin
initial
lexbuf
end
}
|
'\\'
'\r'
*
'\n'
{
E
.
newline
()
;
initial
lexbuf
initial
lexbuf
}
}
|
'
#
'
{
hash
lexbuf
}
|
'
#
'
{
hash
lexbuf
}
|
"%:"
{
hash
lexbuf
}
|
"%:"
{
hash
lexbuf
}
|
"_Pragma"
{
PRAGMA
(
currentLoc
()
)
}
|
"_Pragma"
{
PRAGMA
(
currentLoc
()
)
}
|
'\''
{
|
'\''
{
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
content
=
chr
lexbuf
in
let
content
=
chr
lexbuf
in
let
last
=
Lexing
.
lexeme_end_p
lexbuf
in
let
last
=
Lexing
.
lexeme_end_p
lexbuf
in
CST_CHAR
(
content
,
Cil_datatype
.
Location
.
of_lexing_loc
(
start
,
last
))
CST_CHAR
(
content
,
Cil_datatype
.
Location
.
of_lexing_loc
(
start
,
last
))
}
}
|
"L'"
{
|
"L'"
{
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
content
=
chr
lexbuf
in
let
content
=
chr
lexbuf
in
let
last
=
Lexing
.
lexeme_end_p
lexbuf
in
let
last
=
Lexing
.
lexeme_end_p
lexbuf
in
CST_WCHAR
(
content
,
Cil_datatype
.
Location
.
of_lexing_loc
(
start
,
last
))
CST_WCHAR
(
content
,
Cil_datatype
.
Location
.
of_lexing_loc
(
start
,
last
))
}
}
|
'
"' {
|
'
"'
{
let start = Lexing.lexeme_start_p lexbuf in
let start = Lexing.lexeme_start_p lexbuf in
let content = str lexbuf in
let content = str lexbuf in
let last = Lexing.lexeme_end_p lexbuf in
let last = Lexing.lexeme_end_p lexbuf in
CST_STRING (content, Cil_datatype.Location.of_lexing_loc (start,last))
CST_STRING (content, Cil_datatype.Location.of_lexing_loc (start,last))
}
}
| "
L
\
""
{
| "
L
\
""
{
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
start
=
Lexing
.
lexeme_start_p
lexbuf
in
let
content
=
str
lexbuf
in
let
content
=
str
lexbuf
in
let
last
=
Lexing
.
lexeme_end_p
lexbuf
in
let
last
=
Lexing
.
lexeme_end_p
lexbuf
in
CST_WSTRING
(
content
,
Cil_datatype
.
Location
.
of_lexing_loc
(
start
,
last
))
CST_WSTRING
(
content
,
Cil_datatype
.
Location
.
of_lexing_loc
(
start
,
last
))
}
}
|
floatnum
{
CST_FLOAT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
floatnum
{
CST_FLOAT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
binarynum
{
(* GCC Extension for binary numbers *)
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)}
(* GCC Extension for binary numbers *)
|
binarynum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
hexnum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
hexnum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
octnum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
octnum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
intnum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
|
intnum
{
CST_INT
(
Lexing
.
lexeme
lexbuf
,
currentLoc
()
)
}
...
@@ -697,21 +611,23 @@ rule initial = parse
...
@@ -697,21 +611,23 @@ rule initial = parse
|
"->"
{
ARROW
}
|
"->"
{
ARROW
}
|
'
+
'
{
PLUS
(
currentLoc
()
)
}
|
'
+
'
{
PLUS
(
currentLoc
()
)
}
|
'
-
'
{
MINUS
(
currentLoc
()
)
}
|
'
-
'
{
MINUS
(
currentLoc
()
)
}
|
'
*
'
|
'
*
'
{
if
is_ghost_code
()
then
might_end_ghost
lexbuf
{
else
if
is_ghost_code
()
STAR
(
currentLoc
()
)}
then
might_end_ghost
lexbuf
else
STAR
(
currentLoc
()
)
}
|
"/"
([
^
'\n'
]
as
c
)
|
"/"
([
^
'\n'
]
as
c
)
{
if
c
=
!
annot_char
then
{
if
is_ghost_code
()
||
is_oneline_ghost
()
then
begin
if
c
=
!
annot_char
then
enter_ghost_annot
()
;
if
is_ghost_code
()
||
is_oneline_ghost
()
annot_lex
initial
annot_first_token
lexbuf
then
(
enter_ghost_annot
()
;
annot_lex
initial
annot_first_token
lexbuf
)
end
else
else
parse_error
lexbuf
"This kind of annotation is valid only inside ghost code"
parse_error
lexbuf
"This kind of annotation is valid only inside ghost code"
else
(
lexbuf
.
Lexing
.
lex_curr_pos
<-
lexbuf
.
Lexing
.
lex_curr_pos
-
1
;
SLASH
)
else
begin
}
lexbuf
.
Lexing
.
lex_curr_pos
<-
lexbuf
.
Lexing
.
lex_curr_pos
-
1
;
SLASH
end
}
|
'
/
'
{
SLASH
}
|
'
/
'
{
SLASH
}
|
'
%
'
{
PERCENT
}
|
'
%
'
{
PERCENT
}
|
'
!
'
{
EXCLAM
(
currentLoc
()
)
}
|
'
!
'
{
EXCLAM
(
currentLoc
()
)
}
...
@@ -721,152 +637,174 @@ rule initial = parse
...
@@ -721,152 +637,174 @@ rule initial = parse
|
'
|
'
{
PIPE
}
|
'
|
'
{
PIPE
}
|
'
^
'
{
CIRC
}
|
'
^
'
{
CIRC
}
|
'
?
'
{
QUEST
}
|
'
?
'
{
QUEST
}
|
'
:
'
|
'
:
'
{
if
Cabshelper
.
is_attr_test
()
then
begin
{
Cabshelper
.
pop_attr_test
()
;
COLON2
if
Cabshelper
.
is_attr_test
()
end
else
COLON
then
(
Cabshelper
.
pop_attr_test
()
;
COLON2
)
else
COLON
}
}
|
'
~
'
{
TILDE
(
currentLoc
()
)}
|
'
{
'
{
dbgToken
(
LBRACE
(
currentLoc
()
))}
|
'
~
'
{
TILDE
(
currentLoc
()
)
}
|
'
}
'
{
dbgToken
(
RBRACE
(
currentLoc
()
))}
|
'
{
'
{
LBRACE
(
currentLoc
()
)
}
|
"<%"
{
dbgToken
(
LBRACE
(
currentLoc
()
))}
|
'
}
'
{
RBRACE
(
currentLoc
()
)
}
|
"%>"
{
dbgToken
(
RBRACE
(
currentLoc
()
))}
|
"<%"
{
LBRACE
(
currentLoc
()
)
}
|
"%>"
{
RBRACE
(
currentLoc
()
)
}
|
'
[
'
{
LBRACKET
}
|
'
[
'
{
LBRACKET
}
|
'
]
'
{
RBRACKET
}
|
'
]
'
{
RBRACKET
}
|
"<:"
{
LBRACKET
}
|
"<:"
{
LBRACKET
}
|
":>"
{
RBRACKET
}
|
":>"
{
RBRACKET
}
|
'
(
'
{
dbgToken
(
LPAREN
(
currentLoc
()
)
)
}
|
'
(
'
{
LPAREN
(
currentLoc
()
)
}
|
'
)
'
{
RPAREN
}
|
'
)
'
{
RPAREN
}
|
'
;
'
{
dbgToken
(
SEMICOLON
(
currentLoc
()
)
)
}
|
'
;
'
{
SEMICOLON
(
currentLoc
()
)
}
|
'
,
'
{
COMMA
}
|
'
,
'
{
COMMA
}
|
'.'
{
DOT
}
|
'.'
{
DOT
}
|
"sizeof"
{
SIZEOF
(
currentLoc
()
)
}
|
"sizeof"
{
SIZEOF
(
currentLoc
()
)
}
|
"__asm"
{
ASM
(
currentLoc
()
)
}
|
"__asm"
{
ASM
(
currentLoc
()
)
}
(* If we see __pragma we eat it and the matching parentheses as well *)
(* If we see __pragma we consume it and the matching parentheses as well *)
|
"__pragma"
{
let
_
=
matchingpars
0
lexbuf
in
|
"__pragma"
{
let
_
=
matchingpars
0
lexbuf
in
initial
lexbuf
}
initial
lexbuf
}
(* __extension__ is a black. The parser runs into some conflicts if we let it
(* __extension__ is a black. The parser runs into some conflicts if we let it
*
pass *)
pass *)
|
"__extension__"
{
initial
lexbuf
}
|
"__extension__"
{
initial
lexbuf
}
|
ident
{
scan_ident
(
Lexing
.
lexeme
lexbuf
)
}
|
ident
{
scan_ident
(
Lexing
.
lexeme
lexbuf
)
}
|
eof
|
eof
{
if
is_oneline_ghost
()
then
begin
{
exit
_oneline_ghost
()
;
RGHOST
if
is
_oneline_ghost
()
end
then
(
exit_oneline_ghost
()
;
RGHOST
)
else
EOF
else
EOF
}
}
|
_
as
c
|
_
as
c
{
if
is_ghost_code
()
&&
c
=
'
@
'
then
initial
lexbuf
{
if
is_ghost_code
()
&&
c
=
'
@
'
then
initial
lexbuf
else
parse_error
lexbuf
"Invalid symbol"
else
parse_error
lexbuf
"Invalid symbol"
}
}
and
might_end_ghost
=
parse
and
might_end_ghost
=
parse
|
'
/
'
{
exit_ghost_code
()
;
RGHOST
}
|
'
/
'
{
exit_ghost_code
()
;
RGHOST
}
|
""
{
STAR
(
currentLoc
()
)
}
|
""
{
STAR
(
currentLoc
()
)
}
and
comment
buffer
=
parse
and
comment
buffer
=
parse
|
"*/"
{
}
|
"*/"
{
}
|
eof
{
parse_error
lexbuf
"Unterminated C comment"
}
|
eof
{
parse_error
lexbuf
"Unterminated C comment"
}
|
_
{
lex_comment
comment
buffer
lexbuf
}
|
_
{
lex_comment
comment
buffer
lexbuf
}
and
onelinecomment
buffer
=
parse
and
onelinecomment
buffer
=
parse
|
"*/"
{
if
is_ghost_code
()
then
|
"*/"
{
(* end of multiline comment *)
(* end of multiline comment *)
lexbuf
.
Lexing
.
lex_curr_pos
<-
if
is_ghost_code
()
lexbuf
.
Lexing
.
lex_curr_pos
-
2
then
lexbuf
.
Lexing
.
lex_curr_pos
<-
lexbuf
.
Lexing
.
lex_curr_pos
-
2
else
else
lex_comment
onelinecomment
buffer
lexbuf
lex_comment
onelinecomment
buffer
lexbuf
}
}
|
'\n'
|
eof
{
}
|
'\n'
|
eof
{
}
|
_
{
lex_comment
onelinecomment
buffer
lexbuf
}
|
_
{
lex_comment
onelinecomment
buffer
lexbuf
}
and
matchingpars
parsopen
=
parse
and
matchingpars
parsopen
=
parse
'\n'
{
E
.
newline
()
;
matchingpars
parsopen
lexbuf
}
|
'\n'
{
E
.
newline
()
;
matchingpars
parsopen
lexbuf
}
|
blank
{
matchingpars
parsopen
lexbuf
}
|
blank
{
matchingpars
parsopen
lexbuf
}
|
'
(
'
{
matchingpars
(
parsopen
+
1
)
lexbuf
}
|
'
(
'
{
matchingpars
(
parsopen
+
1
)
lexbuf
}
|
'
)
'
{
if
parsopen
>
1
then
|
'
)
'
{
if
parsopen
>
1
then
matchingpars
(
parsopen
-
1
)
lexbuf
}
matchingpars
(
parsopen
-
1
)
lexbuf
|
"/*"
{
do_lex_comment
comment
lexbuf
;
matchingpars
parsopen
lexbuf
}
}
|
'
"' { let _ = str lexbuf in matchingpars parsopen lexbuf }
|
"/*"
{
do_lex_comment
comment
lexbuf
;
matchingpars
parsopen
lexbuf
}
|
'
"' { let _ = str lexbuf in
matchingpars parsopen lexbuf }
| _ { matchingpars parsopen lexbuf }
| _ { matchingpars parsopen lexbuf }
(* # <line number> <file name> ... *)
(* # <line number> <file name> ... *)
and hash = parse
and hash = parse
'
\n
'
{ E.newline (); initial lexbuf}
|
'
\n
'
{ E.newline (); initial lexbuf}
| blank { hash lexbuf}
| blank { hash lexbuf}
| intnum { (* We are seeing a line number. This is the number for the
* next line *)
(* We are seeing a line number. This is the number for the next line *)
| intnum
{
let s = Lexing.lexeme lexbuf in
let s = Lexing.lexeme lexbuf in
let lineno = try
let msg () = Kernel.warning "
Bad
line
number
in
preprocessed
file
:
%
s
" s in
int_of_string s
let lineno = try int_of_string s with Failure _ -> msg () ; -1 in
with Failure _ ->
(* the int is too big. *)
Kernel.warning "
Bad
line
number
in
preprocessed
file
:
%
s
" s;
(-1)
in
E.setCurrentLine (lineno - 1) ;
E.setCurrentLine (lineno - 1) ;
(* A file name may follow *)
(* A file name may follow *)
file lexbuf }
file lexbuf
| "
line
" { hash lexbuf } (* MSVC line number info *)
}
(* For pragmas with irregular syntax, like #pragma warning,
* we parse them as a whole line. *)
(* MSVC line number info *)
| "
line
" { hash lexbuf }
(* For pragmas with irregular syntax, like #pragma warning, we parse them as a
whole line. *)
| "
pragma
" blank (no_parse_pragma as pragmaName)
| "
pragma
" blank (no_parse_pragma as pragmaName)
{ let here = currentLoc () in
{
let here = currentLoc () in
PRAGMA_LINE (pragmaName ^ pragma lexbuf, here)
PRAGMA_LINE (pragmaName ^ pragma lexbuf, here)
}
}
| "
pragma
" { pragmaLine := true ; PRAGMA (currentLoc ()) }
| "
pragma
" { pragmaLine := true ; PRAGMA (currentLoc ()) }
| _ { endline lexbuf }
| _ { endline lexbuf }
and file = parse
and file = parse
'
\n
' {
E.newline (); initial lexbuf}
| '
\n
' {
E.newline ()
; initial lexbuf
}
| blank { file lexbuf }
| blank { file lexbuf }
(* The //-ending file directive is a GCC extension that provides the CWD of the
preprocessor when the file was preprocessed. *)
(* The //-ending file directive is a GCC extension that provides the CWD of
| '"
'
([
^
'\012'
'\t'
'
"']* as d) "
//
\
""
{
the preprocessor when the file was preprocessed. *)
| '"
'
([
^
'\012'
'\t'
'
"']* as d) "
//
\
""
{
E
.
setCurrentWorkingDirectory
d
;
E
.
setCurrentWorkingDirectory
d
;
endline
lexbuf
}
endline
lexbuf
|
'
"' (([^ '
\012
' '
\t
' '"
'
]
|
"
\\\"
"
)
*
as
f
)
'
"' {
}
|
'
"' (([^ '
\012
' '
\t
' '"
'
]
|
"
\\\"
"
)
*
as
f
)
'
"'
{
let unescape = Str.regexp_string "
\\\
""
in
let unescape = Str.regexp_string "
\\\
""
in
let
f
=
Str
.
global_replace
unescape
"
\"
"
f
in
let
f
=
Str
.
global_replace
unescape
"
\"
"
f
in
E
.
setCurrentFile
f
;
E
.
setCurrentFile
f
;
endline
lexbuf
}
endline
lexbuf
}
|
_
{
endline
lexbuf
}
|
_
{
endline
lexbuf
}
and
endline
=
parse
and
endline
=
parse
'\n'
{
E
.
newline
()
;
initial
lexbuf
}
|
'\n'
{
E
.
newline
()
;
initial
lexbuf
}
|
eof
{
EOF
}
|
eof
{
EOF
}
|
_
{
endline
lexbuf
}
|
_
{
endline
lexbuf
}
and
pragma
=
parse
and
pragma
=
parse
'\n'
{
E
.
newline
()
;
""
}
|
'\n'
{
E
.
newline
()
;
""
}
|
_
{
let
cur
=
Lexing
.
lexeme
lexbuf
in
|
_
{
let
cur
=
Lexing
.
lexeme
lexbuf
in
cur
^
(
pragma
lexbuf
)
}
cur
^
(
pragma
lexbuf
)
}
and
str
=
parse
and
str
=
parse
'
"' {[]} (* no nul terminiation in CST_STRING '"
'
*
)
(* no nul terminiation in CST_STRING '"' *)
|
'
"' { [] }
| hex_escape { lex_hex_escape str lexbuf }
| hex_escape { lex_hex_escape str lexbuf }
| oct_escape { lex_oct_escape str lexbuf }
| oct_escape { lex_oct_escape str lexbuf }
| escape { lex_simple_escape str lexbuf }
| escape { lex_simple_escape str lexbuf }
| eof { parse_error lexbuf "
unterminated
string
" }
| eof { parse_error lexbuf "
unterminated
string
" }
| _ { lex_unescaped str lexbuf }
| _ { lex_unescaped str lexbuf }
and chr = parse
and chr = parse
'\''
{[]}
|
'
\'
' {
[]
}
| hex_escape { lex_hex_escape chr lexbuf }
| hex_escape { lex_hex_escape chr lexbuf }
| oct_escape { lex_oct_escape chr lexbuf }
| oct_escape { lex_oct_escape chr lexbuf }
| escape { lex_simple_escape chr lexbuf }
| escape { lex_simple_escape chr lexbuf }
| eof { parse_error lexbuf "
unterminated
char
" }
| eof { parse_error lexbuf "
unterminated
char
" }
| _ { lex_unescaped chr lexbuf }
| _ { lex_unescaped chr lexbuf }
and annot_first_token = parse
and annot_first_token = parse
|
"ghost"
((
blank
|
'\\'
?
'\n'
|
ghost_comments
)
*
as
comments
)
"else"
{
| "
ghost
" ((blank| '
\\
'?'
\n
' | ghost_comments)* as comments) "
else
"
{
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
Buffer.clear buf ;
Buffer.clear buf ;
let loc = currentLoc () in
let loc = currentLoc () in
...
@@ -874,54 +812,78 @@ and annot_first_token = parse
...
@@ -874,54 +812,78 @@ and annot_first_token = parse
enter_ghost_code () ;
enter_ghost_code () ;
LGHOST_ELSE (loc)
LGHOST_ELSE (loc)
}
}
|
"ghost"
{
| "
ghost
"
{
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
Buffer.clear buf ;
Buffer.clear buf ;
enter_ghost_code () ;
enter_ghost_code () ;
LGHOST
LGHOST
}
}
| ' '|'@'|'
\t
'|'
\r
' as c { Buffer.add_char buf c ; annot_first_token lexbuf }
| ' '|'@'|'
\t
'|'
\r
' as c { Buffer.add_char buf c ; annot_first_token lexbuf }
| '
\n
' { E.newline() ; Buffer.add_char buf '
\n
' ; annot_first_token lexbuf }
| '
\n
' { E.newline() ; Buffer.add_char buf '
\n
' ; annot_first_token lexbuf }
| "" { annot_token lexbuf }
| "" { annot_token lexbuf }
and annot_token = parse
and annot_token = parse
|
"*/"
{
if
is_ghost_annot
()
then
| "
*/
"
parse_error
lexbuf
"Ghost multi-line annotation not terminated"
;
{
if is_ghost_annot ()
then parse_error lexbuf "
Ghost
multi
-
line
annotation
not
terminated
" ;
let s = Buffer.contents buf in
let s = Buffer.contents buf in
make_annot
~
one_line
:
false
initial
lexbuf
s
}
make_annot ~one_line:false initial lexbuf s
}
| eof { parse_error lexbuf "
Unterminated
annotation
" }
| eof { parse_error lexbuf "
Unterminated
annotation
" }
| '
\n
' { E.newline() ; Buffer.add_char buf '
\n
' ; annot_token lexbuf }
| '
\n
' { E.newline() ; Buffer.add_char buf '
\n
' ; annot_token lexbuf }
|
_
as
c
{
if
is_ghost_annot
()
&&
c
=
!
annot_char
then
| _ as c
might_end_ghost_annot
lexbuf
{
else
(
Buffer
.
add_char
buf
c
;
annot_token
lexbuf
)
}
if is_ghost_annot () && c = !annot_char
then might_end_ghost_annot lexbuf
else (Buffer.add_char buf c ; annot_token lexbuf)
}
and might_end_ghost_annot = parse
and might_end_ghost_annot = parse
| '/' { exit_ghost_annot ();
| '/' { exit_ghost_annot ();
let s = Buffer.contents buf in
let s = Buffer.contents buf in
make_annot ~one_line:false initial lexbuf s }
make_annot ~one_line:false initial lexbuf s }
| "" { Buffer.add_char buf !annot_char; annot_token lexbuf }
| "" { Buffer.add_char buf !annot_char; annot_token lexbuf }
and annot_one_line = parse
and annot_one_line = parse
|
"ghost"
((
blank
|
"
\\\n
"
)
+
as
comments
)
"else"
{
| "
ghost
" ((blank|"
\\\
n
")+ as comments) "
else
"
{
do_ghost_else_comments false comments ;
do_ghost_else_comments false comments ;
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
enter_oneline_ghost
()
;
LGHOST_ELSE
(
currentLoc
()
)
enter_oneline_ghost () ;
LGHOST_ELSE (currentLoc ())
}
}
|
"ghost"
{
| "
ghost
"
{
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
if is_oneline_ghost () then parse_error lexbuf "
nested
ghost
code
" ;
enter_oneline_ghost
()
;
LGHOST
enter_oneline_ghost () ;
LGHOST
}
}
| ' '|'@'|'
\t
'|'
\r
' as c { Buffer.add_char buf c ; annot_one_line lexbuf }
| ' '|'@'|'
\t
'|'
\r
' as c { Buffer.add_char buf c ; annot_one_line lexbuf }
| "" { annot_one_line_logic lexbuf }
| "" { annot_one_line_logic lexbuf }
and annot_one_line_logic = parse
and annot_one_line_logic = parse
| '
\n
' { make_annot ~one_line:true initial lexbuf (Buffer.contents buf) }
| '
\n
' { make_annot ~one_line:true initial lexbuf (Buffer.contents buf) }
| eof { parse_error lexbuf "
Invalid
C
file
:
should
end
with
a
newline
" }
| eof { parse_error lexbuf "
Invalid
C
file
:
should
end
with
a
newline
" }
| _ as c { Buffer.add_char buf c ; annot_one_line_logic lexbuf }
| _ as c { Buffer.add_char buf c ; annot_one_line_logic lexbuf }
{
{
(* Catch the exceptions raised by the lexer itself *)
(* Catch the exceptions raised by the lexer itself *)
let initial lexbuf =
let initial lexbuf =
try initial lexbuf
try initial lexbuf
with Failure _ -> raise Parsing.Parse_error
with Failure _ -> raise Parsing.Parse_error
}
}
(*
(*
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment