Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
frama-c
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container Registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pub
frama-c
Commits
1e912a30
Commit
1e912a30
authored
3 years ago
by
Andre Maroneze
Committed by
Virgile Prevosto
3 years ago
Browse files
Options
Downloads
Patches
Plain Diff
[Makefile] use OCaml-based, more efficient, tool to check for non-UTF-8 files
parent
68742159
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
Makefile
+37
-12
37 additions, 12 deletions
Makefile
bin/isutf8.ml
+73
-0
73 additions, 0 deletions
bin/isutf8.ml
with
111 additions
and
12 deletions
.gitignore
+
1
−
0
View file @
1e912a30
...
@@ -65,6 +65,7 @@ autom4te.cache
...
@@ -65,6 +65,7 @@ autom4te.cache
/devel_tools/fc-memuse
/devel_tools/fc-memuse
/bin/ocamldep_transitive_closure
/bin/ocamldep_transitive_closure
/bin/check_newlines
/bin/check_newlines
/bin/isutf8
#share
#share
/share/Makefile.config
/share/Makefile.config
...
...
This diff is collapsed.
Click to expand it.
Makefile
+
37
−
12
View file @
1e912a30
...
@@ -2151,9 +2151,31 @@ else
...
@@ -2151,9 +2151,31 @@ else
$(
OCAMLC
)
unix.cma
$<
-o
$@
$(
OCAMLC
)
unix.cma
$<
-o
$@
endif
endif
check-newlines-clean
:
$(
RM
)
$(
CHECK_NEWLINES
)
bin/check_newlines.cm
*
bin/check_newlines.o
clean
::
check-newlines-clean
ISUTF8
:=
./bin/isutf8
$(
EXE
)
$(ISUTF8)
:
bin/isutf8.ml
$(
PRINT_MAKING
)
$@
ifeq
($(OCAMLBEST),opt)
$(
OCAMLOPT
)
$<
-o
$@
else
$(
OCAMLC
)
$<
-o
$@
endif
isutf8-clean
:
$(
RM
)
$(
ISUTF8
)
bin/isutf8.cm
*
bin/isutf8.o
clean
::
isutf8-clean
FILES_WITHOUT_NEWLINE
:=
\
FILES_WITHOUT_NEWLINE
:=
\
VERSION
\
VERSION
\
VERSION_CODENAME
\
VERSION_CODENAME
BINARY_DISTRIB_FILES
:=
\
$(
sort
$(
wildcard ivette/src/dome/doc/template/static/fonts/
*
))
\
$(
sort
$(
wildcard ivette/src/dome/doc/template/static/fonts/
*
))
\
$(
sort
$(
wildcard share/
*
.ico share/
*
.png share/theme/
*
/
*
.png
))
$(
sort
$(
wildcard share/
*
.ico share/
*
.png share/theme/
*
/
*
.png
))
...
@@ -2163,6 +2185,13 @@ TESTS_WITHOUT_NEWLINE := \
...
@@ -2163,6 +2185,13 @@ TESTS_WITHOUT_NEWLINE := \
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods
\
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods
\
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods
\
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods
\
BINARY_DISTRIB_TESTS
:=
\
tests/misc/oracle/interpreted_automata_dataflow_backward.dot
\
tests/misc/oracle/interpreted_automata_dataflow_forward.dot
\
tests/verisec/suite/programs/apps/SpamAssassin/BID-6679/message_write/test
\
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods
\
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods
\
# OPEN_SOURCE: set it to 'yes' if you want to check open source headers
# OPEN_SOURCE: set it to 'yes' if you want to check open source headers
# STRICT_HEADERS: set it to 'yes' if you want to consider warnings as errors
# STRICT_HEADERS: set it to 'yes' if you want to consider warnings as errors
# The target check-headers does the following checks:
# The target check-headers does the following checks:
...
@@ -2175,7 +2204,7 @@ TESTS_WITHOUT_NEWLINE := \
...
@@ -2175,7 +2204,7 @@ TESTS_WITHOUT_NEWLINE := \
# because identical headers but with different encodings are not exactly
# because identical headers but with different encodings are not exactly
# easy to distinguish
# easy to distinguish
.PHONY
:
check-headers
.PHONY
:
check-headers
check-headers
:
$(HDRCK) $(CHECK_NEWLINES)
check-headers
:
$(HDRCK) $(CHECK_NEWLINES)
$(ISUTF8)
$(
PRINT
)
"Checking
$(
DISTRIB_HEADERS
)
headers (OPEN_SOURCE=
$(
OPEN_SOURCE
)
, CURRENT_HEADERS=
$(
CURRENT_HEADERS
)
)..."
$(
PRINT
)
"Checking
$(
DISTRIB_HEADERS
)
headers (OPEN_SOURCE=
$(
OPEN_SOURCE
)
, CURRENT_HEADERS=
$(
CURRENT_HEADERS
)
)..."
$(
PRINT
)
"- HEADER_SPEC_FILE=
$(
HEADER_SPEC_FILE
)
"
$(
PRINT
)
"- HEADER_SPEC_FILE=
$(
HEADER_SPEC_FILE
)
"
$(
PRINT
)
"- CURRENT_HEADER_DIRS=
$(
CURRENT_HEADER_DIRS
)
"
$(
PRINT
)
"- CURRENT_HEADER_DIRS=
$(
CURRENT_HEADER_DIRS
)
"
...
@@ -2185,16 +2214,12 @@ check-headers: $(HDRCK) $(CHECK_NEWLINES)
...
@@ -2185,16 +2214,12 @@ check-headers: $(HDRCK) $(CHECK_NEWLINES)
$(
file
>
distrib_tests.tmp
)
$(
foreach O,
$(
DISTRIB_TESTS
)
,
$(
file
>>
distrib_tests.tmp,
$O
))
$(
file
>
distrib_tests.tmp
)
$(
foreach O,
$(
DISTRIB_TESTS
)
,
$(
file
>>
distrib_tests.tmp,
$O
))
$(
file
>
header_exceptions.tmp
)
$(
foreach O,
$(
HEADER_EXCEPTIONS
)
,
$(
file
>>
header_exceptions.tmp,
$O
))
$(
file
>
header_exceptions.tmp
)
$(
foreach O,
$(
HEADER_EXCEPTIONS
)
,
$(
file
>>
header_exceptions.tmp,
$O
))
echo
"Checking that distributed files terminate with a newline..."
echo
"Checking that distributed files terminate with a newline..."
$(
CHECK_NEWLINES
)
distrib_files.tmp
$(
FILES_WITHOUT_NEWLINE
)
$(
CHECK_NEWLINES
)
distrib_files.tmp
$(
FILES_WITHOUT_NEWLINE
)
$(
BINARY_DISTRIB_FILES
)
$(
CHECK_NEWLINES
)
distrib_tests.tmp
$(
TESTS_WITHOUT_NEWLINE
)
$(
CHECK_NEWLINES
)
distrib_tests.tmp
$(
TESTS_WITHOUT_NEWLINE
)
$(
BINARY_DISTRIB_TESTS
)
@
if
command
-v
file
>
/dev/null 2>/dev/null
;
then
\
echo
"Checking that distributed files do not use iso-8859..."
echo
"Checking that distributed files do not use iso-8859..."
;
\
$(
ISUTF8
)
distrib_files.tmp
$(
BINARY_DISTRIB_FILES
)
file
--mime-encoding
-f
distrib_files.tmp
-f
distrib_tests.tmp |
\
$(
ISUTF8
)
distrib_tests.tmp
$(
BINARY_DISTRIB_TESTS
)
grep
"iso-8859"
\
echo
"Checking headers..."
|
$(
SED
)
"s/^/error: invalid encoding in /"
\
|
(
!
grep
"error: invalid encoding"
)
;
\
else
echo
"command 'file' not found, skipping encoding checks"
;
\
fi
$(
HDRCK
)
\
$(
HDRCK
)
\
$(
HDRCK_EXTRA
)
\
$(
HDRCK_EXTRA
)
\
$(
addprefix
-header-dirs
,
$(
CURRENT_HEADER_DIRS
))
\
$(
addprefix
-header-dirs
,
$(
CURRENT_HEADER_DIRS
))
\
...
...
This diff is collapsed.
Click to expand it.
bin/isutf8.ml
0 → 100644
+
73
−
0
View file @
1e912a30
module
StringSet
=
Set
.
Make
(
String
)
exception
False
let
is_valid_utf8
filename
=
let
buf
=
Bytes
.
create
1024
in
try
let
ic
=
open_in_bin
filename
in
let
extra
=
ref
0
in
try
while
true
do
let
n_bytes_read
=
input
ic
buf
0
1024
in
if
n_bytes_read
=
0
then
raise
End_of_file
;
for
i
=
0
to
n_bytes_read
-
1
do
let
c
=
Bytes
.
get_uint8
buf
i
in
(*Format.printf "extra: %d, read byte: %d (0x%x, char %c)@."
!extra c c (Char.chr c);*)
if
!
extra
>
0
then
begin
decr
extra
;
if
c
lsr
6
<>
2
then
raise
False
end
else
if
c
>
127
then
begin
if
c
lsr
5
=
6
then
extra
:=
1
else
if
c
lsr
4
=
14
then
extra
:=
2
else
if
c
lsr
3
=
30
then
extra
:=
3
else
raise
False
end
;
done
;
done
;
close_in
ic
;
!
extra
=
0
with
|
End_of_file
->
!
extra
=
0
|
False
->
close_in
ic
;
false
with
|
Sys_error
_
->
(* possibly a non-existing file (e.g. with spaces); ignoring *)
Format
.
printf
"could not open, ignoring file: %s"
filename
;
true
(* usage: first argument is a file name containing a list of files
(one per line) to be checked; the remaining arguments are filenames
to be ignored during checking. *)
let
()
=
if
Array
.
length
Sys
.
argv
<
2
then
begin
Format
.
printf
"usage: %s file_list.txt [ignore1 ignore2 ...]@."
Sys
.
argv
.
(
0
);
exit
0
end
;
let
errors
=
ref
0
in
let
file_list_ic
=
open_in
Sys
.
argv
.
(
1
)
in
let
to_ignore
=
StringSet
.
of_list
(
List
.
tl
(
Array
.
to_list
Sys
.
argv
))
in
begin
try
while
true
;
do
let
filename
=
input_line
file_list_ic
in
if
not
(
StringSet
.
mem
filename
to_ignore
)
&&
not
(
is_valid_utf8
filename
)
then
begin
incr
errors
;
Format
.
printf
"error: invalid UTF-8 in file: %s@."
filename
end
done
with
End_of_file
->
close_in
file_list_ic
end
;
if
!
errors
>
0
then
begin
Format
.
printf
"Found %d file(s) with errors.@."
!
errors
;
exit
1
end
else
exit
0
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment