Skip to content
Snippets Groups Projects
Commit 1e912a30 authored by Andre Maroneze's avatar Andre Maroneze Committed by Virgile Prevosto
Browse files

[Makefile] use OCaml-based, more efficient, tool to check for non-UTF-8 files

parent 68742159
No related branches found
No related tags found
No related merge requests found
...@@ -65,6 +65,7 @@ autom4te.cache ...@@ -65,6 +65,7 @@ autom4te.cache
/devel_tools/fc-memuse /devel_tools/fc-memuse
/bin/ocamldep_transitive_closure /bin/ocamldep_transitive_closure
/bin/check_newlines /bin/check_newlines
/bin/isutf8
#share #share
/share/Makefile.config /share/Makefile.config
......
...@@ -2151,9 +2151,31 @@ else ...@@ -2151,9 +2151,31 @@ else
$(OCAMLC) unix.cma $< -o $@ $(OCAMLC) unix.cma $< -o $@
endif endif
check-newlines-clean:
$(RM) $(CHECK_NEWLINES) bin/check_newlines.cm* bin/check_newlines.o
clean:: check-newlines-clean
ISUTF8:=./bin/isutf8$(EXE)
$(ISUTF8): bin/isutf8.ml
$(PRINT_MAKING) $@
ifeq ($(OCAMLBEST),opt)
$(OCAMLOPT) $< -o $@
else
$(OCAMLC) $< -o $@
endif
isutf8-clean:
$(RM) $(ISUTF8) bin/isutf8.cm* bin/isutf8.o
clean:: isutf8-clean
FILES_WITHOUT_NEWLINE := \ FILES_WITHOUT_NEWLINE := \
VERSION \ VERSION \
VERSION_CODENAME \ VERSION_CODENAME
BINARY_DISTRIB_FILES := \
$(sort $(wildcard ivette/src/dome/doc/template/static/fonts/*)) \ $(sort $(wildcard ivette/src/dome/doc/template/static/fonts/*)) \
$(sort $(wildcard share/*.ico share/*.png share/theme/*/*.png)) $(sort $(wildcard share/*.ico share/*.png share/theme/*/*.png))
...@@ -2163,6 +2185,13 @@ TESTS_WITHOUT_NEWLINE := \ ...@@ -2163,6 +2185,13 @@ TESTS_WITHOUT_NEWLINE := \
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods \ tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods \
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods \ tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods \
BINARY_DISTRIB_TESTS := \
tests/misc/oracle/interpreted_automata_dataflow_backward.dot \
tests/misc/oracle/interpreted_automata_dataflow_forward.dot \
tests/verisec/suite/programs/apps/SpamAssassin/BID-6679/message_write/test \
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods \
tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods \
# OPEN_SOURCE: set it to 'yes' if you want to check open source headers # OPEN_SOURCE: set it to 'yes' if you want to check open source headers
# STRICT_HEADERS: set it to 'yes' if you want to consider warnings as errors # STRICT_HEADERS: set it to 'yes' if you want to consider warnings as errors
# The target check-headers does the following checks: # The target check-headers does the following checks:
...@@ -2175,7 +2204,7 @@ TESTS_WITHOUT_NEWLINE := \ ...@@ -2175,7 +2204,7 @@ TESTS_WITHOUT_NEWLINE := \
# because identical headers but with different encodings are not exactly # because identical headers but with different encodings are not exactly
# easy to distinguish # easy to distinguish
.PHONY: check-headers .PHONY: check-headers
check-headers: $(HDRCK) $(CHECK_NEWLINES) check-headers: $(HDRCK) $(CHECK_NEWLINES) $(ISUTF8)
$(PRINT) "Checking $(DISTRIB_HEADERS) headers (OPEN_SOURCE=$(OPEN_SOURCE), CURRENT_HEADERS=$(CURRENT_HEADERS))..." $(PRINT) "Checking $(DISTRIB_HEADERS) headers (OPEN_SOURCE=$(OPEN_SOURCE), CURRENT_HEADERS=$(CURRENT_HEADERS))..."
$(PRINT) "- HEADER_SPEC_FILE=$(HEADER_SPEC_FILE)" $(PRINT) "- HEADER_SPEC_FILE=$(HEADER_SPEC_FILE)"
$(PRINT) "- CURRENT_HEADER_DIRS=$(CURRENT_HEADER_DIRS)" $(PRINT) "- CURRENT_HEADER_DIRS=$(CURRENT_HEADER_DIRS)"
...@@ -2185,16 +2214,12 @@ check-headers: $(HDRCK) $(CHECK_NEWLINES) ...@@ -2185,16 +2214,12 @@ check-headers: $(HDRCK) $(CHECK_NEWLINES)
$(file >distrib_tests.tmp) $(foreach O,$(DISTRIB_TESTS),$(file >>distrib_tests.tmp,$O)) $(file >distrib_tests.tmp) $(foreach O,$(DISTRIB_TESTS),$(file >>distrib_tests.tmp,$O))
$(file >header_exceptions.tmp) $(foreach O,$(HEADER_EXCEPTIONS),$(file >>header_exceptions.tmp,$O)) $(file >header_exceptions.tmp) $(foreach O,$(HEADER_EXCEPTIONS),$(file >>header_exceptions.tmp,$O))
echo "Checking that distributed files terminate with a newline..." echo "Checking that distributed files terminate with a newline..."
$(CHECK_NEWLINES) distrib_files.tmp $(FILES_WITHOUT_NEWLINE) $(CHECK_NEWLINES) distrib_files.tmp $(FILES_WITHOUT_NEWLINE) $(BINARY_DISTRIB_FILES)
$(CHECK_NEWLINES) distrib_tests.tmp $(TESTS_WITHOUT_NEWLINE) $(CHECK_NEWLINES) distrib_tests.tmp $(TESTS_WITHOUT_NEWLINE) $(BINARY_DISTRIB_TESTS)
@if command -v file >/dev/null 2>/dev/null; then \ echo "Checking that distributed files do not use iso-8859..."
echo "Checking that distributed files do not use iso-8859..."; \ $(ISUTF8) distrib_files.tmp $(BINARY_DISTRIB_FILES)
file --mime-encoding -f distrib_files.tmp -f distrib_tests.tmp | \ $(ISUTF8) distrib_tests.tmp $(BINARY_DISTRIB_TESTS)
grep "iso-8859" \ echo "Checking headers..."
| $(SED) "s/^/error: invalid encoding in /" \
| ( ! grep "error: invalid encoding" ); \
else echo "command 'file' not found, skipping encoding checks"; \
fi
$(HDRCK) \ $(HDRCK) \
$(HDRCK_EXTRA) \ $(HDRCK_EXTRA) \
$(addprefix -header-dirs ,$(CURRENT_HEADER_DIRS)) \ $(addprefix -header-dirs ,$(CURRENT_HEADER_DIRS)) \
......
module StringSet = Set.Make(String)
exception False
let is_valid_utf8 filename =
let buf = Bytes.create 1024 in
try
let ic = open_in_bin filename in
let extra = ref 0 in
try
while true do
let n_bytes_read = input ic buf 0 1024 in
if n_bytes_read = 0 then raise End_of_file;
for i = 0 to n_bytes_read - 1 do
let c = Bytes.get_uint8 buf i in
(*Format.printf "extra: %d, read byte: %d (0x%x, char %c)@."
!extra c c (Char.chr c);*)
if !extra > 0 then begin
decr extra;
if c lsr 6 <> 2 then raise False
end
else
if c > 127 then begin
if c lsr 5 = 6 then extra := 1
else if c lsr 4 = 14 then extra := 2
else if c lsr 3 = 30 then extra := 3
else raise False
end;
done;
done;
close_in ic;
!extra = 0
with
| End_of_file ->
!extra = 0
| False ->
close_in ic;
false
with
| Sys_error _ ->
(* possibly a non-existing file (e.g. with spaces); ignoring *)
Format.printf "could not open, ignoring file: %s" filename;
true
(* usage: first argument is a file name containing a list of files
(one per line) to be checked; the remaining arguments are filenames
to be ignored during checking. *)
let () =
if Array.length Sys.argv < 2 then begin
Format.printf "usage: %s file_list.txt [ignore1 ignore2 ...]@." Sys.argv.(0);
exit 0
end;
let errors = ref 0 in
let file_list_ic = open_in Sys.argv.(1) in
let to_ignore = StringSet.of_list (List.tl (Array.to_list Sys.argv)) in
begin
try
while true; do
let filename = input_line file_list_ic in
if not (StringSet.mem filename to_ignore)
&& not (is_valid_utf8 filename) then begin
incr errors;
Format.printf "error: invalid UTF-8 in file: %s@." filename
end
done
with End_of_file ->
close_in file_list_ic
end;
if !errors > 0 then begin
Format.printf "Found %d file(s) with errors.@." !errors;
exit 1
end else
exit 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment