From 68742159d1fe4bc0b1a15fd558efb818f8e041ee Mon Sep 17 00:00:00 2001 From: Andre Maroneze <andre.maroneze@cea.fr> Date: Fri, 16 Jul 2021 12:39:28 +0200 Subject: [PATCH] [Makefile] more efficient, OCaml-based, tool to check for trailing newlines --- .gitignore | 1 + Makefile | 28 +++++++++++++++++++--- bin/check_newline.sh | 36 ----------------------------- bin/check_newlines.ml | 54 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 39 deletions(-) delete mode 100755 bin/check_newline.sh create mode 100644 bin/check_newlines.ml diff --git a/.gitignore b/.gitignore index 10c4807deeb..f674592c232 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,7 @@ autom4te.cache /devel_tools/fc-time /devel_tools/fc-memuse /bin/ocamldep_transitive_closure +/bin/check_newlines #share /share/Makefile.config diff --git a/Makefile b/Makefile index c71e627f4fc..4f8cb85784b 100644 --- a/Makefile +++ b/Makefile @@ -2141,6 +2141,28 @@ clean:: hdrck-clean CURRENT_HEADERS?=open-source CURRENT_HEADER_DIRS?=$(addsuffix /$(CURRENT_HEADERS),$(HEADER_DIRS)) +CHECK_NEWLINES:=./bin/check_newlines$(EXE) + +$(CHECK_NEWLINES): bin/check_newlines.ml + $(PRINT_MAKING) $@ +ifeq ($(OCAMLBEST),opt) + $(OCAMLOPT) unix.cmxa $< -o $@ +else + $(OCAMLC) unix.cma $< -o $@ +endif + +FILES_WITHOUT_NEWLINE := \ + VERSION \ + VERSION_CODENAME \ + $(sort $(wildcard ivette/src/dome/doc/template/static/fonts/*)) \ + $(sort $(wildcard share/*.ico share/*.png share/theme/*/*.png)) + +TESTS_WITHOUT_NEWLINE := \ + tests/spec/unfinished-oneline-acsl-comment.i \ + tests/verisec/suite/programs/apps/SpamAssassin/BID-6679/message_write/test \ + tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/array_vs_pointer.ods \ + tests/verisec/suite/programs/apps/sendmail/CVE-1999-0047/mime7to8/data_testing.ods \ + # OPEN_SOURCE: set it to 'yes' if you want to check open source headers # STRICT_HEADERS: set it to 'yes' if you want to consider warnings as errors # The target check-headers does the following checks: @@ -2153,7 +2175,7 @@ CURRENT_HEADER_DIRS?=$(addsuffix /$(CURRENT_HEADERS),$(HEADER_DIRS)) # because identical headers but with different encodings are not exactly # easy to distinguish .PHONY: check-headers -check-headers: $(HDRCK) +check-headers: $(HDRCK) $(CHECK_NEWLINES) $(PRINT) "Checking $(DISTRIB_HEADERS) headers (OPEN_SOURCE=$(OPEN_SOURCE), CURRENT_HEADERS=$(CURRENT_HEADERS))..." $(PRINT) "- HEADER_SPEC_FILE=$(HEADER_SPEC_FILE)" $(PRINT) "- CURRENT_HEADER_DIRS=$(CURRENT_HEADER_DIRS)" @@ -2163,8 +2185,8 @@ check-headers: $(HDRCK) $(file >distrib_tests.tmp) $(foreach O,$(DISTRIB_TESTS),$(file >>distrib_tests.tmp,$O)) $(file >header_exceptions.tmp) $(foreach O,$(HEADER_EXCEPTIONS),$(file >>header_exceptions.tmp,$O)) echo "Checking that distributed files terminate with a newline..." - bin/check_newline.sh distrib_files.tmp - bin/check_newline.sh distrib_tests.tmp + $(CHECK_NEWLINES) distrib_files.tmp $(FILES_WITHOUT_NEWLINE) + $(CHECK_NEWLINES) distrib_tests.tmp $(TESTS_WITHOUT_NEWLINE) @if command -v file >/dev/null 2>/dev/null; then \ echo "Checking that distributed files do not use iso-8859..."; \ file --mime-encoding -f distrib_files.tmp -f distrib_tests.tmp | \ diff --git a/bin/check_newline.sh b/bin/check_newline.sh deleted file mode 100755 index d2ff0d06892..00000000000 --- a/bin/check_newline.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -e - -# $1: file containing the list of files to check -# prints a warning for each file not finishing with a newline, -# unless it is one of a few well-known exceptions (e.g. VERSION). -# Note: requires the 'file' command-line tool to check which files are text. - -if [ $# -lt 1 ]; then - echo "usage: $0 file_list.txt" - exit 2 -fi - -is_likely_text_file() { - case $(file -b --mime-type - < "$1") in - (text/*) echo "1" - esac -} - -errors=0 - -IFS='' -while read file -do - if [ -n "$(is_likely_text_file "$file")" ]; then - x=$(tail -c 1 "$file") - if [ "$x" != "" ] && [ "$file" != "VERSION" ] && [ "$file" != "VERSION_CODENAME" ]; then - echo "error: no newline at end of file: $file" - errors=$((errors+1)) - fi - fi -done < <(file -f "$1" --mime | grep '\btext' | cut -d: -f1) - -if [ $errors -gt 0 ]; then - echo "Found $errors file(s) with errors." - exit 1 -fi diff --git a/bin/check_newlines.ml b/bin/check_newlines.ml new file mode 100644 index 00000000000..df74f3ae3dc --- /dev/null +++ b/bin/check_newlines.ml @@ -0,0 +1,54 @@ +module StringSet = Set.Make(String) + +(* returns true for empty files *) +let is_last_byte_newline filename = + try + let ic = open_in filename in + try + let fd = Unix.descr_of_in_channel ic in + ignore (Unix.lseek fd (-1) Unix.SEEK_END); + let buf = Bytes.create 1 in + let n_bytes_read = Unix.read fd buf 0 1 in + close_in ic; + n_bytes_read <= 0 || Bytes.get buf 0 = '\n' + with + | Unix.Unix_error _ -> + (* probably an empty file; ignoring *) + close_in ic; + true + with + | Sys_error _ -> + (* possibly a non-existing file (e.g. with spaces); ignoring *) + Format.printf "could not open, ignoring file: %s" filename; + true + +(* usage: first argument is a file name containing a list of files + (one per line) to be checked; the remaining arguments are a list of + files to be ignored during checking + (i.e. they do not terminate with newlines). *) +let () = + if Array.length Sys.argv < 2 then begin + Format.printf "usage: %s file_list.txt [ignore1 ignore2 ...]@." Sys.argv.(0); + exit 0 + end; + let errors = ref 0 in + let file_list_ic = open_in Sys.argv.(1) in + let to_ignore = StringSet.of_list (List.tl (Array.to_list Sys.argv)) in + begin + try + while true; do + let filename = input_line file_list_ic in + if not (StringSet.mem filename to_ignore) && + not (is_last_byte_newline filename) then begin + incr errors; + Format.printf "error: no newline at end of file: %s@." filename + end + done + with End_of_file -> + close_in file_list_ic + end; + if !errors > 0 then begin + Format.printf "Found %d file(s) with errors.@." !errors; + exit 1 + end else + exit 0 -- GitLab