diff --git a/Makefile b/Makefile index d54194c4305fd821607a814895512f6cf094bd5d..9b37922a1720555b0839db0ca6f186064b4dda95 100644 --- a/Makefile +++ b/Makefile @@ -285,6 +285,7 @@ DISTRIB_FILES:=\ share/analysis-scripts/results_display.py \ share/analysis-scripts/script_for_creduce_fatal.sh \ share/analysis-scripts/script_for_creduce_non_fatal.sh \ + share/analysis-scripts/source_filter.py \ share/analysis-scripts/summary.py \ share/analysis-scripts/template.mk \ $(wildcard share/emacs/*.el) share/autocomplete_frama-c \ @@ -1998,6 +1999,7 @@ install:: install-lib-$(OCAMLBEST) share/analysis-scripts/results_display.py \ share/analysis-scripts/script_for_creduce_fatal.sh \ share/analysis-scripts/script_for_creduce_non_fatal.sh \ + share/analysis-scripts/source_filter.py \ share/analysis-scripts/summary.py \ share/analysis-scripts/template.mk \ $(FRAMAC_DATADIR)/analysis-scripts diff --git a/headers/header_spec.txt b/headers/header_spec.txt index 1b6649697bff9801fa9a205532acf3c923c33b6f..7eef44ba09bb4562be65708698e210d792abaa1a 100644 --- a/headers/header_spec.txt +++ b/headers/header_spec.txt @@ -144,6 +144,7 @@ share/analysis-scripts/README.md: .ignore share/analysis-scripts/results_display.py: CEA_LGPL share/analysis-scripts/script_for_creduce_fatal.sh: .ignore share/analysis-scripts/script_for_creduce_non_fatal.sh: .ignore +share/analysis-scripts/source_filter.py: CEA_LGPL share/analysis-scripts/summary.py: CEA_LGPL share/analysis-scripts/template.mk: .ignore share/compliance/c11_functions.json: .ignore diff --git a/share/analysis-scripts/build.py b/share/analysis-scripts/build.py index 88a0762d0d29a6b5ddf33700c44e972891749c16..9528e62b30d75f83d5686d6d0ef89b16ed1843c9 100755 --- a/share/analysis-scripts/build.py +++ b/share/analysis-scripts/build.py @@ -37,6 +37,7 @@ import sys import subprocess import function_finder +import source_filter script_dir = os.path.dirname(sys.argv[0]) @@ -195,8 +196,7 @@ def copy_fc_stubs(): # [funcname] in [filename]. # [has_args] is used to distinguish between main(void) and main(int, char**). def find_definitions(funcname, filename): - with open(filename, encoding="ascii", errors='ignore') as data: - file_content = data.read() + file_content = source_filter.open_and_filter(filename, not under_test) file_lines = file_content.splitlines(keepends=True) newlines = function_finder.compute_newline_offsets(file_lines) defs = function_finder.find_definitions_and_declarations(True, False, filename, file_content, file_lines, newlines, funcname) diff --git a/share/analysis-scripts/build_callgraph.py b/share/analysis-scripts/build_callgraph.py index 6ab8ab2ab6b9848228dcc16a7c28bbb28c27792d..a1047a70e0eaa9d92044396ed947d5178c2a0822 100755 --- a/share/analysis-scripts/build_callgraph.py +++ b/share/analysis-scripts/build_callgraph.py @@ -30,6 +30,7 @@ import re import sys import function_finder +import source_filter under_test = os.getenv("PTESTS_TESTING") @@ -79,8 +80,7 @@ class Callgraph: def compute(files): cg = Callgraph() for f in files: - with open(f, encoding="ascii", errors='ignore') as data: - file_content = data.read() + file_content = source_filter.open_and_filter(f, not under_test) file_lines = file_content.splitlines(keepends=True) newlines = function_finder.compute_newline_offsets(file_lines) defs = function_finder.find_definitions_and_declarations(True, False, f, file_content, file_lines, newlines) diff --git a/share/analysis-scripts/estimate_difficulty.py b/share/analysis-scripts/estimate_difficulty.py index e7549fefd304cd5d7e27759c2101af0e4bad7fd3..c0bade9d03dfa84f9ba2674bb935ecca68ebd0d6 100755 --- a/share/analysis-scripts/estimate_difficulty.py +++ b/share/analysis-scripts/estimate_difficulty.py @@ -36,6 +36,7 @@ import tempfile import build_callgraph import function_finder +import source_filter #TODO : avoid relativizing paths when introducing too many ".." ; #TODO : accept directory as argument (--full-tree), and then do glob **/*.{c,i} inside @@ -78,16 +79,16 @@ def get_framac_libc_function_statuses(framac, framac_share): return (defined, spec_only) re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)') -def grep_includes_in_file(file): - with open(file, "r", encoding="utf-8", errors='ignore') as f: - i = 0 - for line in f.readlines(): - i += 1 - m = re_include.match(line) - if m: - kind = m.group(1) - header = m.group(2) - yield((i,kind,header)) +def grep_includes_in_file(filename): + file_content = source_filter.open_and_filter(filename, not under_test) + i = 0 + for line in f.readlines(): + i += 1 + m = re_include.match(line) + if m: + kind = m.group(1) + header = m.group(2) + yield((i,kind,header)) def get_includes(files): quote_includes = {} diff --git a/share/analysis-scripts/source_filter.py b/share/analysis-scripts/source_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..4156e072aee0fbf7b331d7fe8bbf18b2ae7b1a64 --- /dev/null +++ b/share/analysis-scripts/source_filter.py @@ -0,0 +1,90 @@ +#-*- coding: utf-8 -*- +########################################################################## +# # +# This file is part of Frama-C. # +# # +# Copyright (C) 2007-2021 # +# CEA (Commissariat à l'énergie atomique et aux énergies # +# alternatives) # +# # +# you can redistribute it and/or modify it under the terms of the GNU # +# Lesser General Public License as published by the Free Software # +# Foundation, version 2.1. # +# # +# It is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU Lesser General Public License for more details. # +# # +# See the GNU Lesser General Public License version 2.1 # +# for more details (enclosed in the file licenses/LGPLv2.1). # +# # +########################################################################## + +# This file provides some functions to open and filter source files +# before they are used by other scripts. These filters help improve +# the efficiency of regex-based heuristics. + +# These filters require external tools, either in the PATH, or in +# environment variables (the latter has higher priority than the former). +# - scc (a fork including option -k), to remove C comments (variable SCC); +# - astyle, to re-indent lines (variable ASTYLE) +# If a tool is absent, the filter is equivalent to a no-op. + +# These functions receive a file object (such as produced by open(), +# subprocess.run, or a previous filter) and return a +# file object containing the output. They abort execution in case +# of errors when running the filters. Note that an absent tool +# does _not_ lead to an error. + +import os +from pathlib import Path +import shutil +import subprocess +import sys + +# warnings about missing commands are disabled during testing +emit_warns = os.getenv("PTESTS_TESTING") == None + +# Returns a Path to the command binary, or None if it is not found +# Emits a warning the first time it looks for a command +warned = {} +def get_command(command, env_var_name): + p = os.getenv(env_var_name) + if not p: + p = shutil.which(command) + if not p: + if emit_warns and command not in warned: + print(f"info: optional external command '{command}' not found in PATH; consider installing it or setting environment variable {env_var_name}") + warned[command] = True + return None + return Path(p) + +def run_and_check(command_and_args, input_data): + try: + return subprocess.check_output(command_and_args, input=input_data, stderr=None, encoding="ascii", errors="ignore") + except subprocess.CalledProcessError as e: + sys.exit(f"error running command: {command_and_args}\n{e}") + +def filter_with_scc(input_data): + scc = get_command("scc", "SCC") + if scc: + return run_and_check([scc, "-k"], input_data) + else: + return input_data + +def filter_with_astyle(input_data): + astyle = get_command("astyle", "ASTYLE") + if astyle: + return run_and_check([astyle, "--keep-one-line-blocks", "--keep-one-line-statements"], input_data) + else: + return input_data + +def open_and_filter(filename, apply_filters): + # we ignore encoding errors and use ASCII to avoid issues when + # opening files with different encodings (UTF-8, ISO-8859, etc) + with open(filename, "r", encoding="ascii", errors='ignore') as f: + data = f.read() + if apply_filters: + data = filter_with_astyle(filter_with_scc(data)) + return data