diff --git a/.gitignore b/.gitignore index 5b85a81865966bf791ed8dc91d533bb33db4be4a..521dcc53cced8219c7d446af100c882601af237f 100644 --- a/.gitignore +++ b/.gitignore @@ -189,6 +189,15 @@ Makefile.plugin.generated # WP/Coq Generated file .lia.cache +# analysis-scripts +share/analysis-scripts/build +share/analysis-scripts/fced-dist +share/analysis-scripts/fced-dist-prepare* +share/analysis-scripts/fc-estimate-difficulty.*spec +share/analysis-scripts/fc-estimate-difficulty +share/analysis-scripts/fc-estimate-difficulty.exe +share/analysis-scripts/libc_metrics.json + # generated ML files /src/libraries/utils/json.ml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9c8fa8d5ef8b62aab214cc09258852fa0a200f40..8968f1d721d01296a2463213c85d7d5dce6b73d3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,7 +7,7 @@ stages: variables: CURRENT: $CI_COMMIT_REF_NAME - DEFAULT: "stable/manganese" + DEFAULT: "master" FRAMA_CI_OPT: "--override frama-c:$CI_COMMIT_REF_NAME,$CI_COMMIT_SHA" OCAML: "4_08" diff --git a/Makefile b/Makefile index 6b917cf38ef061f06e297d00a414b01557f93c12..0401f30e2cd4bed63bfc9449a1edd2c6cef4fd18 100644 --- a/Makefile +++ b/Makefile @@ -265,7 +265,9 @@ DISTRIB_FILES:=\ share/analysis-scripts/detect_recursion.py \ share/analysis-scripts/epilogue.mk \ share/analysis-scripts/estimate_difficulty.py \ + share/analysis-scripts/external_tool.py \ share/analysis-scripts/fc_stubs.c \ + share/analysis-scripts/fclog.py \ share/analysis-scripts/find_fun.py \ share/analysis-scripts/flamegraph.pl \ share/analysis-scripts/frama_c_results.py \ @@ -1982,7 +1984,9 @@ install:: install-lib-$(OCAMLBEST) share/analysis-scripts/clone.sh \ share/analysis-scripts/creduce.sh \ share/analysis-scripts/epilogue.mk \ + share/analysis-scripts/external_tool.py \ share/analysis-scripts/fc_stubs.c \ + share/analysis-scripts/fclog.py \ share/analysis-scripts/find_fun.py \ share/analysis-scripts/flamegraph.pl \ share/analysis-scripts/frama_c_results.py \ diff --git a/headers/header_spec.txt b/headers/header_spec.txt index e34347acdd061e025cfff74a0507b0f5dfac4d46..39f3f30674713e60820f7f3b03b3003e9a6736ed 100644 --- a/headers/header_spec.txt +++ b/headers/header_spec.txt @@ -123,7 +123,9 @@ share/analysis-scripts/creduce.sh: CEA_LGPL share/analysis-scripts/detect_recursion.py: CEA_LGPL share/analysis-scripts/epilogue.mk: CEA_LGPL share/analysis-scripts/estimate_difficulty.py: CEA_LGPL +share/analysis-scripts/external_tool.py: CEA_LGPL share/analysis-scripts/fc_stubs.c: .ignore +share/analysis-scripts/fclog.py: CEA_LGPL share/analysis-scripts/frama_c_results.py: CEA_LGPL share/analysis-scripts/cmd-dep.sh: .ignore share/analysis-scripts/concat-csv.sh: .ignore diff --git a/share/analysis-scripts/build.py b/share/analysis-scripts/build.py index 854fb82d011f36cb5fa593fee0b5e3a5a78e64d9..c4982e88e0743a5f0ec998b3be688b87b75df4db 100755 --- a/share/analysis-scripts/build.py +++ b/share/analysis-scripts/build.py @@ -34,6 +34,7 @@ import re import shutil import sys import subprocess +import typing import function_finder import source_filter @@ -47,6 +48,13 @@ parser = argparse.ArgumentParser( for analysis with Frama-C. Tries to use a build_commands.json file if available.""" ) +parser.add_argument( + "--base", + metavar="DIR", + default=".", + help="base directory used for pretty-printing relative paths (default: PWD)", + type=Path, +) parser.add_argument( "--debug", metavar="FILE", @@ -58,6 +66,7 @@ parser.add_argument( metavar="FILE", default="build_commands.json", help="path to JBDB (default: build_commands.json)", + type=Path, ) parser.add_argument( "--machdep", metavar="MACHDEP", help="analysis machdep (default: Frama-C's default)" @@ -68,6 +77,11 @@ parser.add_argument( default="main", help="name of the main function (default: main)", ) +parser.add_argument( + "--no-source-filter", + action="store_true", + help="disable source filters (less precise, but speeds up large projects)", +) parser.add_argument( "--sources", metavar="FILE", @@ -79,15 +93,17 @@ parser.add_argument( "--targets", metavar="FILE", nargs="+", - help="targets to build. When using --sources, " + "only a single target is allowed.", + help="targets to build. When using --sources, only a single target is allowed.", type=Path, ) args = parser.parse_args() +base = args.base force = args.force jbdb_path = args.jbdb machdep = args.machdep main = args.main +no_source_filter = args.no_source_filter sources = args.sources targets = args.targets debug = args.debug @@ -112,21 +128,20 @@ dot_framac_dir = Path(".frama-c") # Check required environment variables and commands in the PATH ############### -framac_bin = os.getenv("FRAMAC_BIN") -if not framac_bin: - sys.exit("error: FRAMAC_BIN not in environment (set by frama-c-script)") -framac_bin = Path(framac_bin) +framac_bin = Path( + os.getenv("FRAMAC_BIN") + or sys.exit("error: FRAMAC_BIN not in environment (set by frama-c-script)") +) under_test = os.getenv("PTESTS_TESTING") # Prepare blug-related variables and functions ################################ -blug = os.getenv("BLUG") -if not blug: - blug = shutil.which("blug") - if not blug: - sys.exit("error: path to 'blug' binary must be in PATH or variable BLUG") -blug = Path(blug) +blug = Path( + os.getenv("BLUG") + or shutil.which("blug") + or sys.exit("error: path to 'blug' binary must be in PATH or variable BLUG") +) blug_dir = blug.resolve().parent # to import blug_jbdb sys.path.insert(0, blug_dir.as_posix()) @@ -189,14 +204,18 @@ def make_target_name(target): return prettify(target).replace("/", "_").replace(".", "_") -# sources are pretty-printed relatively to the .frama-c directory, where the -# GNUmakefile will reside -def rel_prefix(path): - return path if os.path.isabs(path) else os.path.relpath(path, start=dot_framac_dir) +def rel_path(path: Path, base: Path) -> str: + """Return a relative path to the .frama-c directory, if path is relative to base. + Otherwise, return an absolute path. Typically, base is the parent of the .frama-c directory.""" + try: + path.resolve().relative_to(base.resolve()) # Fails if path is not inside base + return os.path.relpath(path, start=dot_framac_dir) + except ValueError: + return str(path.resolve()) -def pretty_sources(sources): - return [f" {rel_prefix(source)} \\" for source in sources] +def pretty_sources(sources, base): + return [f" {rel_path(source, base)} \\" for source in sorted(sources)] def lines_of_file(path): @@ -229,7 +248,7 @@ def copy_fc_stubs(): # [funcname] in [filename]. # [has_args] is used to distinguish between main(void) and main(int, char**). def find_definitions(funcname, filename): - file_content = source_filter.open_and_filter(filename, not under_test) + file_content = source_filter.open_and_filter(filename, not under_test and not no_source_filter) file_lines = file_content.splitlines(keepends=True) newlines = function_finder.compute_newline_offsets(file_lines) defs = function_finder.find_definitions_and_declarations( @@ -255,7 +274,7 @@ def list_partition(f, l): l1 = [] l2 = [] for e in l: - if f(l): + if f(e): l1.append(e) else: l2.append(e) @@ -340,7 +359,7 @@ if unknown_sources: # We also need to check if the main function uses a 'main(void)'-style # signature, to patch fc_stubs.c. -main_definitions = {} +main_definitions: dict[Path, list[typing.Tuple[Path, str, str]]] = {} for target, sources in sources_map.items(): main_definitions[target] = [] for source in sources: @@ -405,18 +424,18 @@ if jbdb_path: insert_lines_after( template, "^FCFLAGS", - [f" -json-compilation-database {rel_prefix(jbdb_path)} \\"], + [f" -json-compilation-database {rel_path(jbdb_path, base)} \\"], ) -targets_eva = [f" {make_target_name(target)}.eva \\" for target in targets] +targets_eva = [f" {make_target_name(target)}.eva \\" for target in sorted(targets)] replace_line(template, "^TARGETS = main.eva", "TARGETS = \\") insert_lines_after(template, r"^TARGETS = \\", targets_eva) delete_line(template, r"^main.parse: \\") delete_line(template, r"^ main.c \\") -for target, sources in reversed(sources_map.items()): +for target, sources in reversed(sorted(sources_map.items())): pp_target = make_target_name(target) - new_lines = [f"{pp_target}.parse: \\"] + pretty_sources(sources) + [""] + new_lines = [f"{pp_target}.parse: \\"] + pretty_sources(sources, base) + [""] if any(d[2] for d in main_definitions[target]): logging.debug( "target %s has main with args, adding -main eva_main to its FCFLAGS", diff --git a/share/analysis-scripts/build_callgraph.py b/share/analysis-scripts/build_callgraph.py index 8edc32655e804bacfbb7dd8ca1ccdc1faee21bc0..f9a6d60a4ddea7bda496a9e74b60a73a567b0ad1 100755 --- a/share/analysis-scripts/build_callgraph.py +++ b/share/analysis-scripts/build_callgraph.py @@ -26,6 +26,7 @@ for a given function name, via heuristic syntactic matching.""" import os import sys +import typing import function_finder import source_filter @@ -44,10 +45,10 @@ class Callgraph: """ # maps each caller to the list of its callees - succs = {} + succs: dict[str, list[str]] = {} # maps (caller, callee) to the list of call locations - edges = {} + edges: dict[typing.Tuple[str, str], list[typing.Tuple[str, int]]] = {} def add_edge(self, caller, callee, loc): if (caller, callee) in self.edges: diff --git a/share/analysis-scripts/estimate_difficulty.py b/share/analysis-scripts/estimate_difficulty.py index c8dcf2155da073f69ca173d02f251eaf3e19223b..c4f3b7833da1747ea08232b4c47f43efd2291f79 100755 --- a/share/analysis-scripts/estimate_difficulty.py +++ b/share/analysis-scripts/estimate_difficulty.py @@ -26,18 +26,23 @@ of analyzing a new code base with Frama-C.""" import argparse +import glob import json +import logging import os from pathlib import Path import re import subprocess +import sys import tempfile +import typing import build_callgraph +import external_tool +import fclog import source_filter # TODO : avoid relativizing paths when introducing too many ".." ; -# TODO : accept directory as argument (--full-tree), and then do glob **/*.{c,i} inside # TODO : try to check the presence of compiler builtins # TODO : try to check for pragmas # TODO : detect absence of 'main' function (library) @@ -47,34 +52,75 @@ parser = argparse.ArgumentParser( Estimates the difficulty of analyzing a given code base""" ) parser.add_argument( - "--header-dirs", - "-d", - metavar="DIR", + "paths", nargs="+", - help="directories containing headers (default: .frama-c)", + help="source files and directories. \ +If a directory <dir> is specified, it is recursively explored, as if '<dir>/**/*.[ci]' \ +had been specified.", + type=Path, ) -parser.add_argument("files", nargs="+", help="source files") -args = vars(parser.parse_args()) - -header_dirs = args["header_dirs"] -if not header_dirs: - header_dirs = [] -files = args["files"] +parser.add_argument( + "--debug", + metavar="FILE", + help="enable debug mode and redirect output to the specified file", +) +parser.add_argument( + "--verbose", + action="store_true", + help="enable verbose output; if --debug is set, output is redirected to the same file.", +) +parser.add_argument( + "--no-cloc", + action="store_true", + help="disable usage of external tool 'cloc', even if available.", +) +args = parser.parse_args() +paths = args.paths +debug = args.debug +no_cloc = args.no_cloc +verbose = args.verbose under_test = os.getenv("PTESTS_TESTING") -# gather information from several sources +fclog.init(debug, verbose) + +### Auxiliary functions ####################################################### + + +def collect_files_and_local_dirs( + paths: typing.Iterable[Path], +) -> typing.Tuple[set[Path], set[Path]]: + """Returns the list of directories (and their subdirectories) containing + the specified paths. Note that this also includes subdirectories which do not + themselves contain any .c files, but which may contain .h files.""" + dirs: set[Path] = set() + files: set[Path] = set() + for p in paths: + if p.is_dir(): + files = files.union([Path(p) for p in glob.glob(f"{p}/**/*.[chi]", recursive=True)]) + dirs.add(p) + else: + files.add(p) + dirs.add(p.parent) + local_dirs = {Path(s[0]) for d in dirs for s in os.walk(d)} + if not files: + sys.exit( + "error: no source files (.c/.i) found in provided paths: " + + " ".join([str(p) for p in paths]) + ) + return files, local_dirs def extract_keys(l): return [list(key.keys())[0] for key in l] -def get_framac_libc_function_statuses(framac, framac_share): +def get_framac_libc_function_statuses( + framac: typing.Optional[Path], framac_share: Path +) -> typing.Tuple[list[str], list[str]]: if framac: (_handler, metrics_tmpfile) = tempfile.mkstemp(prefix="fc_script_est_diff", suffix=".json") - if debug: - print("metrics_tmpfile: %s", metrics_tmpfile) + logging.debug("metrics_tmpfile: %s", metrics_tmpfile) fc_runtime = framac_share / "libc" / "__fc_runtime.c" fc_libc_headers = framac_share / "libc" / "__fc_libc.h" subprocess.run( @@ -104,10 +150,8 @@ def get_framac_libc_function_statuses(framac, framac_share): return (defined, spec_only) -re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)') - - -def grep_includes_in_file(filename): +def grep_includes_in_file(filename: Path): + re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)') file_content = source_filter.open_and_filter(filename, not under_test) i = 0 for line in file_content.splitlines(): @@ -119,9 +163,9 @@ def grep_includes_in_file(filename): yield (i, kind, header) -def get_includes(files): - quote_includes = {} - chevron_includes = {} +def get_includes(files: typing.Iterable[Path]): + quote_includes: dict[Path, list[typing.Tuple[Path, int]]] = {} + chevron_includes: dict[Path, list[typing.Tuple[Path, int]]] = {} for filename in files: for line, kind, header in grep_includes_in_file(filename): if kind == "<": @@ -136,13 +180,56 @@ def get_includes(files): return chevron_includes, quote_includes +def is_local_header(local_dirs: typing.Iterable[Path], header: Path): + for d in local_dirs: + if (d / header).exists(): + return True + return False + + +def grep_keywords(keywords: list[str], filename: Path) -> dict[str, int]: + with open(filename, "r") as f: + found: dict[str, int] = {} + for line in f: + for kw in keywords: + if kw in line: + if kw in found: + found[kw] += 1 + else: + found[kw] = 1 + return found + + +def pretty_keyword_count(found: dict[str, int]) -> str: + res = "" + for kw, count in sorted(found.items()): + if res: + res += ", " + res += f" {kw} ({count} line{'s' if count > 1 else ''})" + return res + + +### End of auxiliary functions ################################################ + debug = os.getenv("DEBUG") verbose = False +files, local_dirs = collect_files_and_local_dirs(paths) + +score = { + "recursion": 0, + "libc": 0, + "includes": 0, + "malloc": 0, + "keywords": 0, + "asm": 0, +} + framac_bin = os.getenv("FRAMAC_BIN") if not framac_bin: - print( - "Running script in no-Frama-C mode (set FRAMAC_BIN to the directory containing frama-c if you want to use it)." + logging.info( + "Running script in no-Frama-C mode (set FRAMAC_BIN to the directory" + + " containing frama-c if you want to use it)." ) framac = None script_dir = os.path.dirname(os.path.realpath(__file__)) @@ -153,10 +240,28 @@ else: subprocess.check_output([framac, "-no-autoload-plugins", "-print-share-path"]).decode() ) -print("Building callgraph...") + +if not no_cloc: + cloc = external_tool.get_command("cloc", "CLOC") + if cloc: + data = external_tool.run_and_check( + [str(cloc), "--hide-rate", "--progress-rate=0", "--csv"] + list(str(f) for f in files), + "", + ) + data = data.splitlines() + [nfiles, _sum, nblank, ncomment, ncode] = data[-1].split(",") + nlines = int(nblank) + int(ncomment) + int(ncode) + logging.info( + "Processing %d file(s), approx. %d lines of code (out of %d lines)", + int(nfiles), + int(ncode), + nlines, + ) + +logging.info("Building callgraph...") cg = build_callgraph.compute(files) -print("Computing data about libc/POSIX functions...") +logging.info("Computing data about libc/POSIX functions...") libc_defined_functions, libc_specified_functions = get_framac_libc_function_statuses( framac, framac_share ) @@ -172,7 +277,7 @@ with open(framac_share / "compliance" / "posix_identifiers.json", encoding="utf- posix_identifiers = all_data["data"] posix_headers = all_data["headers"] -recursive_cycles = [] +recursive_cycles: list[typing.Tuple[typing.Tuple[str, int], list[typing.Tuple[str, str]]]] = [] reported_recursive_pairs = set() build_callgraph.compute_recursive_cycles(cg, recursive_cycles) for (cycle_start_loc, cycle) in recursive_cycles: @@ -185,16 +290,22 @@ for (cycle_start_loc, cycle) in recursive_cycles: continue reported_recursive_pairs.add(cycle[-1]) (filename, line) = cycle_start_loc - (x, y) = cycle[0] - pretty_cycle = f"{x} -> {y}" - for (x, y) in cycle[1:]: - pretty_cycle += f" -> {y}" - print(f"[recursion] found recursive cycle near {filename}:{line}: {pretty_cycle}") - -callees = [callee for (_, callee) in list(cg.edges.keys())] -callees = set(callees) + + def pretty_cycle(cycle): + (x, y) = cycle[0] + res = f"{x} -> {y}" + for (x, y) in cycle[1:]: + res += f" -> {y}" + return res + + logging.info( + "[recursion] found recursive cycle near %s:%d: %s", filename, line, pretty_cycle(cycle) + ) + score["recursion"] += 1 + +callees = set(callee for (_, callee) in list(cg.edges.keys())) used_headers = set() -print(f"Estimating difficulty for {len(callees)} function calls...") +logging.info("Estimating difficulty for %d function calls...", len(callees)) warnings = 0 for callee in sorted(callees): @@ -203,8 +314,10 @@ for callee in sorted(callees): global warnings if status == "warning": warnings += 1 - if verbose or debug or status == "warning": - print(f"- {status}: {callee} ({standard}) {reason}") + if status == "warning": + logging.warning("%s (%s) %s", callee, standard, reason) + else: + logging.log(fclog.VERBOSE, "%s: %s (%s) %s", status, callee, standard, reason) try: is_problematic = posix_identifiers[callee]["notes"]["fc-support"] == "problematic" @@ -258,55 +371,56 @@ for callee in sorted(callees): if not status_emitted: callee_status("warning", standard, "has neither code nor spec in Frama-C's libc") -print(f"Function-related warnings: {warnings}") +logging.info("Function-related warnings: %d", warnings) +score["libc"] = warnings -if (verbose or debug) and used_headers: - print("used headers:") - for header in sorted(used_headers): - print(f" <{header}>") +logging.log( + fclog.VERBOSE, + "Used POSIX headers:\n%s", + "\n".join([f" <{header}>" for header in sorted(used_headers)]), +) (chevron_includes, quote_includes) = get_includes(files) - -def is_local_header(header_dirs, header): - for d in header_dirs: - path = Path(d) - if Path(path / header).exists(): - return True - return False - - -print(f"Estimating difficulty for {len(chevron_includes)} '#include <header>' directives...") +logging.info( + "Estimating difficulty for %d '#include <header>' directives...", len(chevron_includes) +) non_posix_headers = [] header_warnings = 0 for header in sorted(chevron_includes, key=str.casefold): + if not header.lower().endswith(".h"): + continue # ignore included non-header files if header in posix_headers: fc_support = posix_headers[header]["fc-support"] if fc_support == "unsupported": header_warnings += 1 - print(f"- WARNING: included header <{header}> is explicitly unsupported by Frama-C") + logging.warning("included header <%s> is explicitly unsupported by Frama-C", header) else: - if verbose or debug: - c11_or_posix = "C11" if header in c11_headers else "POSIX" - print(f"- note: included {c11_or_posix} header ") + logging.log( + fclog.VERBOSE, + "included %s header %s", + "C11" if header in c11_headers else "POSIX", + header, + ) else: - if is_local_header(header_dirs, header): - if verbose or debug: - print(f"- ok: included header <{header}> seems to be available locally") + if is_local_header(local_dirs, header): + logging.log( + fclog.VERBOSE, "ok: included header <%s> seems to be available locally", header + ) else: non_posix_headers.append(header) header_warnings += 1 - print(f"- warning: included non-POSIX header <{header}>") -print(f"Header-related warnings: {header_warnings}") - + logging.warning("included non-POSIX header <%s>", header) +logging.info("Header-related warnings: %d", header_warnings) +score["includes"] = header_warnings # dynamic allocation dynalloc_functions = set(["malloc", "calloc", "free", "realloc", "alloca", "mmap"]) dyncallees = dynalloc_functions.intersection(callees) if dyncallees: - print(f"- note: calls to dynamic allocation functions: {', '.join(sorted(dyncallees))}") - + logging.info("Calls to dynamic allocation functions: %s", ", ".join(sorted(dyncallees))) + score["malloc"] = len(dyncallees) # unsupported C11-specific features @@ -320,21 +434,21 @@ c11_unsupported = [ "alignof", # stdalign.h may use these symbols ] -for keyword in c11_unsupported: - out = subprocess.Popen( - ["grep", "-n", "\\b" + keyword + "\\b"] + files + ["/dev/null"], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - lines = out.communicate()[0].decode("utf-8").splitlines() - if lines: - n = len(lines) - print( - f"- warning: found {n} line{'s' if n > 1 else ''} with occurrences of " - f"unsupported C11 construct '{keyword}'" - ) +logging.info("Checking presence of unsupported C11 features...") + +for fi in files: + found = grep_keywords(c11_unsupported, fi) + if found: + logging.warning("unsupported keyword(s) in %s:%s", fi, pretty_keyword_count(found)) + score["keywords"] += len(found) # assembly code if "asm" in callees or "__asm" in callees or "__asm__" in callees: - print("- warning: code seems to contain inline assembly ('asm(...)')") + logging.warning("code seems to contain inline assembly ('asm(...)')") + score["asm"] = 1 + +logging.info( + "Overall difficulty score:\n%s", + "\n".join([k + ": " + str(v) for (k, v) in sorted(score.items())]), +) diff --git a/share/analysis-scripts/external_tool.py b/share/analysis-scripts/external_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..b660423228dd89ce923e80d4988219aed179d9b9 --- /dev/null +++ b/share/analysis-scripts/external_tool.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +########################################################################## +# # +# This file is part of Frama-C. # +# # +# Copyright (C) 2007-2022 # +# CEA (Commissariat à l'énergie atomique et aux énergies # +# alternatives) # +# # +# you can redistribute it and/or modify it under the terms of the GNU # +# Lesser General Public License as published by the Free Software # +# Foundation, version 2.1. # +# # +# It is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU Lesser General Public License for more details. # +# # +# See the GNU Lesser General Public License version 2.1 # +# for more details (enclosed in the file licenses/LGPLv2.1). # +# # +########################################################################## + +"""This file provides utility functions to use external tools, either +available in the PATH or via PyInstaller.""" + +import os +from pathlib import Path +import shutil +import subprocess +import sys +import typing + +# warnings about missing commands are disabled during testing +emit_warns = os.getenv("PTESTS_TESTING") is None + +# Cache for get_command +cached_commands: dict[str, typing.Union[Path, None]] = {} + + +def resource_path(relative_path: str) -> str: + """Get absolute path to resource; only used by the pyinstaller standalone distribution""" + base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) + return os.path.join(base_path, relative_path) + + +def get_command(command: str, env_var_name: str) -> typing.Union[Path, None]: + """Returns a Path to the command; priority goes to the environment variable, + then in the PATH, then in the resource directory (for a pyinstaller binary).""" + if command in cached_commands: + return cached_commands[command] + p = Path(os.getenv(env_var_name) or shutil.which(command) or resource_path(command)) + if p.exists(): + cached_commands[command] = p + return p + else: + if emit_warns: + print( + f"info: optional external command '{command}' not found in PATH;", + f"consider installing it or setting environment variable {env_var_name}", + ) + cached_commands[command] = None + return None + + +def run_and_check(command_and_args: list[str], input_data: str): + try: + return subprocess.check_output( + command_and_args, + input=input_data, + stderr=None, + encoding="ascii", + errors="ignore", + ) + except subprocess.CalledProcessError as e: + sys.exit(f"error running command: {command_and_args}\n{e}") diff --git a/share/analysis-scripts/fc-estimate-difficulty.mk b/share/analysis-scripts/fc-estimate-difficulty.mk new file mode 100644 index 0000000000000000000000000000000000000000..8cfae714209e0fe9458c16346c021170883b272d --- /dev/null +++ b/share/analysis-scripts/fc-estimate-difficulty.mk @@ -0,0 +1,142 @@ +# Produces a self-contained binary for 'frama-c-script estimate-difficulty' +# on a machine without Frama-C. +# Works on Linux and macOS. The Windows binary is compiled and tested using +# a Docker image with wine and pyinstaller (usage of Docker avoids issues with +# staging versions in e.g. Fedora, which may cause weird error messages when +# running in Windows). The Linux binary is tested on a bare-bones Alpine Linux, +# to ensure it does not depend on dynamic libraries. +# The macOS version needs to be tested by hand. + +help: + @echo "targets:" + @echo "fc-estimate-difficulty : Linux/macOS version (depending on host system)" + @echo "fc-estimate-difficulty.exe : Windows cross-compiled version (from Linux)" + @echo "clean : Erase working directory" + @echo "test-fc-estimate-difficulty : Test Linux/macOS version" + @echo "test-fc-estimate-difficulty.exe : Test Windows version" + +os := $(shell uname -s) + +workdir = fced-dist-prepare +distdir = fced-dist + +libc_metrics.json: ../libc/__fc_libc.h ../libc/__fc_runtime.c +ifeq ($(wildcard $(FRAMAC)),) + $(error FRAMAC must be set to the path of the 'frama-c' binary) +endif + FRAMAC_SHARE="$(shell $(FRAMAC) -no-autoload-plugins -print-share-path)" + rm -f $@ + $(FRAMAC) \ + $^ \ + -no-autoload-plugins -load-module metrics \ + -metrics -metrics-libc -metrics-output $@ + chmod -w $@ # generated file: prevent accidental overwriting + +PY_DEPS := \ + build_callgraph.py \ + estimate_difficulty.py \ + external_tool.py \ + fclog.py \ + function_finder.py \ + source_filter.py \ + +COMPLIANCE := $(wildcard ../compliance/*.json) +TOOLS := $(workdir)/scc $(workdir)/astyle +COMMON_DEPS := $(PY_DEPS) $(COMPLIANCE) libc_metrics.json + +fc-estimate-difficulty: EXE= +fc-estimate-difficulty.exe: EXE=".exe" + +fc-estimate-difficulty: WINE= +fc-estimate-difficulty.exe: WINE="/home/andr/git/wine/wine" + +fc-estimate-difficulty: PSEP=: +fc-estimate-difficulty.exe: PSEP=; + +fc-estimate-difficulty: $(COMMON_DEPS) $(TOOLS) +fc-estimate-difficulty.exe: \ + $(COMMON_DEPS) $(addsuffix .exe,$(TOOLS)) fced-win.Dockerfile + +fc-estimate-difficulty: + pyinstaller estimate_difficulty.py \ + -F \ + -n $@ \ + --distpath $(distdir) \ + --noconfirm \ + --add-data "libc_metrics.json:share" \ + --add-data "../compliance/*.json:share/compliance" \ + --add-binary "$(workdir)/scc:." \ + --add-binary "$(workdir)/astyle:." +ifeq ($(os),Linux) + @echo "Linux: running staticx" + staticx "$(distdir)/$@" $@ +else + @echo "NOT running staticx (macOS?)" + mv "$(distdir)/$@" $@ +endif + +fc-estimate-difficulty.exe: + mkdir -p $(workdir)/compliance + cp ../compliance/*.json $(workdir)/compliance + docker build . -f fced-win.Dockerfile -t fced-win + docker cp $$(docker create --rm fced-win):/fced/$@ $@ + +$(workdir)/scc: $(workdir)/scc-snapshots-fc-1.1.0 + make -C $< clean + make -C $< scc $(if $(findstring Linux,$(os)),CFLAGS="-static") -j + cp $(workdir)/scc-snapshots-fc-1.1.0/scc $@ + +$(workdir)/scc.exe: $(workdir)/scc-snapshots-fc-1.1.0 + make -C $< clean + make -C $< scc CC=x86_64-w64-mingw32-gcc -j + cp $(workdir)/scc-snapshots-fc-1.1.0/scc.exe $@ + +$(workdir)/scc-snapshots-fc-1.1.0: + mkdir -p $(workdir) + wget 'https://github.com/Frama-C/scc-snapshots/archive/refs/tags/fc-1.1.0.tar.gz' -O $(workdir)/fc-1.1.0.tar.gz + cd $(workdir) && tar xvf fc-1.1.0.tar.gz + touch $@ + +$(workdir)/astyle: $(workdir)/astyle_3.1 + make -C $(workdir)/astyle_3.1/build/gcc clean + make -C $(workdir)/astyle_3.1/build/gcc \ + $(if $(findstring Linux,$(os)),LDFLAGS="-static" CFLAGS="-static -static-libgcc -static-libstdc++") -j + cp $(workdir)/astyle_3.1/build/gcc/bin/astyle $@ + +$(workdir)/astyle.exe: $(workdir)/astyle_3.1 + make -C $(workdir)/astyle_3.1/build/gcc clean + make -C $(workdir)/astyle_3.1/build/gcc LDFLAGS="-static" CFLAGS="-static -static-libgcc -static-libstdc++" CXX=x86_64-w64-mingw32-g++ -j + cp $(workdir)/astyle_3.1/build/gcc/bin/astyle.exe $@ + +$(workdir)/astyle_3.1: + mkdir -p $(workdir) + wget 'https://downloads.sourceforge.net/project/astyle/astyle/astyle%203.1/astyle_3.1_linux.tar.gz' -O $(workdir)/astyle_3.1_linux.tar.gz + cd $(workdir) && tar xvf astyle_3.1_linux.tar.gz + rm -rf $(workdir)/astyle_3.1 + mv $(workdir)/astyle $(workdir)/astyle_3.1 + sed -i.bak 's/^CXX = g++/CXX ?= g++/' $(workdir)/astyle_3.1/build/gcc/Makefile + sed -i.bak 's/^CXX = g++/CXX ?= g++/' $(workdir)/astyle_3.1/build/gcc/Makefile + touch $@ + +clean: + rm -rf $(workdir) $(distdir) fc-estimate-difficulty.spec fc-estimate-difficulty.exe.spec + +distclean: clean + rm -f fc-estimate-difficulty fc-estimate-difficulty.exe libc_metrics.json + +test-fc-estimate-difficulty: fc-estimate-difficulty +ifeq ($(os),Linux) + docker build . -f fced-lin.Dockerfile -t fced-lin +else + ./$< fced-test | \ + grep -A9 "Overall difficulty score" | \ + grep -v 0 | \ + grep -q ': ' +endif + @echo "Done testing $^." + +test-fc-estimate-difficulty.exe: fc-estimate-difficulty.exe + @file $< | grep -q "MS Windows" + @echo "Done testing $< (actual testing done inside Docker image)" + +.PHONY: clean distclean test-fc-estimate-difficulty test-fc-estimate-difficulty.exe diff --git a/share/analysis-scripts/fced-lin.Dockerfile b/share/analysis-scripts/fced-lin.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9bf2aeed8b47f5c490cd3cf2ed8560e13e839506 --- /dev/null +++ b/share/analysis-scripts/fced-lin.Dockerfile @@ -0,0 +1,8 @@ +FROM alpine:3.15 +COPY fc-estimate-difficulty / +RUN mkdir /fced-test +COPY fced-test fced-test/ +RUN ./fc-estimate-difficulty fced-test | \ + grep -A9 "Overall difficulty score" | \ + grep -v 0 | \ + grep -q ': ' diff --git a/share/analysis-scripts/fced-test/a.c b/share/analysis-scripts/fced-test/a.c new file mode 100644 index 0000000000000000000000000000000000000000..9e3b8f5980c5180127180e585cbdecaa2d7f1d90 --- /dev/null +++ b/share/analysis-scripts/fced-test/a.c @@ -0,0 +1,11 @@ +#include <stdlib.h> +#include <X> + +volatile int v; + +int main() { + asm("mov ax, ax"); + if (v) return main(); + malloc(42); + return 0; +} diff --git a/share/analysis-scripts/fced-test/a.h b/share/analysis-scripts/fced-test/a.h new file mode 100644 index 0000000000000000000000000000000000000000..5e1d66f91d23143601cf71bfc6bd47adff296ffa --- /dev/null +++ b/share/analysis-scripts/fced-test/a.h @@ -0,0 +1 @@ +#include <a.h> diff --git a/share/analysis-scripts/fced-win.Dockerfile b/share/analysis-scripts/fced-win.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1ae0faf9d29a083de5abbb267facb0ac4df6f67e --- /dev/null +++ b/share/analysis-scripts/fced-win.Dockerfile @@ -0,0 +1,21 @@ +FROM tobix/pywine:3.10 +RUN mkdir /fced +COPY *.py /fced/ +RUN mkdir /fced/dist-prepare +COPY fced-dist-prepare /fced/dist-prepare/ +COPY fced-test /fced/fced-test/ +COPY libc_metrics.json /fced/ +WORKDIR /fced +RUN wine pyinstaller estimate_difficulty.py \ + -F \ + -n fc-estimate-difficulty.exe \ + --distpath . \ + --noconfirm \ + --add-data "libc_metrics.json;share" \ + --add-data "dist-prepare/compliance/*.json;share/compliance" \ + --add-binary "dist-prepare/scc.exe;." \ + --add-binary "dist-prepare/astyle.exe;." +RUN wine fc-estimate-difficulty.exe fced-test | \ + grep -A9 "Overall difficulty score" | \ + grep -v 0 | \ + grep -q ': ' diff --git a/share/analysis-scripts/fclog.py b/share/analysis-scripts/fclog.py new file mode 100644 index 0000000000000000000000000000000000000000..9355972bdd4d5b11d30243655fe4b3136d157446 --- /dev/null +++ b/share/analysis-scripts/fclog.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +########################################################################## +# # +# This file is part of Frama-C. # +# # +# Copyright (C) 2007-2022 # +# CEA (Commissariat à l'énergie atomique et aux énergies # +# alternatives) # +# # +# you can redistribute it and/or modify it under the terms of the GNU # +# Lesser General Public License as published by the Free Software # +# Foundation, version 2.1. # +# # +# It is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU Lesser General Public License for more details. # +# # +# See the GNU Lesser General Public License version 2.1 # +# for more details (enclosed in the file licenses/LGPLv2.1). # +# # +########################################################################## + +"""Contains functions to perform logging-related configuration common to + several scripts.""" + +import logging +import sys + +# extra log level used in messages +VERBOSE = 15 + + +def init(debug, verbose): + """Initializes the logging mechanism. 'debug' is the filename to redirect to + (or special values 'stdout'/'stderr'). If None, disable debug. + If 'verbose' is true, output VERBOSE messages. If both are set, the level + is 'debug'.""" + + logging.addLevelName(VERBOSE, "") + logging.addLevelName(logging.ERROR, "ERROR: ") + logging.addLevelName(logging.WARN, "WARNING: ") + logging.addLevelName(logging.INFO, "") + log_level = logging.DEBUG if debug else VERBOSE if verbose else logging.INFO + log_format = "%(levelname)s%(message)s" + + # special values for debug filename + if debug == "stdout" or not debug: + logging.basicConfig(stream=sys.stdout, level=log_level, format=log_format) + # This very ugly hack seems necessary to avoid BrokenPipeErrors when the + # output of the calling script is truncated (via head, grep -q, etc). + # Note that, on Windows, there is an OSError with "22 Invalid argument" + # instead of a BrokenPipeError. + original_flush = sys.stdout.flush + + def new_flush(): + try: + original_flush() + except BrokenPipeError: + sys.stdout = None + sys.exit(0) + except OSError as e: + if sys.platform == "win32" and e.errno == 22: + sys.stdout = None + sys.exit(0) + raise e + + sys.stdout.flush = new_flush + elif debug == "stderr": + logging.basicConfig(stream=sys.stderr, level=log_level, format=log_format) + else: + logging.basicConfig( + filename=debug, + level=log_level, + filemode="w", + format=log_format, + ) diff --git a/share/analysis-scripts/list_files.py b/share/analysis-scripts/list_files.py index 691ef6ac04e66ffe535defeb241a95ed2beb9183..c147503889bcdaac5bdf3e14c94ae684edd75277 100755 --- a/share/analysis-scripts/list_files.py +++ b/share/analysis-scripts/list_files.py @@ -42,7 +42,7 @@ if not arg.exists(): sys.exit(f"error: file '{arg}' not found") # check if arg has a known extension -def is_known_c_extension(ext): +def is_known_c_extension(ext: str) -> bool: return ext in (".c", ".i", ".ci", ".h") @@ -51,11 +51,10 @@ fcmake_pwd = Path(pwd) / ".frama-c" # pwd as seen by the Frama-C makefile with open(arg) as data: jcdb = json.load(data) jcdb_dir = arg.parent -includes = set() -defines = set() +includes: set[Path] = set() +defines: set[Path] = set() files = set() # set of pairs of (file, file_for_fcmake) for entry in jcdb: - arg_includes = [] # before normalization if not "file" in entry: # ignore entries without a filename continue @@ -81,9 +80,9 @@ print("") files_defining_main = set() re_main = re.compile(r"(int|void)\s+main\s*\([^)]*\)\s*\{") -for (filename, file_for_fcmake) in files: - assert os.path.exists(filename), "file does not exist: %s" % filename - with open(filename, "r") as content_file: +for (f, file_for_fcmake) in files: + assert os.path.exists(f), "file does not exist: %s" % f + with open(f, "r") as content_file: content = content_file.read() res = re.search(re_main, content) if res is not None: diff --git a/share/analysis-scripts/make_wrapper.py b/share/analysis-scripts/make_wrapper.py index 7025db38a3d861520ef5a8fca21fb4f43a6a3d25..6f353d8f67c26a8cba4af8d12c2467721e32a1e7 100755 --- a/share/analysis-scripts/make_wrapper.py +++ b/share/analysis-scripts/make_wrapper.py @@ -31,6 +31,8 @@ import os import re import subprocess import sys +import typing + from functools import partial # Check if GNU make is available and has the minimal required version @@ -78,12 +80,14 @@ framac_script = f"{framac_bin}/frama-c-script" output_lines = [] cmd_list = [make_cmd, "-C", make_dir] + args with subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc: + if not proc.stdout: + sys.exit(f"error capturing stdout for cmd: {cmd_list}") while True: - line = proc.stdout.readline() - if line: - sys.stdout.buffer.write(line) + line_bytes = proc.stdout.readline() + if line_bytes: + sys.stdout.buffer.write(line_bytes) sys.stdout.flush() - output_lines.append(line.decode("utf-8")) + output_lines.append(line_bytes.decode("utf-8")) else: break @@ -99,7 +103,7 @@ for line in lines: match = re_recursive_call_start.search(line) if match: - def _action(): + def action_msg(): print( "Consider patching, stubbing or adding an ACSL " + "specification to the recursive call, " @@ -125,7 +129,7 @@ for line in lines: # note: this ending line can also match re_missing_spec tip = { "message": "Found recursive call at:\n" + "".join(msg_lines), - "action": _action, + "action": action_msg, } tips.append(tip) break @@ -136,7 +140,7 @@ for line in lines: if match: fname = match.group(1) - def _action(fname): + def action_find_fun(fname): out = subprocess.Popen( [framac_script, "find-fun", "-C", make_dir, fname], stdout=subprocess.PIPE, @@ -178,7 +182,7 @@ for line in lines: + fname + "\n" + " Looking for files defining it...", - "action": partial(_action, fname), + "action": partial(action_find_fun, fname), } tips.append(tip) @@ -193,6 +197,6 @@ if tips != []: if counter > 1: print("") print("*** recommendation #" + str(counter) + " ***") - print(str(counter) + ". " + tip["message"]) + print(str(counter) + ". " + typing.cast(str, tip["message"])) counter += 1 - tip["action"]() + typing.cast(typing.Callable, tip["action"])() diff --git a/share/analysis-scripts/normalize_jcdb.py b/share/analysis-scripts/normalize_jcdb.py index 64bf31d6aed119023c217a821387c792b8e26c52..4b221b65f56c94305ab348605bdcfc86dc00fae4 100755 --- a/share/analysis-scripts/normalize_jcdb.py +++ b/share/analysis-scripts/normalize_jcdb.py @@ -44,7 +44,6 @@ if not arg.exists(): with open(arg) as data: jcdb_json = json.load(data) jcdb_dir = arg.parent -out_json = {} replacements = set() diff --git a/share/analysis-scripts/source_filter.py b/share/analysis-scripts/source_filter.py index c255e89c28a0906f14964152f4fbb18ad914db00..752cef5b07b43ef011b4ab3ba4bbd657f70d70c9 100644 --- a/share/analysis-scripts/source_filter.py +++ b/share/analysis-scripts/source_filter.py @@ -37,82 +37,33 @@ the efficiency of regex-based heuristics.""" # of errors when running the filters. Note that an absent tool # does _not_ lead to an error. -import os from pathlib import Path -import shutil -import subprocess import sys -# warnings about missing commands are disabled during testing -emit_warns = os.getenv("PTESTS_TESTING") is None +import external_tool -# Cache for get_command -cached_commands = {} - -def resource_path(relative_path): - """Get absolute path to resource; only used by the pyinstaller standalone distribution""" - base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) - return os.path.join(base_path, relative_path) - - -def get_command(command, env_var_name): - """Returns a Path to the command; priority goes to the environment variable, - then in the PATH, then in the resource directory (for a pyinstaller binary).""" - if command in cached_commands: - return cached_commands[command] - p = os.getenv(env_var_name) - if p: - p = Path(p) - else: - p = shutil.which(command) - if p: - p = Path(p) - else: - p = Path(resource_path(command)) - if not p.exists(): - if emit_warns: - print( - f"info: optional external command '{command}' not found in PATH; " - f"consider installing it or setting environment variable {env_var_name}" - ) - p = None - cached_commands[command] = p - return p - - -def run_and_check(command_and_args, input_data): - try: - return subprocess.check_output( - command_and_args, - input=input_data, - stderr=None, - encoding="ascii", - errors="ignore", - ) - except subprocess.CalledProcessError as e: - sys.exit(f"error running command: {command_and_args}\n{e}") - - -def filter_with_scc(input_data): - scc = get_command("scc", "SCC") +def filter_with_scc(input_data: str) -> str: + scc_bin = "scc" if sys.platform != "win32" else "scc.exe" + scc = external_tool.get_command(scc_bin, "SCC") if scc: - return run_and_check([scc, "-k"], input_data) + return external_tool.run_and_check([str(scc), "-k", "-b"], input_data) else: return input_data -def filter_with_astyle(input_data): - astyle = get_command("astyle", "ASTYLE") +def filter_with_astyle(input_data: str) -> str: + astyle_bin = "astyle" if sys.platform != "win32" else "astyle.exe" + astyle = external_tool.get_command(astyle_bin, "ASTYLE") if astyle: - return run_and_check( - [astyle, "--keep-one-line-blocks", "--keep-one-line-statements"], input_data + return external_tool.run_and_check( + [str(astyle), "--keep-one-line-blocks", "--keep-one-line-statements"], input_data ) else: return input_data -def open_and_filter(filename, apply_filters): +def open_and_filter(filename: Path, apply_filters: bool) -> str: # we ignore encoding errors and use ASCII to avoid issues when # opening files with different encodings (UTF-8, ISO-8859, etc) with open(filename, "r", encoding="ascii", errors="ignore") as f: diff --git a/tests/fc_script/estimate_difficulty.i b/tests/fc_script/estimate_difficulty.i index 23b63dd6c5250076eb5eb8e74f0197b0bd9a12cc..e52e49bebdeba046b2d1e5be6821c9cac219aa02 100644 --- a/tests/fc_script/estimate_difficulty.i +++ b/tests/fc_script/estimate_difficulty.i @@ -1,6 +1,6 @@ /* run.config NOFRAMAC: testing frama-c-script, not frama-c itself - EXECNOW: LOG @PTEST_NAME@.res LOG @PTEST_NAME@.err PTESTS_TESTING=1 %{bin:frama-c-script} estimate-difficulty @PTEST_FILE@ > @PTEST_NAME@.res 2> @PTEST_NAME@.err + EXECNOW: LOG @PTEST_NAME@.res LOG @PTEST_NAME@.err PTESTS_TESTING=1 %{bin:frama-c-script} estimate-difficulty --no-cloc @PTEST_FILE@ > @PTEST_NAME@.res 2> @PTEST_NAME@.err */ // these includes are not actually used by the compiler @@ -25,4 +25,8 @@ int main() { strlen(); // no warning ccosl(); // warning: neither code nor spec dprintf(); // no warning: neither code nor spec, but handled by Variadic + __asm__("xor ax, ax"); + malloc(2); + alignof(char*); + _Complex cc; } diff --git a/tests/fc_script/oracle/estimate_difficulty.res b/tests/fc_script/oracle/estimate_difficulty.res index 30f1b80034aa927ce043a821e1aba9c17be0a536..c2388cab71d69c4cb16e73185e5f625471dc75d2 100644 --- a/tests/fc_script/oracle/estimate_difficulty.res +++ b/tests/fc_script/oracle/estimate_difficulty.res @@ -1,10 +1,21 @@ Building callgraph... Computing data about libc/POSIX functions... [recursion] found recursive cycle near estimate_difficulty.i:18: f -> f -Estimating difficulty for 7 function calls... -- warning: ccosl (POSIX) has neither code nor spec in Frama-C's libc -- warning: setjmp (POSIX) is known to be problematic for code analysis +Estimating difficulty for 10 function calls... +WARNING: ccosl (POSIX) has neither code nor spec in Frama-C's libc +WARNING: setjmp (POSIX) is known to be problematic for code analysis Function-related warnings: 2 Estimating difficulty for 3 '#include <header>' directives... -- WARNING: included header <complex.h> is explicitly unsupported by Frama-C +WARNING: included header <complex.h> is explicitly unsupported by Frama-C Header-related warnings: 1 +Calls to dynamic allocation functions: malloc +Checking presence of unsupported C11 features... +WARNING: unsupported keyword(s) in estimate_difficulty.i: _Complex (1 line), alignof (1 line) +WARNING: code seems to contain inline assembly ('asm(...)') +Overall difficulty score: +asm: 1 +includes: 1 +keywords: 2 +libc: 2 +malloc: 1 +recursion: 1