From ab5db2c6c248132c9611332c52f46f334f0a5f31 Mon Sep 17 00:00:00 2001 From: Andre Maroneze <andre.maroneze@cea.fr> Date: Wed, 23 Feb 2022 17:24:41 +0100 Subject: [PATCH] [analysis-scripts] improve and distribute estimate-difficulty executable Allow estimate-difficulty to be exported (in Linux/macOS/Windows) as independent executable, so that it can be used without having to install Frama-C. --- .gitattributes | 5 + .gitignore | 9 + share/analysis-scripts/build.py | 59 ++-- share/analysis-scripts/build_callgraph.py | 4 +- share/analysis-scripts/estimate_difficulty.py | 301 +++++++++++++----- share/analysis-scripts/external_tool.py | 81 +++++ .../fc-estimate-difficulty.mk | 159 +++++++++ share/analysis-scripts/fced-lin.Dockerfile | 8 + share/analysis-scripts/fced-test/a.c | 11 + share/analysis-scripts/fced-test/a.h | 1 + share/analysis-scripts/fced-win.Dockerfile | 21 ++ share/analysis-scripts/fclog.py | 77 +++++ share/analysis-scripts/print_callgraph.py | 3 +- share/analysis-scripts/source_filter.py | 70 +--- share/dune | 2 + .../estimate-difficulty.c | 11 +- tests/fc_script/estimate-difficulty.t/run.t | 27 +- 17 files changed, 662 insertions(+), 187 deletions(-) create mode 100644 share/analysis-scripts/external_tool.py create mode 100644 share/analysis-scripts/fc-estimate-difficulty.mk create mode 100644 share/analysis-scripts/fced-lin.Dockerfile create mode 100644 share/analysis-scripts/fced-test/a.c create mode 100644 share/analysis-scripts/fced-test/a.h create mode 100644 share/analysis-scripts/fced-win.Dockerfile create mode 100644 share/analysis-scripts/fclog.py diff --git a/.gitattributes b/.gitattributes index 98a33b2e7a7..b3719b15315 100644 --- a/.gitattributes +++ b/.gitattributes @@ -280,6 +280,11 @@ README* header_spec=.ignore /doc/aorai/Makefile header_spec=AORAI_LGPL +/share/analysis-scripts/fc-estimate-difficulty.mk header_spec=.ignore +/share/analysis-scripts/fced-lin.Dockerfile header_spec=.ignore +/share/analysis-scripts/fced-win.Dockerfile header_spec=.ignore +/share/analysis-scripts/fced-test/a.c header_spec=.ignore +/share/analysis-scripts/fced-test/a.h header_spec=.ignore /share/analysis-scripts/flamegraph.pl header_spec=CDDL /share/emacs/acsl.el header_spec=CEA_PR_LGPL /share/libc/argz.h header_spec=CEA_FSF_LGPL diff --git a/.gitignore b/.gitignore index acdc5c99766..90dbf023094 100644 --- a/.gitignore +++ b/.gitignore @@ -186,6 +186,15 @@ Makefile.plugin.generated # WP/Coq Generated file .lia.cache +# analysis-scripts +share/analysis-scripts/build +share/analysis-scripts/fced-dist +share/analysis-scripts/fced-dist-prepare* +share/analysis-scripts/fc-estimate-difficulty.*spec +share/analysis-scripts/fc-estimate-difficulty +share/analysis-scripts/fc-estimate-difficulty.exe +share/analysis-scripts/libc_metrics.json + # generated ML files /src/libraries/utils/json.ml diff --git a/share/analysis-scripts/build.py b/share/analysis-scripts/build.py index a2d4f6c6628..43b967f965e 100755 --- a/share/analysis-scripts/build.py +++ b/share/analysis-scripts/build.py @@ -52,6 +52,13 @@ parser = argparse.ArgumentParser( for analysis with Frama-C. Tries to use a build_commands.json file if available.""" ) +parser.add_argument( + "--base", + metavar="DIR", + default=".", + help="base directory used for pretty-printing relative paths (default: PWD)", + type=Path, +) parser.add_argument( "--debug", metavar="FILE", @@ -91,11 +98,12 @@ parser.add_argument( "--targets", metavar="FILE", nargs="+", - help="targets to build. When using --sources, " + "only a single target is allowed.", + help="targets to build. When using --sources, only a single target is allowed.", type=Path, ) args = parser.parse_args() +base = args.base force = args.force jbdb_path = args.jbdb machdep = args.machdep @@ -125,21 +133,20 @@ dot_framac_dir = Path(".frama-c") # Check required environment variables and commands in the PATH ############### -framac_bin_str = os.getenv("FRAMAC_BIN") -if not framac_bin_str: - sys.exit("error: FRAMAC_BIN not in environment (set by frama-c-script)") -framac_bin = Path(framac_bin_str) +framac_bin = Path( + os.getenv("FRAMAC_BIN") + or sys.exit("error: FRAMAC_BIN not in environment (set by frama-c-script)") +) under_test = os.getenv("PTESTS_TESTING") # Prepare blug-related variables and functions ################################ -blug_str = os.getenv("BLUG") -if not blug_str: - blug_str = shutil.which("blug") - if not blug_str: - sys.exit("error: path to 'blug' binary must be in PATH or variable BLUG") -blug = Path(blug_str) +blug = Path( + os.getenv("BLUG") + or shutil.which("blug") + or sys.exit("error: path to 'blug' binary must be in PATH or variable BLUG") +) blug_dir = blug.resolve().parent # to import blug_jbdb sys.path.insert(0, blug_dir.as_posix()) @@ -202,20 +209,18 @@ def make_target_name(target: Path) -> str: return prettify(str(target)).replace("/", "_").replace(".", "_") -# sources are pretty-printed relatively to the .frama-c directory, where the -# GNUmakefile will reside -def rel_prefix(path: Path) -> str: - # heuristics: try a relative path, but if too many ".."'s, then give up - # and use an absolute one. - relp = os.path.relpath(path, start=dot_framac_dir) - if relp.startswith(os.path.join("..", "..")): - return str(path) - else: - return relp +def rel_path(path: Path, base: Path) -> str: + """Return a relative path to the .frama-c directory, if path is relative to base. + Otherwise, return an absolute path. Typically, base is the parent of the .frama-c directory.""" + try: + path.resolve().relative_to(base.resolve()) # Fails if path is not inside base + return os.path.relpath(path, start=dot_framac_dir) + except ValueError: + return str(path.resolve()) -def pretty_sources(sources: list[Path]) -> list[str]: - return [f" {rel_prefix(source)} \\" for source in sorted(sources)] +def pretty_sources(sources: list[Path], base: Path) -> list[str]: + return [f" {rel_path(source, base)} \\" for source in sorted(sources)] def lines_of_file(path: Path) -> list[str]: @@ -248,7 +253,9 @@ def copy_fc_stubs() -> Path: # [funcname] in [filename]. # [has_args] is used to distinguish between main(void) and main(int, char**). def find_definitions(funcname: str, filename: str) -> list[tuple[str, bool]]: - file_content = source_filter.open_and_filter(filename, not under_test and do_filter_source) + file_content = source_filter.open_and_filter( + Path(filename), not under_test and do_filter_source + ) file_lines = file_content.splitlines(keepends=True) newlines = function_finder.compute_newline_offsets(file_lines) defs = function_finder.find_definitions_and_declarations( @@ -425,7 +432,7 @@ if jbdb_path: insert_lines_after( template, "^FCFLAGS", - [f" -json-compilation-database {rel_prefix(jbdb_path)} \\"], + [f" -json-compilation-database {rel_path(jbdb_path, base)} \\"], ) targets_eva = [f" {make_target_name(target)}.eva \\" for target in sorted(targets)] @@ -436,7 +443,7 @@ delete_line(template, r"^main.parse: \\") delete_line(template, r"^ main.c \\") for target, sources in sorted(sources_map.items(), reverse=True): pp_target = make_target_name(target) - new_lines = [f"{pp_target}.parse: \\"] + pretty_sources(sources) + [""] + new_lines = [f"{pp_target}.parse: \\"] + pretty_sources(sources, base) + [""] if any(d[2] for d in main_definitions[target]): logging.debug( "target %s has main with args, adding -main eva_main to its FCFLAGS", diff --git a/share/analysis-scripts/build_callgraph.py b/share/analysis-scripts/build_callgraph.py index 4adf25b358f..0f790b0545e 100755 --- a/share/analysis-scripts/build_callgraph.py +++ b/share/analysis-scripts/build_callgraph.py @@ -24,8 +24,8 @@ """This script finds files containing likely declarations and definitions for a given function name, via heuristic syntactic matching.""" -from __future__ import annotations import os +from pathlib import Path import sys import function_finder @@ -70,7 +70,7 @@ class Callgraph: return f"Callgraph({self.succs}, {self.edges})" -def compute(files): +def compute(files: set[Path]): cg = Callgraph() for f in files: file_content = source_filter.open_and_filter(f, not under_test) diff --git a/share/analysis-scripts/estimate_difficulty.py b/share/analysis-scripts/estimate_difficulty.py index f0fa8cfc5a8..3c7ad122290 100755 --- a/share/analysis-scripts/estimate_difficulty.py +++ b/share/analysis-scripts/estimate_difficulty.py @@ -25,20 +25,24 @@ """This script uses several heuristics to try and estimate the difficulty of analyzing a new code base with Frama-C.""" -from __future__ import annotations import argparse +import glob import json +import logging import os from pathlib import Path import re import subprocess +import sys import tempfile +from typing import Iterable import build_callgraph +import external_tool +import fclog import source_filter # TODO : avoid relativizing paths when introducing too many ".." ; -# TODO : accept directory as argument (--full-tree), and then do glob **/*.{c,i} inside # TODO : try to check the presence of compiler builtins # TODO : try to check for pragmas # TODO : detect absence of 'main' function (library) @@ -48,34 +52,81 @@ parser = argparse.ArgumentParser( Estimates the difficulty of analyzing a given code base""" ) parser.add_argument( - "--header-dirs", - "-d", - metavar="DIR", + "paths", nargs="+", - help="directories containing headers (default: .frama-c)", + help="source files and directories. \ +If a directory <dir> is specified, it is recursively explored, as if '<dir>/**/*.[ci]' \ +had been specified.", + type=Path, ) -parser.add_argument("files", nargs="+", help="source files") -args = vars(parser.parse_args()) - -header_dirs = args["header_dirs"] -if not header_dirs: - header_dirs = [] -files = args["files"] +parser.add_argument( + "--debug", + metavar="FILE", + help="enable debug mode and redirect output to the specified file", +) +parser.add_argument( + "--verbose", + action="store_true", + help="enable verbose output; if --debug is set, output is redirected to the same file.", +) +parser.add_argument( + "--no-cloc", + action="store_true", + help="disable usage of external tool 'cloc', even if available.", +) +args = parser.parse_args() +paths = args.paths +debug = args.debug +no_cloc = args.no_cloc +verbose = args.verbose under_test = os.getenv("PTESTS_TESTING") -# gather information from several sources +fclog.init(debug, verbose) + +### Auxiliary functions ####################################################### + + +def get_dir(path): + """Similar to dirname, but returns the path itself if it refers to a directory""" + if path.is_dir(): + return path + else: + return path.parent + + +def collect_files_and_local_dirs(paths) -> tuple[set[Path], set[Path]]: + """Returns the list of directories (and their subdirectories) containing + the specified paths. Note that this also includes subdirectories which do not + themselves contain any .c files, but which may contain .h files.""" + dirs: set[Path] = set() + files: set[Path] = set() + for p in paths: + if p.is_dir(): + files = files.union([Path(p) for p in glob.glob(f"{p}/**/*.[chi]", recursive=True)]) + dirs.add(p) + else: + files.add(p) + dirs.add(p.parent) + local_dirs = {Path(s[0]) for d in dirs for s in os.walk(d)} + if not files: + sys.exit( + "error: no source files (.c/.i) found in provided paths: " + + " ".join([str(p) for p in paths]) + ) + return files, local_dirs def extract_keys(l): return [list(key.keys())[0] for key in l] -def get_framac_libc_function_statuses(framac, framac_share): +def get_framac_libc_function_statuses( + framac: Path | None, framac_share: Path +) -> tuple[list[str], list[str]]: if framac: (_handler, metrics_tmpfile) = tempfile.mkstemp(prefix="fc_script_est_diff", suffix=".json") - if debug: - print("metrics_tmpfile: %s", metrics_tmpfile) + logging.debug("metrics_tmpfile: %s", metrics_tmpfile) fc_runtime = framac_share / "libc" / "__fc_runtime.c" fc_libc_headers = framac_share / "libc" / "__fc_libc.h" subprocess.run( @@ -105,10 +156,8 @@ def get_framac_libc_function_statuses(framac, framac_share): return (defined, spec_only) -re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)') - - -def grep_includes_in_file(filename): +def grep_includes_in_file(filename: Path): + re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)') file_content = source_filter.open_and_filter(filename, not under_test) i = 0 for line in file_content.splitlines(): @@ -120,9 +169,9 @@ def grep_includes_in_file(filename): yield (i, kind, header) -def get_includes(files): - quote_includes = {} - chevron_includes = {} +def get_includes(files: set[Path]): + quote_includes: dict[Path, list[tuple[Path, int]]] = {} + chevron_includes: dict[Path, list[tuple[Path, int]]] = {} for filename in files: for line, kind, header in grep_includes_in_file(filename): if kind == "<": @@ -137,13 +186,64 @@ def get_includes(files): return chevron_includes, quote_includes +def is_local_header(local_dirs, header): + for d in local_dirs: + path = Path(d) + if Path(path / header).exists(): + return True + return False + + +def grep_keywords(keywords: Iterable[str], filename: Path) -> dict[str, int]: + with open(filename, "r") as f: + found: dict[str, int] = {} + for line in f: + if any(x in line for x in keywords): + # found one or more keywords; count them + for kw in keywords: + if kw in line: + if kw in found: + found[kw] += 1 + else: + found[kw] = 1 + return found + + +def pretty_unsupported_keywords( + file: Path, unsupported_keywords: dict[str, str], found: dict[str, int] +) -> str: + res = f"unsupported keyword(s) in {file}: " + descriptions: list[str] = [] + for kw, count in sorted(found.items()): + if descriptions: # not first occurrence + res += ", " + res += f" {kw} ({count} line{'s' if count > 1 else ''})" + descriptions.append(f"{kw} is a {unsupported_keywords[kw]}") + res += "\n - " + "\n - ".join(descriptions) + return res + + +### End of auxiliary functions ################################################ + debug = os.getenv("DEBUG") verbose = False +files, local_dirs = collect_files_and_local_dirs(paths) + +score = { + "recursion": 0, + "libc": 0, + "includes": 0, + "malloc": 0, + "keywords": 0, + "asm": 0, +} + framac_bin = os.getenv("FRAMAC_BIN") if not framac_bin: - print( - "Running script in no-Frama-C mode (set FRAMAC_BIN to the directory containing frama-c if you want to use it)." + logging.info( + "Running script in no-Frama-C mode (set FRAMAC_BIN to the directory" + + " containing frama-c if you want to use it)." ) framac = None script_dir = os.path.dirname(os.path.realpath(__file__)) @@ -154,10 +254,27 @@ else: subprocess.check_output([framac, "-no-autoload-plugins", "-print-share-path"]).decode() ) -print("Building callgraph...") + +if not no_cloc: + cloc = external_tool.get_command("cloc", "CLOC") + if cloc: + data = external_tool.run_and_check( + [cloc, "--hide-rate", "--progress-rate=0", "--csv"] + list(str(f) for f in files), "" + ) + data = data.splitlines() + [nfiles, _sum, nblank, ncomment, ncode] = data[-1].split(",") + nlines = int(nblank) + int(ncomment) + int(ncode) + logging.info( + "Processing %d file(s), approx. %d lines of code (out of %d lines)", + int(nfiles), + int(ncode), + nlines, + ) + +logging.info("Building callgraph...") cg = build_callgraph.compute(files) -print("Computing data about libc/POSIX functions...") +logging.info("Computing data about libc/POSIX functions...") libc_defined_functions, libc_specified_functions = get_framac_libc_function_statuses( framac, framac_share ) @@ -186,16 +303,23 @@ for cycle_start_loc, cycle in recursive_cycles: continue reported_recursive_pairs.add(cycle[-1]) (filename, line) = cycle_start_loc - (x, y) = cycle[0] - pretty_cycle = f"{x} -> {y}" - for x, y in cycle[1:]: - pretty_cycle += f" -> {y}" - print(f"[recursion] found recursive cycle near {filename}:{line}: {pretty_cycle}") + + def pretty_cycle(cycle): + (x, y) = cycle[0] + res = f"{x} -> {y}" + for x, y in cycle[1:]: + res += f" -> {y}" + return res + + logging.info( + "[recursion] found recursive cycle near %s:%d: %s", filename, line, pretty_cycle(cycle) + ) + score["recursion"] += 1 callees_list = [callee for (_, callee) in list(cg.edges.keys())] callees = set(callees_list) used_headers = set() -print(f"Estimating difficulty for {len(callees)} function calls...") +logging.info("Estimating difficulty for %d function calls...", len(callees)) warnings = 0 for callee in sorted(callees): @@ -204,8 +328,10 @@ for callee in sorted(callees): global warnings if status == "warning": warnings += 1 - if verbose or debug or status == "warning": - print(f"- {status}: {callee} ({standard}) {reason}") + if status == "warning": + logging.warning("%s (%s) %s", callee, standard, reason) + else: + logging.log(fclog.VERBOSE, "%s: %s (%s) %s", status, callee, standard, reason) try: is_problematic = posix_identifiers[callee]["notes"]["fc-support"] == "problematic" @@ -259,84 +385,83 @@ for callee in sorted(callees): if not status_emitted: callee_status("warning", standard, "has neither code nor spec in Frama-C's libc") -print(f"Function-related warnings: {warnings}") +logging.info("Function-related warnings: %d", warnings) +score["libc"] = warnings -if (verbose or debug) and used_headers: - print("used headers:") - for header in sorted(used_headers): - print(f" <{header}>") +logging.log( + fclog.VERBOSE, + "Used POSIX headers:\n%s", + "\n".join([f" <{header}>" for header in sorted(used_headers)]), +) (chevron_includes, quote_includes) = get_includes(files) - -def is_local_header(header_dirs, header): - for d in header_dirs: - path = Path(d) - if Path(path / header).exists(): - return True - return False - - -print(f"Estimating difficulty for {len(chevron_includes)} '#include <header>' directives...") +logging.info( + "Estimating difficulty for %d '#include <header>' directives...", len(chevron_includes) +) non_posix_headers = [] header_warnings = 0 for header in sorted(chevron_includes, key=str.casefold): + if not header.lower().endswith(".h"): + continue # ignore included non-header files if header in posix_headers: fc_support = posix_headers[header]["fc-support"] if fc_support == "unsupported": header_warnings += 1 - print(f"- WARNING: included header <{header}> is explicitly unsupported by Frama-C") + logging.warning("included header <%s> is explicitly unsupported by Frama-C", header) else: - if verbose or debug: - c11_or_posix = "C11" if header in c11_headers else "POSIX" - print(f"- note: included {c11_or_posix} header ") + logging.log( + fclog.VERBOSE, + "included %s header %s", + "C11" if header in c11_headers else "POSIX", + header, + ) else: - if is_local_header(header_dirs, header): - if verbose or debug: - print(f"- ok: included header <{header}> seems to be available locally") + if is_local_header(local_dirs, header): + logging.log( + fclog.VERBOSE, "ok: included header <%s> seems to be available locally", header + ) else: non_posix_headers.append(header) header_warnings += 1 - print(f"- warning: included non-POSIX header <{header}>") -print(f"Header-related warnings: {header_warnings}") - + logging.warning("included non-POSIX header <%s>", header) +logging.info("Header-related warnings: %d", header_warnings) +score["includes"] = header_warnings # dynamic allocation dynalloc_functions = set(["malloc", "calloc", "free", "realloc", "alloca", "mmap"]) dyncallees = dynalloc_functions.intersection(callees) if dyncallees: - print(f"- note: calls to dynamic allocation functions: {', '.join(sorted(dyncallees))}") - + logging.info("Calls to dynamic allocation functions: %s", ", ".join(sorted(dyncallees))) + score["malloc"] = len(dyncallees) # unsupported C11 or non-standard specific features -unsupported_keywords = [ - ("_Alignas", "C11 construct"), - ("_Alignof", "C11 construct"), - ("_Complex", "C11 construct"), - ("_Imaginary", "C11 construct"), - ("alignas", "C11 construct"), - ("alignof", "C11 construct"), # stdalign.h may use these symbols - ("__int128", "non-standard construct (GNU extension)"), - ("__uint128_t", "non-standard construct (GNU extension)"), -] - -for keyword, message in unsupported_keywords: - out = subprocess.Popen( - ["grep", "-n", "\\b" + keyword + "\\b"] + files + ["/dev/null"], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - lines = out.communicate()[0].decode("utf-8").splitlines() - if lines: - n = len(lines) - print( - f"- warning: found {n} line{'s' if n > 1 else ''} with occurrences of " - f"unsupported {message} '{keyword}'" - ) +unsupported_keywords = { + "_Alignas": "C11 construct", + "_Alignof": "C11 construct", + "_Complex": "C11 construct", + "_Imaginary": "C11 construct", + "alignas": "C11 construct", + "alignof": "C11 construct", # stdalign.h may use these symbols + "__int128": "non-standard construct (GNU extension)", + "__uint128_t": "non-standard construct (GNU extension)", +} + +for ff in files: + found = grep_keywords(unsupported_keywords.keys(), ff) + if found: + logging.warning(pretty_unsupported_keywords(ff, unsupported_keywords, found)) + score["keywords"] += len(found) # assembly code if "asm" in callees or "__asm" in callees or "__asm__" in callees: - print("- warning: code seems to contain inline assembly ('asm(...)')") + logging.warning("code seems to contain inline assembly ('asm(...)')") + score["asm"] = 1 + +logging.info( + "Overall difficulty score:\n%s", + "\n".join([k + ": " + str(v) for (k, v) in sorted(score.items())]), +) diff --git a/share/analysis-scripts/external_tool.py b/share/analysis-scripts/external_tool.py new file mode 100644 index 00000000000..d1a1b3584e0 --- /dev/null +++ b/share/analysis-scripts/external_tool.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +########################################################################## +# # +# This file is part of Frama-C. # +# # +# Copyright (C) 2007-2024 # +# CEA (Commissariat à l'énergie atomique et aux énergies # +# alternatives) # +# # +# you can redistribute it and/or modify it under the terms of the GNU # +# Lesser General Public License as published by the Free Software # +# Foundation, version 2.1. # +# # +# It is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU Lesser General Public License for more details. # +# # +# See the GNU Lesser General Public License version 2.1 # +# for more details (enclosed in the file licenses/LGPLv2.1). # +# # +########################################################################## + +"""This file provides utility functions to use external tools, either +available in the PATH or via PyInstaller.""" + +import os +from pathlib import Path +import shutil +import subprocess +import sys + +# warnings about missing commands are disabled during testing +emit_warns = os.getenv("PTESTS_TESTING") is None + +# Cache for get_command +cached_commands: dict[str, Path | None] = {} + + +def resource_path(relative_path): + """Get absolute path to resource; only used by the pyinstaller standalone distribution""" + base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) + return os.path.join(base_path, relative_path) + + +def get_command(command, env_var_name): + """Returns a Path to the command; priority goes to the environment variable, + then in the PATH, then in the resource directory (for a pyinstaller binary).""" + if command in cached_commands: + return cached_commands[command] + p = os.getenv(env_var_name) + if p: + p = Path(p) + else: + p = shutil.which(command) + if p: + p = Path(p) + else: + p = Path(resource_path(command)) + if not p.exists(): + if emit_warns: + print( + f"info: optional external command '{command}' not found in PATH;", + f"consider installing it or setting environment variable {env_var_name}", + ) + p = None + cached_commands[command] = p + return p + + +def run_and_check(command_and_args, input_data): + try: + return subprocess.check_output( + command_and_args, + input=input_data, + stderr=None, + encoding="ascii", + errors="ignore", + ) + except subprocess.CalledProcessError as e: + sys.exit(f"error running command: {command_and_args}\n{e}") diff --git a/share/analysis-scripts/fc-estimate-difficulty.mk b/share/analysis-scripts/fc-estimate-difficulty.mk new file mode 100644 index 00000000000..f2648da8f29 --- /dev/null +++ b/share/analysis-scripts/fc-estimate-difficulty.mk @@ -0,0 +1,159 @@ +# Produces a self-contained binary for 'frama-c-script estimate-difficulty' +# on a machine without Frama-C. + +# Example usage: +# export FRAMAC=$(which frama-c) && make -f fc-estimate-difficulty.mk fc-estimate-difficulty +# export FRAMAC=$(which frama-c) && make -f fc-estimate-difficulty.mk fc-estimate-difficulty.exe + +# Works on Linux and macOS. The Windows binary is compiled and tested using +# a Docker image with wine and pyinstaller (usage of Docker avoids issues with +# staging versions in e.g. Fedora, which may cause weird error messages when +# running in Windows). The Linux binary is tested on a bare-bones Alpine Linux, +# to ensure it does not depend on dynamic libraries. +# The macOS version needs to be tested by hand. + +# Notes: +# pip install "staticx==0.14.1" +# pip install "patchelf==0.17.0.0" # seems to work; version 0.17.2.1 did not work when tried + +help: + @echo "targets:" + @echo "fc-estimate-difficulty : Linux/macOS version (depending on host system)" + @echo "fc-estimate-difficulty.exe : Windows cross-compiled version (from Linux)" + @echo "clean : Erase working directory" + @echo "test-fc-estimate-difficulty : Test Linux/macOS version" + @echo "test-fc-estimate-difficulty.exe : Test Windows version" + +os := $(shell uname -s) + +workdir = fced-dist-prepare +distdir = fced-dist + +# if 'podman' is installed, we assume it is preferred over Docker; +# otherwise, try using 'docker' +ifneq (, $(shell which podman)) +DOCKER:=podman +else +DOCKER:=docker +endif + +libc_metrics.json: ../libc/__fc_libc.h ../libc/__fc_runtime.c +ifeq ($(wildcard $(FRAMAC)),) + $(error FRAMAC must be set to the path of the 'frama-c' binary) +endif + FRAMAC_SHARE="$(shell $(FRAMAC) -no-autoload-plugins -print-share-path)" + rm -f $@ + $(FRAMAC) \ + $^ \ + -no-autoload-plugins -load-module metrics \ + -metrics -metrics-libc -metrics-output $@ + chmod -w $@ # generated file: prevent accidental overwriting + +PY_DEPS := \ + build_callgraph.py \ + estimate_difficulty.py \ + external_tool.py \ + fclog.py \ + function_finder.py \ + source_filter.py \ + +COMPLIANCE := $(wildcard ../compliance/*.json) +TOOLS := $(workdir)/scc $(workdir)/astyle +COMMON_DEPS := $(PY_DEPS) $(COMPLIANCE) libc_metrics.json + +fc-estimate-difficulty: EXE= +fc-estimate-difficulty.exe: EXE=".exe" + +fc-estimate-difficulty: WINE= +fc-estimate-difficulty.exe: WINE="/home/andr/git/wine/wine" + +fc-estimate-difficulty: PSEP=: +fc-estimate-difficulty.exe: PSEP=; + +fc-estimate-difficulty: $(COMMON_DEPS) $(TOOLS) +fc-estimate-difficulty.exe: \ + $(COMMON_DEPS) $(addsuffix .exe,$(TOOLS)) fced-win.Dockerfile + +fc-estimate-difficulty: + pyinstaller estimate_difficulty.py \ + -F \ + -n $@ \ + --distpath $(distdir) \ + --noconfirm \ + --add-data "libc_metrics.json:share" \ + --add-data "../compliance/*.json:share/compliance" \ + --add-binary "$(workdir)/scc:." \ + --add-binary "$(workdir)/astyle:." +ifeq ($(os),Linux) + @echo "Linux: running staticx" + staticx "$(distdir)/$@" $@ +else + @echo "NOT running staticx (macOS?)" + mv "$(distdir)/$@" $@ +endif + +fc-estimate-difficulty.exe: + mkdir -p $(workdir)/compliance + cp ../compliance/*.json $(workdir)/compliance + $(DOCKER) build . -f fced-win.Dockerfile -t fced-win + $(DOCKER) cp $$($(DOCKER) create --rm fced-win):/fced/$@ $@ + +$(workdir)/scc: $(workdir)/scc-snapshots-fc-1.1.0 + make -C $< clean + make -C $< scc $(if $(findstring Linux,$(os)),CFLAGS="-static") -j + cp $(workdir)/scc-snapshots-fc-1.1.0/scc $@ + +$(workdir)/scc.exe: $(workdir)/scc-snapshots-fc-1.1.0 + make -C $< clean + make -C $< scc CC=x86_64-w64-mingw32-gcc -j + cp $(workdir)/scc-snapshots-fc-1.1.0/scc.exe $@ + +$(workdir)/scc-snapshots-fc-1.1.0: + mkdir -p $(workdir) + wget 'https://github.com/Frama-C/scc-snapshots/archive/refs/tags/fc-1.1.0.tar.gz' -O $(workdir)/fc-1.1.0.tar.gz + cd $(workdir) && tar xvf fc-1.1.0.tar.gz + touch $@ + +$(workdir)/astyle: $(workdir)/astyle_3.1 + make -C $(workdir)/astyle_3.1/build/gcc clean + make -C $(workdir)/astyle_3.1/build/gcc \ + $(if $(findstring Linux,$(os)),LDFLAGS="-static" CFLAGS="-static -static-libgcc -static-libstdc++") -j + cp $(workdir)/astyle_3.1/build/gcc/bin/astyle $@ + +$(workdir)/astyle.exe: $(workdir)/astyle_3.1 + make -C $(workdir)/astyle_3.1/build/gcc clean + make -C $(workdir)/astyle_3.1/build/gcc LDFLAGS="-static" CFLAGS="-static -static-libgcc -static-libstdc++" CXX=x86_64-w64-mingw32-g++ -j + cp $(workdir)/astyle_3.1/build/gcc/bin/astyle.exe $@ + +$(workdir)/astyle_3.1: + mkdir -p $(workdir) + wget 'https://downloads.sourceforge.net/project/astyle/astyle/astyle%203.1/astyle_3.1_linux.tar.gz' -O $(workdir)/astyle_3.1_linux.tar.gz + cd $(workdir) && tar xvf astyle_3.1_linux.tar.gz + rm -rf $(workdir)/astyle_3.1 + mv $(workdir)/astyle $(workdir)/astyle_3.1 + sed -i.bak 's/^CXX = g++/CXX ?= g++/' $(workdir)/astyle_3.1/build/gcc/Makefile + sed -i.bak 's/^CXX = g++/CXX ?= g++/' $(workdir)/astyle_3.1/build/gcc/Makefile + touch $@ + +clean: + rm -rf $(workdir) $(distdir) fc-estimate-difficulty.spec fc-estimate-difficulty.exe.spec + +distclean: clean + rm -f fc-estimate-difficulty fc-estimate-difficulty.exe libc_metrics.json + +test-fc-estimate-difficulty: fc-estimate-difficulty +ifeq ($(os),Linux) + $(DOCKER) build . -f fced-lin.Dockerfile -t fced-lin +else + ./$< fced-test | \ + grep -A9 "Overall difficulty score" | \ + grep -v 0 | \ + grep -q ': ' +endif + @echo "Done testing $^." + +test-fc-estimate-difficulty.exe: fc-estimate-difficulty.exe + @file $< | grep -q "MS Windows" + @echo "Done testing $< (actual testing done inside Docker image)" + +.PHONY: clean distclean test-fc-estimate-difficulty test-fc-estimate-difficulty.exe diff --git a/share/analysis-scripts/fced-lin.Dockerfile b/share/analysis-scripts/fced-lin.Dockerfile new file mode 100644 index 00000000000..9bf2aeed8b4 --- /dev/null +++ b/share/analysis-scripts/fced-lin.Dockerfile @@ -0,0 +1,8 @@ +FROM alpine:3.15 +COPY fc-estimate-difficulty / +RUN mkdir /fced-test +COPY fced-test fced-test/ +RUN ./fc-estimate-difficulty fced-test | \ + grep -A9 "Overall difficulty score" | \ + grep -v 0 | \ + grep -q ': ' diff --git a/share/analysis-scripts/fced-test/a.c b/share/analysis-scripts/fced-test/a.c new file mode 100644 index 00000000000..9e3b8f5980c --- /dev/null +++ b/share/analysis-scripts/fced-test/a.c @@ -0,0 +1,11 @@ +#include <stdlib.h> +#include <X> + +volatile int v; + +int main() { + asm("mov ax, ax"); + if (v) return main(); + malloc(42); + return 0; +} diff --git a/share/analysis-scripts/fced-test/a.h b/share/analysis-scripts/fced-test/a.h new file mode 100644 index 00000000000..5e1d66f91d2 --- /dev/null +++ b/share/analysis-scripts/fced-test/a.h @@ -0,0 +1 @@ +#include <a.h> diff --git a/share/analysis-scripts/fced-win.Dockerfile b/share/analysis-scripts/fced-win.Dockerfile new file mode 100644 index 00000000000..1ae0faf9d29 --- /dev/null +++ b/share/analysis-scripts/fced-win.Dockerfile @@ -0,0 +1,21 @@ +FROM tobix/pywine:3.10 +RUN mkdir /fced +COPY *.py /fced/ +RUN mkdir /fced/dist-prepare +COPY fced-dist-prepare /fced/dist-prepare/ +COPY fced-test /fced/fced-test/ +COPY libc_metrics.json /fced/ +WORKDIR /fced +RUN wine pyinstaller estimate_difficulty.py \ + -F \ + -n fc-estimate-difficulty.exe \ + --distpath . \ + --noconfirm \ + --add-data "libc_metrics.json;share" \ + --add-data "dist-prepare/compliance/*.json;share/compliance" \ + --add-binary "dist-prepare/scc.exe;." \ + --add-binary "dist-prepare/astyle.exe;." +RUN wine fc-estimate-difficulty.exe fced-test | \ + grep -A9 "Overall difficulty score" | \ + grep -v 0 | \ + grep -q ': ' diff --git a/share/analysis-scripts/fclog.py b/share/analysis-scripts/fclog.py new file mode 100644 index 00000000000..909c9392c2e --- /dev/null +++ b/share/analysis-scripts/fclog.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +########################################################################## +# # +# This file is part of Frama-C. # +# # +# Copyright (C) 2007-2024 # +# CEA (Commissariat à l'énergie atomique et aux énergies # +# alternatives) # +# # +# you can redistribute it and/or modify it under the terms of the GNU # +# Lesser General Public License as published by the Free Software # +# Foundation, version 2.1. # +# # +# It is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU Lesser General Public License for more details. # +# # +# See the GNU Lesser General Public License version 2.1 # +# for more details (enclosed in the file licenses/LGPLv2.1). # +# # +########################################################################## + +"""Contains functions to perform logging-related configuration common to + several scripts.""" + +import logging +import sys + +# extra log level used in messages +VERBOSE = 15 + + +def init(debug, verbose): + """Initializes the logging mechanism. 'debug' is the filename to redirect to + (or special values 'stdout'/'stderr'). If None, disable debug. + If 'verbose' is true, output VERBOSE messages. If both are set, the level + is 'debug'.""" + + logging.addLevelName(VERBOSE, "") + logging.addLevelName(logging.ERROR, "ERROR: ") + logging.addLevelName(logging.WARN, "WARNING: ") + logging.addLevelName(logging.INFO, "") + log_level = logging.DEBUG if debug else VERBOSE if verbose else logging.INFO + log_format = "%(levelname)s%(message)s" + + # special values for debug filename + if debug == "stdout" or not debug: + logging.basicConfig(stream=sys.stdout, level=log_level, format=log_format) + # This very ugly hack seems necessary to avoid BrokenPipeErrors when the + # output of the calling script is truncated (via head, grep -q, etc). + # Note that, on Windows, there is an OSError with "22 Invalid argument" + # instead of a BrokenPipeError. + original_flush = sys.stdout.flush + + def new_flush(): + try: + original_flush() + except BrokenPipeError: + sys.stdout = None + sys.exit(0) + except OSError as e: + if sys.platform == "win32" and e.errno == 22: + sys.stdout = None + sys.exit(0) + raise e + + sys.stdout.flush = new_flush + elif debug == "stderr": + logging.basicConfig(stream=sys.stderr, level=log_level, format=log_format) + else: + logging.basicConfig( + filename=debug, + level=log_level, + filemode="w", + format=log_format, + ) diff --git a/share/analysis-scripts/print_callgraph.py b/share/analysis-scripts/print_callgraph.py index d867d597928..e54754174bd 100755 --- a/share/analysis-scripts/print_callgraph.py +++ b/share/analysis-scripts/print_callgraph.py @@ -25,6 +25,7 @@ """This script finds files containing likely declarations and definitions for a given function name, via heuristic syntactic matching.""" +from pathlib import Path import sys import build_callgraph @@ -45,7 +46,7 @@ If --dot is specified, print in DOT (Graphviz) format to file outfile, or to stdout if outfile is '-'.""" ) -cg = build_callgraph.compute(args) +cg = build_callgraph.compute(set([Path(a) for a in args])) if dotfile: if dotfile == "-": out = sys.stdout diff --git a/share/analysis-scripts/source_filter.py b/share/analysis-scripts/source_filter.py index 1c731404ca1..872bf47ac8f 100644 --- a/share/analysis-scripts/source_filter.py +++ b/share/analysis-scripts/source_filter.py @@ -37,84 +37,32 @@ the efficiency of regex-based heuristics.""" # of errors when running the filters. Note that an absent tool # does _not_ lead to an error. -from __future__ import annotations -import os +import external_tool from pathlib import Path -import shutil -import subprocess import sys -from typing import Optional - -# warnings about missing commands are disabled during testing -emit_warns = os.getenv("PTESTS_TESTING") is None - -# Cache for get_command. -cached_commands: dict[str, Optional[Path]] = {} - - -def resource_path(relative_path: str) -> str: - """Get absolute path to resource; only used by the pyinstaller standalone distribution""" - base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) - return os.path.join(base_path, relative_path) - - -def get_command(command: str, env_var_name: str) -> Optional[Path]: - """Returns a Path to the command; priority goes to the environment variable, - then in the PATH, then in the resource directory (for a pyinstaller binary).""" - if command in cached_commands: - return cached_commands[command] - cmd = os.getenv(env_var_name) - if cmd: - p = Path(cmd) - else: - cmd = shutil.which(command) - if cmd: - p = Path(cmd) - else: - p = Path(resource_path(command)) - if not p.exists(): - if emit_warns: - print( - f"info: optional external command '{command}' not found in PATH; " - f"consider installing it or setting environment variable {env_var_name}" - ) - p = None - cached_commands[command] = p - return p - - -def run_and_check(command_and_args: list[str], input_data: str) -> str: - try: - return subprocess.check_output( - command_and_args, - input=input_data, - stderr=None, - encoding="ascii", - errors="ignore", - ) - except subprocess.CalledProcessError as e: - sys.exit(f"error running command: {command_and_args}\n{e}") def filter_with_scc(input_data: str) -> str: - scc = get_command("scc", "SCC") + scc_bin = "scc" if sys.platform != "win32" else "scc.exe" + scc = external_tool.get_command(scc_bin, "SCC") if scc: - return run_and_check([str(scc), "-k"], input_data) + return external_tool.run_and_check([scc, "-k", "-b"], input_data) else: return input_data def filter_with_astyle(input_data: str) -> str: - astyle = get_command("astyle", "ASTYLE") + astyle_bin = "astyle" if sys.platform != "win32" else "astyle.exe" + astyle = external_tool.get_command(astyle_bin, "ASTYLE") if astyle: - return run_and_check( - [str(astyle), "--keep-one-line-blocks", "--keep-one-line-statements"], input_data + return external_tool.run_and_check( + [astyle, "--keep-one-line-blocks", "--keep-one-line-statements"], input_data ) else: return input_data -def open_and_filter(filename: str, apply_filters: bool) -> str: +def open_and_filter(filename: Path, apply_filters: bool) -> str: # we ignore encoding errors and use ASCII to avoid issues when # opening files with different encodings (UTF-8, ISO-8859, etc) with open(filename, "r", encoding="ascii", errors="ignore") as f: diff --git a/share/dune b/share/dune index 600546d2181..2ae614fc0c6 100644 --- a/share/dune +++ b/share/dune @@ -289,6 +289,8 @@ (analysis-scripts/creduce.sh as lib/analysis-scripts/creduce.sh) (analysis-scripts/detect_recursion.py as lib/analysis-scripts/detect_recursion.py) (analysis-scripts/estimate_difficulty.py as lib/analysis-scripts/estimate_difficulty.py) +(analysis-scripts/external_tool.py as lib/analysis-scripts/external_tool.py) +(analysis-scripts/fclog.py as lib/analysis-scripts/fclog.py) (analysis-scripts/find_fun.py as lib/analysis-scripts/find_fun.py) (analysis-scripts/flamegraph.pl as lib/analysis-scripts/flamegraph.pl) (analysis-scripts/frama_c_results.py as lib/analysis-scripts/frama_c_results.py) diff --git a/tests/fc_script/estimate-difficulty.t/estimate-difficulty.c b/tests/fc_script/estimate-difficulty.t/estimate-difficulty.c index 4cff9e42340..93e056d98c2 100644 --- a/tests/fc_script/estimate-difficulty.t/estimate-difficulty.c +++ b/tests/fc_script/estimate-difficulty.t/estimate-difficulty.c @@ -1,3 +1,8 @@ +/* run.config + NOFRAMAC: testing frama-c-script, not frama-c itself + EXECNOW: LOG @PTEST_NAME@.res LOG @PTEST_NAME@.err PTESTS_TESTING=1 %{bin:frama-c-script} estimate-difficulty --no-cloc @PTEST_FILE@ > @PTEST_NAME@.res 2> @PTEST_NAME@.err +*/ + // these includes are not actually used by the compiler // (this is a preprocessed file), but analyzed by the script #include <sys/socket.h> @@ -20,7 +25,11 @@ int main() { strlen(); // no warning ccosl(); // warning: neither code nor spec dprintf(); // no warning: neither code nor spec, but handled by Variadic + __asm__("xor ax, ax"); + malloc(2); + alignof(char*); + _Complex cc; } __int128 large_int; -_Complex complex; +_Complex complex2; diff --git a/tests/fc_script/estimate-difficulty.t/run.t b/tests/fc_script/estimate-difficulty.t/run.t index 91b68551038..e021a15bc78 100644 --- a/tests/fc_script/estimate-difficulty.t/run.t +++ b/tests/fc_script/estimate-difficulty.t/run.t @@ -1,13 +1,24 @@ - $ PTESTS_TESTING=1 frama-c-script estimate-difficulty estimate-difficulty.c + $ PTESTS_TESTING=1 frama-c-script estimate-difficulty --no-cloc estimate-difficulty.c Building callgraph... Computing data about libc/POSIX functions... - [recursion] found recursive cycle near estimate-difficulty.c:13: f -> f - Estimating difficulty for 7 function calls... - - warning: ccosl (POSIX) has neither code nor spec in Frama-C's libc - - warning: setjmp (POSIX) is known to be problematic for code analysis + [recursion] found recursive cycle near estimate-difficulty.c:18: f -> f + Estimating difficulty for 10 function calls... + WARNING: ccosl (POSIX) has neither code nor spec in Frama-C's libc + WARNING: setjmp (POSIX) is known to be problematic for code analysis Function-related warnings: 2 Estimating difficulty for 3 '#include <header>' directives... - - WARNING: included header <complex.h> is explicitly unsupported by Frama-C + WARNING: included header <complex.h> is explicitly unsupported by Frama-C Header-related warnings: 1 - - warning: found 1 line with occurrences of unsupported C11 construct '_Complex' - - warning: found 1 line with occurrences of unsupported non-standard construct (GNU extension) '__int128' + Calls to dynamic allocation functions: malloc + WARNING: unsupported keyword(s) in estimate-difficulty.c: _Complex (2 lines), __int128 (1 line), alignof (1 line) + - _Complex is a C11 construct + - __int128 is a non-standard construct (GNU extension) + - alignof is a C11 construct + WARNING: code seems to contain inline assembly ('asm(...)') + Overall difficulty score: + asm: 1 + includes: 1 + keywords: 3 + libc: 2 + malloc: 1 + recursion: 1 -- GitLab