[analysis-scripts] improve and distribute estimate-difficulty executable

Allow estimate-difficulty to be exported (in Linux/macOS/Windows) as independent executable, so that it can be used without having to install Frama-C.

[analysis-scripts] improve and distribute estimate-difficulty executable
ab5db2c6 · Andre Maroneze · 1dc56f25 · ab5db2c6 · ab5db2c6 · ab5db2c6
Commit ab5db2c6 authored 3 years ago by Andre Maroneze
--- a/.gitattributes
+++ b/.gitattributes
@@ -280,6 +280,11 @@ README* header_spec=.ignore

 /doc/aorai/Makefile header_spec=AORAI_LGPL

+/share/analysis-scripts/fc-estimate-difficulty.mk header_spec=.ignore
+/share/analysis-scripts/fced-lin.Dockerfile header_spec=.ignore
+/share/analysis-scripts/fced-win.Dockerfile header_spec=.ignore
+/share/analysis-scripts/fced-test/a.c header_spec=.ignore
+/share/analysis-scripts/fced-test/a.h header_spec=.ignore
 /share/analysis-scripts/flamegraph.pl header_spec=CDDL
 /share/emacs/acsl.el header_spec=CEA_PR_LGPL
 /share/libc/argz.h header_spec=CEA_FSF_LGPL

--- a/.gitignore
+++ b/.gitignore
@@ -186,6 +186,15 @@ Makefile.plugin.generated
 # WP/Coq Generated file
 .lia.cache

+# analysis-scripts
+share/analysis-scripts/build
+share/analysis-scripts/fced-dist
+share/analysis-scripts/fced-dist-prepare*
+share/analysis-scripts/fc-estimate-difficulty.*spec
+share/analysis-scripts/fc-estimate-difficulty
+share/analysis-scripts/fc-estimate-difficulty.exe
+share/analysis-scripts/libc_metrics.json
+
 # generated ML files

 /src/libraries/utils/json.ml

--- a/share/analysis-scripts/build.py
+++ b/share/analysis-scripts/build.py
@@ -52,6 +52,13 @@ parser = argparse.ArgumentParser(
 for analysis with Frama-C. Tries to use a build_commands.json file if
 available."""
 )
+parser.add_argument(
+    "--base",
+    metavar="DIR",
+    default=".",
+    help="base directory used for pretty-printing relative paths (default: PWD)",
+    type=Path,
+)
 parser.add_argument(
    "--debug",
    metavar="FILE",
@@ -91,11 +98,12 @@ parser.add_argument(
    "--targets",
    metavar="FILE",
    nargs="+",
-    help="targets to build. When using --sources, " + "only a single target is allowed.",
+    help="targets to build. When using --sources, only a single target is allowed.",
    type=Path,
 )

 args = parser.parse_args()
+base = args.base
 force = args.force
 jbdb_path = args.jbdb
 machdep = args.machdep
@@ -125,21 +133,20 @@ dot_framac_dir = Path(".frama-c")

 # Check required environment variables and commands in the PATH ###############

-framac_bin_str = os.getenv("FRAMAC_BIN")
-if not framac_bin_str:
-    sys.exit("error: FRAMAC_BIN not in environment (set by frama-c-script)")
-framac_bin = Path(framac_bin_str)
+framac_bin = Path(
+    os.getenv("FRAMAC_BIN")
+    or sys.exit("error: FRAMAC_BIN not in environment (set by frama-c-script)")
+)

 under_test = os.getenv("PTESTS_TESTING")

 # Prepare blug-related variables and functions ################################

-blug_str = os.getenv("BLUG")
-if not blug_str:
-    blug_str = shutil.which("blug")
-    if not blug_str:
-        sys.exit("error: path to 'blug' binary must be in PATH or variable BLUG")
-blug = Path(blug_str)
+blug = Path(
+    os.getenv("BLUG")
+    or shutil.which("blug")
+    or sys.exit("error: path to 'blug' binary must be in PATH or variable BLUG")
+)
 blug_dir = blug.resolve().parent
 # to import blug_jbdb
 sys.path.insert(0, blug_dir.as_posix())
@@ -202,20 +209,18 @@ def make_target_name(target: Path) -> str:
    return prettify(str(target)).replace("/", "_").replace(".", "_")


-# sources are pretty-printed relatively to the .frama-c directory, where the
-# GNUmakefile will reside
-def rel_prefix(path: Path) -> str:
-    # heuristics: try a relative path, but if too many ".."'s, then give up
-    # and use an absolute one.
-    relp = os.path.relpath(path, start=dot_framac_dir)
-    if relp.startswith(os.path.join("..", "..")):
-        return str(path)
-    else:
-        return relp
+def rel_path(path: Path, base: Path) -> str:
+    """Return a relative path to the .frama-c directory, if path is relative to base.
+    Otherwise, return an absolute path. Typically, base is the parent of the .frama-c directory."""
+    try:
+        path.resolve().relative_to(base.resolve())  # Fails if path is not inside base
+        return os.path.relpath(path, start=dot_framac_dir)
+    except ValueError:
+        return str(path.resolve())


-def pretty_sources(sources: list[Path]) -> list[str]:
-    return [f"  {rel_prefix(source)} \\" for source in sorted(sources)]
+def pretty_sources(sources: list[Path], base: Path) -> list[str]:
+    return [f"  {rel_path(source, base)} \\" for source in sorted(sources)]


 def lines_of_file(path: Path) -> list[str]:
@@ -248,7 +253,9 @@ def copy_fc_stubs() -> Path:
 # [funcname] in [filename].
 # [has_args] is used to distinguish between main(void) and main(int, char**).
 def find_definitions(funcname: str, filename: str) -> list[tuple[str, bool]]:
-    file_content = source_filter.open_and_filter(filename, not under_test and do_filter_source)
+    file_content = source_filter.open_and_filter(
+        Path(filename), not under_test and do_filter_source
+    )
    file_lines = file_content.splitlines(keepends=True)
    newlines = function_finder.compute_newline_offsets(file_lines)
    defs = function_finder.find_definitions_and_declarations(
@@ -425,7 +432,7 @@ if jbdb_path:
    insert_lines_after(
        template,
        "^FCFLAGS",
-        [f"  -json-compilation-database {rel_prefix(jbdb_path)} \\"],
+        [f"  -json-compilation-database {rel_path(jbdb_path, base)} \\"],
    )

 targets_eva = [f"  {make_target_name(target)}.eva \\" for target in sorted(targets)]
@@ -436,7 +443,7 @@ delete_line(template, r"^main.parse: \\")
 delete_line(template, r"^  main.c \\")
 for target, sources in sorted(sources_map.items(), reverse=True):
    pp_target = make_target_name(target)
-    new_lines = [f"{pp_target}.parse: \\"] + pretty_sources(sources) + [""]
+    new_lines = [f"{pp_target}.parse: \\"] + pretty_sources(sources, base) + [""]
    if any(d[2] for d in main_definitions[target]):
        logging.debug(
            "target %s has main with args, adding -main eva_main to its FCFLAGS",

--- a/share/analysis-scripts/build_callgraph.py
+++ b/share/analysis-scripts/build_callgraph.py
@@ -24,8 +24,8 @@
 """This script finds files containing likely declarations and definitions
 for a given function name, via heuristic syntactic matching."""

-from __future__ import annotations
 import os
+from pathlib import Path
 import sys

 import function_finder
@@ -70,7 +70,7 @@ class Callgraph:
        return f"Callgraph({self.succs}, {self.edges})"


-def compute(files):
+def compute(files: set[Path]):
    cg = Callgraph()
    for f in files:
        file_content = source_filter.open_and_filter(f, not under_test)

--- a/share/analysis-scripts/estimate_difficulty.py
+++ b/share/analysis-scripts/estimate_difficulty.py
@@ -25,20 +25,24 @@
 """This script uses several heuristics to try and estimate the difficulty
 of analyzing a new code base with Frama-C."""

-from __future__ import annotations
 import argparse
+import glob
 import json
+import logging
 import os
 from pathlib import Path
 import re
 import subprocess
+import sys
 import tempfile
+from typing import Iterable

 import build_callgraph
+import external_tool
+import fclog
 import source_filter

 # TODO : avoid relativizing paths when introducing too many ".." ;
-# TODO : accept directory as argument (--full-tree), and then do glob **/*.{c,i} inside
 # TODO : try to check the presence of compiler builtins
 # TODO : try to check for pragmas
 # TODO : detect absence of 'main' function (library)
@@ -48,34 +52,81 @@ parser = argparse.ArgumentParser(
 Estimates the difficulty of analyzing a given code base"""
 )
 parser.add_argument(
-    "--header-dirs",
-    "-d",
-    metavar="DIR",
+    "paths",
    nargs="+",
-    help="directories containing headers (default: .frama-c)",
+    help="source files and directories. \
+If a directory <dir> is specified, it is recursively explored, as if '<dir>/**/*.[ci]' \
+had been specified.",
+    type=Path,
 )
-parser.add_argument("files", nargs="+", help="source files")
-args = vars(parser.parse_args())
-
-header_dirs = args["header_dirs"]
-if not header_dirs:
-    header_dirs = []
-files = args["files"]
+parser.add_argument(
+    "--debug",
+    metavar="FILE",
+    help="enable debug mode and redirect output to the specified file",
+)
+parser.add_argument(
+    "--verbose",
+    action="store_true",
+    help="enable verbose output; if --debug is set, output is redirected to the same file.",
+)
+parser.add_argument(
+    "--no-cloc",
+    action="store_true",
+    help="disable usage of external tool 'cloc', even if available.",
+)
+args = parser.parse_args()
+paths = args.paths
+debug = args.debug
+no_cloc = args.no_cloc
+verbose = args.verbose

 under_test = os.getenv("PTESTS_TESTING")

-# gather information from several sources
+fclog.init(debug, verbose)
+
+### Auxiliary functions #######################################################
+
+
+def get_dir(path):
+    """Similar to dirname, but returns the path itself if it refers to a directory"""
+    if path.is_dir():
+        return path
+    else:
+        return path.parent
+
+
+def collect_files_and_local_dirs(paths) -> tuple[set[Path], set[Path]]:
+    """Returns the list of directories (and their subdirectories) containing
+    the specified paths. Note that this also includes subdirectories which do not
+    themselves contain any .c files, but which may contain .h files."""
+    dirs: set[Path] = set()
+    files: set[Path] = set()
+    for p in paths:
+        if p.is_dir():
+            files = files.union([Path(p) for p in glob.glob(f"{p}/**/*.[chi]", recursive=True)])
+            dirs.add(p)
+        else:
+            files.add(p)
+            dirs.add(p.parent)
+    local_dirs = {Path(s[0]) for d in dirs for s in os.walk(d)}
+    if not files:
+        sys.exit(
+            "error: no source files (.c/.i) found in provided paths: "
+            + " ".join([str(p) for p in paths])
+        )
+    return files, local_dirs


 def extract_keys(l):
    return [list(key.keys())[0] for key in l]


-def get_framac_libc_function_statuses(framac, framac_share):
+def get_framac_libc_function_statuses(
+    framac: Path | None, framac_share: Path
+) -> tuple[list[str], list[str]]:
    if framac:
        (_handler, metrics_tmpfile) = tempfile.mkstemp(prefix="fc_script_est_diff", suffix=".json")
-        if debug:
-            print("metrics_tmpfile: %s", metrics_tmpfile)
+        logging.debug("metrics_tmpfile: %s", metrics_tmpfile)
        fc_runtime = framac_share / "libc" / "__fc_runtime.c"
        fc_libc_headers = framac_share / "libc" / "__fc_libc.h"
        subprocess.run(
@@ -105,10 +156,8 @@ def get_framac_libc_function_statuses(framac, framac_share):
    return (defined, spec_only)


-re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)')
-
-
-def grep_includes_in_file(filename):
+def grep_includes_in_file(filename: Path):
+    re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)')
    file_content = source_filter.open_and_filter(filename, not under_test)
    i = 0
    for line in file_content.splitlines():
@@ -120,9 +169,9 @@ def grep_includes_in_file(filename):
            yield (i, kind, header)


-def get_includes(files):
-    quote_includes = {}
-    chevron_includes = {}
+def get_includes(files: set[Path]):
+    quote_includes: dict[Path, list[tuple[Path, int]]] = {}
+    chevron_includes: dict[Path, list[tuple[Path, int]]] = {}
    for filename in files:
        for line, kind, header in grep_includes_in_file(filename):
            if kind == "<":
@@ -137,13 +186,64 @@ def get_includes(files):
    return chevron_includes, quote_includes


+def is_local_header(local_dirs, header):
+    for d in local_dirs:
+        path = Path(d)
+        if Path(path / header).exists():
+            return True
+    return False
+
+
+def grep_keywords(keywords: Iterable[str], filename: Path) -> dict[str, int]:
+    with open(filename, "r") as f:
+        found: dict[str, int] = {}
+        for line in f:
+            if any(x in line for x in keywords):
+                # found one or more keywords; count them
+                for kw in keywords:
+                    if kw in line:
+                        if kw in found:
+                            found[kw] += 1
+                        else:
+                            found[kw] = 1
+        return found
+
+
+def pretty_unsupported_keywords(
+    file: Path, unsupported_keywords: dict[str, str], found: dict[str, int]
+) -> str:
+    res = f"unsupported keyword(s) in {file}: "
+    descriptions: list[str] = []
+    for kw, count in sorted(found.items()):
+        if descriptions:  # not first occurrence
+            res += ", "
+        res += f" {kw} ({count} line{'s' if count > 1 else ''})"
+        descriptions.append(f"{kw} is a {unsupported_keywords[kw]}")
+    res += "\n - " + "\n - ".join(descriptions)
+    return res
+
+
+### End of auxiliary functions ################################################
+
 debug = os.getenv("DEBUG")
 verbose = False

+files, local_dirs = collect_files_and_local_dirs(paths)
+
+score = {
+    "recursion": 0,
+    "libc": 0,
+    "includes": 0,
+    "malloc": 0,
+    "keywords": 0,
+    "asm": 0,
+}
+
 framac_bin = os.getenv("FRAMAC_BIN")
 if not framac_bin:
-    print(
-        "Running script in no-Frama-C mode (set FRAMAC_BIN to the directory containing frama-c if you want to use it)."
+    logging.info(
+        "Running script in no-Frama-C mode (set FRAMAC_BIN to the directory"
+        + " containing frama-c if you want to use it)."
    )
    framac = None
    script_dir = os.path.dirname(os.path.realpath(__file__))
@@ -154,10 +254,27 @@ else:
        subprocess.check_output([framac, "-no-autoload-plugins", "-print-share-path"]).decode()
    )

-print("Building callgraph...")
+
+if not no_cloc:
+    cloc = external_tool.get_command("cloc", "CLOC")
+    if cloc:
+        data = external_tool.run_and_check(
+            [cloc, "--hide-rate", "--progress-rate=0", "--csv"] + list(str(f) for f in files), ""
+        )
+        data = data.splitlines()
+        [nfiles, _sum, nblank, ncomment, ncode] = data[-1].split(",")
+        nlines = int(nblank) + int(ncomment) + int(ncode)
+        logging.info(
+            "Processing %d file(s), approx. %d lines of code (out of %d lines)",
+            int(nfiles),
+            int(ncode),
+            nlines,
+        )
+
+logging.info("Building callgraph...")
 cg = build_callgraph.compute(files)

-print("Computing data about libc/POSIX functions...")
+logging.info("Computing data about libc/POSIX functions...")
 libc_defined_functions, libc_specified_functions = get_framac_libc_function_statuses(
    framac, framac_share
 )
@@ -186,16 +303,23 @@ for cycle_start_loc, cycle in recursive_cycles:
        continue
    reported_recursive_pairs.add(cycle[-1])
    (filename, line) = cycle_start_loc
-    (x, y) = cycle[0]
-    pretty_cycle = f"{x} -> {y}"
-    for x, y in cycle[1:]:
-        pretty_cycle += f" -> {y}"
-    print(f"[recursion] found recursive cycle near {filename}:{line}: {pretty_cycle}")
+
+    def pretty_cycle(cycle):
+        (x, y) = cycle[0]
+        res = f"{x} -> {y}"
+        for x, y in cycle[1:]:
+            res += f" -> {y}"
+        return res
+
+    logging.info(
+        "[recursion] found recursive cycle near %s:%d: %s", filename, line, pretty_cycle(cycle)
+    )
+    score["recursion"] += 1

 callees_list = [callee for (_, callee) in list(cg.edges.keys())]
 callees = set(callees_list)
 used_headers = set()
-print(f"Estimating difficulty for {len(callees)} function calls...")
+logging.info("Estimating difficulty for %d function calls...", len(callees))
 warnings = 0

 for callee in sorted(callees):
@@ -204,8 +328,10 @@ for callee in sorted(callees):
        global warnings
        if status == "warning":
            warnings += 1
-        if verbose or debug or status == "warning":
-            print(f"- {status}: {callee} ({standard}) {reason}")
+        if status == "warning":
+            logging.warning("%s (%s) %s", callee, standard, reason)
+        else:
+            logging.log(fclog.VERBOSE, "%s: %s (%s) %s", status, callee, standard, reason)

    try:
        is_problematic = posix_identifiers[callee]["notes"]["fc-support"] == "problematic"
@@ -259,84 +385,83 @@ for callee in sorted(callees):
            if not status_emitted:
                callee_status("warning", standard, "has neither code nor spec in Frama-C's libc")

-print(f"Function-related warnings: {warnings}")
+logging.info("Function-related warnings: %d", warnings)
+score["libc"] = warnings

-if (verbose or debug) and used_headers:
-    print("used headers:")
-    for header in sorted(used_headers):
-        print(f"  <{header}>")
+logging.log(
+    fclog.VERBOSE,
+    "Used POSIX headers:\n%s",
+    "\n".join([f"  <{header}>" for header in sorted(used_headers)]),
+)

 (chevron_includes, quote_includes) = get_includes(files)

-
-def is_local_header(header_dirs, header):
-    for d in header_dirs:
-        path = Path(d)
-        if Path(path / header).exists():
-            return True
-    return False
-
-
-print(f"Estimating difficulty for {len(chevron_includes)} '#include <header>' directives...")
+logging.info(
+    "Estimating difficulty for %d '#include <header>' directives...", len(chevron_includes)
+)
 non_posix_headers = []
 header_warnings = 0
 for header in sorted(chevron_includes, key=str.casefold):
+    if not header.lower().endswith(".h"):
+        continue  # ignore included non-header files
    if header in posix_headers:
        fc_support = posix_headers[header]["fc-support"]
        if fc_support == "unsupported":
            header_warnings += 1
-            print(f"- WARNING: included header <{header}> is explicitly unsupported by Frama-C")
+            logging.warning("included header <%s> is explicitly unsupported by Frama-C", header)
        else:
-            if verbose or debug:
-                c11_or_posix = "C11" if header in c11_headers else "POSIX"
-                print(f"- note: included {c11_or_posix} header ")
+            logging.log(
+                fclog.VERBOSE,
+                "included %s header %s",
+                "C11" if header in c11_headers else "POSIX",
+                header,
+            )
    else:
-        if is_local_header(header_dirs, header):
-            if verbose or debug:
-                print(f"- ok: included header <{header}> seems to be available locally")
+        if is_local_header(local_dirs, header):
+            logging.log(
+                fclog.VERBOSE, "ok: included header <%s> seems to be available locally", header
+            )
        else:
            non_posix_headers.append(header)
            header_warnings += 1
-            print(f"- warning: included non-POSIX header <{header}>")
-print(f"Header-related warnings: {header_warnings}")
-
+            logging.warning("included non-POSIX header <%s>", header)
+logging.info("Header-related warnings: %d", header_warnings)
+score["includes"] = header_warnings

 # dynamic allocation

 dynalloc_functions = set(["malloc", "calloc", "free", "realloc", "alloca", "mmap"])
 dyncallees = dynalloc_functions.intersection(callees)
 if dyncallees:
-    print(f"- note: calls to dynamic allocation functions: {', '.join(sorted(dyncallees))}")
-
+    logging.info("Calls to dynamic allocation functions: %s", ", ".join(sorted(dyncallees)))
+    score["malloc"] = len(dyncallees)

 # unsupported C11 or non-standard specific features

-unsupported_keywords = [
-    ("_Alignas", "C11 construct"),
-    ("_Alignof", "C11 construct"),
-    ("_Complex", "C11 construct"),
-    ("_Imaginary", "C11 construct"),
-    ("alignas", "C11 construct"),
-    ("alignof", "C11 construct"),  # stdalign.h may use these symbols
-    ("__int128", "non-standard construct (GNU extension)"),
-    ("__uint128_t", "non-standard construct (GNU extension)"),
-]
-
-for keyword, message in unsupported_keywords:
-    out = subprocess.Popen(
-        ["grep", "-n", "\\b" + keyword + "\\b"] + files + ["/dev/null"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-    )
-    lines = out.communicate()[0].decode("utf-8").splitlines()
-    if lines:
-        n = len(lines)
-        print(
-            f"- warning: found {n} line{'s' if n > 1 else ''} with occurrences of "
-            f"unsupported {message} '{keyword}'"
-        )
+unsupported_keywords = {
+    "_Alignas": "C11 construct",
+    "_Alignof": "C11 construct",
+    "_Complex": "C11 construct",
+    "_Imaginary": "C11 construct",
+    "alignas": "C11 construct",
+    "alignof": "C11 construct",  # stdalign.h may use these symbols
+    "__int128": "non-standard construct (GNU extension)",
+    "__uint128_t": "non-standard construct (GNU extension)",
+}
+
+for ff in files:
+    found = grep_keywords(unsupported_keywords.keys(), ff)
+    if found:
+        logging.warning(pretty_unsupported_keywords(ff, unsupported_keywords, found))
+    score["keywords"] += len(found)

 # assembly code

 if "asm" in callees or "__asm" in callees or "__asm__" in callees:
-    print("- warning: code seems to contain inline assembly ('asm(...)')")
+    logging.warning("code seems to contain inline assembly ('asm(...)')")
+    score["asm"] = 1
+
+logging.info(
+    "Overall difficulty score:\n%s",
+    "\n".join([k + ": " + str(v) for (k, v) in sorted(score.items())]),
+)
--- a/share/analysis-scripts/external_tool.py
+++ b/share/analysis-scripts/external_tool.py
+# -*- coding: utf-8 -*-
+##########################################################################
+#                                                                        #
+#  This file is part of Frama-C.                                         #
+#                                                                        #
+#  Copyright (C) 2007-2024                                               #
+#    CEA (Commissariat à l'énergie atomique et aux énergies              #
+#         alternatives)                                                  #
+#                                                                        #
+#  you can redistribute it and/or modify it under the terms of the GNU   #
+#  Lesser General Public License as published by the Free Software       #
+#  Foundation, version 2.1.                                              #
+#                                                                        #
+#  It is distributed in the hope that it will be useful,                 #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of        #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
+#  GNU Lesser General Public License for more details.                   #
+#                                                                        #
+#  See the GNU Lesser General Public License version 2.1                 #
+#  for more details (enclosed in the file licenses/LGPLv2.1).            #
+#                                                                        #
+##########################################################################
+
+"""This file provides utility functions to use external tools, either
+available in the PATH or via PyInstaller."""
+
+import os
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+
+# warnings about missing commands are disabled during testing
+emit_warns = os.getenv("PTESTS_TESTING") is None
+
+# Cache for get_command
+cached_commands: dict[str, Path | None] = {}
+
+
+def resource_path(relative_path):
+    """Get absolute path to resource; only used by the pyinstaller standalone distribution"""
+    base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
+    return os.path.join(base_path, relative_path)
+
+
+def get_command(command, env_var_name):
+    """Returns a Path to the command; priority goes to the environment variable,
+    then in the PATH, then in the resource directory (for a pyinstaller binary)."""
+    if command in cached_commands:
+        return cached_commands[command]
+    p = os.getenv(env_var_name)
+    if p:
+        p = Path(p)
+    else:
+        p = shutil.which(command)
+        if p:
+            p = Path(p)
+        else:
+            p = Path(resource_path(command))
+            if not p.exists():
+                if emit_warns:
+                    print(
+                        f"info: optional external command '{command}' not found in PATH;",
+                        f"consider installing it or setting environment variable {env_var_name}",
+                    )
+                p = None
+    cached_commands[command] = p
+    return p
+
+
+def run_and_check(command_and_args, input_data):
+    try:
+        return subprocess.check_output(
+            command_and_args,
+            input=input_data,
+            stderr=None,
+            encoding="ascii",
+            errors="ignore",
+        )
+    except subprocess.CalledProcessError as e:
+        sys.exit(f"error running command: {command_and_args}\n{e}")
--- a/share/analysis-scripts/fc-estimate-difficulty.mk
+++ b/share/analysis-scripts/fc-estimate-difficulty.mk
+# Produces a self-contained binary for 'frama-c-script estimate-difficulty'
+# on a machine without Frama-C.
+
+# Example usage:
+# export FRAMAC=$(which frama-c) && make -f fc-estimate-difficulty.mk fc-estimate-difficulty
+# export FRAMAC=$(which frama-c) && make -f fc-estimate-difficulty.mk fc-estimate-difficulty.exe
+
+# Works on Linux and macOS. The Windows binary is compiled and tested using
+# a Docker image with wine and pyinstaller (usage of Docker avoids issues with
+# staging versions in e.g. Fedora, which may cause weird error messages when
+# running in Windows). The Linux binary is tested on a bare-bones Alpine Linux,
+# to ensure it does not depend on dynamic libraries.
+# The macOS version needs to be tested by hand.
+
+# Notes:
+# pip install "staticx==0.14.1"
+# pip install "patchelf==0.17.0.0" # seems to work; version 0.17.2.1 did not work when tried
+
+help:
+	@echo "targets:"
+	@echo "fc-estimate-difficulty           : Linux/macOS version (depending on host system)"
+	@echo "fc-estimate-difficulty.exe       : Windows cross-compiled version (from Linux)"
+	@echo "clean                            : Erase working directory"
+	@echo "test-fc-estimate-difficulty      : Test Linux/macOS version"
+	@echo "test-fc-estimate-difficulty.exe  : Test Windows version"
+
+os := $(shell uname -s)
+
+workdir = fced-dist-prepare
+distdir = fced-dist
+
+# if 'podman' is installed, we assume it is preferred over Docker;
+# otherwise, try using 'docker'
+ifneq (, $(shell which podman))
+DOCKER:=podman
+else
+DOCKER:=docker
+endif
+
+libc_metrics.json: ../libc/__fc_libc.h ../libc/__fc_runtime.c
+ifeq ($(wildcard $(FRAMAC)),)
+	$(error FRAMAC must be set to the path of the 'frama-c' binary)
+endif
+	FRAMAC_SHARE="$(shell $(FRAMAC) -no-autoload-plugins -print-share-path)"
+	rm -f $@
+	$(FRAMAC) \
+	  $^ \
+	  -no-autoload-plugins -load-module metrics \
+	  -metrics -metrics-libc -metrics-output $@
+	chmod -w $@ # generated file: prevent accidental overwriting
+
+PY_DEPS := \
+  build_callgraph.py \
+  estimate_difficulty.py \
+  external_tool.py \
+  fclog.py \
+  function_finder.py \
+  source_filter.py \
+
+COMPLIANCE := $(wildcard ../compliance/*.json)
+TOOLS := $(workdir)/scc $(workdir)/astyle
+COMMON_DEPS := $(PY_DEPS) $(COMPLIANCE) libc_metrics.json
+
+fc-estimate-difficulty: EXE=
+fc-estimate-difficulty.exe: EXE=".exe"
+
+fc-estimate-difficulty: WINE=
+fc-estimate-difficulty.exe: WINE="/home/andr/git/wine/wine"
+
+fc-estimate-difficulty: PSEP=:
+fc-estimate-difficulty.exe: PSEP=;
+
+fc-estimate-difficulty: $(COMMON_DEPS) $(TOOLS)
+fc-estimate-difficulty.exe: \
+  $(COMMON_DEPS) $(addsuffix .exe,$(TOOLS)) fced-win.Dockerfile
+
+fc-estimate-difficulty:
+	pyinstaller estimate_difficulty.py \
+	  -F \
+	  -n $@ \
+	  --distpath $(distdir) \
+	  --noconfirm \
+	  --add-data "libc_metrics.json:share" \
+	  --add-data "../compliance/*.json:share/compliance" \
+	  --add-binary "$(workdir)/scc:." \
+	  --add-binary "$(workdir)/astyle:."
+ifeq ($(os),Linux)
+	@echo "Linux: running staticx"
+	staticx "$(distdir)/$@" $@
+else
+	@echo "NOT running staticx (macOS?)"
+	mv "$(distdir)/$@" $@
+endif
+
+fc-estimate-difficulty.exe:
+	mkdir -p $(workdir)/compliance
+	cp ../compliance/*.json $(workdir)/compliance
+	$(DOCKER) build . -f fced-win.Dockerfile -t fced-win
+	$(DOCKER) cp $$($(DOCKER) create --rm fced-win):/fced/$@ $@
+
+$(workdir)/scc: $(workdir)/scc-snapshots-fc-1.1.0
+	make -C $< clean
+	make -C $< scc $(if $(findstring Linux,$(os)),CFLAGS="-static") -j
+	cp $(workdir)/scc-snapshots-fc-1.1.0/scc $@
+
+$(workdir)/scc.exe: $(workdir)/scc-snapshots-fc-1.1.0
+	make -C $< clean
+	make -C $< scc CC=x86_64-w64-mingw32-gcc -j
+	cp $(workdir)/scc-snapshots-fc-1.1.0/scc.exe $@
+
+$(workdir)/scc-snapshots-fc-1.1.0:
+	mkdir -p $(workdir)
+	wget 'https://github.com/Frama-C/scc-snapshots/archive/refs/tags/fc-1.1.0.tar.gz' -O $(workdir)/fc-1.1.0.tar.gz
+	cd $(workdir) && tar xvf fc-1.1.0.tar.gz
+	touch $@
+
+$(workdir)/astyle: $(workdir)/astyle_3.1
+	make -C $(workdir)/astyle_3.1/build/gcc clean
+	make -C $(workdir)/astyle_3.1/build/gcc \
+	  $(if $(findstring Linux,$(os)),LDFLAGS="-static" CFLAGS="-static -static-libgcc -static-libstdc++") -j
+	cp $(workdir)/astyle_3.1/build/gcc/bin/astyle $@
+
+$(workdir)/astyle.exe: $(workdir)/astyle_3.1
+	make -C $(workdir)/astyle_3.1/build/gcc clean
+	make -C $(workdir)/astyle_3.1/build/gcc LDFLAGS="-static" CFLAGS="-static -static-libgcc -static-libstdc++" CXX=x86_64-w64-mingw32-g++ -j
+	cp $(workdir)/astyle_3.1/build/gcc/bin/astyle.exe $@
+
+$(workdir)/astyle_3.1:
+	mkdir -p $(workdir)
+	wget 'https://downloads.sourceforge.net/project/astyle/astyle/astyle%203.1/astyle_3.1_linux.tar.gz' -O $(workdir)/astyle_3.1_linux.tar.gz
+	cd $(workdir) && tar xvf astyle_3.1_linux.tar.gz
+	rm -rf $(workdir)/astyle_3.1
+	mv $(workdir)/astyle $(workdir)/astyle_3.1
+	sed -i.bak 's/^CXX = g++/CXX ?= g++/' $(workdir)/astyle_3.1/build/gcc/Makefile
+	sed -i.bak 's/^CXX = g++/CXX ?= g++/' $(workdir)/astyle_3.1/build/gcc/Makefile
+	touch $@
+
+clean:
+	rm -rf $(workdir) $(distdir) fc-estimate-difficulty.spec fc-estimate-difficulty.exe.spec
+
+distclean: clean
+	rm -f fc-estimate-difficulty fc-estimate-difficulty.exe libc_metrics.json
+
+test-fc-estimate-difficulty: fc-estimate-difficulty
+ifeq ($(os),Linux)
+	$(DOCKER) build . -f fced-lin.Dockerfile -t fced-lin
+else
+	./$< fced-test | \
+	  grep -A9 "Overall difficulty score" | \
+	  grep -v 0 | \
+	  grep -q ': '
+endif
+	@echo "Done testing $^."
+
+test-fc-estimate-difficulty.exe: fc-estimate-difficulty.exe
+	@file $< | grep -q "MS Windows"
+	@echo "Done testing $< (actual testing done inside Docker image)"
+
+.PHONY: clean distclean test-fc-estimate-difficulty test-fc-estimate-difficulty.exe
--- a/share/analysis-scripts/fced-lin.Dockerfile
+++ b/share/analysis-scripts/fced-lin.Dockerfile
+FROM alpine:3.15
+COPY fc-estimate-difficulty /
+RUN mkdir /fced-test
+COPY fced-test fced-test/
+RUN ./fc-estimate-difficulty fced-test | \
+    grep -A9 "Overall difficulty score" | \
+    grep -v 0 | \
+    grep -q ': '
--- a/share/analysis-scripts/fced-test/a.c
+++ b/share/analysis-scripts/fced-test/a.c
+#include <stdlib.h>
+#include <X>
+
+volatile int v;
+
+int main() {
+  asm("mov ax, ax");
+  if (v) return main();
+  malloc(42);
+  return 0;
+}
--- a/share/analysis-scripts/fced-test/a.h
+++ b/share/analysis-scripts/fced-test/a.h
+#include <a.h>
--- a/share/analysis-scripts/fced-win.Dockerfile
+++ b/share/analysis-scripts/fced-win.Dockerfile
+FROM tobix/pywine:3.10
+RUN mkdir /fced
+COPY *.py /fced/
+RUN mkdir /fced/dist-prepare
+COPY fced-dist-prepare /fced/dist-prepare/
+COPY fced-test /fced/fced-test/
+COPY libc_metrics.json /fced/
+WORKDIR /fced
+RUN wine pyinstaller estimate_difficulty.py \
+	  -F \
+	  -n fc-estimate-difficulty.exe \
+	  --distpath . \
+	  --noconfirm \
+	  --add-data "libc_metrics.json;share" \
+	  --add-data "dist-prepare/compliance/*.json;share/compliance" \
+	  --add-binary "dist-prepare/scc.exe;." \
+	  --add-binary "dist-prepare/astyle.exe;."
+RUN wine fc-estimate-difficulty.exe fced-test | \
+    grep -A9 "Overall difficulty score" | \
+    grep -v 0 | \
+    grep -q ': '
--- a/share/analysis-scripts/fclog.py
+++ b/share/analysis-scripts/fclog.py
+# -*- coding: utf-8 -*-
+##########################################################################
+#                                                                        #
+#  This file is part of Frama-C.                                         #
+#                                                                        #
+#  Copyright (C) 2007-2024                                               #
+#    CEA (Commissariat à l'énergie atomique et aux énergies              #
+#         alternatives)                                                  #
+#                                                                        #
+#  you can redistribute it and/or modify it under the terms of the GNU   #
+#  Lesser General Public License as published by the Free Software       #
+#  Foundation, version 2.1.                                              #
+#                                                                        #
+#  It is distributed in the hope that it will be useful,                 #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of        #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
+#  GNU Lesser General Public License for more details.                   #
+#                                                                        #
+#  See the GNU Lesser General Public License version 2.1                 #
+#  for more details (enclosed in the file licenses/LGPLv2.1).            #
+#                                                                        #
+##########################################################################
+
+"""Contains functions to perform logging-related configuration common to
+   several scripts."""
+
+import logging
+import sys
+
+# extra log level used in messages
+VERBOSE = 15
+
+
+def init(debug, verbose):
+    """Initializes the logging mechanism. 'debug' is the filename to redirect to
+    (or special values 'stdout'/'stderr'). If None, disable debug.
+    If 'verbose' is true, output VERBOSE messages. If both are set, the level
+    is 'debug'."""
+
+    logging.addLevelName(VERBOSE, "")
+    logging.addLevelName(logging.ERROR, "ERROR: ")
+    logging.addLevelName(logging.WARN, "WARNING: ")
+    logging.addLevelName(logging.INFO, "")
+    log_level = logging.DEBUG if debug else VERBOSE if verbose else logging.INFO
+    log_format = "%(levelname)s%(message)s"
+
+    # special values for debug filename
+    if debug == "stdout" or not debug:
+        logging.basicConfig(stream=sys.stdout, level=log_level, format=log_format)
+        # This very ugly hack seems necessary to avoid BrokenPipeErrors when the
+        # output of the calling script is truncated (via head, grep -q, etc).
+        # Note that, on Windows, there is an OSError with "22 Invalid argument"
+        # instead of a BrokenPipeError.
+        original_flush = sys.stdout.flush
+
+        def new_flush():
+            try:
+                original_flush()
+            except BrokenPipeError:
+                sys.stdout = None
+                sys.exit(0)
+            except OSError as e:
+                if sys.platform == "win32" and e.errno == 22:
+                    sys.stdout = None
+                    sys.exit(0)
+                raise e
+
+        sys.stdout.flush = new_flush
+    elif debug == "stderr":
+        logging.basicConfig(stream=sys.stderr, level=log_level, format=log_format)
+    else:
+        logging.basicConfig(
+            filename=debug,
+            level=log_level,
+            filemode="w",
+            format=log_format,
+        )
--- a/share/analysis-scripts/print_callgraph.py
+++ b/share/analysis-scripts/print_callgraph.py
@@ -25,6 +25,7 @@
 """This script finds files containing likely declarations and definitions
 for a given function name, via heuristic syntactic matching."""

+from pathlib import Path
 import sys
 import build_callgraph

@@ -45,7 +46,7 @@ If --dot is specified, print in DOT (Graphviz) format
 to file outfile, or to stdout if outfile is '-'."""
    )

-cg = build_callgraph.compute(args)
+cg = build_callgraph.compute(set([Path(a) for a in args]))
 if dotfile:
    if dotfile == "-":
        out = sys.stdout

--- a/share/analysis-scripts/source_filter.py
+++ b/share/analysis-scripts/source_filter.py
@@ -37,84 +37,32 @@ the efficiency of regex-based heuristics."""
 # of errors when running the filters. Note that an absent tool
 # does _not_ lead to an error.

-from __future__ import annotations
-import os
+import external_tool
 from pathlib import Path
-import shutil
-import subprocess
 import sys
-from typing import Optional
-
-# warnings about missing commands are disabled during testing
-emit_warns = os.getenv("PTESTS_TESTING") is None
-
-# Cache for get_command.
-cached_commands: dict[str, Optional[Path]] = {}
-
-
-def resource_path(relative_path: str) -> str:
-    """Get absolute path to resource; only used by the pyinstaller standalone distribution"""
-    base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
-    return os.path.join(base_path, relative_path)
-
-
-def get_command(command: str, env_var_name: str) -> Optional[Path]:
-    """Returns a Path to the command; priority goes to the environment variable,
-    then in the PATH, then in the resource directory (for a pyinstaller binary)."""
-    if command in cached_commands:
-        return cached_commands[command]
-    cmd = os.getenv(env_var_name)
-    if cmd:
-        p = Path(cmd)
-    else:
-        cmd = shutil.which(command)
-        if cmd:
-            p = Path(cmd)
-        else:
-            p = Path(resource_path(command))
-            if not p.exists():
-                if emit_warns:
-                    print(
-                        f"info: optional external command '{command}' not found in PATH; "
-                        f"consider installing it or setting environment variable {env_var_name}"
-                    )
-                p = None
-    cached_commands[command] = p
-    return p
-
-
-def run_and_check(command_and_args: list[str], input_data: str) -> str:
-    try:
-        return subprocess.check_output(
-            command_and_args,
-            input=input_data,
-            stderr=None,
-            encoding="ascii",
-            errors="ignore",
-        )
-    except subprocess.CalledProcessError as e:
-        sys.exit(f"error running command: {command_and_args}\n{e}")


 def filter_with_scc(input_data: str) -> str:
-    scc = get_command("scc", "SCC")
+    scc_bin = "scc" if sys.platform != "win32" else "scc.exe"
+    scc = external_tool.get_command(scc_bin, "SCC")
    if scc:
-        return run_and_check([str(scc), "-k"], input_data)
+        return external_tool.run_and_check([scc, "-k", "-b"], input_data)
    else:
        return input_data


 def filter_with_astyle(input_data: str) -> str:
-    astyle = get_command("astyle", "ASTYLE")
+    astyle_bin = "astyle" if sys.platform != "win32" else "astyle.exe"
+    astyle = external_tool.get_command(astyle_bin, "ASTYLE")
    if astyle:
-        return run_and_check(
-            [str(astyle), "--keep-one-line-blocks", "--keep-one-line-statements"], input_data
+        return external_tool.run_and_check(
+            [astyle, "--keep-one-line-blocks", "--keep-one-line-statements"], input_data
        )
    else:
        return input_data


-def open_and_filter(filename: str, apply_filters: bool) -> str:
+def open_and_filter(filename: Path, apply_filters: bool) -> str:
    # we ignore encoding errors and use ASCII to avoid issues when
    # opening files with different encodings (UTF-8, ISO-8859, etc)
    with open(filename, "r", encoding="ascii", errors="ignore") as f:

--- a/share/dune
+++ b/share/dune
@@ -289,6 +289,8 @@
 (analysis-scripts/creduce.sh as lib/analysis-scripts/creduce.sh)
 (analysis-scripts/detect_recursion.py as lib/analysis-scripts/detect_recursion.py)
 (analysis-scripts/estimate_difficulty.py as lib/analysis-scripts/estimate_difficulty.py)
+(analysis-scripts/external_tool.py as lib/analysis-scripts/external_tool.py)
+(analysis-scripts/fclog.py as lib/analysis-scripts/fclog.py)
 (analysis-scripts/find_fun.py as lib/analysis-scripts/find_fun.py)
 (analysis-scripts/flamegraph.pl as lib/analysis-scripts/flamegraph.pl)
 (analysis-scripts/frama_c_results.py as lib/analysis-scripts/frama_c_results.py)

--- a/tests/fc_script/estimate-difficulty.t/estimate-difficulty.c
+++ b/tests/fc_script/estimate-difficulty.t/estimate-difficulty.c
+/* run.config
+   NOFRAMAC: testing frama-c-script, not frama-c itself
+   EXECNOW: LOG @PTEST_NAME@.res LOG @PTEST_NAME@.err PTESTS_TESTING=1 %{bin:frama-c-script} estimate-difficulty --no-cloc @PTEST_FILE@ > @PTEST_NAME@.res 2> @PTEST_NAME@.err
+*/
+
 // these includes are not actually used by the compiler
 // (this is a preprocessed file), but analyzed by the script
 #include <sys/socket.h>
@@ -20,7 +25,11 @@ int main() {
  strlen(); // no warning
  ccosl(); // warning: neither code nor spec
  dprintf(); // no warning: neither code nor spec, but handled by Variadic
+  __asm__("xor ax, ax");
+  malloc(2);
+  alignof(char*);
+  _Complex cc;
 }

 __int128 large_int;
-_Complex complex;
+_Complex complex2;
--- a/tests/fc_script/estimate-difficulty.t/run.t
+++ b/tests/fc_script/estimate-difficulty.t/run.t
-  $ PTESTS_TESTING=1 frama-c-script estimate-difficulty estimate-difficulty.c
+  $ PTESTS_TESTING=1 frama-c-script estimate-difficulty --no-cloc estimate-difficulty.c
  Building callgraph...
  Computing data about libc/POSIX functions...
-  [recursion] found recursive cycle near estimate-difficulty.c:13: f -> f
-  Estimating difficulty for 7 function calls...
-  - warning: ccosl (POSIX) has neither code nor spec in Frama-C's libc
-  - warning: setjmp (POSIX) is known to be problematic for code analysis
+  [recursion] found recursive cycle near estimate-difficulty.c:18: f -> f
+  Estimating difficulty for 10 function calls...
+  WARNING: ccosl (POSIX) has neither code nor spec in Frama-C's libc
+  WARNING: setjmp (POSIX) is known to be problematic for code analysis
  Function-related warnings: 2
  Estimating difficulty for 3 '#include <header>' directives...
-  - WARNING: included header <complex.h> is explicitly unsupported by Frama-C
+  WARNING: included header <complex.h> is explicitly unsupported by Frama-C
  Header-related warnings: 1
-  - warning: found 1 line with occurrences of unsupported C11 construct '_Complex'
-  - warning: found 1 line with occurrences of unsupported non-standard construct (GNU extension) '__int128'
+  Calls to dynamic allocation functions: malloc
+  WARNING: unsupported keyword(s) in estimate-difficulty.c:  _Complex (2 lines),  __int128 (1 line),  alignof (1 line)
+   - _Complex is a C11 construct
+   - __int128 is a non-standard construct (GNU extension)
+   - alignof is a C11 construct
+  WARNING: code seems to contain inline assembly ('asm(...)')
+  Overall difficulty score:
+  asm: 1
+  includes: 1
+  keywords: 3
+  libc: 2
+  malloc: 1
+  recursion: 1