From cbcc7d614acad80db3920841e8b3b5719090fa14 Mon Sep 17 00:00:00 2001
From: Andre Maroneze <andre.maroneze@cea.fr>
Date: Tue, 13 Oct 2020 21:51:58 +0200
Subject: [PATCH] [analysis-scripts] add heuristic difficulty estimation

---
 bin/frama-c-script                            |   8 +
 share/analysis-scripts/build_callgraph.py     |  16 ++
 share/analysis-scripts/estimate_difficulty.py | 221 ++++++++++++++++++
 3 files changed, 245 insertions(+)
 create mode 100755 share/analysis-scripts/estimate_difficulty.py

diff --git a/bin/frama-c-script b/bin/frama-c-script
index 1843c1da669..0fb86734d9b 100755
--- a/bin/frama-c-script
+++ b/bin/frama-c-script
@@ -86,6 +86,10 @@ usage() {
    echo "      If [want_decls] is true, lists declarations."
    echo "      Results are guaranteed neither correct nor complete."
    echo ""
+   echo "  - estimate-difficulty file..."
+   echo "      Applies several heuristics to try and estimate the difficulty"
+   echo "      of analyzing the specified files with Frama-C."
+   echo ""
    echo "  - make-wrapper target arg..."
    echo "      Runs 'make target arg...', parsing the output to suggest"
    echo "      useful commands in case of failure."
@@ -243,6 +247,10 @@ case "$command" in
         shift;
         ${FRAMAC_SHARE}/analysis-scripts/heuristic_list_functions.py "$@";
         ;;
+    "estimate-difficulty")
+        shift;
+        ${FRAMAC_SHARE}/analysis-scripts/estimate_difficulty.py "$@";
+        ;;
     "make-wrapper")
         shift;
         "${FRAMAC_SHARE}"/analysis-scripts/make_wrapper.py "$0" "$@";
diff --git a/share/analysis-scripts/build_callgraph.py b/share/analysis-scripts/build_callgraph.py
index 12ac0db3365..c62294d1b8e 100755
--- a/share/analysis-scripts/build_callgraph.py
+++ b/share/analysis-scripts/build_callgraph.py
@@ -136,6 +136,22 @@ def cycle_dfs(cg, visited, just_visited, n):
                 return []
     return []
 
+def compute_recursive_cycles(cg, acc):
+    to_visit = set(cg.nodes())
+    if not to_visit: # empty graph -> no recursion
+        return
+    visited = set()
+    while to_visit:
+        just_visited = set()
+        n = sorted(list(to_visit))[0]
+        cycle = cycle_dfs(cg, visited, just_visited, n)
+        visited = visited.union(just_visited)
+        if cycle:
+            (fst, snd) = cycle[0]
+            cycle_start_loc = cg.edges[(fst, snd)][0]
+            acc.append((cycle_start_loc, cycle))
+        to_visit -= visited
+
 def detect_recursion(cg):
     #print(f"Detecting recursive calls...")
     to_visit = set(cg.nodes())
diff --git a/share/analysis-scripts/estimate_difficulty.py b/share/analysis-scripts/estimate_difficulty.py
new file mode 100755
index 00000000000..02de99f5a0b
--- /dev/null
+++ b/share/analysis-scripts/estimate_difficulty.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+#-*- coding: utf-8 -*-
+##########################################################################
+#                                                                        #
+#  This file is part of Frama-C.                                         #
+#                                                                        #
+#  Copyright (C) 2007-2020                                               #
+#    CEA (Commissariat à l'énergie atomique et aux énergies              #
+#         alternatives)                                                  #
+#                                                                        #
+#  you can redistribute it and/or modify it under the terms of the GNU   #
+#  Lesser General Public License as published by the Free Software       #
+#  Foundation, version 2.1.                                              #
+#                                                                        #
+#  It is distributed in the hope that it will be useful,                 #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of        #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
+#  GNU Lesser General Public License for more details.                   #
+#                                                                        #
+#  See the GNU Lesser General Public License version 2.1                 #
+#  for more details (enclosed in the file licenses/LGPLv2.1).            #
+#                                                                        #
+##########################################################################
+
+# This script uses several heuristics to try and estimate the difficulty
+# of analyzing a new code base with Frama-C.
+
+import argparse
+import build_callgraph
+import function_finder
+import json
+import os
+from pathlib import Path
+import re
+import subprocess
+import sys
+import tempfile
+
+MIN_PYTHON = (3, 5)
+if sys.version_info < MIN_PYTHON:
+    sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
+
+parser = argparse.ArgumentParser(description="""
+Estimates the difficulty of analyzing a given code base""")
+parser.add_argument("--header-dirs", "-d", metavar='DIR', nargs='+',
+                    help='directories containing headers (default: .frama-c)')
+parser.add_argument("files", nargs='+', help='source files')
+args = vars(parser.parse_args())
+
+header_dirs = args["header_dirs"]
+if not header_dirs:
+    header_dirs = []
+files = args["files"]
+
+# gather information from several sources
+
+def extract_keys(l):
+    return [list(key.keys())[0] for key in l]
+
+def get_framac_libc_function_statuses(framac, framac_share):
+    (_metrics_handler, metrics_tmpfile) = tempfile.mkstemp(prefix="fc_script_est_diff", suffix=".json")
+    if debug:
+        print(f"metrics_tmpfile: {metrics_tmpfile}")
+    fc_runtime = framac_share / "libc" / "__fc_runtime.c"
+    fc_libc_headers = framac_share / "libc" / "__fc_libc.h"
+    subprocess.run([framac, "-no-autoload-plugins", fc_runtime, fc_libc_headers,
+                    "-load-module", "metrics", "-metrics", "-metrics-libc",
+                    "-metrics-output", metrics_tmpfile], stdout=subprocess.DEVNULL, check=True)
+    with open(metrics_tmpfile) as f:
+        metrics_json = json.load(f)
+    os.remove(metrics_tmpfile)
+    defined = extract_keys(metrics_json["defined-functions"])
+    spec_only = extract_keys(metrics_json["specified-only-functions"])
+    return (defined, spec_only)
+
+include_exp_for_grep = r'\s*#\s*include\s*\("\|<\)\([^">]\+\)\("\|>\)'
+include_exp_for_py = r'\s*#\s*include\s*("|<)([^">]+)("|>)'
+re_include = re.compile(r'^(.*):(.*):' + include_exp_for_py)
+def get_includes(files):
+    quote_includes = {}
+    chevron_includes = {}
+    # adding /dev/null to the list of files ensures 'grep' will display the
+    # file name for each match, even when a single file is given
+    out = subprocess.Popen(["grep", "-n", "^" + include_exp_for_grep] + files + ["/dev/null"],
+                           stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    lines = out.communicate()[0].decode('utf-8').splitlines()
+    for line in lines:
+        m = re_include.match(line)
+        assert m, f"grep found include but not Python: {line}"
+        filename = m.group(1)
+        line = m.group(2)
+        kind = m.group(3)
+        header = m.group(4)
+        if kind == '<':
+            includes = chevron_includes[header] if header in chevron_includes else []
+        else:
+            includes = quote_includes[header] if header in quote_includes else []
+        includes.append((filename, line))
+        if kind == '<':
+            chevron_includes[header] = includes
+        else:
+            chevron_includes[header] = includes
+    return chevron_includes, quote_includes
+
+debug = os.getenv("DEBUG")
+verbose = False
+
+print("Building callgraph...")
+cg = build_callgraph.compute(files)
+
+framac_bin = os.getenv('FRAMAC_BIN')
+if not framac_bin:
+    sys.exit("error: FRAMAC_BIN not in environment")
+framac = Path(framac_bin) / "frama-c"
+
+framac_share = Path(subprocess.check_output([framac, '-no-autoload-plugins', '-print-share-path']).decode())
+
+print("Computing data about libc/POSIX functions...")
+libc_defined_functions, libc_specified_functions = get_framac_libc_function_statuses(framac, framac_share)
+
+with open(framac_share / "compliance" / "c11_functions.json") as f:
+    c11_functions = json.load(f)["data"]
+
+with open(framac_share / "compliance" / "c11_headers.json") as f:
+    c11_headers = json.load(f)["data"]
+
+with open(framac_share / "compliance" / "posix_identifiers.json") as f:
+    all_data = json.load(f)
+    posix_identifiers = all_data["data"]
+    posix_headers = all_data["headers"]
+
+recursive_cycles = []
+build_callgraph.compute_recursive_cycles(cg, recursive_cycles)
+for (cycle_start_loc, cycle) in recursive_cycles:
+    (filename, line) = cycle_start_loc
+    (x, y) = cycle[0]
+    pretty_cycle = f"{x} -> {y}"
+    for (x, y) in cycle[1:]:
+        pretty_cycle += f" -> {y}"
+    print(f"[recursion] found recursive cycle near {filename}:{line}: {pretty_cycle}")
+
+callees = [callee for (_, callee) in list(cg.edges.keys())]
+callees = set(callees)
+used_headers = set()
+print(f"Estimating difficulty for {len(callees)} function calls...")
+warnings = 0
+for callee in sorted(callees):
+    #print(f"callee: {callee}")
+    if callee in posix_identifiers:
+        used_headers.add(posix_identifiers[callee]["header"])
+    if callee in c11_functions:
+        # check that the callee is not a macro or type (e.g. va_arg)
+        if posix_identifiers[callee]["id_type"] != "function":
+            continue
+        #print(f"C11 function: {callee}")
+        if callee in libc_specified_functions:
+            if verbose or debug:
+                print(f"- good: {callee} (C11) is specified in Frama-C's libc")
+        elif callee in libc_defined_functions:
+            if verbose or debug:
+                print(f"- ok: {callee} (C11) is defined in Frama-C's libc")
+        else:
+            # Some functions without specification are actually variadic
+            # (and possibly handled by the Variadic plug-in)
+            if "notes" in posix_identifiers[callee] and "variadic-plugin" in posix_identifiers[callee]["notes"]:
+                if verbose or debug:
+                    print(f"- ok: {callee} (C11) is handled by the Variadic plug-in")
+            else:
+                warnings += 1
+                print(f"- warning: {callee} (C11) has neither code nor spec in Frama-C's libc")
+    elif callee in posix_identifiers:
+        # check that the callee is not a macro or type (e.g. va_arg)
+        if posix_identifiers[callee]["id_type"] != "function":
+            continue
+        #print(f"Non-C11, POSIX function: {callee}")
+        if callee in libc_specified_functions:
+            if verbose or debug:
+                print(f"- good: {callee} (POSIX) specified in Frama-C's libc")
+        elif callee in libc_defined_functions:
+            if verbose or debug:
+                print(f"- ok: {callee} (POSIX) defined in Frama-C's libc")
+        else:
+            warnings += 1
+            print(f"- warning: {callee} (POSIX) has neither code nor spec in Frama-C's libc")
+print(f"Function-related warnings: {warnings}")
+
+if (verbose or debug) and used_headers:
+    print(f"used headers:")
+    for header in sorted(used_headers):
+        print(f"  <{header}>")
+
+(chevron_includes, quote_includes) = get_includes(files)
+
+def is_local_header(header_dirs, header):
+    for d in header_dirs:
+        path = Path(d)
+        if Path(path / header).exists():
+            return True
+    return False
+
+print(f"Estimating difficulty for {len(chevron_includes)} '#include <header>' directives...")
+non_posix_headers = []
+for header in chevron_includes:
+    if header in posix_headers:
+        fc_support = posix_headers[header]["fc-support"]
+        if fc_support == "unsupported":
+            print(f"- WARNING: included header <{header}> is explicitly unsupported by Frama-C")
+        elif fc_support == "none":
+            print(f"- warning: included header <{header}> not currently included in Frama-C's libc")
+        else:
+            if verbose or debug:
+                c11_or_posix = "C11" if header in c11_headers else "POSIX"
+                print(f"- note: included {c11_or_posix} header ")
+    else:
+        if is_local_header(header_dirs, header):
+            if verbose or debug:
+                print(f"- ok: included header <{header}> seems to be available locally")
+        else:
+            non_posix_headers.append(header)
+            print(f"- warning: included non-POSIX header <{header}>")
+print(f"Header-related warnings: {len(non_posix_headers)}")
-- 
GitLab