Skip to content
Snippets Groups Projects
estimate_difficulty.py 11.10 KiB
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
##########################################################################
#                                                                        #
#  This file is part of Frama-C.                                         #
#                                                                        #
#  Copyright (C) 2007-2021                                               #
#    CEA (Commissariat à l'énergie atomique et aux énergies              #
#         alternatives)                                                  #
#                                                                        #
#  you can redistribute it and/or modify it under the terms of the GNU   #
#  Lesser General Public License as published by the Free Software       #
#  Foundation, version 2.1.                                              #
#                                                                        #
#  It is distributed in the hope that it will be useful,                 #
#  but WITHOUT ANY WARRANTY; without even the implied warranty of        #
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
#  GNU Lesser General Public License for more details.                   #
#                                                                        #
#  See the GNU Lesser General Public License version 2.1                 #
#  for more details (enclosed in the file licenses/LGPLv2.1).            #
#                                                                        #
##########################################################################

# This script uses several heuristics to try and estimate the difficulty
# of analyzing a new code base with Frama-C.

import argparse
import json
import os
from   pathlib import Path
import re
import subprocess
import sys
import tempfile

import build_callgraph
import function_finder
import source_filter

#TODO : avoid relativizing paths when introducing too many ".." ;
#TODO : accept directory as argument (--full-tree), and then do glob **/*.{c,i} inside
#TODO : try to check the presence of compiler builtins
#TODO : try to check for pragmas

parser = argparse.ArgumentParser(description="""
Estimates the difficulty of analyzing a given code base""")
parser.add_argument("--header-dirs", "-d", metavar='DIR', nargs='+',
                    help='directories containing headers (default: .frama-c)')
parser.add_argument("files", nargs='+', help='source files')
args = vars(parser.parse_args())

header_dirs = args["header_dirs"]
if not header_dirs:
    header_dirs = []
files = args["files"]

under_test = os.getenv("PTESTS_TESTING")

# gather information from several sources

def extract_keys(l):
    return [list(key.keys())[0] for key in l]

def get_framac_libc_function_statuses(framac, framac_share):
    (_metrics_handler, metrics_tmpfile) = tempfile.mkstemp(prefix="fc_script_est_diff", suffix=".json")
    if debug:
        print(f"metrics_tmpfile: {metrics_tmpfile}")
    fc_runtime = framac_share / "libc" / "__fc_runtime.c"
    fc_libc_headers = framac_share / "libc" / "__fc_libc.h"
    subprocess.run([framac, "-no-autoload-plugins", fc_runtime, fc_libc_headers,
                    "-load-module", "metrics", "-metrics", "-metrics-libc",
                    "-metrics-output", metrics_tmpfile], stdout=subprocess.DEVNULL, check=True)
    with open(metrics_tmpfile) as f:
        metrics_json = json.load(f)
    os.remove(metrics_tmpfile)
    defined = extract_keys(metrics_json["defined-functions"])
    spec_only = extract_keys(metrics_json["specified-only-functions"])
    return (defined, spec_only)

re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)')
def grep_includes_in_file(filename):
    file_content = source_filter.open_and_filter(filename, not under_test)
    i = 0
    for line in f.readlines():
        i += 1
        m = re_include.match(line)
        if m:
            kind = m.group(1)
            header = m.group(2)
            yield((i,kind,header))

def get_includes(files):
    quote_includes = {}
    chevron_includes = {}
    for filename in files:
        for line, kind, header in grep_includes_in_file(filename):
            if kind == '<':
                includes = chevron_includes[header] if header in chevron_includes else []
            else:
                includes = quote_includes[header] if header in quote_includes else []
            includes.append((filename, line))
            if kind == '<':
                chevron_includes[header] = includes
            else:
                quote_includes[header] = includes
    return chevron_includes, quote_includes

debug = os.getenv("DEBUG")
verbose = False

print("Building callgraph...")
cg = build_callgraph.compute(files)

framac_bin = os.getenv('FRAMAC_BIN')
if not framac_bin:
    sys.exit("error: FRAMAC_BIN not in environment")
framac = Path(framac_bin) / "frama-c"

framac_share = Path(subprocess.check_output([framac, '-no-autoload-plugins', '-print-share-path']).decode())

print("Computing data about libc/POSIX functions...")
libc_defined_functions, libc_specified_functions = get_framac_libc_function_statuses(framac, framac_share)

with open(framac_share / "compliance" / "c11_functions.json") as f:
    c11_functions = json.load(f)["data"]

with open(framac_share / "compliance" / "c11_headers.json") as f:
    c11_headers = json.load(f)["data"]

with open(framac_share / "compliance" / "posix_identifiers.json") as f:
    all_data = json.load(f)
    posix_identifiers = all_data["data"]
    posix_headers = all_data["headers"]

recursive_cycles = []
reported_recursive_pairs = set()
build_callgraph.compute_recursive_cycles(cg, recursive_cycles)
for (cycle_start_loc, cycle) in recursive_cycles:
    # Note: in larger code bases, many cycles are reported for the same final
    # function (e.g. for the calls 'g -> g', we may have 'f -> g -> g',
    # 'h -> g -> g', etc; to minimize this, we print just the first one.
    # This does not prevent 3-cycle repetitions, such as 'f -> g -> f',
    # but these are less common.
    if cycle[-1] in reported_recursive_pairs:
        continue
    reported_recursive_pairs.add(cycle[-1])
    (filename, line) = cycle_start_loc
    (x, y) = cycle[0]
    pretty_cycle = f"{x} -> {y}"
    for (x, y) in cycle[1:]:
        pretty_cycle += f" -> {y}"
    print(f"[recursion] found recursive cycle near {filename}:{line}: {pretty_cycle}")

callees = [callee for (_, callee) in list(cg.edges.keys())]
callees = set(callees)
used_headers = set()
print(f"Estimating difficulty for {len(callees)} function calls...")
warnings = 0

for callee in sorted(callees):
    def callee_status(status, standard, reason):
        global warnings
        if status == "warning":
            warnings += 1
        if verbose or debug or status == "warning":
            print(f"- {status}: {callee} ({standard}) {reason}")
    #print(f"callee: {callee}")
    if callee in posix_identifiers:
        used_headers.add(posix_identifiers[callee]["header"])
    if callee in c11_functions:
        standard = "C11"
        # check that the callee is not a macro or type (e.g. va_arg);
        # a few functions, such as strcpy_s, are in C11 but not in POSIX,
        # so we must test membership before checking the POSIX type
        if callee in posix_identifiers and posix_identifiers[callee]["id_type"] != "function":
            continue
        #print(f"C11 function: {callee}")
        if callee in libc_specified_functions:
            callee_status("good", standard, "is specified in Frama-C's libc")
        elif callee in libc_defined_functions:
            callee_status("ok", standard, "is defined in Frama-C's libc")
        else:
            # Some functions without specification are actually variadic
            # (and possibly handled by the Variadic plug-in)
            if callee in posix_identifiers and "notes" in posix_identifiers[callee] and "variadic-plugin" in posix_identifiers[callee]["notes"]:
                callee_status("ok", standard, "is handled by the Variadic plug-in")
            else:
                callee_status("warning", standard, "has neither code nor spec in Frama-C's libc")
    elif callee in posix_identifiers:
        standard = "POSIX"
        # check that the callee is not a macro or type (e.g. va_arg)
        if posix_identifiers[callee]["id_type"] != "function":
            continue
        #print(f"Non-C11, POSIX function: {callee}")
        if callee in libc_specified_functions:
            callee_status("good", standard, "specified in Frama-C's libc")
        elif callee in libc_defined_functions:
            callee_status("ok", standard, "defined in Frama-C's libc")
        else:
            callee_status("warning", standard, "has neither code nor spec in Frama-C's libc")
print(f"Function-related warnings: {warnings}")

if (verbose or debug) and used_headers:
    print(f"used headers:")
    for header in sorted(used_headers):
        print(f"  <{header}>")

(chevron_includes, quote_includes) = get_includes(files)

def is_local_header(header_dirs, header):
    for d in header_dirs:
        path = Path(d)
        if Path(path / header).exists():
            return True
    return False

print(f"Estimating difficulty for {len(chevron_includes)} '#include <header>' directives...")
non_posix_headers = []
for header in sorted(chevron_includes, key=str.casefold):
    if header in posix_headers:
        fc_support = posix_headers[header]["fc-support"]
        if fc_support == "unsupported":
            print(f"- WARNING: included header <{header}> is explicitly unsupported by Frama-C")
        elif fc_support == "none":
            print(f"- warning: included header <{header}> not currently included in Frama-C's libc")
        else:
            if verbose or debug:
                c11_or_posix = "C11" if header in c11_headers else "POSIX"
                print(f"- note: included {c11_or_posix} header ")
    else:
        if is_local_header(header_dirs, header):
            if verbose or debug:
                print(f"- ok: included header <{header}> seems to be available locally")
        else:
            non_posix_headers.append(header)
            print(f"- warning: included non-POSIX header <{header}>")
print(f"Header-related warnings: {len(non_posix_headers)}")


# dynamic allocation

dynalloc_functions = set(["malloc", "calloc", "free", "realloc", "alloca", "mmap"])
dyncallees = dynalloc_functions.intersection(callees)
if dyncallees:
    print(f"- note: calls to dynamic allocation functions: {', '.join(sorted(dyncallees))}")


# unsupported C11-specific features

c11_unsupported = ["_Alignas", "_Alignof", "_Complex", "_Generic", "_Imaginary"]

for keyword in c11_unsupported:
    out = subprocess.Popen(["grep", "-n", '\\b' + keyword + '\\b'] + files + ["/dev/null"],
                           stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    lines = out.communicate()[0].decode('utf-8').splitlines()
    if lines:
        n = len(lines)
        print(f"- warning: found {n} line{'s' if n > 1 else ''} with occurrences of unsupported C11 construct '{keyword}'")

# assembly code

if "asm" in callees or "__asm" in callees or "__asm__" in callees:
    print(f"- warning: code seems to contain inline assembly ('asm(...)')")

# TODO:
# - detect absence of 'main' function (library)