-
Andre Maroneze authoredAndre Maroneze authored
estimate_difficulty.py 11.10 KiB
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
##########################################################################
# #
# This file is part of Frama-C. #
# #
# Copyright (C) 2007-2021 #
# CEA (Commissariat à l'énergie atomique et aux énergies #
# alternatives) #
# #
# you can redistribute it and/or modify it under the terms of the GNU #
# Lesser General Public License as published by the Free Software #
# Foundation, version 2.1. #
# #
# It is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU Lesser General Public License for more details. #
# #
# See the GNU Lesser General Public License version 2.1 #
# for more details (enclosed in the file licenses/LGPLv2.1). #
# #
##########################################################################
# This script uses several heuristics to try and estimate the difficulty
# of analyzing a new code base with Frama-C.
import argparse
import json
import os
from pathlib import Path
import re
import subprocess
import sys
import tempfile
import build_callgraph
import function_finder
import source_filter
#TODO : avoid relativizing paths when introducing too many ".." ;
#TODO : accept directory as argument (--full-tree), and then do glob **/*.{c,i} inside
#TODO : try to check the presence of compiler builtins
#TODO : try to check for pragmas
parser = argparse.ArgumentParser(description="""
Estimates the difficulty of analyzing a given code base""")
parser.add_argument("--header-dirs", "-d", metavar='DIR', nargs='+',
help='directories containing headers (default: .frama-c)')
parser.add_argument("files", nargs='+', help='source files')
args = vars(parser.parse_args())
header_dirs = args["header_dirs"]
if not header_dirs:
header_dirs = []
files = args["files"]
under_test = os.getenv("PTESTS_TESTING")
# gather information from several sources
def extract_keys(l):
return [list(key.keys())[0] for key in l]
def get_framac_libc_function_statuses(framac, framac_share):
(_metrics_handler, metrics_tmpfile) = tempfile.mkstemp(prefix="fc_script_est_diff", suffix=".json")
if debug:
print(f"metrics_tmpfile: {metrics_tmpfile}")
fc_runtime = framac_share / "libc" / "__fc_runtime.c"
fc_libc_headers = framac_share / "libc" / "__fc_libc.h"
subprocess.run([framac, "-no-autoload-plugins", fc_runtime, fc_libc_headers,
"-load-module", "metrics", "-metrics", "-metrics-libc",
"-metrics-output", metrics_tmpfile], stdout=subprocess.DEVNULL, check=True)
with open(metrics_tmpfile) as f:
metrics_json = json.load(f)
os.remove(metrics_tmpfile)
defined = extract_keys(metrics_json["defined-functions"])
spec_only = extract_keys(metrics_json["specified-only-functions"])
return (defined, spec_only)
re_include = re.compile(r'\s*#\s*include\s*("|<)([^">]+)("|>)')
def grep_includes_in_file(filename):
file_content = source_filter.open_and_filter(filename, not under_test)
i = 0
for line in f.readlines():
i += 1
m = re_include.match(line)
if m:
kind = m.group(1)
header = m.group(2)
yield((i,kind,header))
def get_includes(files):
quote_includes = {}
chevron_includes = {}
for filename in files:
for line, kind, header in grep_includes_in_file(filename):
if kind == '<':
includes = chevron_includes[header] if header in chevron_includes else []
else:
includes = quote_includes[header] if header in quote_includes else []
includes.append((filename, line))
if kind == '<':
chevron_includes[header] = includes
else:
quote_includes[header] = includes
return chevron_includes, quote_includes
debug = os.getenv("DEBUG")
verbose = False
print("Building callgraph...")
cg = build_callgraph.compute(files)
framac_bin = os.getenv('FRAMAC_BIN')
if not framac_bin:
sys.exit("error: FRAMAC_BIN not in environment")
framac = Path(framac_bin) / "frama-c"
framac_share = Path(subprocess.check_output([framac, '-no-autoload-plugins', '-print-share-path']).decode())
print("Computing data about libc/POSIX functions...")
libc_defined_functions, libc_specified_functions = get_framac_libc_function_statuses(framac, framac_share)
with open(framac_share / "compliance" / "c11_functions.json") as f:
c11_functions = json.load(f)["data"]
with open(framac_share / "compliance" / "c11_headers.json") as f:
c11_headers = json.load(f)["data"]
with open(framac_share / "compliance" / "posix_identifiers.json") as f:
all_data = json.load(f)
posix_identifiers = all_data["data"]
posix_headers = all_data["headers"]
recursive_cycles = []
reported_recursive_pairs = set()
build_callgraph.compute_recursive_cycles(cg, recursive_cycles)
for (cycle_start_loc, cycle) in recursive_cycles:
# Note: in larger code bases, many cycles are reported for the same final
# function (e.g. for the calls 'g -> g', we may have 'f -> g -> g',
# 'h -> g -> g', etc; to minimize this, we print just the first one.
# This does not prevent 3-cycle repetitions, such as 'f -> g -> f',
# but these are less common.
if cycle[-1] in reported_recursive_pairs:
continue
reported_recursive_pairs.add(cycle[-1])
(filename, line) = cycle_start_loc
(x, y) = cycle[0]
pretty_cycle = f"{x} -> {y}"
for (x, y) in cycle[1:]:
pretty_cycle += f" -> {y}"
print(f"[recursion] found recursive cycle near {filename}:{line}: {pretty_cycle}")
callees = [callee for (_, callee) in list(cg.edges.keys())]
callees = set(callees)
used_headers = set()
print(f"Estimating difficulty for {len(callees)} function calls...")
warnings = 0
for callee in sorted(callees):
def callee_status(status, standard, reason):
global warnings
if status == "warning":
warnings += 1
if verbose or debug or status == "warning":
print(f"- {status}: {callee} ({standard}) {reason}")
#print(f"callee: {callee}")
if callee in posix_identifiers:
used_headers.add(posix_identifiers[callee]["header"])
if callee in c11_functions:
standard = "C11"
# check that the callee is not a macro or type (e.g. va_arg);
# a few functions, such as strcpy_s, are in C11 but not in POSIX,
# so we must test membership before checking the POSIX type
if callee in posix_identifiers and posix_identifiers[callee]["id_type"] != "function":
continue
#print(f"C11 function: {callee}")
if callee in libc_specified_functions:
callee_status("good", standard, "is specified in Frama-C's libc")
elif callee in libc_defined_functions:
callee_status("ok", standard, "is defined in Frama-C's libc")
else:
# Some functions without specification are actually variadic
# (and possibly handled by the Variadic plug-in)
if callee in posix_identifiers and "notes" in posix_identifiers[callee] and "variadic-plugin" in posix_identifiers[callee]["notes"]:
callee_status("ok", standard, "is handled by the Variadic plug-in")
else:
callee_status("warning", standard, "has neither code nor spec in Frama-C's libc")
elif callee in posix_identifiers:
standard = "POSIX"
# check that the callee is not a macro or type (e.g. va_arg)
if posix_identifiers[callee]["id_type"] != "function":
continue
#print(f"Non-C11, POSIX function: {callee}")
if callee in libc_specified_functions:
callee_status("good", standard, "specified in Frama-C's libc")
elif callee in libc_defined_functions:
callee_status("ok", standard, "defined in Frama-C's libc")
else:
callee_status("warning", standard, "has neither code nor spec in Frama-C's libc")
print(f"Function-related warnings: {warnings}")
if (verbose or debug) and used_headers:
print(f"used headers:")
for header in sorted(used_headers):
print(f" <{header}>")
(chevron_includes, quote_includes) = get_includes(files)
def is_local_header(header_dirs, header):
for d in header_dirs:
path = Path(d)
if Path(path / header).exists():
return True
return False
print(f"Estimating difficulty for {len(chevron_includes)} '#include <header>' directives...")
non_posix_headers = []
for header in sorted(chevron_includes, key=str.casefold):
if header in posix_headers:
fc_support = posix_headers[header]["fc-support"]
if fc_support == "unsupported":
print(f"- WARNING: included header <{header}> is explicitly unsupported by Frama-C")
elif fc_support == "none":
print(f"- warning: included header <{header}> not currently included in Frama-C's libc")
else:
if verbose or debug:
c11_or_posix = "C11" if header in c11_headers else "POSIX"
print(f"- note: included {c11_or_posix} header ")
else:
if is_local_header(header_dirs, header):
if verbose or debug:
print(f"- ok: included header <{header}> seems to be available locally")
else:
non_posix_headers.append(header)
print(f"- warning: included non-POSIX header <{header}>")
print(f"Header-related warnings: {len(non_posix_headers)}")
# dynamic allocation
dynalloc_functions = set(["malloc", "calloc", "free", "realloc", "alloca", "mmap"])
dyncallees = dynalloc_functions.intersection(callees)
if dyncallees:
print(f"- note: calls to dynamic allocation functions: {', '.join(sorted(dyncallees))}")
# unsupported C11-specific features
c11_unsupported = ["_Alignas", "_Alignof", "_Complex", "_Generic", "_Imaginary"]
for keyword in c11_unsupported:
out = subprocess.Popen(["grep", "-n", '\\b' + keyword + '\\b'] + files + ["/dev/null"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
lines = out.communicate()[0].decode('utf-8').splitlines()
if lines:
n = len(lines)
print(f"- warning: found {n} line{'s' if n > 1 else ''} with occurrences of unsupported C11 construct '{keyword}'")
# assembly code
if "asm" in callees or "__asm" in callees or "__asm__" in callees:
print(f"- warning: code seems to contain inline assembly ('asm(...)')")
# TODO:
# - detect absence of 'main' function (library)