Skip to content
Snippets Groups Projects
  • Virgile Prevosto's avatar
    9c3171bc
    [machdep] more robust generator · 9c3171bc
    Virgile Prevosto authored
    - --compile-flags="-c -I/path" works (i.e. there's no need to provide as many
      --compile-flags options as you have flags to pass to the compiler itself.
      Ditto for --cpp-arch-flags of course
    - we start by checking that a minimal C file does not trigger a compiler error
      with the given option and we abort otherwise. This ensures that subsequent
      tests will not mistakenly take a configuration error for a genuine result.
    9c3171bc
    History
    [machdep] more robust generator
    Virgile Prevosto authored
    - --compile-flags="-c -I/path" works (i.e. there's no need to provide as many
      --compile-flags options as you have flags to pass to the compiler itself.
      Ditto for --cpp-arch-flags of course
    - we start by checking that a minimal C file does not trigger a compiler error
      with the given option and we abort otherwise. This ensures that subsequent
      tests will not mistakenly take a configuration error for a genuine result.
make_machdep.py 14.13 KiB
#!/usr/bin/env python
##########################################################################
#                                                                        #
#  This file is part of Frama-C.                                         #
#                                                                        #
#  Copyright (C) 2007-2023                                               #
#    CEA (Commissariat à l'énergie atomique et aux énergies              #
#         alternatives)                                                  #
#                                                                        #
#  you can redistribute it and/or modify it under the terms of the GNU   #
#  Lesser General Public License as published by the Free Software       #
#  Foundation, version 2.1.                                              #
#                                                                        #
#  It is distributed in the hope that it will be useful,                 #
#  but WITHOUT ANY WARRANTY; without even the implied warranty of        #
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
#  GNU Lesser General Public License for more details.                   #
#                                                                        #
#  See the GNU Lesser General Public License version 2.1                 #
#  for more details (enclosed in the file licenses/LGPLv2.1).            #
#                                                                        #
##########################################################################

"""
Produces a machdep.yaml file for a given architecture.

Prerequisites:

- A C11-compatible (cross-)compiler (with support for _Generic),
- A (cross-)compiler supporting _Static_assert
- A (cross-)compiler supporting _Alignof or alignof

This script tries to compile several source files to extract the
information we need in terms of sizeof, alignof and representation
of the various types defined by the standard (e.g. size_t, wchar_t, ...)

In case some values are not identified, the YAML format can be edited
by hand afterwards.
"""

import argparse
from pathlib import Path
import re
import subprocess
import sys
import logging
import yaml
from yaml.representer import Representer

my_path = Path(sys.argv[0]).parent

logging.basicConfig(format="%(levelname)s: %(message)s")

parser = argparse.ArgumentParser(prog="make_machdep")
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-o", type=argparse.FileType("w"), dest="dest_file")
parser.add_argument("--compiler", default="cc", help="which compiler to use; default is 'cc'")
parser.add_argument(
    "--compiler-version",
    default="--version",
    help="option to pass to the compiler to obtain its version; default is --version",
)

parser.add_argument(
    "--from-file",
    help="reads compiler and arch flags from existing yaml file. Use -i to update it in place",
)
parser.add_argument(
    "-i",
    "--in-place",
    action="store_true",
    help="when reading compiler config from yaml, update the file in place. unused otherwise",
)

parser.add_argument(
    "--cpp-arch-flags",
    nargs="+",
    action="extend",
    help="architecture-specific flags needed for preprocessing, e.g. '-m32'",
)
parser.add_argument(
    "--compiler-flags",
    nargs="+",
    action="extend",
    type=str,
    help="flags to be given to the compiler (other than those set by --cpp-arch-flags); default is '-c'",
)
parser.add_argument(
    "--check",
    action="store_true",
    help="checks that the generated machdep is conforming to the schema",
)
parser.add_argument(
    "--check-only",
    action="store_true",
    help="must be used in conjunction with --from-file to check that the provided input file is conforming to the schema",
)

args, other_args = parser.parse_known_args()

if not args.compiler_flags:
    args.compiler_flags = ["-c"]

if not args.cpp_arch_flags:
    args.cpp_arch_flags = []


def make_schema():
    schema_filename = my_path.parent / "machdep-schema.yaml"
    with open(schema_filename, "r") as schema:
        return yaml.safe_load(schema)


schema = make_schema()


def check_machdep(machdep):
    try:
        from jsonschema import validate, ValidationError

        validate(machdep, schema)
        return True
    except ImportError:
        logging.warning("jsonschema is not available: no validation will be performed")
        return True
    except ValidationError:
        logging.warning("machdep object is not conforming to machdep schema")
        return False


if args.from_file:
    orig_file = open(args.from_file, "r")
    orig_machdep = yaml.safe_load(orig_file)
    orig_file.close()
    if args.check_only:
        if check_machdep(orig_machdep):
            sys.exit(0)
        else:
            sys.exit(1)
    if not "compiler" in orig_machdep or not "cpp_arch_flags" in orig_machdep:
        raise Exception("Missing fields in yaml file")
    args.compiler = orig_machdep["compiler"]
    if isinstance(orig_machdep["cpp_arch_flags"], list):
        args.cpp_arch_flags = orig_machdep["cpp_arch_flags"]
    else:  # old version of the schema used a single string
        args.cpp_arch_flags = orig_machdep["cpp_arch_flags"].split()


def print_machdep(machdep):
    if args.from_file and args.in_place:
        args.dest_file = open(args.from_file, "w")
    elif args.dest_file is None:
        args.dest_file = sys.stdout
    yaml.dump(machdep, args.dest_file, indent=4, sort_keys=True)


def default_value(typ):
    if typ == "integer":
        return -1
    if typ == "boolean":
        return False
    if typ == "string":
        return ""
    if typ == "list":
        return {}
    logging.warning(f"Unexpected type {typ} in YAML schema")
    return None


def make_machdep():
    machdep = {}
    for key in schema:
        machdep[key] = None
    return machdep


machdep = make_machdep()

compilation_command = [args.compiler]

for flag in args.cpp_arch_flags + args.compiler_flags:
    compilation_command = compilation_command + flag.split(" ")

source_files = [
    # sanity_check is juste here to ensure that the given compiler
    # and flags are coherent. It must be kept at the top of the list.
    ("sanity_check.c", "none"),
    ("sizeof_short.c", "number"),
    ("sizeof_int.c", "number"),
    ("sizeof_long.c", "number"),
    ("sizeof_longlong.c", "number"),
    ("sizeof_ptr.c", "number"),
    ("sizeof_float.c", "number"),
    ("sizeof_double.c", "number"),
    ("sizeof_longdouble.c", "number"),
    ("sizeof_void.c", "number"),
    ("sizeof_fun.c", "number"),
    ("alignof_short.c", "number"),
    ("alignof_int.c", "number"),
    ("alignof_long.c", "number"),
    ("alignof_longlong.c", "number"),
    ("alignof_ptr.c", "number"),
    ("alignof_float.c", "number"),
    ("alignof_double.c", "number"),
    ("alignof_longdouble.c", "number"),
    ("alignof_fun.c", "number"),
    ("alignof_str.c", "number"),
    ("alignof_aligned.c", "number"),
    ("size_t.c", "type"),
    ("ssize_t.c", "type"),
    ("wchar_t.c", "type"),
    ("ptrdiff_t.c", "type"),
    ("intptr_t.c", "type"),
    ("uintptr_t.c", "type"),
    ("int_fast8_t.c", "type"),
    ("int_fast16_t.c", "type"),
    ("int_fast32_t.c", "type"),
    ("int_fast64_t.c", "type"),
    ("uint_fast8_t.c", "type"),
    ("uint_fast16_t.c", "type"),
    ("uint_fast32_t.c", "type"),
    ("uint_fast64_t.c", "type"),
    ("wint_t.c", "type"),
    ("sig_atomic_t.c", "type"),
    ("time_t.c", "type"),
    ("char_is_unsigned.c", "bool"),
    ("little_endian.c", "bool"),
    ("has__builtin_va_list.c", "has__builtin_va_list"),
    ("weof.c", "macro"),
    ("wordsize.c", "macro"),
    ("posix_version.c", "macro"),
    ("limits_macros.c", "macro"),
    ("stdio_macros.c", "macro"),
    ("stdlib_macros.c", "macro"),
    ("nsig.c", "macro"),
    ("errno.c", "macrolist"),
]


def find_value(name, typ, output):
    if typ == "bool":
        expected = "(True|False)"

        def conversion(x):
            return x == "True"

        default = False

    elif typ == "number":
        expected = "([0-9]+)"

        def conversion(x):
            return int(x)

        default = -1

    elif typ == "type":
        expected = "`([^`]+)`"

        def conversion(x):
            return x

        default = ""

    else:
        logging.warning(f"unexpected type '{typ}' for field '{name}', skipping")
        return
    if name in machdep:
        msg = re.compile(name + " is " + expected)
        res = re.search(msg, output)
        if res:
            value = conversion(res.group(1))
            if args.verbose:
                print(f"[INFO] setting {name} to {value}")
            machdep[name] = value
        else:
            logging.warning(
                f"cannot find value of field '{name}', using default value: '{default}'"
            )
            machdep[name] = default
            if args.verbose:
                print(f"compiler output is:{output}")
    else:
        logging.warning(f"unexpected symbol '{name}', ignoring")


def cleanup_cpp(output):
    lines = output.splitlines()
    macro = filter(lambda s: s != "" and s[0] != "#", lines)
    macro = map(lambda s: s.strip(), macro)
    return " ".join(macro)


def find_macros_value(output, is_list=False, entry=None):
    msg = re.compile(r"(\w+)_is = ([^;]+);")
    if is_list:
        assert entry
        machdep[entry] = {}
    for res in re.finditer(msg, output):
        name = res.group(1)
        value = res.group(2).strip()
        if is_list:
            machdep[entry][name] = value
        else:
            if name in machdep:
                if args.verbose:
                    print(f"[INFO] setting {name} to {value}")
                machdep[name] = value
            else:
                logging.warning(f"unexpected symbol '{name}', ignoring")
    if args.verbose:
        print(f"compiler output is:{output}")


for f, typ in source_files:
    p = my_path / f
    cmd = compilation_command + [str(p)]
    if typ in ("macro", "macrolist"):
        # We're just interested in expanding a macro,
        # treatment is a bit different than the rest.
        cmd = cmd + ["-E"]
    if args.verbose:
        print(f"[INFO] running command: {' '.join(cmd)}")
    proc = subprocess.run(cmd, capture_output=True)
    Path(f).with_suffix(".o").unlink(missing_ok=True)
    if typ == "none":
        if proc.returncode != 0:
            logging.critical("cannot compile sample C file with provided compiler and flags.")
            logging.info(f"compiler output is:{proc.stderr.decode()}")
            sys.exit(1)
        continue
    if typ == "macro":
        if proc.returncode != 0:
            logging.warning(f"error in preprocessing value '{p}', some values won't be filled")
            if args.verbose:
                print(f"compiler output is:{proc.stderr.decode()}")
            name = p.stem
            if name in machdep:
                machdep[name] = ""
            continue
        find_macros_value(cleanup_cpp(proc.stdout.decode()))
        continue
    if typ == "macrolist":
        name = p.stem
        if proc.returncode != 0:
            logging.warning(f"error in preprocessing value '{p}', some value might not be filled")
            if args.verbose:
                print(f"compiler output is:{proc.stderr.decode()}")
            if name in machdep:
                machdep[name] = {}
            continue
        find_macros_value(cleanup_cpp(proc.stdout.decode()), is_list=True, entry=name)
        continue
    if typ == "has__builtin_va_list":
        # Special case: compilation success determines presence or absence
        machdep["has__builtin_va_list"] = proc.returncode == 0
        continue
    if proc.returncode == 0:
        # all tests should fail on an appropriate _Static_assert
        # if compilation succeeds, we have a problem
        logging.warning(f"WARNING: could not identify value of '{p.stem}', skipping")
        continue
    find_value(p.stem, typ, proc.stderr.decode())

version_output = subprocess.run(
    [args.compiler, args.compiler_version], capture_output=True, text=True
)
version = version_output.stdout.splitlines()[0]

machdep["compiler"] = args.compiler
machdep["cpp_arch_flags"] = args.cpp_arch_flags
machdep["version"] = version

machdep["custom_defs"] = ""

# Extract predefined macros; we're assuming a gcc-like compiler here.
# Leave custom_defs empty if this fails.

# in case we have all the predefined macros, custom_defs will be very long.
# we thus want to output it as a literal block, not a simple string.
# For that, use a custom object and tell PyYaml to output it in a particular way
# Based on SO's answer:


class custom_defs(str):
    pass


def change_style(style, representer):
    def new_representer(dumper, data):
        scalar = representer(dumper, data)
        scalar.style = style
        return scalar

    return new_representer


custom_defs_representer = change_style("|", Representer.represent_str)

yaml.add_representer(custom_defs, custom_defs_representer)

cmd = compilation_command + ["-dM", "-E", "-"]
if args.verbose:
    print(f"[INFO] running command: {' '.join(cmd)}")
proc = subprocess.run(cmd, stdin=subprocess.DEVNULL, capture_output=True, text=True)
if proc.returncode == 0:
    lines = ""
    for line in proc.stdout.splitlines():
        # Preprocessor emits a warning if we're trying to #undef
        # standard macros. Leave them alone.
        if re.match(r"#define *__STDC", line):
            continue
        macro = re.match(r"#define *(\w+)", line)
        if macro:
            lines += f"#undef {macro.group(1)}\n"
        lines += f"{line.strip()}\n"
    machdep["custom_defs"] = custom_defs(lines)
else:
    logging.warning("could not determine predefined macros")
    if args.verbose:
        print(f"compiler output is:{proc.stderr}")

if args.from_file and args.in_place:
    machdep["machdep_name"] = Path(args.from_file).stem
elif args.dest_file:
    machdep["machdep_name"] = Path(args.dest_file.name).stem
else:
    machdep["machdep_name"] = "anonymous_machdep"

missing_fields = [f for [f, v] in machdep.items() if v is None]

if missing_fields:
    msg = ", ".join(missing_fields)
    logging.warning(f"the following fields are missing from the machdep definition: {msg}")
    for field in missing_fields:
        machdep[field] = default_value(schema[field]["type"])

print_machdep(machdep)