From 5c72dbce637517fc959630fb5e69659c50e40e22 Mon Sep 17 00:00:00 2001
From: Andre Maroneze <andre.maroneze@cea.fr>
Date: Fri, 9 Dec 2022 10:12:24 +0100
Subject: [PATCH] [kernel] WIP: first attempt at machdep generator

---
 make_machdep/.gitignore                |   1 +
 make_machdep/alignof_aligned.c         |   5 +
 make_machdep/alignof_fun.c             |   5 +
 make_machdep/alignof_longdouble.c      |   3 +
 make_machdep/alignof_str.c             |   3 +
 make_machdep/char_is_unsigned.c        |   4 +
 make_machdep/const_string_literals.c   |   1 +
 make_machdep/has__builtin_va_list.c    |   2 +
 make_machdep/little_endian.c           |  13 +
 make_machdep/make_machdep.py           | 319 +++++++++++++++++++++++++
 make_machdep/make_machdep_common.h     |  24 ++
 make_machdep/ptrdiff_t.c               |   6 +
 make_machdep/size_t.c                  |   6 +
 make_machdep/sizeof_alignof_standard.c |  29 +++
 make_machdep/sizeof_fun.c              |   5 +
 make_machdep/sizeof_longdouble.c       |   3 +
 make_machdep/sizeof_void.c             |   3 +
 make_machdep/wchar_t.c                 |   9 +
 18 files changed, 441 insertions(+)
 create mode 100644 make_machdep/.gitignore
 create mode 100644 make_machdep/alignof_aligned.c
 create mode 100644 make_machdep/alignof_fun.c
 create mode 100644 make_machdep/alignof_longdouble.c
 create mode 100644 make_machdep/alignof_str.c
 create mode 100644 make_machdep/char_is_unsigned.c
 create mode 100644 make_machdep/const_string_literals.c
 create mode 100644 make_machdep/has__builtin_va_list.c
 create mode 100644 make_machdep/little_endian.c
 create mode 100755 make_machdep/make_machdep.py
 create mode 100644 make_machdep/make_machdep_common.h
 create mode 100644 make_machdep/ptrdiff_t.c
 create mode 100644 make_machdep/size_t.c
 create mode 100644 make_machdep/sizeof_alignof_standard.c
 create mode 100644 make_machdep/sizeof_fun.c
 create mode 100644 make_machdep/sizeof_longdouble.c
 create mode 100644 make_machdep/sizeof_void.c
 create mode 100644 make_machdep/wchar_t.c

diff --git a/make_machdep/.gitignore b/make_machdep/.gitignore
new file mode 100644
index 00000000000..5761abcfdf0
--- /dev/null
+++ b/make_machdep/.gitignore
@@ -0,0 +1 @@
+*.o
diff --git a/make_machdep/alignof_aligned.c b/make_machdep/alignof_aligned.c
new file mode 100644
index 00000000000..4e3406707b8
--- /dev/null
+++ b/make_machdep/alignof_aligned.c
@@ -0,0 +1,5 @@
+#include "make_machdep_common.h"
+
+char array[1] __attribute__((aligned));
+
+unsigned char alignof_aligned = ALIGNOF(array);
diff --git a/make_machdep/alignof_fun.c b/make_machdep/alignof_fun.c
new file mode 100644
index 00000000000..ee33251515f
--- /dev/null
+++ b/make_machdep/alignof_fun.c
@@ -0,0 +1,5 @@
+#include "make_machdep_common.h"
+
+int main(void);
+
+unsigned char alignof_fun = ALIGNOF(main);
diff --git a/make_machdep/alignof_longdouble.c b/make_machdep/alignof_longdouble.c
new file mode 100644
index 00000000000..77a05993157
--- /dev/null
+++ b/make_machdep/alignof_longdouble.c
@@ -0,0 +1,3 @@
+#include "make_machdep_common.h"
+
+unsigned char alignof_longdouble = ALIGNOF(long double);
diff --git a/make_machdep/alignof_str.c b/make_machdep/alignof_str.c
new file mode 100644
index 00000000000..77c8ea19c76
--- /dev/null
+++ b/make_machdep/alignof_str.c
@@ -0,0 +1,3 @@
+#include "make_machdep_common.h"
+
+unsigned char alignof_str = ALIGNOF("literal string");
diff --git a/make_machdep/char_is_unsigned.c b/make_machdep/char_is_unsigned.c
new file mode 100644
index 00000000000..dd4f61b9e48
--- /dev/null
+++ b/make_machdep/char_is_unsigned.c
@@ -0,0 +1,4 @@
+#include "make_machdep_common.h"
+
+__attribute__((section(".data")))
+unsigned char char_is_unsigned = (char)-1 >= 0 ? 0x15 : 0xf4;
diff --git a/make_machdep/const_string_literals.c b/make_machdep/const_string_literals.c
new file mode 100644
index 00000000000..2e51bff8891
--- /dev/null
+++ b/make_machdep/const_string_literals.c
@@ -0,0 +1 @@
+char *const_string_literals = "%$#!";
diff --git a/make_machdep/has__builtin_va_list.c b/make_machdep/has__builtin_va_list.c
new file mode 100644
index 00000000000..77d2b17a407
--- /dev/null
+++ b/make_machdep/has__builtin_va_list.c
@@ -0,0 +1,2 @@
+/* If this compiles, we assume the compiler has __builtin_va_list. */
+__builtin_va_list l = {0};
diff --git a/make_machdep/little_endian.c b/make_machdep/little_endian.c
new file mode 100644
index 00000000000..2632a464e02
--- /dev/null
+++ b/make_machdep/little_endian.c
@@ -0,0 +1,13 @@
+#if defined(__BYTE_ORDER__)
+# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+__attribute__((section(".data")))
+unsigned char little_endian = 0xf4;
+# elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__attribute__((section(".data")))
+unsigned char little_endian = 0x15;
+# else
+# error Unexpected __BYTE_ORDER__
+# endif
+#else
+# error __BYTE_ORDER__ undefined
+#endif
diff --git a/make_machdep/make_machdep.py b/make_machdep/make_machdep.py
new file mode 100755
index 00000000000..bf68467450a
--- /dev/null
+++ b/make_machdep/make_machdep.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+
+"""
+Produces a machdep.ml file for a given architecture.
+
+Prerequisites:
+
+- A C11-compatible (cross-)compiler (with support for _Generic),
+  or a (cross-)compiler having __builtin_types_compatible_p
+
+- A (cross-)compiler supporting _Alignof or alignof
+
+- objdump
+
+This script tries to compile several source files into object files,
+then uses objdump to extract information from the compilation.
+
+We want to obtain values produced by the compiler.
+In an ideal scenario, we are able to execute the binary, so we can just use
+printf(). However, when cross-compiling, we may be unable to run the program.
+Even worse, we may lack a proper runtime, and thus simply obtaining an
+executable may be impossible.
+However, we don't really need it: having an object file (with symbols) is
+usually enough.
+
+Compilation is split in several files because, for non-standard constructions,
+some compilers (e.g. CompCert) may fail to parse them. We must detect these
+cases and output warnings, but without preventing compilation of the rest.
+"""
+
+import argparse
+from pathlib import Path
+import re
+import subprocess
+import sys
+
+re_symbol_name = re.compile("^[0-9a-fA-F]+ <([^>]+)>: *$")
+
+# Parsing objdump's format is not trivial: some versions print results as:
+#   <offset>: 01 02 03 04           <assembly>
+# That is, bytes separated by single spaces, then several spaces, then assembly;
+# while other versions (e.g. for mips) print several bytes together:
+#   <offset>: 01020304           <assembly>
+# So we simply take all hexadecimal characters until the end of the line,
+# and then split as soon as 2 consecutive spaces are found.
+# Otherwise, we might end up considering instructions such as 'add' as part
+# of the data.
+# Unfortunately, objdump does not contain an option to display the data bytes
+# themselves _without_ the disassembled data.
+re_symbol_data = re.compile("^ *[0-9a-fA-F]+:[ \t]+([0-9a-fA-F ]+)")
+
+parser = argparse.ArgumentParser(prog="make_machdep")
+parser.add_argument("-v", "--verbose", action="store_true")
+parser.add_argument("--compiler")
+parser.add_argument("--compiler-version")
+parser.add_argument("--cpp-arch-flags", nargs="+", default=[], help="architecture-specific flags needed for preprocessing, e.g. '-m32'")
+parser.add_argument("--compiler-flags", nargs="+", default=["-c"], help="flags to be given to the compiler (other than those set by --cpp-arch-flags); by default, '-c'")
+parser.add_argument("--objdump", action="store", help="objdump command to use", default="objdump")
+args, other_args = parser.parse_known_args()
+
+def print_machdep(machdep):
+    print("open Cil_types")
+    print("")
+    print("let machdep : mach = {")
+    for f, v in machdep.items():
+        if isinstance(v, str):
+            print(f"  {f} = \"{v}\";")
+        elif isinstance(v, bool):
+            print(f"  {f} = {'true' if v else 'false'};")
+        elif isinstance(v, list):
+            l = ", ".join([f'"{e}"' for e in v])
+            print(f"  {f} = [{l}];")
+        else:
+            print(f"  {f} = {v};")
+
+    print("}")
+
+def decode_object_file(objfile, section=".data"):
+    command = [args.objdump, "-j" + section, "-d", str(objfile)]
+    if args.verbose:
+        print(f"[INFO] running command: {' '.join(command)}")
+    proc = subprocess.run(command, capture_output=True)
+    if proc.returncode != 0:
+        # Special case where objdump _may_ fail: section other than '.data'
+        if section != ".data":
+            return [], None
+        print(f"error: command returned non-zero ({proc.returncode}): {' '.join(command)}")
+        if args.verbose:
+            print(proc.stderr.decode("utf-8"))
+        sys.exit(1)
+    symbols = {}
+    cur_symbol = None
+    underscore_name = None
+    for line in proc.stdout.decode("utf-8").split("\n"):
+        m = re_symbol_name.match(line)
+        if m:
+            #print(f"found symbol: [{m.group(1)}]")
+            cur_symbol = m.group(1)
+            continue
+        m = re_symbol_data.match(line)
+        if m:
+            #print(f"found data: {m.group(1)}")
+            if not cur_symbol:
+                # This can happen when objdump decides to print more than one
+                # line from the starting offset
+                continue
+                #sys.exit(f"error: found data without symbol")
+            octet_string = m.group(1)
+            if "  " in octet_string:
+                [octet_string, _rest] = octet_string.split("  ", maxsplit=1)
+            octet_string = octet_string.replace(" ", "")
+            octets = []
+            for i in range(0, len(octet_string) // 2):
+                octets.append(int(octet_string[2*i:2*i+2], 16))
+            # We assume all values fit in 1 byte (sizeof and alignof);
+            # for the literal string, the first byte is enough.
+            # We profit from having the symbol name to fill a special machdep field.
+            underscore_name = cur_symbol.startswith("_")
+            s = cur_symbol.strip("_") # Normalize symbol names
+            symbols[s] = octets[0]
+            cur_symbol = None
+            continue
+    return symbols, underscore_name
+
+# This must remain synchronized with cil_types.ml's 'mach' type
+machdep = {
+    "sizeof_short": None,
+    "sizeof_int": None,
+    "sizeof_long": None,
+    "sizeof_longlong": None,
+    "sizeof_ptr": None,
+    "sizeof_float": None,
+    "sizeof_double": None,
+    "sizeof_longdouble": None,
+    "sizeof_void": None,
+    "sizeof_fun": None,
+    "size_t": None,
+    "wchar_t": None,
+    "ptrdiff_t": None,
+    "alignof_short": None,
+    "alignof_int": None,
+    "alignof_long": None,
+    "alignof_longlong": None,
+    "alignof_ptr": None,
+    "alignof_float": None,
+    "alignof_double": None,
+    "alignof_longdouble": None,
+    "alignof_str": None,
+    "alignof_fun": None,
+    "char_is_unsigned": None,
+    "underscore_name": None,
+    "const_string_literals": None,
+    "little_endian": None,
+    "alignof_aligned": None,
+    "has__builtin_va_list": None,
+    "compiler": None,
+    "cpp_arch_flags": None,
+    "version": None,
+}
+
+compilation_command = other_args + args.compiler_flags
+
+source_files = [
+    ("sizeof_alignof_standard.c", "number"),
+    ("sizeof_void.c", "number"),
+    ("sizeof_fun.c", "number"),
+    ("sizeof_longdouble.c", "number"),
+    ("alignof_longdouble.c", "number"),
+    ("alignof_fun.c", "number"),
+    ("alignof_str.c", "number"),
+    ("alignof_aligned.c", "number"),
+    ("size_t.c", "type"),
+    ("wchar_t.c", "type"),
+    ("ptrdiff_t.c", "type"),
+    ("char_is_unsigned.c", "bool"),
+    ("little_endian.c", "bool"),
+    ("const_string_literals.c", "const_string_literals"),
+    ("has__builtin_va_list.c", "has__builtin_va_list"),
+]
+
+for (f, typ) in source_files:
+    p = Path(f)
+    cmd = compilation_command + [str(p)]
+    if args.verbose:
+        print(f"[INFO] running command: {' '.join(cmd)}")
+    proc = subprocess.run(cmd, capture_output=True)
+    if typ == "has__builtin_va_list":
+        # Special case: compilation success determines presence or absence
+        machdep["has__builtin_va_list"] = proc.returncode == 0
+        continue
+    if proc.returncode != 0:
+        print(f"WARNING: error during compilation of '{p}', skipping")
+        if args.verbose:
+            print(proc.stderr.decode("utf-8"))
+        continue
+    objfile = p.with_suffix(".o")
+    if not objfile.exists():
+        print(f"WARNING: could not find expected '{objfile}', skipping")
+        continue
+    if typ == "const_string_literals":
+        # Special case: try decoding different sections to find read-only object
+        # Try ".rodata" section (ELF)
+        symbols, _underscore_name = decode_object_file(objfile, section=".rodata")
+        if ".rodata" in symbols and symbols[".rodata"] == 0x25:
+            if args.verbose:
+                print(f"[INFO] setting const_string_literals to true")
+            machdep["const_string_literals"] = True
+        else:
+            # Try ".rdata" section (COFF)
+            symbols, _underscore_name = decode_object_file(objfile, section=".rdata")
+            if ".rdata" in symbols and symbols[".rdata"] == 0x25:
+                if args.verbose:
+                    print(f"[INFO] setting const_string_literals to true")
+                machdep["const_string_literals"] = True
+            else:
+                symbols, _underscore_name = decode_object_file(objfile)
+                if "const_string_literals" in symbols and symbols["const_string_literals"] == 0x25:
+                    # Found symbol in .data section => not const
+                    if args.verbose:
+                        print(f"[INFO] setting const_string_literals to false")
+                    machdep["const_string_literals"] = False
+                else:
+                    print(f"WARNING: could not find const_string_literals in any of the expected sections, skipping")
+        continue
+    symbols, underscore_name = decode_object_file(objfile)
+    if machdep["underscore_name"] is None:
+        machdep["underscore_name"] = underscore_name
+    if not symbols:
+        print(f"WARNING: no symbols found in {objfile}")
+        continue
+    if typ == "number":
+        for name, value in symbols.items():
+            if name in machdep:
+                if args.verbose:
+                    print(f"[INFO] setting {name} to {value}")
+                machdep[name] = value
+            else:
+                print(f"WARNING: unexpected symbol '{name}' in '{objfile}', ignoring")
+                continue
+    elif typ == "bool":
+        for name, value in symbols.items():
+            if name in machdep:
+                if value == 0x15:
+                    bvalue = True
+                elif value == 0xf4:
+                    bvalue = False
+                else:
+                    print(f"WARNING: unexpected value '{value} for boolean '{name}' in '{objfile}', ignoring")
+                    continue
+                if args.verbose:
+                    print(f"[INFO] setting {name} to {bvalue}")
+                machdep[name] = bvalue
+            else:
+                print(f"WARNING: unexpected symbol '{name}' in '{objfile}', ignoring")
+                continue
+    elif typ == "type":
+        for name, value in symbols.items():
+            if not ("_IS_" in name):
+                print(f"WARNING: unexpected symbol '{name}' in '{objfile}', ignoring")
+                continue
+            if value == 0xf4:
+                # Symbol found with 'false' => incompatible type, ignore
+                continue
+            elif value != 0x15:
+                print(f"WARNING: unexpected value '{value}' for symbol '{name}' in '{objfile}', ignoring")
+                continue
+            [name, original_type] = name.split("_IS_")
+            original_type = original_type.replace("_", " ")
+            if name in machdep:
+                if args.verbose:
+                    print(f"[INFO] setting {name} to {original_type}")
+                machdep[name] = original_type
+            else:
+                print(f"WARNING: unexpected symbol '{name}' (expected '{name}' in machdep) in '{objfile}', ignoring")
+                continue
+    else:
+        sys.exit(f"AssertionError: f {f} typ {typ}")
+
+# Special fields
+
+machdep["cpp_arch_flags"] = args.cpp_arch_flags
+
+if args.compiler and args.compiler_version:
+    machdep["compiler"] = args.compiler.lower()
+    machdep["version"] = args.compiler_version
+else:
+    # Try to obtain version number from option '--version'
+    compiler_version_command = compilation_command + ["--version"]
+    proc = subprocess.run(compiler_version_command, capture_output=True)
+    if proc.returncode != 0:
+        print(f"WARNING: option '--version' unsupported by compiler; re-run this script with --compiler and --compiler-version")
+        if args.verbose:
+            print(proc.stderr.decode("utf-8"))
+    else:
+        version_line = proc.stdout.decode("utf-8").split("\n")[0]
+        if args.compiler:
+            machdep["compiler"] = args.compiler.lower()
+        else:
+            if "gcc" in version_line.lower():
+                machdep["compiler"] = "gcc"
+            elif "clang" in version_line.lower():
+                print(f"Note: clang is considered as a 'gcc'-type compiler for machdep purposes")
+                machdep["compiler"] = "gcc"
+            elif "msvc" in version_line.lower():
+                machdep["compiler"] = "msvc"
+            else:
+                machdep["compiler"] = compilation_command[0]
+        if args.compiler_version:
+            machdep["version"] = args.compiler_version
+        else:
+            machdep["version"] = version_line
+
+missing_fields = [f for [f, v] in machdep.items() if v is None]
+
+if missing_fields:
+    print("WARNING: the following fields are missing from the machdep definition:")
+    print(", ".join(missing_fields))
+
+print_machdep(machdep)
diff --git a/make_machdep/make_machdep_common.h b/make_machdep/make_machdep_common.h
new file mode 100644
index 00000000000..aecf880bf9b
--- /dev/null
+++ b/make_machdep/make_machdep_common.h
@@ -0,0 +1,24 @@
+#if __STDC_VERSION__ < 201112L && !defined(__COMPCERT__)
+/* Try using a compiler builtin */
+#define ALIGNOF alignof
+#else
+#define ALIGNOF _Alignof
+#endif
+
+#if __STDC_VERSION__ >= 201112L || defined(__COMPCERT__)
+// Assume _Generic() is supported
+# define COMPATIBLE(T1, T2) _Generic(((T1){0}),  \
+                                     T2: 0x15,      \
+                                     default: 0xf4  \
+                                     )
+#else
+// Expect that __builtin_types_compatible_p exists
+# define COMPATIBLE(T1, T2) (__builtin_types_compatible_p(T1, T2) ? 0x15 : 0xf4)
+#endif
+
+#define TEST_TYPE_IS_HELPER1(test_type, type) test_type ## _IS_ ## type
+#define TEST_TYPE_IS_HELPER2(test_type, type) TEST_TYPE_IS_HELPER1(test_type, type)
+#define TEST_TYPE_IS(type) TEST_TYPE_IS_HELPER2(TEST_TYPE, type)
+
+#define TEST_TYPE_MAYBE(type) unsigned char TEST_TYPE_IS(type) = COMPATIBLE(TEST_TYPE, type)
+#define TEST_TYPE_MAYBE_(type, type_) unsigned char TEST_TYPE_IS(type_) = COMPATIBLE(TEST_TYPE, type)
diff --git a/make_machdep/ptrdiff_t.c b/make_machdep/ptrdiff_t.c
new file mode 100644
index 00000000000..13c895bdcf5
--- /dev/null
+++ b/make_machdep/ptrdiff_t.c
@@ -0,0 +1,6 @@
+#include "make_machdep_common.h"
+#include <stddef.h>
+#define TEST_TYPE ptrdiff_t
+
+TEST_TYPE_MAYBE(int);
+TEST_TYPE_MAYBE(long);
diff --git a/make_machdep/size_t.c b/make_machdep/size_t.c
new file mode 100644
index 00000000000..586f9b6ffae
--- /dev/null
+++ b/make_machdep/size_t.c
@@ -0,0 +1,6 @@
+#include "make_machdep_common.h"
+#include <stddef.h>
+#define TEST_TYPE size_t
+
+TEST_TYPE_MAYBE_(unsigned int, unsigned_int);
+TEST_TYPE_MAYBE_(unsigned long, unsigned_long);
diff --git a/make_machdep/sizeof_alignof_standard.c b/make_machdep/sizeof_alignof_standard.c
new file mode 100644
index 00000000000..8aaa2d80418
--- /dev/null
+++ b/make_machdep/sizeof_alignof_standard.c
@@ -0,0 +1,29 @@
+#include "make_machdep_common.h"
+
+/* We want to obtain values produced by the compiler.
+   In an ideal scenario, we are able to execute the binary, so we can just use
+   printf(). However, when cross-compiling, we may be unable to run the program.
+   Even worse, we may lack a proper runtime, and thus simply obtaining an
+   executable may be impossible.
+   However, we don't really need it: having an object file (with symbols) is
+   usually enough.
+
+   We store the values in global variables, since at the very least we can
+   examine the object file to retrieve the data.
+*/
+
+unsigned char sizeof_short = sizeof(short);
+unsigned char sizeof_int = sizeof(int);
+unsigned char sizeof_long = sizeof(long);
+unsigned char sizeof_longlong = sizeof(long long);
+unsigned char sizeof_ptr = sizeof(void*);
+unsigned char sizeof_float = sizeof(float);
+unsigned char sizeof_double = sizeof(double);
+
+unsigned char alignof_short = ALIGNOF(short);
+unsigned char alignof_int = ALIGNOF(int);
+unsigned char alignof_long = ALIGNOF(long);
+unsigned char alignof_longlong = ALIGNOF(long long);
+unsigned char alignof_ptr = ALIGNOF(void*);
+unsigned char alignof_float = ALIGNOF(float);
+unsigned char alignof_double = ALIGNOF(double);
diff --git a/make_machdep/sizeof_fun.c b/make_machdep/sizeof_fun.c
new file mode 100644
index 00000000000..58427c9b93e
--- /dev/null
+++ b/make_machdep/sizeof_fun.c
@@ -0,0 +1,5 @@
+#include "make_machdep_common.h"
+
+int main(void);
+
+unsigned char sizeof_fun = sizeof(main);
diff --git a/make_machdep/sizeof_longdouble.c b/make_machdep/sizeof_longdouble.c
new file mode 100644
index 00000000000..1d83113026e
--- /dev/null
+++ b/make_machdep/sizeof_longdouble.c
@@ -0,0 +1,3 @@
+#include "make_machdep_common.h"
+
+unsigned char sizeof_longdouble = sizeof(long double);
diff --git a/make_machdep/sizeof_void.c b/make_machdep/sizeof_void.c
new file mode 100644
index 00000000000..3f3e370d200
--- /dev/null
+++ b/make_machdep/sizeof_void.c
@@ -0,0 +1,3 @@
+#include "make_machdep_common.h"
+
+unsigned char sizeof_void = sizeof(void);
diff --git a/make_machdep/wchar_t.c b/make_machdep/wchar_t.c
new file mode 100644
index 00000000000..40825fdf07a
--- /dev/null
+++ b/make_machdep/wchar_t.c
@@ -0,0 +1,9 @@
+#include "make_machdep_common.h"
+#include <stddef.h>
+#define TEST_TYPE wchar_t
+
+TEST_TYPE_MAYBE_(unsigned short, unsigned_short);
+TEST_TYPE_MAYBE(short);
+TEST_TYPE_MAYBE_(unsigned int, unsigned_int);
+TEST_TYPE_MAYBE(int);
+TEST_TYPE_MAYBE(long);
-- 
GitLab