From 44664ea21a06a0ebf2ffea4a2b4b0f5e79cca78b Mon Sep 17 00:00:00 2001
From: Virgile Prevosto <virgile.prevosto@m4x.org>
Date: Wed, 22 Mar 2023 16:40:54 +0100
Subject: [PATCH] [devman] Modify Machdep section in devman according to new
 management mechanism

---
 doc/developer/advance.tex | 261 +++++++++++++++++++++-----------------
 doc/developer/changes.tex |   9 +-
 2 files changed, 147 insertions(+), 123 deletions(-)

diff --git a/doc/developer/advance.tex b/doc/developer/advance.tex
index 5a13d511697..af30488ed5b 100644
--- a/doc/developer/advance.tex
+++ b/doc/developer/advance.tex
@@ -3213,114 +3213,51 @@ type-checked. Transformation hooks are registered through
   None.
 \end{prereq}
 
+\subsection{Generating a custom model}\label{sec:gener-cust-model}
 Several aspects of the C standard that are implementation-defined, such as
 the width of standard integer types, endianness, signedness of the
 \texttt{char} type, etc., as well as a few compiler and architecture specific
 features, can be customized using a \texttt{machdep} configuration,
 defining a new machine model.
 
-To create a new machine model, define an instance of \verb+Cil_types.mach+%
-\scodeidxdef{Cil\_types}{mach}. You can base it on the examples available in
-\verb+tests/misc/custom_machdep/custom_machdep.ml+ and
-\verb+src/kernel_internals/runtime/machdeps.ml+.
-The new definition can be added to \framac's database using
-\verb+File.new_machdep+\scodeidxdef{File}{new\_machdep}.
-
-\begin{example}
-  A custom machine description may be implemented as follows
-  (the meaning of each field is presented later in this section):
-\begin{ocamlcode}
-let my_machine =
-{
-  version          = "generic C compiler for my machine";
-  compiler         = "generic";
-  cpp_arch_flags   = [];
-  sizeof_short     = 2;
-  sizeof_int       = 4;
-  sizeof_long      = 4;
-  sizeof_longlong  = 8;
-  sizeof_ptr       = 4;
-  sizeof_float     = 4;
-  sizeof_double    = 8;
-  sizeof_longdouble  = 12;
-  sizeof_void      = 1;
-  sizeof_fun       = 1;
-  size_t = "unsigned long";
-  wchar_t = "int";
-  ptrdiff_t = "int";
-  alignof_short = 2;
-  alignof_int = 4;
-  alignof_long = 4;
-  alignof_longlong = 4;
-  alignof_ptr = 4;
-  alignof_float = 4;
-  alignof_double = 4;
-  alignof_longdouble = 4;
-  alignof_str = 1;
-  alignof_fun = 1;
-  alignof_aligned = 16;
-  char_is_unsigned = false;
-  const_string_literals = true;
-  little_endian = true;
-  underscore_name = false ;
-  has__builtin_va_list = true;
-}
-
-let () = File.new_machdep "my_machine" my_machine
-\end{ocamlcode}
-\end{example}
-
-\index{Command Line!-machdep@\texttt{-machdep}}%
-After this code is loaded, \framac can be instructed to use the new machine
-model using the \texttt{-machdep} command line option.
-
-If you intend to use \framac's standard library headers, you must also do the
-following:
-
+Machine models are described as \texttt{YAML} files, following
+\texttt{machdeps/machdep-schema.yaml} schema in \texttt{FRAMAC\_SHARE}.
+Predefined machdeps are also located in the \texttt{machdeps} directory
+of \framac's \texttt{SHARE} directory and can be used as reference for
+defining new \texttt{machdep} by hand. It is also possible to automatically
+generate a \texttt{YAML} file with the \texttt{make\_machdep.py} script in
+the \texttt{machdeps/make\_machdep} directory. This script requires a
+C11-compliant cross-compiler for the architecture you want to describe.
+Its main options are:
 \begin{itemize}
-\item define constant \verb+__FC_MACHDEP_<CUSTOM>+, replacing \verb+<CUSTOM>+
-  with the name (in uppercase letters) of your created machdep;
-  this can be done via \verb+-cpp-extra-args="-D__FC_MACHDEP_<CUSTOM>"+;
-\item provide a header file with macro definitions corresponding to your \caml
-  definitions. For the most part, these are macros prefixed by \verb+__FC_+,
-  corresponding to standard C macro definitions, {\it e.g.},
-  \verb+__FC_UCHAR_MAX+. These definitions are used by \framac's
-  \verb+<limits.h>+ and other headers to provide the standard C definitions.
-  The test file \verb+tests/misc/custom_machdep/__fc_machdep_custom.h+
-  (reproduced below)
-  contains a complete example of the required definitions. Other examples can
-  be found in \verb+share/libc/__fc_machdep.h+.
+\item \texttt{-compiler <c>}: the cross-compiler to be used for generating
+  the machdep
+\item \texttt{-cpp-arch-flags <flag>}: an option given to the compiler for
+selecting the desired architecture. Multiple occurrences of this option can
+occur if you want to pass several options
+\item \texttt{-o <file>}: put the generated YAML into the given file (default
+is to use standard output).
+\item \texttt{--help}: outputs the list of all options of the script
 \end{itemize}
-Make sure that your custom header defines the \verb+__FC_MACHDEP+
-include guard, and that the program you are analyzing includes this header
-before all other headers. One way to ensure this without having to modify any
-source files is to use an option such as \verb+-include+ in GCC.
-
-\begin{example}
-  Contents of \verb+tests/misc/custom_machdep/__fc_machdep_custom.h+, used as
-  example for creating custom machdeps. Notice the unusual size for \verb+int+
-  (3 bytes), selected for testing purposes only, and inconsistent with the
-  chosen values for \verb+INT_MIN+ and \verb+INT_MAX+, which do not fit
-  in 3 bytes.
-\lstinputlisting{../../tests/misc/custom_machdep/__fc_machdep_custom.h}
-\end{example}
-
-An example of the complete command-line is presented below, for a custom
-machdep called \texttt{myarch}, defined in file \verb+my_machdep.ml+ and
-with stdlib constants defined in \verb+machdep_myarch.h+:
 
-\begin{listing-nonumber}
-  frama-c -load-script my_machdep.ml -machdep myarch \
-    -cpp-extra-args="-D__FC_MACHDEP_MYARCH -include machdep_myarch.h"
-\end{listing-nonumber}
+In order to communicate machine-related information to the preprocessor (notably
+the value of standard macros), \framac generates a specific header,
+\verb+__fc_machdep.h+, that is automatically included by the standard headers from
+\framac's standard C library. Field \verb+custom_defs+ of the YAML file allows
+customizing this header (see next section for more information)
 
-\section{Machdep record fields}\label{sec:machdep-fields}
+\subsection{Machdep record fields}\label{sec:machdep-fields}
 
-Each field in the machdep record is succintly described in the \verb+Cil_types+
-module. We present below a thorough description of each field.
+Each field of the machdep is succintly described in the
+\verb+machdep-schema.yaml+ file.
+We present below a thorough description of each field.
 
+\paragraph{Meta-data}
 \begin{description}
 \item[\texttt{version}]: human-readable textual description of the machdep.
+\item[\texttt{machdep\_name}]: name of the machdep, must only contain alphanumeric
+  characters. If it is e.g. \verb+custom_name+, the generated header will
+  define a macro of the form \verb+__FC_MACHDEP_CUSTOM_NAME+.
 \item[\texttt{compiler}]: defines whether special compiler-specific extensions
   will be enabled. It should be one of the strings below:
   \begin{itemize}
@@ -3341,6 +3278,9 @@ module. We present below a thorough description of each field.
   64-bit multiarch GCC.
   Note that, in practice, very few programs rely on such predefined macros,
   such as \verb+__x86_64+ and \verb+__i386+.
+\end{description}
+\paragraph{Standard sizes and alignment constraints}
+\begin{description}
   \item[\texttt{sizeof\_short}]: size (in bytes) of the \verb+short+ type.
   \item[\texttt{sizeof\_int}]: size (in bytes) of the \verb+int+ type.
   \item[\texttt{sizeof\_long}]: size (in bytes) of the \verb+long+ type.
@@ -3363,16 +3303,10 @@ module. We present below a thorough description of each field.
     \framac plugins,  but this field exists for future expansion, and
     to compute \verb+sizeof+ of aggregates properly.
   \item[\texttt{sizeof\_void}]: the result of evaluating \verb+sizeof(void)+
-    by the compiler (or 0 if unsupported).
+    by the compiler (or negative if unsupported).
   \item[\texttt{sizeof\_fun}]: the result of evaluating \verb+sizeof(f)+, where
     \verb+f+ is a function ({\em not} a function pointer) by the compiler
     (or negative if unsupported).
-  \item[\texttt{size\_t}]: a string containing the actual type that
-    \verb+size_t+ expands to, e.g. \verb+"unsigned long"+.
-  \item[\texttt{wchar\_t}]: a string containing the actual type that
-    \verb+wchar_t+ expands to. If unsupported, you can use \verb+int+.
-  \item[\texttt{ptrdiff\_t}]: a string containing the actual type that
-    \verb+ptrdiff_t+ expands to. If unsupported, you can use \verb+int+.
   \item[\texttt{alignof\_short}]: the result of evaluating
     \verb+_Alignof(short)+.
   \item[\texttt{alignof\_int}]: the result of evaluating \verb+_Alignof(int)+.
@@ -3395,31 +3329,120 @@ module. We present below a thorough description of each field.
     \verb+aligned+ attribute (or 1 if unsupported). This corresponds to the
     default alignment when using \verb+#pragma packed()+ without a numeric
     argument.
+\end{description}
+
+\paragraph{Standard types}
+
+\begin{description}
+\item[\texttt{int\_fast8\_t}]: a string containing the actual type that
+  \verb+int_fast8_t+ expands to. Usually \verb+signed char+.
+\item[\texttt{int\_fast16\_t}]: a string containing the actual type that
+  \verb+int_fast16_t+ expands to. Usually \verb+int+ or \verb+long+.
+\item[\texttt{int\_fast32\_t}]: a string containing the actual type that
+  \verb+int_fast_32_t+ expands to. Usually \verb+int+ or \verb+long+.
+\item[\texttt{int\_fast64\_t}]: a string containing the actual type that
+  \verb+int_fast64_t+ expands to. Usually \verb+long+ or \verb+long long+.
+\item[\texttt{uint\_fast8\_t}]: a string containing the actual type that
+  \verb+uint_fast8_t+ expands to. Usually \verb+unsigned char+.
+\item[\texttt{uint\_fast16\_t}]: a string containing the actual type that
+  \verb+uint_fast16_t+ expands to. Usually \verb+unsigned+ or \verb+unsigned long+.
+\item[\texttt{uint\_fast32\_t}]: a string containing the actual type that
+  \verb+uint_fast_32_t+ expands to. Usually \verb+unsigned int+ or \verb+unsigned long+.
+\item[\texttt{uint\_fast64\_t}]: a string containing the actual type that
+  \verb+uint_fast64_t+ expands to. Usually \verb+unsigned long+ or
+  \verb+unsigned long long+.
+\item[\texttt{int\_ptr\_t}]: a string containing the actual type that
+  \verb+int_ptr_t+ expands to, e.g. \verb+long+
+\item[\texttt{uint\_ptr\_t}]: a string containing the actual type that
+  \verb+uint_ptr_t+ expands to, e.g. \verb+unsigned long+
+\item[\texttt{size\_t}]: a string containing the actual type that
+  \verb+size_t+ expands to, e.g. \verb+unsigned long+.
+\item[\texttt{ssize\_t}]: a string containing the actual type that
+  \verb+ssize_t+ expands to, e.g. \verb+long+
+\item[\texttt{wchar\_t}]: a string containing the actual type that
+  \verb+wchar_t+ expands to. If unsupported, you can use \verb+int+.
+\item[\texttt{ptrdiff\_t}]: a string containing the actual type that
+  \verb+ptrdiff_t+ expands to. If unsupported, you can use \verb+int+.
+\item[\texttt{sig\_atomic\_t}]: a string containing the actual type that
+  \verb+sig_atomic_t+ expands to (i.e. an integer type that can be
+  accessed atomically even in presence of interrupts).
+\item[\texttt{time\_t}]: a string containing the actual type that
+  \verb+time_t+ expands to (i.e. an integer type that can hold time
+  values in seconds).
+\item[\texttt{wint\_t}]: a string containing the actual type that
+  \verb+wint_t+ expands to (i.e. an integer type capable of holding any
+  \verb+wchar_t+ and \verb+WEOF+)
+\end{description}
+
+\paragraph{Standard macros}
+
+Note that all fields described in this paragraph have \texttt{string} values,
+even if they denote numeric constants. In order to avoid errors when loading the
+YAML file, you can force them to be considered as strings by enclosing them between
+single or double quotes, as in \verb+eof: '(-1)'+ (as a sidenote, negative values
+should be enclosed in parentheses, in order to ensure safe macro expansions).
+
+\begin{description}
+  \item[\texttt{bufsiz}]: value of the \texttt{BUFSIZ} macro, i.e. the size
+    of buffers used by I/O functions in \texttt{stdio.h}
+  \item[\texttt{eof}]: value of the \texttt{EOF} macro, the value returned by input
+    functions in \texttt{stdio.h} to indicate the end of the stream.
+  \item[\texttt{errno}]: list of possible errors with their numeric value. In order
+    to be easier to read, the content of this field is in fact written itself as an
+    object whose fields are the name of the errors. For instance, for a machdep defining
+    only the three errors mandated by the C standard, we would have:
+    \begin{verbatim}
+    errno:
+        edom: 1
+        eilseq: 2
+        erange: 3
+    \end{verbatim}
+  \item[\texttt{filename\_max}]: value of the \texttt{FILENAME\_MAX} macro, which
+    denotes the longest name that is guaranteed to be accepted by \texttt{fopen}.
+  \item[\texttt{fopen\_max}]: value of the \texttt{FOPEN\_MAX} macro, which denotes the
+    greatest number of streams that can be simultaneously opened by the program.
+  \item[\texttt{host\_name\_max}]: value of the \texttt{HOST\_NAME\_MAX} macro, which
+    denotes the maximum length of a hostname (without terminating null byte).
+  \item[\texttt{l\_tmpnam}]: value of the \texttt{L\_tmpnam} macro, which denotes
+    the maximum size of a temporary filename as returned by \texttt{tmpnam}.
+  \item[\texttt{mb\_cur\_max}]: value of the \texttt{MB\_CUR\_MAX} macro, which denotes
+    the maximum number of bytes for a character in the current locale (usually
+    \texttt{'1'})
+  \item[\texttt{nsig}]: number of possible signals (non-standard macro, can be left
+    empty if undefined for the current machdep)
+  \item[\texttt{path\_max}]: value of the \texttt{PATH\_MAX} macro, which denotes
+    the maximum size (including terminating null) that can be stored in a buffer
+    when returning a pathname
+  \item[\texttt{posix\_version}]: value of the \texttt{\_POSIX\_VERSION} macro.
+    Leave empty on non-POSIX machdeps.
+  \item[\texttt{rand\_max}]: value of the \texttt{RAND\_MAX} macro, which denotes
+    the maximum value returned by \texttt{rand}
+  \item[\texttt{tmp\_max}]: value of the \texttt{TMP\_MAX} macro, which denotes
+    the minimum number of temporary filenames returned by \texttt{tmpnam} that
+    are guaranteed to be distinct.
+  \item[\texttt{tty\_name\_max}]: value of the \texttt{TTY\_NAME\_MAX} macro, which
+    denotes the maximum length of a terminal device name (including terminating null byte).
+  \item[\texttt{weof}]: value of the \texttt{WEOF} macro, similar to \texttt{EOF},
+    but for wide chars.
+  \item[\texttt{wordsize}]: value of the \texttt{\_\_WORDSIZE} macro, which denotes
+    the length of a word on the current architecture.
+\end{description}
+
+\paragraph{Other features}
+\begin{description}
   \item[\texttt{char\_is\_unsigned}]: whether type \verb+char+ is unsigned.
-  \item[\texttt{const\_string\_literals}]: whether string literals have const
-    chars, or are writable. If \verb+true+, the following code has undefined
-    behavior, otherwise it is defined: \verb+char *s = "no"; s[0] = 'g';+.
-  \item[\texttt{little\_endian}]: whether the machine is little endian.
-  \item[\texttt{underscore\_name}]: whether the compiler generates assembly
-    labels by prepending \verb+_+ to the identifier. That is, will function
-    \verb+foo()+ have the label \verb+foo+, or \verb+_foo+?
+  \item[\texttt{little\_endian}]: whether the machine is little endian or big
+    endian\footnote{More exotic endianness such as mixed-endian are currently unsupported}
   \item[\texttt{has\_\_builtin\_va\_list}]: whether \verb+__builtin_va_list+
     is a (built-in) type known by the preprocessor.
   \item[\texttt{\_\_thread\_is\_keyword}]: whether \verb+__thread+ is a
     keyword (otherwise, it can be used as a standard identifier).
+  \item[\texttt{custom\_defs}]: arbitrary text that will be appended
+    verbatim at the
+    end of the generated header file (this is empty in machdeps that are
+    generated by the \texttt{make\_machdep.py} script).
 \end{description}
 
-\paragraph{Writing a new machdep}
-
-Writing a machdep for a new architecture is not trivial, due to the fact
-that some steps are hard to automate. If you have a working compiler for the
-target architecture, you can use it to produce an executable that will print
-the contents of expressions such as \verb+sizeof(long)+, \verb+_Alignof(int)+,
-etc. You can also use the compiler to test for unsupported features.
-In case \verb+printf+ is not available, you can use the exit code of the
-program (return code of \verb+main+). In case you can only preprocess, but not
-compile and run the program, the assembly code may provide some useful data.
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \section{Visitors}\label{adv:visitors}\index{Visitor|bfit}
diff --git a/doc/developer/changes.tex b/doc/developer/changes.tex
index bf81337e8f7..04a799989f9 100644
--- a/doc/developer/changes.tex
+++ b/doc/developer/changes.tex
@@ -5,10 +5,11 @@
 This chapter summarizes the major changes in this documentation between each
 \framac release, from newest to oldest.
 
-%\section*{Frama-C+dev}
-%\begin{itemize}
-%\item …
-%\end{itemize}
+\section*{Frama-C+dev}
+\begin{itemize}
+\item \textbf{Customizing the machine model}: Rewrite section according to new
+  machdep management mechanism
+\end{itemize}
 
 \section*{26.0 Iron}
 \begin{itemize}
-- 
GitLab