From 2c5f249a1b38d3ac08294414289580bd80d755a2 Mon Sep 17 00:00:00 2001
From: davidcok <davidcok@github.com>
Date: Mon, 29 Jul 2019 15:27:29 +0200
Subject: [PATCH] Vartiuos edits

---
 doc/userman/description.tex   | 53 +++++++++++++++++++++++++++--------
 doc/userman/grammar.tex       |  8 +++---
 doc/userman/introduction.tex  |  2 ++
 doc/userman/preprocessing.tex | 27 +++++++++---------
 4 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/doc/userman/description.tex b/doc/userman/description.tex
index 232eac91..52f15957 100644
--- a/doc/userman/description.tex
+++ b/doc/userman/description.tex
@@ -4,7 +4,7 @@
 
 \fclang is currently still experimental and not part of regular \framac releases. It must be built from source and added to a \framac installation.
 The instructions for doing so are provided at 
-\url{https://frama-c/colm/frama-clang.html}.
+\url{https://frama-c.com/frama-clang.html}.
 
 \fclang depends on two software packages:
 \begin{itemize}
@@ -16,11 +16,11 @@ which itself is available from \url{http://releases.llvm.org}.
 Building and installing \fclang has two effects:
 \begin{itemize}
 \item The \fclang executable files are installed within the \framac installation
-\item The include files containing \acslpp specifications of C++ library functions
+\item Include files containing \acslpp specifications of C++ library functions
 are copied to \verb|$FRAMAC_SHARE/libc| and 
 \verb|$FRAMAC_SHARE/libc++|, where \verb|$FRAMAC_SHARE| is the path
 given by the command \lstinline|frama-c-config -print-share-path|.
-\end{itemize}.
+\end{itemize}
 These include files are replacements for the standard system include files.
 They should have the same definitions of C and C++ functions and classes, but
 with \acslpp annotations giving their specifications.
@@ -51,7 +51,7 @@ The options controlling \fclang are of two sorts:
 \begin{itemize}
 \item options known to \framac are interpreted and passed on to \fclang when
 \irg is invoked, perhaps with a different option name
-\item options known to \fclang directoy (and not ot \framac) must be 
+\item options known to \fclang directly (and not to \framac) must be 
 included in the internal command that invokes \irg using the \option{-cpp-extra-args} option. These options are described in \S\ref{sec:standalone}.
 \end{itemize}
 
@@ -104,9 +104,9 @@ TODO
 
 In normal use within \framac, the \irg executable is
 invoked automatically. However, it can also be run standalone.
-In this mode it accepts command-line options and a single file;
+In this mode it accepts command-line options and a single input file;
 it produces a C AST representing the translated C++, in a text format 
-of the CIL (C Intermediate language).
+of the Cil (C Intermediate language).
 
 \section{Clang options}
 
@@ -128,23 +128,52 @@ The most significant options for  \irg are these:
 	\item \option{-{-}version} -- print version information
 \end{itemize}
 
+\section{Custom ASTs}
+
+In standard mode, \framac invokes \irg on a file, producing an AST in intermediate format, and the reads that intermediate file into \framac to complete the processing.
+If some manipulation of the AST intermediate is needed, those two steps can be performed separately as follows:
+\begin{itemize}
+\item Produce an intermediate AST \lstinline|f.ast| for a given input file \lstinline|f.cpp| using the command \\
+\centerline{\normalsize{framaCIRGen <options> -o f.ast f.cpp}}
+\item Manipulate \lstinline|f.ast| as desired.
+\item Run \framac on the AST using the command \\
+\centerline{\normalsize{frama-c <options> -cpp-command "cat f.ast" f.cpp}}
+\end{itemize}
+
+If you have multiple files, do the following:
+\begin{itemize}
+\item Create the ast files for a group of files in \$files:\\
+\centerline{\bf \texttt{ for f in \$files; do framaCIRGen <options> -o \${f\%.*}.ast \$f ; done }}
+\item Manipulate the resulting .ast files as needed
+\item Execute a command like \\
+\centerline{\bf \texttt{frama-c -cpp-command "`pwd`/ct" \$files}}
+\end{itemize}
+where \lstinline|ct| is an executable similar to\\
+\centerline{\bf \texttt{do f in \$@ ; cat \$\{f.\%*\}.ast ; done}}
+
 \section{Frama-Clang specific options}
 
 There are also options that are specific to \fcl.
 These are listed below:
 \begin{itemize}
 	\item \option{-{-}info=<n>} -- sets the level of informational messages to \textbf{n}; 0 is completely quiet; increasing values are
-	more verbose. \option{-{-}info} sets the level to 1; 
-	\option{-{-}no-info} sets the level to 0. The \lstinline|frama-c| option \option{-fclang-msg-key=parse} is equivalent to setting a value of 1.
+	more verbose. \\
+	\option{-{-}info} sets the level to 1 \\
+	\option{-{-}no-info} sets the level to 0\\
+	 The \lstinline|frama-c| option \option{-fclang-msg-key=parse} is equivalent to setting a value of 1.
+	
 	\item \option{-{-}warn=<n>} -- sets the level of parser warning messages to \textbf{n}; 0 is completely quiet; increasing values are
-more verbose. \option{-{-}warn} sets the level to 1; 
-\option{-{-}no-warn} sets the level to 0. The \lstinline|frama-c| option \option{-fclang-warn-key=parse} is equivalent to setting a value of 1.
+more verbose. \option{-{-}warn} sets the level to 1\\
+\option{-{-}no-warn} sets the level to 0\\
+The \lstinline|frama-c| option \option{-fclang-warn-key=parse} is equivalent to setting a value of 1.
 
 	\item \option{-{-}debug=<n>} -- sets the level of parser debug messages to \textbf{n}; 0 is completely quiet; increasing values are
-more verbose. \option{-{-}debug} sets the level to 1; 
-\option{-{-}no-debug} sets the level to 0. 
+more verbose\\
+ \option{-{-}debug} sets the level to 1\\
+\option{-{-}no-debug} sets the level to 0\\
 The \lstinline|frama-c| option \option{-fclang-debug=<n>} is equivalent to setting a value of \textbf{n}.
 In particular, a debug value of 1 shows the command-line that invokes \irg.
+
 	\item \option{-{-}stop-annot-error} -- if set, then parsing stops on the first error; default is off
 	\item \option{-{-}gen-impl-method} -- TODO
 	\item \option{-{-}exit-code} -- if set, then the exit code of \irg is 1 if errors occur; this is not the default because then \lstinline|frama-c| would terminate upon
diff --git a/doc/userman/grammar.tex b/doc/userman/grammar.tex
index e17f5cbf..afa166d5 100644
--- a/doc/userman/grammar.tex
+++ b/doc/userman/grammar.tex
@@ -1,8 +1,8 @@
 \newcommand{\lang}{C++}
-\section{Grammar}
+\chapter{Grammar and parser for \acslpp}
 \label{sec:grammar}
 
-This section summarizes some of the considerations in writing a parser for \acslpp within \fclang.
+This section summarizes some of the technical implementation considerations in writing a parser for \acslpp within \fclang. This material may be useful for developers and maintainers of \fclang; it is not needed by users of \fclang.
 
 Recall that \fclang uses clang to parse \lang and a custom parser to parse \acslpp annotations, jointly producing a representation of the \lang+\acslpp source input in the Frama-C intermediate language. The first version of the \acslpp custom parser, swritten during the STANCE project, used a hand-written bison-like parser, but with function pointers to record state and actions rather than using a tool-generated table to drive the parsing. This design proved to be too brittle and difficult to efficiently change as new features were added to \acslpp. Consequently during the VESSEDIA project, the scanner and parser were completely rewritten, largely retaining the previous design's connections to clang, token definitions, and name lookup and type resolution.
 
@@ -15,8 +15,8 @@ Most \acslpp constructs start with a unique keyword (e.g., clauses begin with \l
 \item \textbf{Left recursion}. Expression grammars are typically left recursive. However, it is well-known how to factor out left recursion. The precedence order of operators is largely hard-coded into the grammar implementation; the usual left recursion poses no particular challenge.
 \item {terms vs. predicates}. \acslpp distinguishes terms and predicates, following the distinction between propositional and predicate logic. However, terms and predicates have very similar grammars. Furthermore, \acslpp allows boolean-value terms to be implicitly converted to predicates and allows predicates to be used within terms (such as for the conditional sub-expression in a ternary expression). This makes it not possible to distinguish terms and predicates inn top-down parsing. However, Frama-C has different AST nodes for the two, so it would require a very significant refactoring of Frama-C and all its plugins to unify terms and predicates (as other specification languages have done). Note that this problem is a challenge for any parser design. The previous and current parser designs adopted the same solution; maintain two parallel parses of expressions --- one as a term and one as a predicate. Error messages are emitted only when both parses fail or when a particular grammar production call for a particular type (term or predicate) and that one is not available.
 \item{terms vs. tsets}. TODO
-\item \textbf{cast vs. parenthesized expression} To determine whether an input like \lstinline|(T)+x| is (a) the sum of the parenthesized expression \lstinline|(T)| and \lstinline|x| or (b) a cast of \lstinline|x| to the type \lstinline|T|, one must know whether \lstinline|T| is a variable or type. This is a classic problem in parsing \lang; it reqwuires that identifiers be known to be either type names or variable names in the scanner. In addition, \lstinline|T| here can be an arbitrary type expression. For example, in \lang, a type expression can contain pointer operators that can look, at least initially like multiplications and they can contain template instantiations that look initially like comparisons. \fclang handles this situation by allowing a backtrackable parse. When a left parenthesis is parsed in an expression context, the parser proceeds by attempting a parse of a cast expression. If the contents of the parenthesis pair is successfully parsed as a type expression, it is assumed to be a cast expression. If such a parse fails, no error messages are emitted; rather the parse is rewound and proceeds again assuming the token sequence to be a parenthesized expression.
-\item \textbf{set comprehension}. The set comprehension expression follows traditional mathematics: \lstinline| { \textit{expr} | \textit{binders} ; \textit{predicate} . This structure poses two difficulties for parsers. First, the expression may contain bitwise-or operators, so it is not known to the parser whether an occurence of \verb:|: is the beginning of the binders or is just a bitwise-or operator. Second, the expression will use the variables declared in the binders section. However, the binders are not seen until after the expression is scanned. Note that these problems are not unique to a recursive descent design; they would challenge a LALR parser just as much. \textit{This particular feature is not yet implemented in \fclang, nor in Frama-C and so is not yet resolved in the parser implementation.}
+\item \textbf{cast vs. parenthesized expression} To determine whether an input like \lstinline|(T)+x| is (a) the sum of the parenthesized expression \lstinline|(T)| and \lstinline|x| or (b) a cast of \lstinline|x| to the type \lstinline|T|, one must know whether \lstinline|T| is a variable or type. This is a classic problem in parsing \lang; it requires that identifiers be known to be either type names or variable names in the scanner. In addition, \lstinline|T| here can be an arbitrary type expression. For example, in \lang, a type expression can contain pointer operators that can look, at least initially like multiplications and they can contain template instantiations that look initially like comparisons. \fclang handles this situation by allowing a backtrackable parse. When a left parenthesis is parsed in an expression context, the parser proceeds by attempting a parse of a cast expression. If the contents of the parenthesis pair is successfully parsed as a type expression, it is assumed to be a cast expression. If such a parse fails, no error messages are emitted; rather the parse is rewound and proceeds again assuming the token sequence to be a parenthesized expression.
+\item \textbf{set comprehension}. The set comprehension expression follows traditional mathematics: \lstinline: { $expr$ | $binders$ ; $predicate$ }:. This structure poses two difficulties for parsers. First, the expression may contain bitwise-or operators, so it is not known to the parser whether an occurence of \verb:|: is the beginning of the binders or is just a bitwise-or operator. Second, the expression will use the variables declared in the binders section. However, the binders are not seen until after the expression is scanned. Note that these problems are not unique to a recursive descent design; they would challenge a LALR parser just as much. \textit{This particular feature is not yet implemented in \fclang, nor in Frama-C and so is not yet resolved in the parser implementation.}
 \item \textbf{labeled expressions}. \acslpp allows expressions to have labels, designated by a \lstinline|id : | prefix. Thus the parser cannot know whether an initial \lstinline|id| is a variable or just a label until the colon is parsed. Thus this situation requires a lookahead of 2 tokens.
 
 \end{itemize}
diff --git a/doc/userman/introduction.tex b/doc/userman/introduction.tex
index 162dd60e..9ec919c7 100644
--- a/doc/userman/introduction.tex
+++ b/doc/userman/introduction.tex
@@ -20,3 +20,5 @@ The following sections describe the current status and limitations of the curren
     However, \fclang may not support all of the features of C++ within annotations.
 \end{itemize}
 
+The source material for this document is in Frama-C's gitlab repository: \url{git@git.frama-c.com:frama-c/frama-clang.git}, in directory \lstinline|doc/userman| .
+
diff --git a/doc/userman/preprocessing.tex b/doc/userman/preprocessing.tex
index 05de59fb..d414fe56 100644
--- a/doc/userman/preprocessing.tex
+++ b/doc/userman/preprocessing.tex
@@ -1,20 +1,21 @@
 \chapter{Preprocessing}
+\label{sec:preprocessing}
 
 This section describes the implementation of the C++ preprocessor for \acslpp
-annotations. Recall that thte C++ preprocessor replaces comments (including
+annotations. Recall that the C++ preprocessor replaces comments (including
 \acslpp comments) by white space, without operations such as handling
 preprocessor directives. Accordingly, \fclang must handle standard
 preprocessing within \acslpp annotations itself.
 
 As a refresher, the C/C++ preprocessor (CPP) (cf. \url{https://gcc.gnu.org/onlinedocs/cpp/}) conceptually implements the following operations on a C++ source file:
 \begin{itemize}
-	\item The input is read and broken it into a sequence of physical lines according to the line terminators (ASCII character sequences \texttt{\\r}, \texttt{\\n}, or \texttt{\\r\\n}).
+	\item The input is read and broken it into a sequence of physical lines according to the line terminators (ASCII character sequences \verb|\\r|, \verb|\\n|, or \verb|\\r\\n|).
 	\item Each C trigraph is replaced by its corresponding character.
 	\item Any backslash-whitespace-line-terminator sequence is removed and the line that it ends is combined with the following line.
 	\item Comments are replaced by single spaces. This requires tokenizing the input to avoid recognizing comment markers within strings as indicating a comment. Note that this allows block comments to hide line terminations.
 	\item The input text is divided into preprocessing tokens grouped in logical lines. Each preprocessor token becomes a compiler token (except where \#\# concatenation occurs). However, \acslb tokens are slightly different, as described below.
 	\item The source text is transformed according to any preprocessing directives contained within it. Each preprocessing directive must be contained within one logical line
-    \item adjacent string literals (separatedonly by white-space or line-endings) are concatenated into a single string literal.
+    \item adjacent string literals (separated only by white-space or line-endings) are concatenated into a single string literal.
 	
 \end{itemize}
 The result is a sequence of preprocessing tokens that is passed on to the 
@@ -43,7 +44,7 @@ Preprocessor tokens (per CPP) belong to one of five categories. White space (spa
 	\item other tokens: \verb|@|, \verb|#|, \verb|`| and all non-ASCII single characters.
 \end{itemize}
 The above token definitions imply that arbitrary text can always be broken into
-legitimate tokens, with the exception of a few characters and of badly formed unicode sequences.
+legitimate preprocessor tokens, with the exception of a few characters and of badly formed unicode sequences.
 
 TODO - unicode characters
 
@@ -57,13 +58,13 @@ Note that not all preprocessor tokens are valid C/C++ parser tokens. Tokens in t
 	\begin{itemize}
         \item[] all \acslb keywords that begin with a backslash, such as
          \verb|\result|.
-		\item[] $==>$ (logical implies)
-		\item[] $-->$ (bit-wise implies)
-		\item[] $<==>$ (logical equality)
-		\item[] $<-->$ (bit-wise equality)
-		\item[] \verb|^^| (logical inequality)
-		\item[] \verb|^*| (list repetition)
-		\item[] \verb:[|: and \verb:|]: (list creation)
+		\item \verb|==>| (logical implies)
+		\item \verb|-->| (bit-wise implies)
+		\item \verb|<==>| (logical equality)
+		\item \verb|<-->| (bit-wise equality)
+		\item \verb|^^| (logical inequality)
+		\item \verb|^*| (list repetition)
+		\item \verb:[|: and \verb:|]: (list creation)
 	\end{itemize}
 	These \acslb tokens need to be assembled from multiple CPP tokens (and those CPP tokens must not be separated by white space)
 	\item A CPP numeric token that contains .. will not be a legal C/C++ number, but may be a sequence of
@@ -76,7 +77,7 @@ Note that not all preprocessor tokens are valid C/C++ parser tokens. Tokens in t
 \end{itemize}
 
 \subsection{Preprocessor directives}
-A preprocessing directive consists of a single logical line (after the previous preprocessing phases have been completed) that begins with optional white space, the '\#' character, additional optional white space, and a preprocessor directive identifier.
+A preprocessing directive consists of a single logical line (after the previous preprocessing phases have been completed) that begins with optional white space, the \verb|#| character, additional optional white space, and a preprocessor directive identifier.
 The preprocessing language contains a fixed set of preprocessing directive identifiers:
 \begin{itemize}
 	\item \texttt{define}, \texttt{undef}
@@ -93,7 +94,7 @@ Consequently, any directives contained in the annotation are not seen when the s
 Accordingly, \acslpp imposes constraints on the directives that may be present within annotations:
 \begin{itemize}
 	\item \texttt{define} and \texttt{undef} are not permitted in an annotation
-	\item \texttt{if}, \texttt{ifdef}, \texttt{ifndef}, \texttt{elif}, \texttt{else}, \texttt{endif} are permitted but must be completely nested within the annotation in which they appear (an \texttt{endif} and its corresponding \texttt{if/ifdef/ifndef} must be in the same annotation comment.)
+	\item \texttt{if}, \texttt{ifdef}, \texttt{ifndef}, \texttt{elif}, \texttt{else}, \texttt{endif} are permitted but must be completely nested within the annotation in which they appear (an \texttt{endif} and its corresponding \texttt{if}, \texttt{ifdef}, or \texttt{ifndef} directive must be in the same annotation comment.)
 	\item \texttt{warning} and \texttt{error} are permitted
 	\item \texttt{include} is permitted, but will cause errors if it contains, as is almost always the case, either \texttt{define} or \texttt{pragma} directives
 	\item \texttt{line} - TBD
-- 
GitLab