-
Virgile Prevosto authoredVirgile Prevosto authored
ACSLLexer.cpp 118.44 KiB
/**************************************************************************/
/* */
/* This file is part of Frama-Clang */
/* */
/* Copyright (C) 2012-2021 */
/* CEA (Commissariat à l'énergie atomique et aux énergies */
/* alternatives) */
/* */
/* you can redistribute it and/or modify it under the terms of the GNU */
/* Lesser General Public License as published by the Free Software */
/* Foundation, version 2.1. */
/* */
/* It is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
/* GNU Lesser General Public License for more details. */
/* */
/* See the GNU Lesser General Public License version 2.1 */
/* for more details (enclosed in the file LICENSE). */
/* */
/**************************************************************************/
//
// Description:
// Implementation of the ACSL++ lexer.
//
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
#endif
#include <sstream>
#include <climits>
#include <string>
#include "clang/Basic/Version.h"
#include "clang/Basic/Diagnostic.h"
#include "llvm/ADT/SmallString.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/Lookup.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/MacroArgs.h"
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#include "ACSLLexer.h"
#include "intermediate_format.h"
namespace Acsl {
using namespace DLexer;
Lexer::Error::Error(location position, const std::string& msg)
: filePos(position->filename1), linePos(position->linenum1),
columnPos(position->charnum1), message(msg) {}
std::string
Lexer::Error::str() const {
std::ostringstream s;
s << filePos << ":" << linePos << ":" << columnPos << ": [lexer error] " << message;
return s.str();
}
Lexer::Lexer(const clang::Sema* sema):
_hasFinished(false),
_tokenLocation(NULL),
_hasNewlineToken(false),
_context('\0'),
_extension(0),
_charLitKind(DLexer::CharacterLiteralToken::TUndefined),
_stateLexer(SLStandard),
_clangSema(sema),
_AcceptedUtf8Symbols(),
_clangTokensSet(false),
_ppCheckOnly(true)
{
// TODO - can we avoid creating these everytime we create a lexer
lexerWarning = _clangSema->Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning,"[lexer] %0");
lexerError = _clangSema->Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,"[lexer] %0");
ppWarning = _clangSema->Diags.getCustomDiagID(clang::DiagnosticsEngine::Warning,"[pp] %0");
ppError = _clangSema->Diags.getCustomDiagID(clang::DiagnosticsEngine::Error,"[pp] %0");
}
Lexer::ReadResult
Lexer::readEndComment(const std::string& buffer, size_t& position, location loc) {
size_t pos;
if (_context == '*')
pos = buffer.find("*/", position);
else
pos = buffer.find('\n', position);
unsigned lineEnd = loc->linenum2;
unsigned charEnd = loc->charnum2;
if (_context == '*') {
unsigned previousCharEnd = charEnd;
size_t subpos = buffer.find('\n', position);
if (subpos != std::string::npos
&& (pos == std::string::npos || subpos < pos)) {
++lineEnd;
previousCharEnd = (subpos-position)+charEnd;
charEnd = 1;
size_t subsubpos;
bool doesContinue;
do {
subsubpos = buffer.find('\n', subpos);
doesContinue = (subsubpos != std::string::npos
&& (pos == std::string::npos || subsubpos < pos));
if (doesContinue) {
++lineEnd;
previousCharEnd = (subsubpos-subpos)+1;
subpos = subsubpos;
};
} while (doesContinue);
charEnd = (pos != std::string::npos) ? (pos-subpos)+2
: (buffer.length()-subpos-1);
if (charEnd == 0) {
charEnd = previousCharEnd;
if (lineEnd > loc->linenum2)
--lineEnd;
};
}
else
charEnd += (pos != std::string::npos)
? (pos-position)+1 : (buffer.length()-position-1);
}
else
charEnd += (pos != std::string::npos)
? (pos-position) : (buffer.length()-position-1);
if (pos != std::string::npos) {
_currentToken.readFrom(buffer, position, pos-position); // FIXME - won't translate characters
position = pos;
if (_context == '*')
position += 2;
else {
position++;
_hasNewlineToken = true;
};
CommentToken* result = new CommentToken;
_token = Token(result);
_currentToken >> result->content();
loc->linenum2 = lineEnd;
loc->charnum2 = charEnd;
return RRHasToken;
};
loc->linenum2 = lineEnd;
loc->charnum2 = charEnd+1;
if (buffer.length() > 0 && buffer[buffer.length()-1] == '\n') {
++loc->linenum2;
loc->charnum2 = 1;
};
if (_context == '*') {
_errorList.push_back(Error(loc, "comment not terminated"));
return RRFinished;
};
_currentToken.readFrom(buffer, position, buffer.size()-position);
position = buffer.size();
CommentToken* result = new CommentToken;
_token = Token(result);
_currentToken >> result->content();
loc->linenum2 = lineEnd;
loc->charnum2 = charEnd;
return RRHasToken;
}
Lexer::ReadResult
Lexer::readProtectedToken(const std::string& buffer, size_t& position, location loc) {
char ch = buffer[position];
while (isalnum(ch) || ch == '_') {
ch = advanceChar2(buffer,position,loc);
};
std::string result;
_currentToken >> result;
_token = protectedKeywordOrIdentifier(result);
return RRHasToken;
}
Token Lexer::protectedKeywordOrIdentifier(const std::string& textNoBS) {
KeywordToken::Map::const_iterator
found = KeywordToken::_protectedKeywords.find(textNoBS);
if (found != KeywordToken::_protectedKeywords.end()) {
KeywordToken result(found->second);
return Token(result);
}
else {
// treat it as a (built-in) identifier
IdentifierToken* ident = new IdentifierToken("\\" + textNoBS);
return Token(AbstractToken::TIdentifier,ident);
};
}
Lexer::ReadResult
Lexer::readIdentifierToken(const std::string& buffer, size_t& position, location loc) {
char ch = buffer[position];
while (isalnum(ch) || ch == '_') { // FIXME - $$ in identifier?
ch = advanceChar2(buffer,position,loc);
};
std::string ident;
_currentToken >> ident;
KeywordToken::Map::const_iterator found =
KeywordToken::_unprotectedKeywords.find(ident);
if (found != KeywordToken::_unprotectedKeywords.end()) {
KeywordToken* result = new KeywordToken;
result->setType(found->second);
_token = Token(result);
}
else {
IdentifierToken* result = new IdentifierToken(ident);
_token = Token(AbstractToken::TIdentifier, result);
}
return RRHasToken;
}
Lexer::ReadResult
Lexer::readNumberToken(const std::string& buffer, size_t& position,
location loc) {
while(true) {
char ch = buffer[position];
switch (_litState) {
case LeadZero: {
if ( ch == 'x' || ch == 'X' ) {
_digits = Hex;
_litState = Digits;
advanceChar2(buffer,position,loc);
continue;
}
if (ch == 'b' || ch == 'B' ) {
_digits = Bin;
_litState = Digits;
advanceChar2(buffer,position,loc);
continue;
}
if (ch == '.') {
if (buffer[position+1] == '.') {
// special case: 0..xxx is a range of integers.
// Just return the lexeme 0 as an int constant.
IntegerLiteralToken* zero = new IntegerLiteralToken;
zero->setType(IntegerLiteralToken::TOctal);
_currentToken >> zero->value();
_token = Token(zero);
return RRHasToken;
}
_digits = Dec;
_litState = FracPart;
advanceChar2(buffer,position,loc);
continue;
}
_digits = Oct;
_litState = Digits;
continue;
}
case Digits: {
while (isDigit(ch)) {
advanceChar2(buffer,position,loc);
ch = buffer[position]; // = continue;
}
// We only support decimal or hexadecimal floats
if (ch == '.' && (_digits != Oct)) {
if (buffer[position+1] == '.') {
// special case: xxx..expr is a range of integers.
// Just return the lexeme xxx as an int constant.
IntegerLiteralToken* cst = new IntegerLiteralToken;
setType(cst);
_currentToken >> cst->value();
_token = Token(cst);
return RRHasToken; // FIXME - set secondaryState?
}
_litState = FracPart;
advanceChar2(buffer,position,loc);
continue;
}
if ((ch == 'e' || ch == 'E') && (_digits == Dec || _digits == Bin)) {
_litState = Exponent;
advanceChar2(buffer,position,loc);
continue;
}
if ((ch == 'p' || ch == 'P') && _digits == Hex) {
_litState = Exponent;
advanceChar2(buffer,position,loc);
continue;
}
// We are seeing an integer. It remains to check for a suffix.
IntegerLiteralToken* intToken = new IntegerLiteralToken;
setType(intToken);
_token = Token(intToken);
_litState = ISuf;
continue;
}
case FracPart: {
while (isDigit(ch)) {
advanceChar2(buffer,position,loc);
ch = buffer[position]; // = continue;
}
if ((ch == 'e' || ch == 'E') && _digits == Dec) {
_litState = Exponent;
advanceChar2(buffer,position,loc);
continue;
}
if ((ch == 'p' || ch == 'P') && _digits == Hex) {
_litState = Exponent;
advanceChar2(buffer,position,loc);
continue;
}
FloatingLiteralToken* floatToken = new FloatingLiteralToken;
if (_digits == Dec)
floatToken->setType(FloatingLiteralToken::TDecimal);
if (_digits == Bin)
floatToken->setType(FloatingLiteralToken::TBit);
if (_digits == Hex)
floatToken->setType(FloatingLiteralToken::THexaDecimal);
_token = Token(floatToken);
_litState = FSuf;
continue;
}
case Exponent: {
if (ch == '+' || ch == '-') {
_litState = ExpNext;
advanceChar2(buffer,position,loc);
continue;
}
_litState = ExpNext;
continue;
}
case ExpNext: {
if (isdigit(ch)) {
advanceChar2(buffer,position,loc);
continue;
}
FloatingLiteralToken* floatToken = new FloatingLiteralToken;
if (_digits == Dec)
floatToken->setType(FloatingLiteralToken::TDecimal);
if (_digits == Bin)
floatToken->setType(FloatingLiteralToken::TBit);
if (_digits == Hex)
floatToken->setType(FloatingLiteralToken::THexaDecimal);
_token = Token(floatToken);
_litState = FSuf;
continue;
}
case ISuf: {
IntegerLiteralToken& intToken =
(IntegerLiteralToken &)_token.getSContent();
if (ch == 'l' || ch == 'L') {
advanceChar2(buffer,position,loc);
if (intToken.isInt()) {
intToken.mergeExtension(IntegerLiteralToken::ELong);
continue;
}
if (intToken.isLong()) {
intToken.mergeExtension(IntegerLiteralToken::ELongLong);
continue;
}
std::string constant;
_currentToken >> constant;
std::ostringstream msg;
msg << "Unknown suffix for integer literal constant: "
<< constant << ".";
_errorList.push_back(Error(loc,msg.str()));
// release the token
delete _token.extractContent();
return RRFinished;
}
if (ch == 'u' || ch == 'U') {
advanceChar2(buffer,position,loc);
if (intToken.isUnsigned()) {
std::string constant;
_currentToken >> constant;
std::ostringstream msg;
msg << "Unknown suffix for integer literal constant: "
<< constant << ".";
_errorList.push_back(Error(loc,msg.str()));
// release the token
delete _token.extractContent();
return RRFinished;
}
intToken.mergeExtension(IntegerLiteralToken::EUnsigned);
continue;
}
_currentToken >> intToken.value();
return RRHasToken;
}
case FSuf: {
FloatingLiteralToken& floatToken =
(FloatingLiteralToken &)_token.getSContent();
if (ch == 'l' || ch == 'L') {
advanceChar2(buffer,position,loc);
if (floatToken.isReal()) {
floatToken.mergeSuffix(FloatingLiteralToken::FSLongDouble);
continue;
}
std::string constant;
_currentToken >> constant;
std::ostringstream msg;
msg << "Unknown suffix for Real literal constant: "
<< constant << ".";
_errorList.push_back(Error(loc,msg.str()));
// release the token
delete _token.extractContent();
return RRFinished;
}
if (ch == 'd' || ch == 'D') {
advanceChar2(buffer,position,loc);
if (floatToken.isReal()) {
floatToken.mergeSuffix(FloatingLiteralToken::FSDouble);
continue;
}
std::string constant; // FIXME - set secondaryState?
_currentToken >> constant;
std::ostringstream msg;
msg << "Unknown suffix for Real literal constant: "
<< constant << ".";
_errorList.push_back(Error(loc,msg.str()));
// release the token
delete _token.extractContent();
return RRFinished;
}
if (ch == 'f' || ch == 'F') {
advanceChar2(buffer,position,loc);
if (floatToken.isReal()) {
floatToken.mergeSuffix(FloatingLiteralToken::FSFloat);
continue;
}
std::string constant;
_currentToken >> constant;
std::ostringstream msg;
msg << "Unknown suffix for Real literal constant: "
<< constant << ".";
_errorList.push_back(Error(loc,msg.str()));
// release the token
delete _token.extractContent();
return RRFinished;
}
_currentToken >> floatToken.value();
return RRHasToken;
}
}
}
return RRNeedChars;
}
bool Lexer::isUtf8SymbolStart(char ch) {
_AcceptedUtf8Symbols.clear();
std::string symb;
symb = AbstractToken::utf8_convert(0x2200);
if (symb[0] == ch) {
DLexer::KeywordToken tok;
tok.setType(DLexer::KeywordToken::TForall);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2203);
if(symb[0] == ch) {
DLexer::KeywordToken tok;
tok.setType(DLexer::KeywordToken::TExists);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2261);
if(symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TEqual);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2262);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TDifferent);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2264);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TLessOrEqual);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2265);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TGreaterOrEqual);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2212);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TMinus);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x21D2);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TImplies);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x21D4);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TEquiv);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2227);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TLogicalAnd);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2228);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TLogicalOr);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x00AC);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TNot);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x22BB);
if (symb[0] == ch) {
DLexer::OperatorPunctuatorToken tok;
tok.setType(DLexer::OperatorPunctuatorToken::TLogicalXor);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2208);
if (symb[0] == ch) {
DLexer::KeywordToken tok;
tok.setType(DLexer::KeywordToken::TSubset);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x1D539);
if (symb[0] == ch) {
DLexer::KeywordToken tok;
tok.setType(DLexer::KeywordToken::TBoolean);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x2124);
if (symb[0] == ch) {
DLexer::KeywordToken tok;
tok.setType(DLexer::KeywordToken::TInteger);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
symb = AbstractToken::utf8_convert(0x211D);
if (symb[0] == ch) {
DLexer::KeywordToken tok;
tok.setType(DLexer::KeywordToken::TReal);
_AcceptedUtf8Symbols.push_front(
std::pair<std::string,AbstractToken>(symb,tok));
}
return !_AcceptedUtf8Symbols.empty();
}
Lexer::ReadResult
Lexer::readUtf8Symbol(const std::string& buffer, size_t& position, location loc) {
size_t i = 1;
while(!_AcceptedUtf8Symbols.empty()) {
if (position+i>=buffer.size()) break;
if (_AcceptedUtf8Symbols.size() == 1) {
std::string symb = _AcceptedUtf8Symbols.front().first;
size_t length = symb.size();
while(i<length) {
if (position+i >= buffer.size() || symb[i] != buffer[position+i]) {
//TODO: use buffer[position] to determine the expected length of
// the character in buffer
size_t last = position+i>=buffer.size()?buffer.size()-1:position+i;
std::ostringstream msg;
msg << "unknown character A: "
<< buffer.substr(position, last)
<< ".";
_errorList.push_back(Error(loc,msg.str()));
position=last+1;
return RRContinueLexing;
}
i++;
}
position+=i;
loc->charnum2+=i;
_token = _AcceptedUtf8Symbols.front().second;
return RRHasToken;
} else {
Utf8SymbolSet::iterator it = _AcceptedUtf8Symbols.begin();
Utf8SymbolSet::iterator end = _AcceptedUtf8Symbols.end();
while(it != end) {
if ((*it).first[i] != buffer[position+i]) {
Utf8SymbolSet::iterator tmp = it;
it++;
_AcceptedUtf8Symbols.erase(tmp,it);
} else it++;
}
}
i++;
}
std::ostringstream msg;
msg << "unknown character B: "
<< buffer.substr(position, position+i-1)
<< ".";
_errorList.push_back(Error(loc,msg.str()));
position+=i;
return RRContinueLexing;
}
Lexer::ReadResult
Lexer::readCharLiteral(const std::string& buffer,
size_t& position, location loc) {
DLexer::CharacterLiteralToken* tok = new CharacterLiteralToken;
_token = Token(tok);
tok->setType(_charLitKind);
size_t length = buffer.length();
if (buffer[position] == '\\') {
position++;
loc->charnum2++;
if (position>=length) {
_errorList.push_back(Error(loc, "unterminated escape sequence"));
return RRFinished;
}
switch(buffer[position]) {
case '\'': tok->set_value('\''); break;
case '"': tok->set_value('"'); break;
case '?': tok->set_value('?'); break;
case '\\': tok->set_value('\\'); break;
case 'a': tok->set_value('\a'); break;
case 'b': tok->set_value('\b'); break;
case 'f': tok->set_value('\f'); break;
case 'n': tok->set_value('\n'); break;
case 'r': tok->set_value('\r'); break;
case 't': tok->set_value('\t'); break;
case 'v': tok->set_value('\v'); break;
case 'x': {
unsigned long long res = 0;
while(true) {
position++;
loc->charnum2++;
if (position>=length) {
_errorList.push_back(Error(loc,"unterminated char literal"));
return RRFinished;
}
if ('0'<=buffer[position] && buffer[position]<= '9')
res = 16*res + (buffer[position]-'0');
else if ('a'<=buffer[position]&&buffer[position]<='f')
res = 16*res + 10 + buffer[position]-'a';
else if ('A'<=buffer[position]&&buffer[position]<='F')
res = 16*res + 10 + buffer[position]-'A';
else { loc->charnum2--; position--; break; }
}
if (_charLitKind == DLexer::CharacterLiteralToken::TWideChar) {
if (res > 1ULL << (8*sizeof(wchar_t))) {
_errorList.push_back(
Error(loc,"escape sequence not in wide char range"));
return RRFinished;
}
tok->wchar_value() = res;
} else {
if (res > UCHAR_MAX) {
_errorList.push_back(
Error(loc,"escape sequence not in char range"));
return RRFinished;
}
tok->char_value() = res;
}
break;
}
default:
if ('0'<= buffer[position] && buffer[position] <= '9') {
unsigned res = 0;
for (int i = 0; i<3; i++) {
if(position>=length) {
_errorList.push_back(
Error(loc,"unterminated char literal"));
return RRFinished;
}
if(buffer[position] < '0' || buffer[position] > '7') break;
res = res*8 + (buffer[position]-'0');
position++;
loc->charnum2++;
}
position--;
loc->charnum2--;
if (_charLitKind == DLexer::CharacterLiteralToken::TWideChar) {
// assume wchar_t is wide enough to accomodate the 0-512 range.
tok->wchar_value() = res;
} else {
if (res > UCHAR_MAX) {
_errorList.push_back(
Error(loc,"escape sequence not in char range"));
return RRFinished;
}
tok->char_value() = res;
}
} else
// Just ignore the '\' (\ followed by an unknown character can be
// supported in an implementation-defined manner as per [lex.ccon.3]
tok->set_value(buffer[position]);
break;
}
} else { tok->set_value(buffer[position]); }
bool first_char = true;
while(true) {
position++;
loc->charnum2++;
if (position >= length) {
_errorList.push_back(Error(loc,"unterminated char literal"));
return RRFinished;
}
if (buffer[position] == '\'') {
position++;
return RRHasToken;
}
if(first_char) {
std::cerr <<
loc->filename1 << ":" << loc->linenum1 << ":" <<
loc->charnum1 << "-" << loc->charnum2 <<
" Character literal has more than one char. Ignoring the other ones\n";
first_char = false;
}
if (buffer[position] == '\\') position++;
}
assert(false);
}
const std::string Lexer::getPreprocessorToken(const std::string& buffer, size_t& position, location loc, bool skipNewLines, bool raw) {
bool debug = false;
char ch;
while (true) {
skipSpace(buffer,position,loc,skipNewLines);
ch = buffer[position];
// FIXME - what about continuation lines within or outside of comments
if (ch == '/' && buffer[position+1] == '/') {
do {
ch = advanceChar1NoToken(buffer,position,loc);
} while (ch != '\0' && ch != '\n' && ch != '\r');
continue;
}
break;
}
size_t p = position;
if (ch == '\0') return "";
if (ch == '#') {
// position is just after the returned string
ch = advanceChar1NoToken(buffer,position,loc);
return "#";
}
// FIXME - digraphs, trigraphs
if (ch == '"') {
// Read a quoted string
size_t p = position;
do {
ch = advanceChar1NoToken(buffer,position,loc);
if (ch == '\\') {
ch = advanceChar1NoToken(buffer,position,loc);
ch = advanceChar1NoToken(buffer,position,loc);
}
// FIXME - escaped characters, digraphs, trigraphs
} while (ch != '\0' && ch != '"');
if (ch == '\0') {
// FIXME - unclosed string
} else if (ch == '"') {
ch = advanceChar1NoToken(buffer,position,loc);
return std::string(buffer,p,position-p);
// position is just after the closing quote
// FIXME - return raw token, or actual string
}
}
if (ch == '\'') {
// Read a quoted character string (PP allows multiple characters within single quotes)
size_t p = position;
do {
ch = advanceChar1NoToken(buffer,position,loc);
if (ch == '\\') {
ch = advanceChar1NoToken(buffer,position,loc);
ch = advanceChar1NoToken(buffer,position,loc);
}
// FIXME - escaped characters, digraphs, trigraphs
} while (ch != '\0' && ch != '\'');
if (ch == '\0') {
// FIXME - unclosed string
} else if (ch == '"') {
ch = advanceChar1NoToken(buffer,position,loc);
// FIXME - return raw token, or actual string
}
return std::string(buffer,p,position-p);
// position is just after the closing quote
}
while (isalnum(ch) || ch == '_' ) { // FIXME - also $?
ch = advanceChar1NoToken(buffer,position,loc);
}
if ((size_t) p != position) {
// An identifiers preprocessor token
const std::string id = std::string(buffer,p,position-p);
if (!raw) {
if (isDefinedMacro(id)) {
clang::MacroInfo* macro = getMacro(id);
if (macro->isFunctionLike()) {
if (debug)
std::cout << "Not Expanding " << id
<< " -- is function like" << std::endl;
} else {
//std::cout << "Expanding " << id << " : ";
// for (int i=0; i < macro->getNumTokens(); i++) {
// std::cout << macro->getReplacementToken(i).getName() << " ";
// }
// std::cout << std::endl;
}
}
}
return id;
}
// FIXME - is there a fixed set of operator tokens? or just one characer each? or any sequence
while (!isalnum(ch) && ch != '_' && !isspace(ch) && ch != '@' && ch != '\0') {
ch = advanceChar1NoToken(buffer,position,loc);
}
if (position != (size_t) p) advanceChar1NoToken(buffer,position,loc);
return std::string(buffer,p,position-p);
// FIXME - what about non-printable tokens or illegal tokens
}
DLexer::Token
Lexer::expandIfMacro(const DLexer::Token& token, Parser::Base::ReadResult& res) {
bool debug=false;
if (_rawOnly) return token;
const std::string id = token.text();
if (isDefinedMacro(id)) {
clang::MacroInfo* macro = getMacro(id);
if (macro->isFunctionLike()) {
if (debug)
std::cout << "Not Expanding " << id
<< " -- is function like" << std::endl;
return token;
} else {
//std::cout << "Expanding " << id << std::endl;
int i = macro->getNumTokens();
if (i == 1) {
const clang::Token& clang_token = macro->getReplacementToken(0);
Token t = convertClangTokenToACSLToken(clang_token);
t = expandIfMacro(t, res);
//std::cout << " Expanded to " << t.str() << std::endl;
return t;
} else {
while (--i >= 0) {
const clang::Token& clang_token = macro->getReplacementToken(i);
_token_stack.push(convertClangTokenToACSLToken(clang_token));
//std::cout << "Pushed " << _token_stack.top().str() << std::endl;
}
//std::cout << std::endl;
if (_token_stack.empty()) {
// Macro expanded to nothing
return Token();
} else {
Token t = _token_stack.top();
_token_stack.pop();
//std::cout << "Popped " << t.str() << std::endl;
t = expandIfMacro(t, res);
return t;
}
}
}
}
return token;
}
char
Lexer::skipSpace(const std::string& buffer, size_t& position, location loc, bool skipNewLines) {
char ch = buffer[position];
while ((isspace(ch) || ch == '@' || ch == '/' ) && (skipNewLines || (ch != '\n' && ch != '\r'))) {
if (ch == '@') {
ch = '@';
}
if (ch == '/') {
if (buffer[position+1] != '/') break;
if (buffer[position+2] == '@') {
_revised[position+0] = ' ';
_revised[position+1] = ' ';
_revised[position+2] = ' ';
} else {
removeFromRevision(position);
}
do {
ch = advanceChar1NoToken(buffer,position,loc);
} while (ch != '\0' && ch != '\n' && ch != '\r');
continue;
}
++position;
if (ch == '\n') {
++loc->linenum1;
loc->charnum1 = 1;
}
else
++loc->charnum1;
ch = buffer[position];
};
return ch;
}
Lexer::ReadResult Lexer::readToken() {
bool debug = false;
if (!_acslTokens.empty()) {
_token = _acslTokens.front();
_acslTokens.pop_front();
// FIXME - location
return RRHasToken;
}
if (_clangTokensSet) {
while (!_clangTokens.empty()) {
clang::Token t = _clangTokens.front();
if (debug) {
std::cout << "POPPED CLANG " << str(t) << std::endl;
}
if (t.is(clang::tok::TokenKind::unknown)) {
const clang::SourceLocation& end = t.getEndLoc();
_clangTokens.pop_front();
if (text(t) == "\\") {
clang::Token& tt = _clangTokens.front();
if (debug) std::cout << "POPPED CLANG " << str(tt) << std::endl;
if (tt.isOneOf(clang::tok::identifier,clang::tok::kw_true,clang::tok::kw_false,clang::tok::kw_this)) {
clang::IdentifierInfo* identifierInfo = tt.getIdentifierInfo();
std::string text = std::string(identifierInfo->getNameStart(),
identifierInfo->getLength());
if (!sameLocation(end, tt.getLocation())) {
_clangSema->Diags.Report(tt.getLocation(),lexerWarning)
<< "White space not permitted between the \\ and the identifier";
// Continue on though
}
_token = protectedKeywordOrIdentifier(text);
setLocation(tt.getLocation(),tt.getEndLoc()); // FIXME prefer using t.getLocation() but that is not always valid on an unknown token
_clangTokens.pop_front();
} else {
_clangSema->Diags.Report(tt.getLocation(),lexerError)
<< ( "Expected an identifier after the backslash: " + text(tt));
continue;
}
} else if (text(t) == "@") {
continue; // In ACSL '@' is a space
} else {
_clangSema->Diags.Report(t.getLocation(),lexerWarning)
<< ( "Skipping unknown token: " + text(t));
continue; // Just skip token
}
} else {
_token = convertClangTokenToACSLToken(t);
setLocation(t.getLocation(),t.getEndLoc());
if (!t.getLocation().isValid() && debug)
std::cout << "BEGIN LOC INVALID " + str(t) << std::endl;
if (!t.getEndLoc().isValid() && debug)
std::cout << "END LOC INVALID " + str(t) << std::endl;
size_t p;
if (t.is(clang::tok::TokenKind::identifier)) {
_clangTokens.pop_front();
} else if (t.is(clang::tok::TokenKind::numeric_constant)) {
if ((p=text(t).find("..")) != std::string::npos) {
// .. within a numeric PP token
std::string ts = text(t);
std::string t1 = ts.substr(0,p);
std::string t2 = ts.substr(p+2,std::string::npos);
_clangTokens.pop_front();
if (!t2.empty()) reparseWithClang(t2,t.getLocation().getLocWithOffset(p+2));
reparseWithClang("..",t.getLocation().getLocWithOffset(p));
if (!t1.empty()) reparseWithClang(t1,t.getLocation());
continue; // Now go back to pick off the first token
} else {
_clangTokens.pop_front();
// FIXME - should we check here whether the PP numeric token is a valid lexer numeric token
}
} else if (text(t) == "<=") {
_clangTokens.pop_front();
clang::Token& tt = _clangTokens.front();
clang::Token& ttt = *(++_clangTokens.begin());
if (text(tt) == "=" && text(ttt) == ">") {
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TEquiv);
setLocation(t.getLocation(),ttt.getEndLoc());
_clangTokens.pop_front();
_clangTokens.pop_front();
}
} else if (text(t) == "==") {
_clangTokens.pop_front();
clang::Token& tt = _clangTokens.front();
if (text(tt) == ">") {
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TImplies);
setLocation(t.getLocation(),tt.getEndLoc());
_clangTokens.pop_front();
}
} else if (text(t) == "..") {
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TRange);
setLocation(t.getLocation(),t.getEndLoc());
_clangTokens.pop_front();
} else if (text(t) == ".") {
_clangTokens.pop_front();
clang::Token& tt = _clangTokens.front();
clang::Token& ttt = *(++_clangTokens.begin());
if (text(tt) == ".") {
if (text(ttt) == ".") { // FIXME - Ellipsis within a numeric token? Ellipsis a CLang token of its own?
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TEllipsis);
setLocation(t.getLocation(),ttt.getEndLoc());
_clangTokens.pop_front();
_clangTokens.pop_front();
} else {
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TRange);
setLocation(t.getLocation(),tt.getEndLoc());
_clangTokens.pop_front();
}
}
} else {
_clangTokens.pop_front();
}
}
if (debug) {
std::cout << " NEW TOKEN " << _token.str() << std::endl;
}
return RRHasToken;
}
_hasFinished = true;
return RRFinished;
}
ReadResult r = readToken(_buffer, _position, _tokenLocation);
if (debug && r == RRHasToken) {
std::cout << " ACSL TOKEN " << _token.str() << std::endl;
}
return r;
}
void
Lexer::reparseWithClang(const std::string& text, clang::SourceLocation clangLoc) {
Lexer lexer2(_clangSema);
lexer2.setBuffer(text,clangLoc,0,true);
auto iter = lexer2._clangTokens.rbegin();
while (iter != lexer2._clangTokens.rend()) {
_clangTokens.push_front(*iter++);
}
}
bool
Lexer::sameLocation(const clang::SourceLocation& begin, const clang::SourceLocation& end) const {
clang::SourceManager& sm = _clangSema->getPreprocessor().getSourceManager();
clang::PresumedLoc PLoc = sm.getPresumedLoc(begin);
clang::PresumedLoc PLoc2 = sm.getPresumedLoc(end);
if (PLoc.getLine() != PLoc2.getLine()) return false;
if (PLoc.getColumn() != PLoc2.getColumn()) return false;
if (strcmp(PLoc.getFilename(),PLoc2.getFilename()) != 0) return false;
return true;
}
void
Lexer::setLocation(const clang::SourceLocation& begin, const clang::SourceLocation& end) {
clang::SourceManager& sm = _clangSema->getPreprocessor().getSourceManager();
clang::PresumedLoc PLoc = sm.getPresumedLoc(begin);
_tokenLocation->filename1 = strdup(PLoc.getFilename());
_tokenLocation->linenum1 = PLoc.getLine();
_tokenLocation->charnum1 = PLoc.getColumn();
PLoc = sm.getPresumedLoc(end);
_tokenLocation->filename2 = strdup(PLoc.getFilename());
_tokenLocation->linenum2 = PLoc.getLine();
_tokenLocation->charnum2 = PLoc.getColumn();
}
Lexer::ReadResult Lexer::readToken(const std::string& buffer, size_t& position, location loc)
{ ReadResult r;
do { r = readMain(buffer, position, loc);
} while ( r == RRContinueLexing);
//if (r == RRHasToken) std::cout << " READ " << _token.str() << std::endl;
return r;
}
std::string
Lexer::str(const clang::Token& t) const {
std::ostringstream s;
s << t.getName() << " " << _clangSema->getPreprocessor().getSpelling(t) << " " << t.getLocation().printToString(_clangSema->getPreprocessor().getSourceManager())
<< " " << t.getEndLoc().printToString(_clangSema->getPreprocessor().getSourceManager());
return s.str();
}
std::string
Lexer::str(const clang::SourceLocation& loc) const {
std::ostringstream s;
s << loc.printToString(_clangSema->getPreprocessor().getSourceManager());
return s.str();
}
std::string
Lexer::text(const clang::Token& t) const {
return _clangSema->getPreprocessor().getSpelling(t);
}
Lexer::ReadResult
Lexer::readMain(const std::string& buffer, size_t& position, location loc) {
if (!_token_stack.empty()) {
ReadResult res = RRHasToken;
_token = _token_stack.top();
_token_stack.pop();
_token = expandIfMacro(_token, res);
return res;
}
loc->linenum1 = loc->linenum2;
loc->charnum1 = loc->charnum2+1;
if (_hasNewlineToken) {
_hasNewlineToken = false;
++loc->linenum1;
loc->charnum1 = 1;
};
char ch = skipSpace(buffer,position,loc);
if (ch == '/' && buffer[position+1] == '*') {
// Embedded block comment - abort
_revised = _revised.substr(0,position);
//_errorList.push_back(Error(loc,"Block comments may not be nested inside specification annotations - aborting processing comment"));
return RRFinished;
}
if (ch == '\0') return RRFinished;
loc->linenum2 = loc->linenum1;
loc->charnum2 = loc->charnum1-1;
switch (ch) {
case '\\':
_context = ch;
advanceChar2NoToken(buffer,position,loc);
return readProtectedToken(buffer, position, loc);
case '<':
if (buffer.length() - position < 4) getMoreCharacters();
if (buffer[position+1] == '=') {
if (buffer[position+2] == '=') {
if (buffer[position+3] == '>') {
position += 4;
loc->charnum2 += 4;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TEquiv);
return RRHasToken;
};
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLessOrEqual);
return RRHasToken;
}
else if (buffer[position+1] == '-') {
if (buffer[position+2] == '-') {
if (buffer[position+3] == '>') {
position += 4;
loc->charnum2 += 4;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitEquiv);
return RRHasToken;
};
};
}
else if (buffer[position+1] == '<') {
if (buffer[position+2] == '=') {
position += 3;
loc->charnum2 += 3;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLeftShiftAssign);
return RRHasToken;
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLeftShift);
return RRHasToken;
}
else if (buffer[position+1] == ':') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLTColon);
return RRHasToken;
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLess);
return RRHasToken;
case '>':
if (buffer.length() - position < 3) getMoreCharacters();
if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TGreaterOrEqual);
return RRHasToken;
}
else if (buffer[position+1] == '>') {
if (buffer[position+2] == '=') {
position += 3;
loc->charnum2 += 3;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TRightShiftAssign);
return RRHasToken;
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TRightShift);
return RRHasToken;
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TGreater);
return RRHasToken;
case '!':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TDifferent);
return RRHasToken;
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(OperatorPunctuatorToken::TNot);
return RRHasToken;
case '=':
if (buffer.length() - position < 3) getMoreCharacters();
if (buffer[position+1] == '=') {
if (buffer[position+2] == '>') {
position += 3;
loc->charnum2 += 3;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TImplies);
return RRHasToken;
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TEqual);
return RRHasToken;
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TAssign);
return RRHasToken;
case '&':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '&') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLogicalAnd);
return RRHasToken;
}
else if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitAndAssign);
return RRHasToken;
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TAmpersand);
return RRHasToken;
case '|':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '|') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLogicalOr);
return RRHasToken;
}
else if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitOrAssign);
return RRHasToken;
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitOr);
return RRHasToken;
case '^':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '^') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TLogicalXor);
return RRHasToken;
}
else if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitXorAssign);
return RRHasToken;
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitXor);
return RRHasToken;
case '-':
if (buffer.length() - position < 3) getMoreCharacters();
if (buffer[position+1] == '-') {
if (buffer[position+2] == '>') {
position += 3;
loc->charnum2 += 3;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TBitImplies);
return RRHasToken;
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TMinusMinus);
return RRHasToken;
}
else if (buffer[position+1] == '>') {
if (buffer[position+2] == '*') {
position += 3;
loc->charnum2 += 3;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TArrowStar);
return RRHasToken;
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TArrow);
return RRHasToken;
}
else if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TMinusAssign);
return RRHasToken;
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TMinus);
return RRHasToken;
case '+':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '+') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TPlusPlus);
return RRHasToken;
}
else if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TPlusAssign);
return RRHasToken;
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TPlus);
return RRHasToken;
case '{':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TOpenBrace);
return RRHasToken;
case '}':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TCloseBrace);
return RRHasToken;
case '[':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TOpenBracket);
return RRHasToken;
case ']':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TCloseBracket);
return RRHasToken;
case '(':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TOpenParen);
return RRHasToken;
case ')':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TCloseParen);
return RRHasToken;
case ';':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TSemiColon);
return RRHasToken;
case ':':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == ':') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TColonColon);
return RRHasToken;
}
else if (buffer[position+1] == '>') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TColonGT);
return RRHasToken;
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TColon);
return RRHasToken;
case '.':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '.') {
if (buffer[position+2] == '.') {
position += 3;
loc->charnum2 += 3;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TEllipsis);
return RRHasToken;
};
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TRange);
return RRHasToken;
}
else if (buffer[position+1] == '*') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TPunctStar);
return RRHasToken;
}
else if (isdigit(buffer[position+1])) {
advanceChar2(buffer,position,loc);
_digits = Dec;
_litState = FracPart;
return readNumberToken(buffer, position, loc);
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TPunct);
return RRHasToken;
case '?':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TQuery);
return RRHasToken;
case '*':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TMultAssign);
return RRHasToken;
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TStar);
return RRHasToken;
case '/':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TDivideAssign);
return RRHasToken;
}
else if (buffer[position+1] == '/') {
position += 2;
loc->charnum2 += 2;
_context = '/';
return readEndComment(buffer, position, loc);
}
else if (buffer[position+1] == '*') {
position += 2;
loc->charnum2 += 2;
_context = '*';
return readEndComment(buffer, position, loc);
};
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TDivide);
return RRHasToken;
case '%':
if (buffer.length() - position < 2) getMoreCharacters();
if (buffer[position+1] == '=') {
position += 2;
loc->charnum2 += 2;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TModuloAssign);
return RRHasToken;
}
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TProcent);
return RRHasToken;
case '~':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TTilda);
return RRHasToken;
case ',':
position += 1;
loc->charnum2 += 1;
_token = OperatorPunctuatorToken().setType(
OperatorPunctuatorToken::TComma);
return RRHasToken;
// case '#': // FIXME - fix interactions with checking for PP directives
// position += 1;
// loc->charnum2 += 1;
// _token = OperatorPunctuatorToken().setType(
// OperatorPunctuatorToken::THash);
// return RRHasToken;
case '\'':
position+=1;
loc->charnum2+=1;
_charLitKind = DLexer::CharacterLiteralToken::TChar;
return readCharLiteral(buffer,position,loc);
case 'L':
if (buffer[position+1] == '\'') {
position+=2;
loc->charnum2+=2;
_charLitKind = DLexer::CharacterLiteralToken::TWideChar;
return readCharLiteral(buffer,position,loc);
} else break;
case '0':
advanceChar2(buffer,position,loc);
_litState = LeadZero;
return readNumberToken(buffer,position,loc);
case '"':
size_t i, n;
for (i = 1, n = buffer.length();
i < n && buffer[position+i] != '"' && buffer[position+i-1] != '\\';
i++);
if (position + i >= n) {
std::cerr << "no closing double quotes before EOF" << std::endl;
break;
}
std::string literal(buffer, position, i+1);
DLexer::StringLiteralToken* result =
new DLexer::StringLiteralToken(literal);
_token = DLexer::Token(result);
position += i+1;
return RRHasToken;
};
if (isalpha(ch) || ch == '_') {
ReadResult res = readIdentifierToken(buffer, position, loc);
if (res == RRHasToken) {
_token = expandIfMacro(_token,res);
if (res == RRContinueLexing) return RRContinueLexing;
}
return res;
};
if (isdigit(ch)) {
advanceChar2(buffer,position,loc);
_digits = Dec;
_litState = Digits;
return readNumberToken(buffer, position, loc);
};
if (isUtf8SymbolStart(ch)) {
return readUtf8Symbol(buffer,position,loc);
}
std::ostringstream msg;
if (ch == '#') {
size_t start = position;
advanceChar1NoToken(buffer, position, loc);
handlePPDirectiveInACSL(buffer, position, start, loc);
return RRContinueLexing;
} else {
// illegal character. Just add an error message.
msg << "unknown character C: " << ch << ".";
_errorList.push_back(Error(loc, msg.str()));
++position; // otherwise, we're stuck in an infinite loop
}
return RRContinueLexing;
}
// FIXME - no check for out of place ELSE
const int PP_NO_ANNOTATION = 0; // Not in an if/ifdef/ifndef block
const int PP_IN_IF = 1; // Processing current statements
const int PP_SKIP_TO_ELIF = 2; // Skipping to a future elseif to test it
const int PP_SKIP_TO_END = 3; // Processed a true block, now skipping all elses to an endif
// PP_NO_ANNOTATION >> if/ifdef/ifndef true >> push current state, new state is PP_IN_IF
// PP_NO_ANNOTATION >> if/ifdef/ifndef false >> PP_SKIP_TO_ELIF
// PP_NO_ANNOTATION >> else/elif/endif : ERROR
// PP_IN_IF >> else/elif >> PP_SKIP_TO_END
// PP_IN_IF >> endif >> pop state (an error if state stack is empty)
// PP_IN_IF >> if/ifdef/ifndef >> nesting, push current state, new state is PP_IN_IF
// PP_SKIP_TO_ELIF >> else >> PP_IN_IF
// PP_SKIP_TO_ELIF >> elif true >> PP_IN_IF
// PP_SKIP_TO_ELIF >> elif false >> PP_SKIP_TO_ELIF
// PP_SKIP_TO_ELIF >> endif >> pop state (an error if state stack is empty)
// PP_SKIP_TO_ELIF >> if/ifdef/ifndef >> nesting, push current state, new state is PP_SKIP_TO_END
// PP_SKIP_TO_END >> if/ifdef/ifndef >> nesting, push current state, new state is PP_SKIP_TO_END
// PP_SKIP_TO_END >> else/elif >> PP_SKIP_TO_END
// PP_SKIP_TO_END >> endif >> pop state (an error if state stack is empty)
// FIXME - are unknown directives in skipped blocks errors?
int ppstate = PP_NO_ANNOTATION;
// Constant globals for the pre-defined preprocessor directives
const std::string pp_empty(" ");
const std::string pp_ifdef("ifdef");
const std::string pp_ifndef("ifndef");
const std::string pp_if("if");
const std::string pp_endif("endif");
const std::string pp_else("else");
const std::string pp_elif("elif");
const std::string pp_error("error");
const std::string pp_warning("warning");
const std::string pp_line("line");
const std::string pp_include("include");
const std::string pp_define("define");
const std::string pp_undef("undef");
const std::string pp_pragma("pragma");
std::stack<int> state_stack;
void Lexer::complainAboutExcessMaterial(const std::string& buffer, size_t& position, location loc, bool warn) {
if (warn) {
Error junkerror(loc,"");
const std::string junk = getPreprocessorToken(buffer,position,loc,false,true);
if (!junk.empty()) {
std::ostringstream msg;
msg << "Invalid excess material after preprocessor directive: " << junk;
junkerror.message = msg.str();
_errorList.push_back(junkerror);
}
}
while (!getPreprocessorToken(buffer,position,loc,false,true).empty()) {}
}
// Requires that 'start' is the position of the '#' character
// and that the current token is the identifier following the '#' character
// (perhaps with intervening whitespace)
// and that the current position is just after the end of that identifier.
// Also presumes that the entire ACSL annotation (C++ comment is in buffer).
void Lexer::handlePPDirectiveInACSL(const std::string& buffer, size_t& position, size_t start, location loc) {
const clang::SourceManager& sourceMgr = _clangSema->getPreprocessor().getSourceManager();
const clang::FileID fileID = sourceMgr.getFileID(_clangSourceLocation);
clang::SourceLocation hashloc = sourceMgr.translateLineCol(fileID,loc->linenum1,loc->charnum1);
skipSpace(buffer,position,loc,false);
clang::SourceLocation idloc = sourceMgr.translateLineCol(fileID,loc->linenum1,loc->charnum1);
size_t p = position; // first char of identifier
Lexer::readIdentifierToken(buffer, position, loc);
// We either have a non-empty identifier, or a keyword, or an empty identifier
// (if the next character is not a valid identifier start character, such as punctuation)
std::string id(buffer,p,position-p);
if (position == p) id = " ";
std::ostringstream msg;
bool complain;
while (true) {
complain = true;
if (id.empty()) { // End of ACSL annotation with no directive
if (ppstate != PP_NO_ANNOTATION) {
msg << "ACSL annotation ended with missing #endif directive";
_errorList.push_back(Error(loc, msg.str())); // FIXME - put in the location of the if?
}
ppstate = PP_NO_ANNOTATION;
complain = false;
} else if (id == pp_if) {
state_stack.push(ppstate);
skipUpToEndOfLine(buffer,position,loc);
complain = false;
if (ppstate == PP_NO_ANNOTATION) {
if (!_ppCheckOnly) {
std::string slice(buffer,start,position-start);
msg << "#if is not implemented (Presuming true): " << slice;
_clangSema->Diags.Report(idloc,ppError) << msg.str();
}
ppstate = PP_IN_IF;
} else if (ppstate == PP_IN_IF) {
if (!_ppCheckOnly) {
std::string slice(buffer,start,position-start);
msg << "#if is not implemented (Presuming true): " << slice;
_clangSema->Diags.Report(idloc,ppError) << msg.str();
}
ppstate = PP_IN_IF;
} else {
skipUpToEndOfLine(buffer,position,loc);
std::string slice(buffer,start,position-start);
_errorList.push_back(Error(loc, msg.str()));
ppstate = PP_SKIP_TO_END;
}
} else if (id == pp_elif) {
skipUpToEndOfLine(buffer,position,loc);
complain = false;
if (!_ppCheckOnly) {
std::string slice(buffer,start,position-start);
msg << "#elif is not implemented (Presuming false): " << slice;
_clangSema->Diags.Report(idloc,ppError) << msg.str();
}
ppstate = PP_SKIP_TO_ELIF;
} else if (id == pp_ifdef || id == pp_ifndef) {
state_stack.push(ppstate);
bool negate = id == pp_ifndef;
const std::string idd = getPreprocessorToken(buffer,position,loc,false,true);
if (ppstate == PP_NO_ANNOTATION || ppstate == PP_IN_IF) {
bool useBody = (negate != isDefinedMacro(idd));
if (!useBody) {
ppstate = PP_SKIP_TO_ELIF;
} else {
ppstate = PP_IN_IF;
}
} else {
ppstate = PP_SKIP_TO_END;
}
} else if (id == pp_endif) {
if (ppstate == PP_NO_ANNOTATION) {
msg << "No matching #if for this #endif (ignoring line)";
_clangSema->Diags.Report(idloc,ppError) << msg.str();
removeFromRevision(start);
} else {
ppstate = state_stack.top();
state_stack.pop();
}
} else if (id == pp_else) {
if (ppstate == PP_NO_ANNOTATION) {
msg << "No matching #if for this #else (ignoring line)";
_clangSema->Diags.Report(idloc,ppError) << msg.str();
removeFromRevision(start);
} else if (ppstate == PP_IN_IF) {
ppstate = PP_SKIP_TO_END;
} else if (ppstate == PP_SKIP_TO_ELIF) {
ppstate = PP_IN_IF;
} else if (ppstate == PP_SKIP_TO_END) {
// continue skipping to the next endif
}
} else if (id == pp_error || id == pp_warning) {
// FIXME - does not check that the remainder of the line consists of valid tokens
// Content is NOT macro substituted, per cpp documentation
const std::string rest = skipUpToEndOfLine(buffer,position,loc);
if (!_ppCheckOnly && (ppstate == PP_IN_IF || ppstate == PP_NO_ANNOTATION)) {
_clangSema->Diags.Report(idloc,ppError) << rest;
}
complain = false;
} else if (id == pp_include) {
// FIXME - does not check that the remainder of the line consists of valid tokens
// Content is NOT macro substituted, per cpp documentation
const std::string rest = skipUpToEndOfLine(buffer,position,loc);
// if (!_ppCheckOnly && (ppstate == PP_IN_IF || ppstate == PP_NO_ANNOTATION)) {
// _clangSema->Diags.Report(idloc,ppError) << rest;
// }
complain = false;
} else if (id == pp_line) {
_clangSema->Diags.Report(idloc,ppWarning) << "#line directive not yet implemented";
// Error number_error(loc,"");
// const std::string number = getPreprocessorToken(buffer,position,loc,false,false);
// int line;
// bool failed = true;
// if (number.empty()) {
// msg << "No line number to read (ignoring #line directive)";
// error.message = msg.str();
// _errorList.push_back(error);
// removeFromRevision(start);
// } else if (number.find_first_not_of("0123456789",0,10) != std::string::npos) {
// msg << "The number token in a #line directive must be all digits: " << number;
// number_error.message = msg.str();
// _errorList.push_back(number_error);
// removeFromRevision(start);
// } else try {
// size_t p;
// //line = std::string::stoi(number, &p);
// line = atoi(number.c_str()); // FIXME - does not find errors
// failed = false;
// } catch (std::exception e) {
// msg << "Failed to read line number (ignoring #line directive): " << number;
// number_error.message = msg.str();
// _errorList.push_back(number_error);
// removeFromRevision(start);
// }
// if (failed) {
// skipUpToEndOfLine(buffer,position,loc);
// complain = false;
// } else {
// loc->linenum2 = line - 1; // The -1 is because we have yet to read the line termination at the end of line directive and the number in the line directive is the new number of the **next** line
// Error filename_error(loc,"");
// std::string filename = getPreprocessorToken(buffer,position,loc,false,false);
// if (filename.empty()) {
// // No filename present - OK
// } else if (filename.c_str()[0] == '"') {
// // OK - got a string
// loc->filename2 = strdup(filename.c_str());
// } else {
// msg << "Failed to read filename (ignoring directive): " << std::string(buffer,start,position-start);;
// filename_error.message = msg.str();
// _errorList.push_back(filename_error);
// removeFromRevision(start);
// }
// //skipUpToEndOfLine(buffer,position,loc);
// // FIXME - complain about dangling material
// // FIXME - locations for error messages on line and filename are not correct
// // FIXME - needs macro expansion
// // FIXME - how are comments supposed to be handled
// // FIXME - are line directive used when in text that is being skipped
// // FIXME - check for dangling #if blocks at end of annotation
// }
} else if (id == pp_define || id == pp_undef || id == pp_pragma) {
skipUpToEndOfLine(buffer,position,loc);
complain = false;
std::string slice(buffer,start,position-start);
msg << "Preprocessor directive is not permitted in ACSL++ annotation (ignoring line): " << slice;
_clangSema->Diags.Report(idloc,ppError) << msg.str();
removeFromRevision(start);
} else if (id == " ") { // missing directive
skipUpToEndOfLine(buffer,position,loc);
complain = false;
std::string slice(buffer,start,position-start);
msg << "Missing preprocessor directive (ignoring line): " << slice;
_clangSema->Diags.Report(hashloc,ppError) << msg.str();
removeFromRevision(start);
} else { // unknown directive
skipUpToEndOfLine(buffer,position,loc);
complain = false;
std::string slice(buffer,start,position-start);
msg << "Invalid preprocessor directive [" << id << "] (ignoring line): " << slice;
_clangSema->Diags.Report(idloc,ppError) << msg.str();
removeFromRevision(start);
}
complainAboutExcessMaterial(buffer,position,loc,complain);
if (ppstate == PP_NO_ANNOTATION || ppstate == PP_IN_IF) break;
id = skipToNextPPDirective(buffer,position,loc);
}
}
//! Puts in comment markers to comment out a bad directive (to the end of line);
//! 'start' must be the position in the buffer of the '#' pp directive character
// FIXME - not valid if there is an embedded multi-line comment
// FIXME - not valid if there is a # immediately followed by newline
void
Lexer::removeFromRevision(size_t start) {
if (_revised[start+1] == '\n') {
_revised[start] = ' ';
} else {
_revised[start] = '/';
_revised[start+1] = '/';
}
while (_revised[start] != '\n' && _revised[start] != 0) {
_revised[start] = ' ';
start++;
}
}
// Advances position to the end-of-line character (or end of file), returning
// a string of the skipped over contents and updating 'position'
const std::string Lexer::skipUpToEndOfLine(const std::string& buffer, size_t& position, location loc) {
size_t start = position;
while (true) {
char ch = buffer[position];
if (ch == '\n' || ch == '\r' || ch == '\0') {
return std::string(buffer,start,position-start);
}
advanceChar1NoToken(buffer,position,loc);
}
}
const std::string Lexer::getAndTrimRestOfLine(const std::string& buffer, size_t& position, location loc) {
while (true) {
char ch = buffer[position];
if (!std::isspace(ch)) break;
advanceChar1NoToken(buffer,position,loc);
}
size_t start = position;
while (true) {
char ch = buffer[position];
if (std::isspace(ch)) break;
if ((ch == '\n' || ch == '\r' || ch == '\0')) break;
advanceChar1NoToken(buffer,position,loc);
}
size_t end = position;
while (true) {
char ch = buffer[position];
if ((ch == '\n' || ch == '\r' || ch == '\0')) break;
advanceChar1NoToken(buffer,position,loc);
}
return std::string(buffer, start, end - start);
}
// Advances character position
char Lexer::advanceChar1NoToken(const std::string& buffer, size_t& position, location loc) {
char ch;
while (true) {
position++;
loc->charnum1++;
ch = buffer[position];
if (ch == '\\') {
size_t p = position;
char c;
do {
c = buffer[++p];
} while (c != '\n' && (isspace(c) || c == '@'));
if (c == '\n') {
// we just saw a backslash-whitespace-newline sequence; ignore it.
loc->linenum1++;
loc->charnum1 = 0;
position = p;
continue;
}
}
break;
}
return ch;
}
// Advances character position
char Lexer::advanceChar2NoToken(const std::string& buffer, size_t& position, location loc) {
char ch;
while (true) {
position++;
loc->charnum2++;
ch = buffer[position];
if (ch == '\\') {
size_t p = position;
char c;
do {
c = buffer[++p];
} while (c != '\n' && (isspace(c) || c == '@'));
if (c == '\n') {
// we just saw a backslash-whitespace-newline sequence; ignore it.
loc->linenum1++;
loc->charnum2 = 0;
position = p;
continue;
}
}
break;
}
return ch;
}
bool Lexer::isDefinedMacro(const std::string& id) {
clang::IdentifierInfo& identifierInfo
= _clangSema->getPreprocessor().getIdentifierTable().get(id);
return identifierInfo.hasMacroDefinition();
}
clang::MacroInfo*
Lexer::getMacro(const std::string& name) const {
clang::IdentifierInfo& identifierInfo
= _clangSema->getPreprocessor().getIdentifierTable().get(name);
return _clangSema->getPreprocessor().getMacroInfo(&identifierInfo);
}
const std::string Lexer::skipToNextPPDirective(const std::string& buffer, size_t& position, location loc) {
char ch;
while (true) {
unsigned int n = loc->linenum1;
skipSpace(buffer, position, loc); // Use this routine so lines are properly counted
ch = buffer[position];
if (ch == '\0') return ""; // No token found
bool atLineStart = (size_t) n != loc->linenum1;
if (atLineStart && ch == '#') {
n = loc->linenum1;
advanceChar1NoToken(buffer,position,loc);
skipSpace(buffer,position,loc);
if ((size_t) n != loc->linenum1) {
// FIXME - does not handle lines with only comments after the #
// Empty directive
return std::string(" ");
}
size_t start = position;
while (!isspace(ch) && ch != '\0') {
ch = advanceChar1NoToken(buffer,position,loc);
}
size_t end = position;
return std::string(buffer,start,end-start);
} else {
atLineStart = false;
ch = advanceChar1NoToken(buffer,position,loc); // Must not be a line ending
}
}
}
// FIXME replicates Clang_utils::makeLocation
// FIXME - I think the strdup strings here are never deleted, or else multiply deleted
location
Lexer::makeLocation(clang::SourceLocation source) const {
clang::SourceManager& sm = _clangSema->getPreprocessor().getSourceManager();
clang::PresumedLoc PLoc = sm.getPresumedLoc(source);
if (PLoc.isValid()) {
const char* f = strdup(PLoc.getFilename());
size_t line = PLoc.getLine();
size_t col = PLoc.getColumn();
return cons_location(f,line,col,strdup(f),line,col);
} else {
return cons_location("",0,0,"",0,0);
}
}
DLexer::Token
Lexer::convertClangTokenToACSLToken(const clang::Token& source) const {
switch (source.getKind()) {
case clang::tok::eof: // End of file.
case clang::tok::eod: // End of preprocessing directive (end of line inside
// a directive).
case clang::tok::code_completion: // Code completion marker
assert(false); // unimplemented
case clang::tok::comment:
{ DLexer::CommentToken* result = new DLexer::CommentToken;
result->content() = std::string(source.getLiteralData(),
source.getLength());
return DLexer::Token(result);
};
// C99 6.4.2: Identifiers.
case clang::tok::identifier: // abcde123
{ clang::IdentifierInfo* identifierInfo = source.getIdentifierInfo();
std::string text = std::string(identifierInfo->getNameStart(),
identifierInfo->getLength());
KeywordToken::Map::const_iterator found =
KeywordToken::_unprotectedKeywords.find(text);
if (found != KeywordToken::_unprotectedKeywords.end()) {
KeywordToken* result = new KeywordToken;
result->setType(found->second);
return DLexer::Token(result);
}
found =
KeywordToken::_unicodeKeywords.find(text);
if (found != KeywordToken::_unicodeKeywords.end()) {
KeywordToken* result = new KeywordToken;
result->setType(found->second);
return DLexer::Token(result);
}
OperatorPunctuatorToken::Map::const_iterator foundp =
OperatorPunctuatorToken::_unicodePunctuators.find(text);
if (foundp != OperatorPunctuatorToken::_unicodePunctuators.end()) {
OperatorPunctuatorToken* result = new OperatorPunctuatorToken;
result->setType(foundp->second);
return DLexer::Token(result);
}
{
IdentifierToken* result = new IdentifierToken(text);
return Token(AbstractToken::TIdentifier,result);
}
};
case clang::tok::raw_identifier: // Used only in raw lexing mode.
{ DLexer::IdentifierToken* result = new DLexer::IdentifierToken;
result->content() = source.getRawIdentifier().str();
return DLexer::Token(result);
};
// C99 6.4.4.1: Integer Constants
// C99 6.4.4.2: Floating Constants
case clang::tok::numeric_constant:
// Clang does not distinguish between integer and floating literals:
// we have to lex the constant properly ourselves.
{ llvm::SmallString<128> spellingBuffer;
spellingBuffer.resize(source.getLength() + 1);
bool isInvalid = false;
std::string tokSpelling = _clangSema->getPreprocessor()
.getSpelling(source, spellingBuffer, &isInvalid).str();
if (isInvalid)
return DLexer::Token();
// FIXME - better to use a specialized routine than to make a whole new literal?
Acsl::Lexer lexer(_clangSema);
lexer.setBuffer(tokSpelling, _clangSourceLocation, 0, false);
ReadResult readResult;
do {
readResult = lexer.readToken();
} while (readResult <= Acsl::Lexer::RRContinueLexing);
return lexer.extractToken();
};
// C99 6.4.4: Character Constants
case clang::tok::char_constant: // 'a'
case clang::tok::wide_char_constant: // L'b'
// C++0x Character Constants
case clang::tok::utf16_char_constant: // u'a'
case clang::tok::utf32_char_constant: // U'a'
// see clang::Sema::ActOnCharacterConstant
{ llvm::SmallString<16> charBuffer;
bool isInvalid = false;
std::string thisTok = _clangSema->getPreprocessor().getSpelling(source,
charBuffer, &isInvalid).str();
if (isInvalid)
return DLexer::Token();
ReadResult readResult;
size_t position = 1;
Acsl::Lexer lexer(_clangSema);
lexer.setBuffer(thisTok, _clangSourceLocation, position, false); // FIXME - is this needed
do {
if (thisTok[0] == '\'') {
lexer._charLitKind = CharacterLiteralToken::TChar;
}
else if (thisTok[0] == 'L' && thisTok[1] == '\'') {
lexer._charLitKind = CharacterLiteralToken::TWideChar;
position++;
}
readResult = lexer.readCharLiteral(thisTok, position, lexer.seeTokenLocation());
} while (readResult <= Acsl::Lexer::RRContinueLexing);
// FIXME - free the location
Token tt = lexer.extractToken();
return tt;
};
// C99 6.4.5: String Literals.
case clang::tok::string_literal: // "foo"
case clang::tok::wide_string_literal: // L"foo"
#if CLANG_VERSION_MAJOR < 9
case clang::tok::angle_string_literal:// <foo>
#endif
// C++0x String Literals.
case clang::tok::utf8_string_literal: // u8"foo"
case clang::tok::utf16_string_literal:// u"foo"
case clang::tok::utf32_string_literal:// U"foo"
// see clang::Sema::ActOnStringLiteral
// [TODO] note that multiple string tokens should be converted into
// one string token!
{ DLexer::StringLiteralToken* result = new DLexer::StringLiteralToken(
std::string(source.getLiteralData(), source.getLength()));
return DLexer::Token(result);
};
// C99 6.4.6: Punctuators.
case clang::tok::l_square:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TOpenBracket));
case clang::tok::r_square:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TCloseBracket));
case clang::tok::l_paren:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TOpenParen));
case clang::tok::r_paren:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TCloseParen));
case clang::tok::l_brace:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TOpenBrace));
case clang::tok::r_brace:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TCloseBrace));
case clang::tok::period:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TPunct));
case clang::tok::ellipsis:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TEllipsis));
case clang::tok::amp:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TAmpersand));
case clang::tok::ampamp:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TLogicalAnd));
case clang::tok::ampequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TBitAndAssign));
case clang::tok::star:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TStar));
case clang::tok::starequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TMultAssign));
case clang::tok::plus:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TPlus));
case clang::tok::plusplus:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TPlusPlus));
case clang::tok::plusequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TPlusAssign));
case clang::tok::minus:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TMinus));
case clang::tok::arrow:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TArrow));
case clang::tok::minusminus:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TMinusMinus));
case clang::tok::minusequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TMinusAssign));
case clang::tok::tilde:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TTilda));
case clang::tok::exclaim:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TNot));
case clang::tok::exclaimequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TDifferent));
case clang::tok::slash:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TDivide));
case clang::tok::slashequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TDivideAssign));
case clang::tok::percent:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TProcent));
case clang::tok::percentequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TModuloAssign));
case clang::tok::less:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TLess));
case clang::tok::lessless:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TLeftShift));
case clang::tok::lessequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TLessOrEqual));
case clang::tok::lesslessequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TLeftShiftAssign));
case clang::tok::greater:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TGreater));
case clang::tok::greatergreater:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TRightShift));
case clang::tok::greaterequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TGreaterOrEqual));
case clang::tok::greatergreaterequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TRightShiftAssign));
case clang::tok::caret:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TBitXor));
case clang::tok::caretequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TBitXorAssign));
case clang::tok::pipe:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TBitOr));
case clang::tok::pipepipe:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TLogicalOr));
case clang::tok::pipeequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TBitOrAssign));
case clang::tok::question:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TQuery));
case clang::tok::colon:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TColon));
case clang::tok::semi:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TSemiColon));
case clang::tok::equal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TAssign));
case clang::tok::equalequal:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TEqual));
case clang::tok::comma:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TComma));
case clang::tok::hash:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::THash));
case clang::tok::hashhash:
case clang::tok::hashat:
notImplemented(source);
assert(false);
// C++ Support
case clang::tok::periodstar:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TPunctStar));
case clang::tok::arrowstar:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TArrowStar));
case clang::tok::coloncolon:
return DLexer::Token(DLexer::OperatorPunctuatorToken()
.setType(DLexer::OperatorPunctuatorToken::TColonColon));
// Objective C support.
case clang::tok::at:
assert(false); // unimplemented
// CUDA support.
case clang::tok::lesslessless:
case clang::tok::greatergreatergreater:
notImplemented(source);
assert(false);
// C99 6.4.1: Keywords. These turn into kw_* tokens.
case clang::tok::kw_auto:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TAuto));
case clang::tok::kw_break:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TBreak));
case clang::tok::kw_case:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TCase));
case clang::tok::kw_char:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TChar));
case clang::tok::kw_const:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TConst));
case clang::tok::kw_continue:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TContinue));
case clang::tok::kw_default:
return DLexer::Token(new DLexer::IdentifierToken(text(source)));
case clang::tok::kw_do:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TDo));
case clang::tok::kw_double:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TDouble));
case clang::tok::kw_else:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TElse));
case clang::tok::kw_enum:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TEnum));
case clang::tok::kw_extern:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TExtern));
case clang::tok::kw_float:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TFloat));
case clang::tok::kw_for:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TFor));
case clang::tok::kw_goto:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TGoto));
case clang::tok::kw_if:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TIf));
case clang::tok::kw_inline:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TInline));
case clang::tok::kw_int:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TInt));
case clang::tok::kw_long:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TLong));
case clang::tok::kw_register:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TRegister));
case clang::tok::kw_restrict:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TRestrict));
case clang::tok::kw_return:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TReturn));
case clang::tok::kw_short:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TShort));
case clang::tok::kw_signed:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TSigned));
case clang::tok::kw_sizeof:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TSizeof));
case clang::tok::kw_static:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TStatic));
case clang::tok::kw_struct:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TStruct));
case clang::tok::kw_switch:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TSwitch));
case clang::tok::kw_typedef:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TTypedef));
case clang::tok::kw_union:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TUnion));
case clang::tok::kw_unsigned:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TUnsigned));
case clang::tok::kw_void:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TVoid));
case clang::tok::kw_volatile:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TVolatile));
case clang::tok::kw_while:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TWhile));
case clang::tok::kw__Alignas:
case clang::tok::kw__Alignof:
case clang::tok::kw__Atomic:
case clang::tok::kw__Bool:
case clang::tok::kw__Complex:
case clang::tok::kw__Generic:
case clang::tok::kw__Imaginary:
case clang::tok::kw__Static_assert:
case clang::tok::kw___func__:
case clang::tok::kw___objc_yes:
case clang::tok::kw___objc_no:
assert(false); // unimplemented
// C++ 2.11p1: Keywords.
case clang::tok::kw_asm:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TAsm));
case clang::tok::kw_bool:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TBool));
case clang::tok::kw_catch:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TCatch));
case clang::tok::kw_class:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TClass));
case clang::tok::kw_const_cast:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TConstCast));
case clang::tok::kw_delete:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TDelete));
case clang::tok::kw_dynamic_cast:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TDynamicCast));
case clang::tok::kw_explicit:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TExplicit));
case clang::tok::kw_export:
assert(false); // unimplemented
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TExport));
case clang::tok::kw_false:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TFalse));
case clang::tok::kw_friend:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TFriend));
case clang::tok::kw_mutable:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TMutable));
case clang::tok::kw_namespace:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TNamespace));
case clang::tok::kw_new:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TNew));
case clang::tok::kw_operator:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TOperator));
case clang::tok::kw_private:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TPrivate));
case clang::tok::kw_protected:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TProtected));
case clang::tok::kw_public:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TPublic));
case clang::tok::kw_reinterpret_cast:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TReinterpretCast));
case clang::tok::kw_static_cast:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TStaticCast));
case clang::tok::kw_template:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TTemplate));
case clang::tok::kw_this:
{ DLexer::IdentifierToken* result = new DLexer::IdentifierToken;
result->content() = "this";
return DLexer::Token(result);
}
case clang::tok::kw_throw:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TThrow));
case clang::tok::kw_true:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TTrue));
case clang::tok::kw_try:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TTry));
case clang::tok::kw_typename:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TTypename));
case clang::tok::kw_typeid:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TTypeid));
case clang::tok::kw_using:
notImplemented(source);
assert(false);
// return DLexer::Token(DLexer::KeywordToken()
// .setType(DLexer::KeywordToken::TUsing));
case clang::tok::kw_virtual:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TVirtual));
case clang::tok::kw_wchar_t:
return DLexer::Token(DLexer::KeywordToken()
.setType(DLexer::KeywordToken::TWcharT));
default:
notImplemented(source);
}
return DLexer::Token();
}
void
Lexer::notImplemented(const clang::Token& source) const {
std::cerr << "Not implemented: " << str(source) << std::endl;
assert(false); // unimplemented
}
clang::MacroArgs*
Lexer::readFunctionLikeMacroArgs(const std::string& identifier,
clang::MacroInfo *macro) {
// clang comment: The number of fixed arguments to parse.
#if CLANG_VERSION_MAJOR < 5
unsigned numFixedArgsLeft = macro->getNumArgs();
#else
unsigned numFixedArgsLeft = macro->getNumParams();
#endif
bool isVariadic = macro->isVariadic();
// argTokens - Build up a list of tokens that make up each argument. Each
// argument is separated by an EOF token. Use a SmallVector so we can avoid
// heap allocations in the common case.
llvm::SmallVector<clang::Token, 64> argTokens;
Lexer::ReadResult parseResult = RRHasToken;
unsigned numActuals = 0;
DLexer::AbstractToken readToken = queryToken();
while (parseResult != Lexer::RRFinished
&& (readToken.getType() != DLexer::AbstractToken::TOperatorPunctuator
|| ((const DLexer::OperatorPunctuatorToken&) readToken).getType()
!= DLexer::OperatorPunctuatorToken::TCloseParen)) {
assert(readToken.getType() == DLexer::AbstractToken::TOperatorPunctuator);
DLexer::OperatorPunctuatorToken::Type firstTokenType =
((const DLexer::OperatorPunctuatorToken&) readToken).getType();
assert(firstTokenType == DLexer::OperatorPunctuatorToken::TOpenParen
|| firstTokenType == DLexer::OperatorPunctuatorToken::TComma);
// "l-paren-ness?", "only expect argument separators here"
// C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note
// that we already consumed the first one.
unsigned numParens = 0;
clearToken();
while (1) {
// Read arguments as unexpanded tokens. This avoids issues, e.g., where
// an argument value in a macro could expand to ',' or '(' or ')'.
do {
eatToken(parseResult);
} while (parseResult != Lexer::RRHasToken
&& parseResult != Lexer::RRFinished);
readToken = queryToken();
if (parseResult == Lexer::RRFinished) // "#if f(<eof>" & "#if f(\n"
break;
else if (readToken.getType() == DLexer::AbstractToken::TOperatorPunctuator
&& ((const DLexer::OperatorPunctuatorToken&) readToken).getType()
== DLexer::OperatorPunctuatorToken::TCloseParen) {
// If we found the ) token, the macro arg list is done.
if (numParens-- == 0)
break;
}
else if (readToken.getType() == DLexer::AbstractToken::TOperatorPunctuator
&& ((const DLexer::OperatorPunctuatorToken&) readToken).getType()
== DLexer::OperatorPunctuatorToken::TOpenParen)
++numParens;
else if (numParens == 0
&& readToken.getType() == DLexer::AbstractToken::TOperatorPunctuator
&& ((const DLexer::OperatorPunctuatorToken&) readToken).getType()
== DLexer::OperatorPunctuatorToken::TComma) {
// Comma ends this argument if there are more fixed arguments expected.
// However, if this is a variadic macro, and this is part of the
// variadic part, then the comma is just an argument token.
if (!isVariadic)
break;
if (numFixedArgsLeft > 1)
break;
}
argTokens.push_back(convertToClang(extractToken()));
}
// If this was an empty argument list foo(), don't add this as an empty
// argument.
if (argTokens.empty()
&& (readToken.getType() == DLexer::AbstractToken::TOperatorPunctuator
&& ((const DLexer::OperatorPunctuatorToken&) readToken).getType()
== DLexer::OperatorPunctuatorToken::TCloseParen))
break;
// If this is not a variadic macro, and too many args were specified, emit
// an error.
if (!isVariadic && numFixedArgsLeft == 0)
return 0;
// Add a marker EOF token to the end of the token list for this argument.
clang::Token EOFTok;
EOFTok.startToken();
EOFTok.setKind(clang::tok::eof);
// EOFTok.setLocation(Tok.getLocation());
EOFTok.setLength(0);
argTokens.push_back(EOFTok);
++numActuals;
assert(numFixedArgsLeft != 0 && "Too many arguments parsed");
--numFixedArgsLeft;
}
// Okay, we either found the r_paren. Check to see if we parsed too few
// arguments.
#if CLANG_VERSION_MAJOR < 5
unsigned minArgsExpected = macro->getNumArgs();
#else
unsigned minArgsExpected = macro->getNumParams();
#endif
// See MacroArgs instance var for description of this.
assert(numActuals == minArgsExpected);
return clang::MacroArgs::create(macro, argTokens, false,
_clangSema->getPreprocessor());
}
void
Lexer::eatToken(ReadResult& result) {
if (_stateLexer == SLMacroTokens) {
while (!_macroTokensStack.empty() && _macroTokensStack.back().empty()) {
_macroTokensStack.pop_back();
if (_currentMacroArgumentsStack.back())
_currentMacroArgumentsStack.back()
->destroy(_clangSema->getPreprocessor());
_currentMacroArgumentsStack.pop_back();
}
if (!_macroTokensStack.empty()) {
std::pair<unsigned, DLexer::AbstractToken*>&
token = _macroTokensStack.back().front();
setToken(DLexer::Token(token.first, token.second));
_macroTokensStack.back().pop_front();
result = RRHasToken;
}
else {
_usedMacros.clear();
_stringsForToken.clear();
_stateLexer = SLStandard;
};
}
else
assert(_stateLexer == SLStandard);
if (_stateLexer == SLStandard) {
result = readToken();
// if (hasErrors()) // FIXME - what to do with this now that this code is in the Lexer
// addErrorMessagesFromLexer();
};
if (result == RRHasToken) {
DLexer::AbstractToken::Type type = queryToken().getType();
if (type == DLexer::AbstractToken::TComment)
result = RRContinueLexing;
};
}
// FIXME - do we need this
// Converts an ACSL token to a Clang token
clang::Token
Lexer::convertToClang(DLexer::Token source) const {
DLexer::AbstractToken token = source.getFullToken();
switch (token.getType()) {
case DLexer::AbstractToken::TIdentifier:
{ clang::Token identifierToken;
identifierToken.startToken();
identifierToken.clearFlag(clang::Token::NeedsCleaning);
identifierToken.setIdentifierInfo(0);
const std::string& identifier = ((const DLexer::IdentifierToken&)
source.getContent()).content();
// std::memset(&identifierToken, 0, sizeof(clang::Token));
identifierToken.setLength(identifier.size());
identifierToken.setLocation(_clangSourceLocation /* .getLocWithOffset(...) */);
identifierToken.setKind(clang::tok::raw_identifier);
_stringsForToken.push_back(identifier);
identifierToken.setRawIdentifierData(_stringsForToken.back().c_str());
return identifierToken;
};
case DLexer::AbstractToken::TKeyword:
{ clang::Token result;
// result.setLength(...);
// result.setLocation(...);
switch (((const DLexer::KeywordToken&) token).getType()) {
case DLexer::KeywordToken::TAuto:
result.setKind(clang::tok::kw_auto);
break;
case DLexer::KeywordToken::TBool:
result.setKind(clang::tok::kw_bool);
break;
case DLexer::KeywordToken::TCase:
result.setKind(clang::tok::kw_case);
break;
case DLexer::KeywordToken::TChar:
result.setKind(clang::tok::kw_char);
break;
case DLexer::KeywordToken::TClass:
result.setKind(clang::tok::kw_class);
break;
case DLexer::KeywordToken::TConst:
result.setKind(clang::tok::kw_const);
break;
case DLexer::KeywordToken::TConstCast:
result.setKind(clang::tok::kw_const_cast);
break;
case DLexer::KeywordToken::TDouble:
result.setKind(clang::tok::kw_double);
break;
case DLexer::KeywordToken::TDynamicCast:
result.setKind(clang::tok::kw_dynamic_cast);
break;
case DLexer::KeywordToken::TElse:
result.setKind(clang::tok::kw_else);
break;
case DLexer::KeywordToken::TEnum:
result.setKind(clang::tok::kw_enum);
break;
case DLexer::KeywordToken::TFalse:
result.setKind(clang::tok::kw_false);
break;
case DLexer::KeywordToken::TFloat:
result.setKind(clang::tok::kw_float);
break;
case DLexer::KeywordToken::TFor:
result.setKind(clang::tok::kw_for);
break;
case DLexer::KeywordToken::TIf:
result.setKind(clang::tok::kw_if);
break;
case DLexer::KeywordToken::TInt:
result.setKind(clang::tok::kw_int);
break;
case DLexer::KeywordToken::TLong:
result.setKind(clang::tok::kw_long);
break;
case DLexer::KeywordToken::TOperator:
result.setKind(clang::tok::kw_operator);
break;
case DLexer::KeywordToken::TReinterpretCast:
result.setKind(clang::tok::kw_reinterpret_cast);
break;
case DLexer::KeywordToken::TShort:
result.setKind(clang::tok::kw_short);
break;
case DLexer::KeywordToken::TSigned:
result.setKind(clang::tok::kw_signed);
break;
case DLexer::KeywordToken::TSizeof:
result.setKind(clang::tok::kw_sizeof);
break;
case DLexer::KeywordToken::TStatic:
result.setKind(clang::tok::kw_static);
break;
case DLexer::KeywordToken::TStaticCast:
result.setKind(clang::tok::kw_static_cast);
break;
case DLexer::KeywordToken::TStruct:
result.setKind(clang::tok::kw_struct);
break;
case DLexer::KeywordToken::TTrue:
result.setKind(clang::tok::kw_true);
break;
case DLexer::KeywordToken::TTypeId:
result.setKind(clang::tok::kw_typeid);
break;
case DLexer::KeywordToken::TTypename:
result.setKind(clang::tok::kw_typename);
break;
case DLexer::KeywordToken::TUnion:
result.setKind(clang::tok::kw_union);
break;
case DLexer::KeywordToken::TUnsigned:
result.setKind(clang::tok::kw_unsigned);
break;
case DLexer::KeywordToken::TVirtual:
result.setKind(clang::tok::kw_virtual);
break;
case DLexer::KeywordToken::TVoid:
result.setKind(clang::tok::kw_void);
break;
case DLexer::KeywordToken::TVolatile:
result.setKind(clang::tok::kw_volatile);
break;
case DLexer::KeywordToken::TWcharT:
result.setKind(clang::tok::kw_wchar_t);
break;
case DLexer::KeywordToken::TWhile:
result.setKind(clang::tok::kw_while);
break;
default:
break;
};
return result;
};
case DLexer::AbstractToken::TLiteral:
{ // result.setLength(...);
// result.setLocation(...);
clang::Token result;
result.startToken();
result.clearFlag(clang::Token::NeedsCleaning);
result.setIdentifierInfo(0);
// std::memset(&result, 0, sizeof(clang::Token));
// result.setLocation(getSourceLocation(BufferPtr, TokLen));
switch (((const DLexer::LiteralToken&) token).getType()) {
case DLexer::LiteralToken::TInteger:
result.setKind(clang::tok::numeric_constant); // 0x123
{ std::ostringstream out;
out << ((const DLexer::IntegerLiteralToken&)
source.getContent()).getValue();
_stringsForToken.push_back(out.str());
result.setLength(_stringsForToken.back().size());
result.setLiteralData(_stringsForToken.back().c_str());
};
break;
case DLexer::LiteralToken::TCharacter:
result.setKind(clang::tok::char_constant); // 'a'
{ std::string value;
value += ((const DLexer::CharacterLiteralToken&)
source.getContent()).getChar();
_stringsForToken.push_back(value);
result.setLength(_stringsForToken.back().size());
result.setLiteralData(_stringsForToken.back().c_str());
};
break;
case DLexer::LiteralToken::TFloating:
result.setKind(clang::tok::numeric_constant); // 0x123
{ std::ostringstream out;
out << ((const DLexer::FloatingLiteralToken&)
source.getContent()).getValueAsString();
_stringsForToken.push_back(out.str());
result.setLength(_stringsForToken.back().size());
result.setLiteralData(_stringsForToken.back().c_str());
};
break;
case DLexer::LiteralToken::TString:
result.setKind(clang::tok::string_literal); // "foo"
_stringsForToken.push_back(((const DLexer::StringLiteralToken&)
source.getContent()).content());
result.setLength(_stringsForToken.back().size());
result.setLiteralData(_stringsForToken.back().c_str());
break;
default:
break;
};
return result;
};
case DLexer::AbstractToken::TOperatorPunctuator:
{ clang::Token result;
switch (((const DLexer::OperatorPunctuatorToken&) token).getType()) {
case DLexer::OperatorPunctuatorToken::TOpenBrace:
result.setKind(clang::tok::l_brace);
break;
case DLexer::OperatorPunctuatorToken::TCloseBrace:
result.setKind(clang::tok::r_brace);
break;
case DLexer::OperatorPunctuatorToken::TOpenBracket:
result.setKind(clang::tok::l_square);
break;
case DLexer::OperatorPunctuatorToken::TCloseBracket:
result.setKind(clang::tok::r_square);
break;
case DLexer::OperatorPunctuatorToken::TOpenParen:
result.setKind(clang::tok::l_paren);
break;
case DLexer::OperatorPunctuatorToken::TCloseParen:
result.setKind(clang::tok::r_paren);
break;
case DLexer::OperatorPunctuatorToken::TSemiColon:
result.setKind(clang::tok::semi);
break;
case DLexer::OperatorPunctuatorToken::TColon:
result.setKind(clang::tok::colon);
break;
case DLexer::OperatorPunctuatorToken::TEllipsis:
result.setKind(clang::tok::ellipsis);
break;
case DLexer::OperatorPunctuatorToken::TQuery:
result.setKind(clang::tok::question);
break;
case DLexer::OperatorPunctuatorToken::TColonColon:
result.setKind(clang::tok::coloncolon);
break;
case DLexer::OperatorPunctuatorToken::TPunct:
result.setKind(clang::tok::period);
break;
case DLexer::OperatorPunctuatorToken::TPunctStar:
result.setKind(clang::tok::periodstar);
break;
case DLexer::OperatorPunctuatorToken::TComma:
result.setKind(clang::tok::comma);
break;
case DLexer::OperatorPunctuatorToken::TArrowStar:
result.setKind(clang::tok::arrowstar);
break;
case DLexer::OperatorPunctuatorToken::TArrow:
result.setKind(clang::tok::arrow);
break;
case DLexer::OperatorPunctuatorToken::TPlus:
result.setKind(clang::tok::plus);
break;
case DLexer::OperatorPunctuatorToken::TMinus:
result.setKind(clang::tok::minus);
break;
case DLexer::OperatorPunctuatorToken::TStar:
result.setKind(clang::tok::star);
break;
case DLexer::OperatorPunctuatorToken::TDivide:
result.setKind(clang::tok::slash);
break;
case DLexer::OperatorPunctuatorToken::TProcent:
result.setKind(clang::tok::percent);
break;
case DLexer::OperatorPunctuatorToken::TBitXor:
result.setKind(clang::tok::caret);
break;
case DLexer::OperatorPunctuatorToken::TAmpersand:
result.setKind(clang::tok::amp);
break;
case DLexer::OperatorPunctuatorToken::TBitOr:
result.setKind(clang::tok::pipe);
break;
case DLexer::OperatorPunctuatorToken::TTilda:
result.setKind(clang::tok::tilde);
break;
case DLexer::OperatorPunctuatorToken::TNot:
result.setKind(clang::tok::exclaim);
break;
case DLexer::OperatorPunctuatorToken::TAssign:
result.setKind(clang::tok::equal);
break;
case DLexer::OperatorPunctuatorToken::TLess:
result.setKind(clang::tok::less);
break;
case DLexer::OperatorPunctuatorToken::TGreater:
result.setKind(clang::tok::greater);
break;
case DLexer::OperatorPunctuatorToken::TPlusAssign:
result.setKind(clang::tok::plusequal);
break;
case DLexer::OperatorPunctuatorToken::TMinusAssign:
result.setKind(clang::tok::minusequal);
break;
case DLexer::OperatorPunctuatorToken::TMultAssign:
result.setKind(clang::tok::starequal);
break;
case DLexer::OperatorPunctuatorToken::TDivideAssign:
result.setKind(clang::tok::slashequal);
break;
case DLexer::OperatorPunctuatorToken::TModuloAssign:
result.setKind(clang::tok::percentequal);
break;
case DLexer::OperatorPunctuatorToken::TBitXorAssign:
result.setKind(clang::tok::caretequal);
break;
case DLexer::OperatorPunctuatorToken::TBitAndAssign:
result.setKind(clang::tok::ampequal);
break;
case DLexer::OperatorPunctuatorToken::TBitOrAssign:
result.setKind(clang::tok::pipeequal);
break;
case DLexer::OperatorPunctuatorToken::TLeftShift:
result.setKind(clang::tok::lessless);
break;
case DLexer::OperatorPunctuatorToken::TRightShift:
result.setKind(clang::tok::greatergreater);
break;
case DLexer::OperatorPunctuatorToken::TLeftShiftAssign:
result.setKind(clang::tok::lesslessequal);
break;
case DLexer::OperatorPunctuatorToken::TRightShiftAssign:
result.setKind(clang::tok::greatergreaterequal);
break;
case DLexer::OperatorPunctuatorToken::TEqual:
result.setKind(clang::tok::equalequal);
break;
case DLexer::OperatorPunctuatorToken::TDifferent:
result.setKind(clang::tok::exclaimequal);
break;
case DLexer::OperatorPunctuatorToken::TLessOrEqual:
result.setKind(clang::tok::lessequal);
break;
case DLexer::OperatorPunctuatorToken::TGreaterOrEqual:
result.setKind(clang::tok::greaterequal);
break;
case DLexer::OperatorPunctuatorToken::TLogicalAnd:
result.setKind(clang::tok::ampamp);
break;
case DLexer::OperatorPunctuatorToken::TLogicalOr:
result.setKind(clang::tok::pipepipe);
break;
case DLexer::OperatorPunctuatorToken::TPlusPlus:
result.setKind(clang::tok::plusplus);
break;
case DLexer::OperatorPunctuatorToken::TMinusMinus:
result.setKind(clang::tok::minusminus);
break;
default:
break;
};
return result;
};
case DLexer::AbstractToken::TComment:
{ clang::Token commentToken;
commentToken.startToken();
commentToken.clearFlag(clang::Token::NeedsCleaning);
commentToken.setIdentifierInfo(0);
// std::memset(&commentToken, 0, sizeof(clang::Token));
const std::string& comment = ((const DLexer::CommentToken&)
source.getContent()).content();
commentToken.setLength(comment.size());
// commentToken.setLocation(getSourceLocation(BufferPtr, TokLen));
commentToken.setKind(clang::tok::comment);
_stringsForToken.push_back(comment);
commentToken.setLiteralData(_stringsForToken.back().c_str());
return commentToken;
};
default:
break;
};
return clang::Token();
}
void Lexer::lexUsingClang(const clang::Sema* _sema, const std::string& input, clang::SourceLocation loc, std::list<clang::Token>& clangTokens) {
clang::Preprocessor& pp = _sema->getPreprocessor();
clang::SourceManager& _sourceManager = pp.getSourceManager();
clang::PresumedLoc PLoc = _sourceManager.getPresumedLoc(loc);
if (!PLoc.isValid()) {
std::cout << "Invalid location for " << input << " " << str(loc) << std::endl;
}
const char* f = strdup(PLoc.getFilename());
size_t line = PLoc.getLine();
size_t col = PLoc.getColumn();
clangTokens.clear();
// This is a hack to make error positions come out right. There does not seem to be a way to
// tell clang or a MemoryBuffer where to presume the material starts.
std::ostringstream prefix;
for (size_t i = 1; i<line; i++) prefix << "\n";
for (size_t i = 1; i<col; i++) prefix << " "; // FIXME - original file may have had tabs
// Append to the input exactly one token at the end of the string, so we can recognize when
// the input has been fully lexed.
std::string datastr = prefix.str() + input + " *";
// Create a clang 'File' out of an llvm memory buffer that points to a copy of the ACSL text
clang::FileID newFile = _sourceManager.createFileID(
llvm::MemoryBuffer::getMemBufferCopy(datastr, llvm::Twine(f)));
// Now declare that file as the lexer source
bool res = pp.EnterSourceFile(newFile, pp.GetCurDirLookup(), loc);
if (res) {
// Error occurred
// FIXME make a regular error message
std::cerr << "Error on trying to parse annotation text" << std::endl;
return ;
}
// And lex from it. We have to observe and catch the end of the string; otherwise
// the Lexer, on reaching the end of this input, will pop the include stack and
// continue lexing the next file. In some cases this is the file that contained
// the text we are lexing now, causing an infinite loop (since we have not returned
// from handling the comment, so the comment has not yet been discarded).
clang::Token t;
unsigned endmarker = datastr.size()-1;
unsigned offset;
while (true) {
pp.Lex(t);
// offset is the position in the character buffer
offset = (_sourceManager.getDecomposedExpansionLoc(t.getLocation())).second;
// FIXME - eof does not have the right location
// FIXME - we get eof sometimes but not always? just ending comments?
if (t.is(clang::tok::eof)) break;
if (offset >= endmarker) {
//std::cerr << "Yes we do need the extra eof check" << std::endl;
break;
}
clangTokens.push_back(t);
}
bool debug = false;
if (debug) {
std::cout << clangTokens.size() << " TOKENS" << std::endl;
std::cout << "INPUT: " << input << std::endl;
std::list<clang::Token>::const_iterator iter = clangTokens.begin();
while (iter != clangTokens.end()) {
clang::Token t = *iter;
if (!t.getLocation().isValid()) std::cout << "BEGIN LOC INVALID " + str(t) << std::endl;
if (!t.getEndLoc().isValid()) std::cout << "END LOC INVALID " + str(t) << std::endl;
std::cout << " CLANG-TOKEN " << str(*iter++) << std::endl;
}
}
}
} // end of namespace Acsl