Move shouldWrap / isUnnamedMacro; use Lexer. Prerequisite for improving wrapInput().

This commit is contained in:
Axel Naumann 2016-07-27 01:24:36 +02:00 committed by sftnight
parent 112370dc17
commit 93edfaeec2
7 changed files with 260 additions and 149 deletions

View File

@ -232,15 +232,6 @@ namespace cling {
CompilationResult CodeCompleteInternal(const std::string& input,
unsigned offset);
///\brief Decides whether the input line should be wrapped into a function
/// declaration that can later be executed.
///
///\param[in] input - The input being scanned.
///
///\returns true if the input should be wrapped into a function declaration.
///
bool ShouldWrapInput(const std::string& input);
///\brief Wraps a given input.
///
/// The interpreter must be able to run statements on the fly, which is not

View File

@ -12,6 +12,8 @@
#include "cling/Interpreter/Interpreter.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/SmallString.h"
@ -158,14 +160,15 @@ namespace cling {
///\param [in] filename - The file to read.
/// @param[out] result - the cling::Value as result of the
/// execution of the last statement
///\param [in] ignoreOutmostBlock - Whether to ignore enlosing {}.
///\param [in] curlyToIgnore - Whether to ignore enlosing {}, and position
/// of the opening '{'.
///
///\returns result of the compilation.
///
Interpreter::CompilationResult
readInputFromFile(llvm::StringRef filename,
Value* result,
bool ignoreOutmostBlock = false);
size_t posOpenCurly = (size_t)(-1));
///\brief Set the stdout and stderr stream to the appropriate file.
///
///\param [in] file - The file for the redirection.

View File

@ -0,0 +1,53 @@
//--------------------------------------------------------------------*- C++ -*-
// CLING - the C++ LLVM-based InterpreterG :)
// author: Axel Naumann <axel@cern.ch>
//
// This file is dual-licensed: you can choose to license it under the University
// of Illinois Open Source License or the GNU Lesser General Public License. See
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
#ifndef CLING_UTILS_SOURCE_NORMALIZATION_H
#define CLING_UTILS_SOURCE_NORMALIZATION_H
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include <string>
namespace clang {
class LangOptions;
class SourceLocation;
class SourceManager;
};
namespace cling {
namespace utils {
///\brief Determine whether the source is an unnamed macro.
///
/// Unnamed macros contain no function definition, but "prompt-style" code
/// surrounded by a set of curly braces.
///
/// \param source The source code to analyze.
/// \return the position of the unnamed macro's opening '{'; or
/// std::string::npos if this is not an unnamed macro.
size_t isUnnamedMacro(llvm::StringRef source,
clang::LangOptions& LangOpts);
///\brief Determine whether the source needs to be moved into a function.
///
/// If so, move possible #includes out of the future body of the function and
/// return the position where the function signature should be inserted.
///
/// \param source - The source code to analyze; out: the source with
/// re-arranged includes.
/// \param LangOpts - LangOptions to use for lexing.
/// \param SM - SourceManager to use for lexed SourceLocations.
/// \return The position where the function signature and '{' should be
/// inserted; std::string::npos if this source should not be wrapped.
size_t getWrapPoint(std::string& source, const clang::LangOptions& LangOpts);
} // namespace utils
} // namespace cling
#endif // CLING_UTILS_SOURCE_NORMALIZATION_H

View File

@ -28,6 +28,7 @@
#include "cling/Interpreter/Transaction.h"
#include "cling/Interpreter/Value.h"
#include "cling/Utils/AST.h"
#include "cling/Utils/SourceNormalization.h"
#include "cling/Interpreter/AutoloadCallback.h"
#include "clang/AST/ASTContext.h"
@ -535,7 +536,12 @@ namespace cling {
Interpreter::CompilationResult
Interpreter::process(const std::string& input, Value* V /* = 0 */,
Transaction** T /* = 0 */) {
if (isRawInputEnabled() || !ShouldWrapInput(input)) {
std::string wrapReadySource = input;
size_t wrapPoint = std::string::npos;
if (!isRawInputEnabled())
wrapPoint = utils::getWrapPoint(wrapReadySource, getCI()->getLangOpts());
if (isRawInputEnabled() || wrapPoint == std::string::npos) {
CompilationOptions CO;
CO.DeclarationExtraction = 0;
CO.ValuePrinting = 0;
@ -553,7 +559,7 @@ namespace cling {
CO.DynamicScoping = isDynamicLookupEnabled();
CO.Debug = isPrintingDebug();
CO.CheckPointerValidity = 1;
if (EvaluateInternal(input, CO, V, T) == Interpreter::kFailure) {
if (EvaluateInternal(wrapReadySource, CO, V, T) == Interpreter::kFailure) {
return Interpreter::kFailure;
}
@ -658,7 +664,9 @@ namespace cling {
std::string wrappedInput = input;
std::string wrapperName;
if (ShouldWrapInput(input))
size_t wrapPos = utils::getWrapPoint(wrappedInput, getCI()->getLangOpts());
if (wrapPos != std::string::npos)
WrapInput(wrappedInput, wrapperName, CO);
StateDebuggerRAII stateDebugger(this);
@ -786,89 +794,6 @@ namespace cling {
return Interpreter::kFailure;
}
bool Interpreter::ShouldWrapInput(const std::string& input) {
// TODO: For future reference.
// Parser* P = const_cast<clang::Parser*>(m_IncrParser->getParser());
// Parser::TentativeParsingAction TA(P);
// TPResult result = P->isCXXDeclarationSpecifier();
// TA.Revert();
// return result == TPResult::True();
// FIXME: can't skipToEndOfLine because we don't want to PragmaLex
// because we don't want to pollute the preprocessor. Without PragmaLex
// there is no "end of line" / eod token. So skip the #line before lexing.
size_t posStart = 0;
size_t lenInput = input.length();
while (lenInput > posStart && isspace(input[posStart]))
++posStart;
// Don't wrap empty input
if (posStart == lenInput)
return false;
if (input[posStart] == '#') {
size_t posDirective = posStart + 1;
while (lenInput > posDirective && isspace(input[posDirective]))
++posDirective;
// A single '#'? Weird... better don't wrap.
if (posDirective == lenInput)
return false;
if (!strncmp(&input[posDirective], "line ", 5)) {
// There is a line directive. It does affect the determination whether
// this input should be wrapped; skip the line.
size_t posEOD = input.find('\n', posDirective + 5);
if (posEOD != std::string::npos)
posStart = posEOD + 1;
}
}
//llvm::OwningPtr<llvm::MemoryBuffer> buf;
//buf.reset(llvm::MemoryBuffer::getMemBuffer(&input[posStart],
// "Cling Preparse Buf"));
Lexer WrapLexer(SourceLocation(), getSema().getLangOpts(),
input.c_str() + posStart,
input.c_str() + posStart,
input.c_str() + input.size());
Token Tok;
WrapLexer.LexFromRawLexer(Tok);
const tok::TokenKind kind = Tok.getKind();
if (kind == tok::raw_identifier && !Tok.needsCleaning()) {
StringRef keyword(Tok.getRawIdentifier());
if (keyword.equals("using")) {
// FIXME: Using definitions and declarations should be decl extracted.
// Until we have that, don't wrap them if they are the only input.
const char* cursor = keyword.data();
cursor = strchr(cursor, ';'); // advance to end of using decl / def.
if (!cursor) {
// Using decl / def without trailing ';' means input consists of only
// that using decl /def: should not wrap.
return false;
}
// Skip whitespace after ';'
do ++cursor;
while (*cursor && isspace(*cursor));
if (!*cursor)
return false;
// There is "more" - let's assume this input consists of a using
// declaration or definition plus some code that should be wrapped.
return true;
}
if (keyword.equals("extern"))
return false;
if (keyword.equals("namespace"))
return false;
if (keyword.equals("template"))
return false;
}
else if (kind == tok::hash) {
WrapLexer.LexFromRawLexer(Tok);
if (Tok.is(tok::raw_identifier) && !Tok.needsCleaning()) {
StringRef keyword(Tok.getRawIdentifier());
if (keyword.equals("include"))
return false;
}
}
return true;
}
void Interpreter::WrapInput(std::string& input, std::string& fname,
CompilationOptions &CO) {

View File

@ -189,7 +189,7 @@ namespace cling {
Interpreter::CompilationResult
MetaProcessor::readInputFromFile(llvm::StringRef filename,
Value* result,
bool ignoreOutmostBlock /*=false*/) {
size_t posOpenCurly) {
{
// check that it's not binary:
@ -197,7 +197,9 @@ namespace cling {
char magic[1024] = {0};
in.read(magic, sizeof(magic));
size_t readMagic = in.gcount();
if (readMagic >= 4) {
// Binary files < 300 bytes are rare, and below newlines etc make the
// heuristic unreliable.
if (readMagic >= 300) {
llvm::StringRef magicStr(magic,in.gcount());
llvm::sys::fs::file_magic fileType
= llvm::sys::fs::identify_magic(magicStr);
@ -226,59 +228,50 @@ namespace cling {
in.seekg(0);
in.read(&content[0], size);
if (ignoreOutmostBlock && !content.empty()) {
if (posOpenCurly != (size_t)-1 && !content.empty()) {
assert(content[posOpenCurly] == '{'
&& "No curly at claimed position of opening curly!");
// hide the curly brace:
content[posOpenCurly] = ' ';
// and the matching closing '}'
static const char whitespace[] = " \t\r\n";
std::string::size_type posNonWS = content.find_first_not_of(whitespace);
// Handle comments before leading {
while (posNonWS != std::string::npos
&& content[posNonWS] == '/' && content[posNonWS+1] == '/') {
// Remove the comment line
posNonWS = content.find_first_of('\n', posNonWS+2)+1;
posNonWS = content.find_first_not_of(whitespace, posNonWS);
}
std::string::size_type replaced = posNonWS;
if (posNonWS != std::string::npos && content[posNonWS] == '{') {
// hide the curly brace:
content[posNonWS] = ' ';
// and the matching closing '}'
posNonWS = content.find_last_not_of(whitespace);
if (posNonWS != std::string::npos) {
if (content[posNonWS] == ';' && content[posNonWS-1] == '}') {
content[posNonWS--] = ' '; // replace ';' and enter next if
size_t posCloseCurly = content.find_last_not_of(whitespace);
if (posCloseCurly != std::string::npos) {
if (content[posCloseCurly] == ';' && content[posCloseCurly-1] == '}') {
content[posCloseCurly--] = ' '; // replace ';' and enter next if
}
if (content[posCloseCurly] == '}') {
content[posCloseCurly] = ' '; // replace '}'
} else {
std::string::size_type posBlockClose = content.find_last_of('}');
if (posBlockClose != std::string::npos) {
content[posBlockClose] = ' '; // replace '}'
}
if (content[posNonWS] == '}') {
content[posNonWS] = ' '; // replace '}'
} else {
std::string::size_type posBlockClose = content.find_last_of('}');
if (posBlockClose != std::string::npos) {
content[posBlockClose] = ' '; // replace '}'
std::string::size_type posComment
= content.find_first_not_of(whitespace, posBlockClose);
if (posComment != std::string::npos
&& content[posComment] == '/' && content[posComment+1] == '/') {
// More text (comments) are okay after the last '}', but
// we can not easily find it to remove it (so we need to upgrade
// this code to better handle the case with comments or
// preprocessor code before and after the leading { and
// trailing })
while (posComment <= posCloseCurly) {
content[posComment++] = ' '; // replace '}' and comment
}
std::string::size_type posComment
= content.find_first_not_of(whitespace, posBlockClose);
if (posComment != std::string::npos
&& content[posComment] == '/' && content[posComment+1] == '/') {
// More text (comments) are okay after the last '}', but
// we can not easily find it to remove it (so we need to upgrade
// this code to better handle the case with comments or
// preprocessor code before and after the leading { and
// trailing })
while (posComment <= posNonWS) {
content[posComment++] = ' '; // replace '}' and comment
}
} else {
content[replaced] = '{';
// By putting the '{' back, we keep the code as consistent as
// the user wrote it ... but we should still warn that we not
// goint to treat this file an unamed macro.
llvm::errs()
<< "Warning in cling::MetaProcessor: can not find the closing '}', "
<< llvm::sys::path::filename(filename)
<< " is not handled as an unamed script!\n";
} // did not find "//"
} // remove comments after the trailing '}'
} // find '}'
} // have '{'
} // ignore outmost block
} else {
content[posCloseCurly] = '{';
// By putting the '{' back, we keep the code as consistent as
// the user wrote it ... but we should still warn that we not
// goint to treat this file an unamed macro.
llvm::errs()
<< "Warning in cling::MetaProcessor: can not find the closing '}', "
<< llvm::sys::path::filename(filename)
<< " is not handled as an unamed script!\n";
} // did not find "//"
} // remove comments after the trailing '}'
} // find '}'
} // ignore outermost block
std::string strFilename(filename.str());
m_CurrentlyExecutingFile = strFilename;

View File

@ -11,11 +11,13 @@ set( LLVM_LINK_COMPONENTS
add_cling_library(clingUtils OBJECT
AST.cpp
SourceNormalization.cpp
Validation.cpp
LINK_LIBS
clangSema
clangAST
clangLex
clangBasic
)

View File

@ -0,0 +1,144 @@
//------------------------------------------------------------------------------
// CLING - the C++ LLVM-based InterpreterG :)
// author: Lukasz Janyst <ljanyst@cern.ch>
//
// This file is dual-licensed: you can choose to license it under the University
// of Illinois Open Source License or the GNU Lesser General Public License. See
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
#include "cling/Utils/SourceNormalization.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include <utility>
using namespace clang;
namespace {
///\brief A Lexer that exposes preprocessor directives.
class MinimalPPLexer: public Lexer {
public:
///\brief Construct a Lexer from LangOpts and source.
MinimalPPLexer(const LangOptions &LangOpts, llvm::StringRef source):
Lexer(SourceLocation(), LangOpts,
source.begin(), source.begin(), source.end()) {}
bool inPPDirective() const { return ParsingPreprocessorDirective; }
///\brief Lex, forwarding to Lexer::LexFromRawLexer, and keeping track of
/// preprocessor directives to provide a tok::eod corresponding to a
/// tok::hash.
bool Lex(Token& Tok) {
bool ret = LexFromRawLexer(Tok);
if (inPPDirective()) {
// Saw a PP directive; probe for eod to end PP parsing mode.
if (Tok.is(tok::eod))
ParsingPreprocessorDirective = false;
} else {
if (Tok.is(tok::hash)) {
// Found a PP directive, request tok::eod to be generated.
ParsingPreprocessorDirective = true;
}
}
return ret;
}
///\brief Advance to token with given token kind.
///
/// \param Tok - Token to advance.
/// \param kind - Token kind where to stop lexing.
/// \return - Result of most recent call to Lex().
bool AdvanceTo(Token& Tok, tok::TokenKind kind) {
while (!Lex(Tok)) {
if (Tok.is(kind))
return false;
}
return true;
}
};
size_t getFileOffset(const Token& Tok) {
return Tok.getLocation().getRawEncoding();
}
}
size_t
cling::utils::isUnnamedMacro(llvm::StringRef source,
clang::LangOptions& LangOpts) {
// Find the first token that is not a non-cpp directive nor a comment.
// If that token is a '{' we have an unnamed macro.
MinimalPPLexer Lex(LangOpts, source);
Token Tok;
while (!Lex.Lex(Tok)) {
if (Lex.inPPDirective())
continue; // Skip PP directives.
const tok::TokenKind kind = Tok.getKind();
if (kind == tok::comment) continue; // ignore comments
if (kind == tok::l_brace)
return getFileOffset(Tok);
return std::string::npos;
}
// Empty file?
return std::string::npos;
}
size_t cling::utils::getWrapPoint(std::string& source,
const clang::LangOptions& LangOpts) {
// TODO: For future reference.
// Parser* P = const_cast<clang::Parser*>(m_IncrParser->getParser());
// Parser::TentativeParsingAction TA(P);
// TPResult result = P->isCXXDeclarationSpecifier();
// TA.Revert();
// return result == TPResult::True();
MinimalPPLexer Lex(LangOpts, source);
Token Tok;
size_t wrapPoint;
while (!Lex.Lex(Tok)) {
if (Lex.inPPDirective()) {
wrapPoint = getFileOffset(Tok);
continue; // Skip PP directives; they just move the wrap point.
}
const tok::TokenKind kind = Tok.getKind();
if (kind == tok::raw_identifier && !Tok.needsCleaning()) {
StringRef keyword(Tok.getRawIdentifier());
if (keyword.equals("using")) {
// FIXME: Using definitions and declarations should be decl extracted.
// Until we have that, don't wrap them if they are the only input.
if (Lex.AdvanceTo(Tok, tok::semi)) {
// EOF while looking for semi. Don't wrap.
return std::string::npos;
}
// There is "more" - let's assume this input consists of a using
// declaration or definition plus some code that should be wrapped.
return getFileOffset(Tok);
}
if (keyword.equals("extern"))
return std::string::npos;
if (keyword.equals("namespace"))
return std::string::npos;
if (keyword.equals("template"))
return std::string::npos;
// Else there is something else here that needs to be wrapped.
return wrapPoint;
}
}
// We have only had PP directives; no need to wrap.
return std::string::npos;
}