diff --git a/include/cling/Interpreter/Interpreter.h b/include/cling/Interpreter/Interpreter.h index 92edb1a3..91e9e3cd 100644 --- a/include/cling/Interpreter/Interpreter.h +++ b/include/cling/Interpreter/Interpreter.h @@ -232,15 +232,6 @@ namespace cling { CompilationResult CodeCompleteInternal(const std::string& input, unsigned offset); - ///\brief Decides whether the input line should be wrapped into a function - /// declaration that can later be executed. - /// - ///\param[in] input - The input being scanned. - /// - ///\returns true if the input should be wrapped into a function declaration. - /// - bool ShouldWrapInput(const std::string& input); - ///\brief Wraps a given input. /// /// The interpreter must be able to run statements on the fly, which is not diff --git a/include/cling/MetaProcessor/MetaProcessor.h b/include/cling/MetaProcessor/MetaProcessor.h index 64fd4373..90945422 100644 --- a/include/cling/MetaProcessor/MetaProcessor.h +++ b/include/cling/MetaProcessor/MetaProcessor.h @@ -12,6 +12,8 @@ #include "cling/Interpreter/Interpreter.h" +#include "clang/Basic/SourceLocation.h" + #include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallString.h" @@ -158,14 +160,15 @@ namespace cling { ///\param [in] filename - The file to read. /// @param[out] result - the cling::Value as result of the /// execution of the last statement - ///\param [in] ignoreOutmostBlock - Whether to ignore enlosing {}. + ///\param [in] curlyToIgnore - Whether to ignore enlosing {}, and position + /// of the opening '{'. /// ///\returns result of the compilation. /// Interpreter::CompilationResult readInputFromFile(llvm::StringRef filename, Value* result, - bool ignoreOutmostBlock = false); + size_t posOpenCurly = (size_t)(-1)); ///\brief Set the stdout and stderr stream to the appropriate file. /// ///\param [in] file - The file for the redirection. diff --git a/include/cling/Utils/SourceNormalization.h b/include/cling/Utils/SourceNormalization.h new file mode 100644 index 00000000..4ea45f21 --- /dev/null +++ b/include/cling/Utils/SourceNormalization.h @@ -0,0 +1,53 @@ +//--------------------------------------------------------------------*- C++ -*- +// CLING - the C++ LLVM-based InterpreterG :) +// author: Axel Naumann +// +// This file is dual-licensed: you can choose to license it under the University +// of Illinois Open Source License or the GNU Lesser General Public License. See +// LICENSE.TXT for details. +//------------------------------------------------------------------------------ + +#ifndef CLING_UTILS_SOURCE_NORMALIZATION_H +#define CLING_UTILS_SOURCE_NORMALIZATION_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" + +#include + +namespace clang { + class LangOptions; + class SourceLocation; + class SourceManager; +}; + +namespace cling { +namespace utils { + ///\brief Determine whether the source is an unnamed macro. + /// + /// Unnamed macros contain no function definition, but "prompt-style" code + /// surrounded by a set of curly braces. + /// + /// \param source The source code to analyze. + /// \return the position of the unnamed macro's opening '{'; or + /// std::string::npos if this is not an unnamed macro. + size_t isUnnamedMacro(llvm::StringRef source, + clang::LangOptions& LangOpts); + + ///\brief Determine whether the source needs to be moved into a function. + /// + /// If so, move possible #includes out of the future body of the function and + /// return the position where the function signature should be inserted. + /// + /// \param source - The source code to analyze; out: the source with + /// re-arranged includes. + /// \param LangOpts - LangOptions to use for lexing. + /// \param SM - SourceManager to use for lexed SourceLocations. + /// \return The position where the function signature and '{' should be + /// inserted; std::string::npos if this source should not be wrapped. + size_t getWrapPoint(std::string& source, const clang::LangOptions& LangOpts); +} // namespace utils +} // namespace cling + +#endif // CLING_UTILS_SOURCE_NORMALIZATION_H \ No newline at end of file diff --git a/lib/Interpreter/Interpreter.cpp b/lib/Interpreter/Interpreter.cpp index 685702aa..d3b33133 100644 --- a/lib/Interpreter/Interpreter.cpp +++ b/lib/Interpreter/Interpreter.cpp @@ -28,6 +28,7 @@ #include "cling/Interpreter/Transaction.h" #include "cling/Interpreter/Value.h" #include "cling/Utils/AST.h" +#include "cling/Utils/SourceNormalization.h" #include "cling/Interpreter/AutoloadCallback.h" #include "clang/AST/ASTContext.h" @@ -535,7 +536,12 @@ namespace cling { Interpreter::CompilationResult Interpreter::process(const std::string& input, Value* V /* = 0 */, Transaction** T /* = 0 */) { - if (isRawInputEnabled() || !ShouldWrapInput(input)) { + std::string wrapReadySource = input; + size_t wrapPoint = std::string::npos; + if (!isRawInputEnabled()) + wrapPoint = utils::getWrapPoint(wrapReadySource, getCI()->getLangOpts()); + + if (isRawInputEnabled() || wrapPoint == std::string::npos) { CompilationOptions CO; CO.DeclarationExtraction = 0; CO.ValuePrinting = 0; @@ -553,7 +559,7 @@ namespace cling { CO.DynamicScoping = isDynamicLookupEnabled(); CO.Debug = isPrintingDebug(); CO.CheckPointerValidity = 1; - if (EvaluateInternal(input, CO, V, T) == Interpreter::kFailure) { + if (EvaluateInternal(wrapReadySource, CO, V, T) == Interpreter::kFailure) { return Interpreter::kFailure; } @@ -658,7 +664,9 @@ namespace cling { std::string wrappedInput = input; std::string wrapperName; - if (ShouldWrapInput(input)) + size_t wrapPos = utils::getWrapPoint(wrappedInput, getCI()->getLangOpts()); + + if (wrapPos != std::string::npos) WrapInput(wrappedInput, wrapperName, CO); StateDebuggerRAII stateDebugger(this); @@ -786,89 +794,6 @@ namespace cling { return Interpreter::kFailure; } - bool Interpreter::ShouldWrapInput(const std::string& input) { - // TODO: For future reference. - // Parser* P = const_cast(m_IncrParser->getParser()); - // Parser::TentativeParsingAction TA(P); - // TPResult result = P->isCXXDeclarationSpecifier(); - // TA.Revert(); - // return result == TPResult::True(); - - // FIXME: can't skipToEndOfLine because we don't want to PragmaLex - // because we don't want to pollute the preprocessor. Without PragmaLex - // there is no "end of line" / eod token. So skip the #line before lexing. - size_t posStart = 0; - size_t lenInput = input.length(); - while (lenInput > posStart && isspace(input[posStart])) - ++posStart; - // Don't wrap empty input - if (posStart == lenInput) - return false; - if (input[posStart] == '#') { - size_t posDirective = posStart + 1; - while (lenInput > posDirective && isspace(input[posDirective])) - ++posDirective; - // A single '#'? Weird... better don't wrap. - if (posDirective == lenInput) - return false; - if (!strncmp(&input[posDirective], "line ", 5)) { - // There is a line directive. It does affect the determination whether - // this input should be wrapped; skip the line. - size_t posEOD = input.find('\n', posDirective + 5); - if (posEOD != std::string::npos) - posStart = posEOD + 1; - } - } - //llvm::OwningPtr buf; - //buf.reset(llvm::MemoryBuffer::getMemBuffer(&input[posStart], - // "Cling Preparse Buf")); - Lexer WrapLexer(SourceLocation(), getSema().getLangOpts(), - input.c_str() + posStart, - input.c_str() + posStart, - input.c_str() + input.size()); - Token Tok; - WrapLexer.LexFromRawLexer(Tok); - const tok::TokenKind kind = Tok.getKind(); - - if (kind == tok::raw_identifier && !Tok.needsCleaning()) { - StringRef keyword(Tok.getRawIdentifier()); - if (keyword.equals("using")) { - // FIXME: Using definitions and declarations should be decl extracted. - // Until we have that, don't wrap them if they are the only input. - const char* cursor = keyword.data(); - cursor = strchr(cursor, ';'); // advance to end of using decl / def. - if (!cursor) { - // Using decl / def without trailing ';' means input consists of only - // that using decl /def: should not wrap. - return false; - } - // Skip whitespace after ';' - do ++cursor; - while (*cursor && isspace(*cursor)); - if (!*cursor) - return false; - // There is "more" - let's assume this input consists of a using - // declaration or definition plus some code that should be wrapped. - return true; - } - if (keyword.equals("extern")) - return false; - if (keyword.equals("namespace")) - return false; - if (keyword.equals("template")) - return false; - } - else if (kind == tok::hash) { - WrapLexer.LexFromRawLexer(Tok); - if (Tok.is(tok::raw_identifier) && !Tok.needsCleaning()) { - StringRef keyword(Tok.getRawIdentifier()); - if (keyword.equals("include")) - return false; - } - } - - return true; - } void Interpreter::WrapInput(std::string& input, std::string& fname, CompilationOptions &CO) { diff --git a/lib/MetaProcessor/MetaProcessor.cpp b/lib/MetaProcessor/MetaProcessor.cpp index 1a93009c..4bba723e 100644 --- a/lib/MetaProcessor/MetaProcessor.cpp +++ b/lib/MetaProcessor/MetaProcessor.cpp @@ -189,7 +189,7 @@ namespace cling { Interpreter::CompilationResult MetaProcessor::readInputFromFile(llvm::StringRef filename, Value* result, - bool ignoreOutmostBlock /*=false*/) { + size_t posOpenCurly) { { // check that it's not binary: @@ -197,7 +197,9 @@ namespace cling { char magic[1024] = {0}; in.read(magic, sizeof(magic)); size_t readMagic = in.gcount(); - if (readMagic >= 4) { + // Binary files < 300 bytes are rare, and below newlines etc make the + // heuristic unreliable. + if (readMagic >= 300) { llvm::StringRef magicStr(magic,in.gcount()); llvm::sys::fs::file_magic fileType = llvm::sys::fs::identify_magic(magicStr); @@ -226,59 +228,50 @@ namespace cling { in.seekg(0); in.read(&content[0], size); - if (ignoreOutmostBlock && !content.empty()) { + if (posOpenCurly != (size_t)-1 && !content.empty()) { + assert(content[posOpenCurly] == '{' + && "No curly at claimed position of opening curly!"); + // hide the curly brace: + content[posOpenCurly] = ' '; + // and the matching closing '}' static const char whitespace[] = " \t\r\n"; - std::string::size_type posNonWS = content.find_first_not_of(whitespace); - // Handle comments before leading { - while (posNonWS != std::string::npos - && content[posNonWS] == '/' && content[posNonWS+1] == '/') { - // Remove the comment line - posNonWS = content.find_first_of('\n', posNonWS+2)+1; - posNonWS = content.find_first_not_of(whitespace, posNonWS); - } - std::string::size_type replaced = posNonWS; - if (posNonWS != std::string::npos && content[posNonWS] == '{') { - // hide the curly brace: - content[posNonWS] = ' '; - // and the matching closing '}' - posNonWS = content.find_last_not_of(whitespace); - if (posNonWS != std::string::npos) { - if (content[posNonWS] == ';' && content[posNonWS-1] == '}') { - content[posNonWS--] = ' '; // replace ';' and enter next if + size_t posCloseCurly = content.find_last_not_of(whitespace); + if (posCloseCurly != std::string::npos) { + if (content[posCloseCurly] == ';' && content[posCloseCurly-1] == '}') { + content[posCloseCurly--] = ' '; // replace ';' and enter next if + } + if (content[posCloseCurly] == '}') { + content[posCloseCurly] = ' '; // replace '}' + } else { + std::string::size_type posBlockClose = content.find_last_of('}'); + if (posBlockClose != std::string::npos) { + content[posBlockClose] = ' '; // replace '}' } - if (content[posNonWS] == '}') { - content[posNonWS] = ' '; // replace '}' - } else { - std::string::size_type posBlockClose = content.find_last_of('}'); - if (posBlockClose != std::string::npos) { - content[posBlockClose] = ' '; // replace '}' + std::string::size_type posComment + = content.find_first_not_of(whitespace, posBlockClose); + if (posComment != std::string::npos + && content[posComment] == '/' && content[posComment+1] == '/') { + // More text (comments) are okay after the last '}', but + // we can not easily find it to remove it (so we need to upgrade + // this code to better handle the case with comments or + // preprocessor code before and after the leading { and + // trailing }) + while (posComment <= posCloseCurly) { + content[posComment++] = ' '; // replace '}' and comment } - std::string::size_type posComment - = content.find_first_not_of(whitespace, posBlockClose); - if (posComment != std::string::npos - && content[posComment] == '/' && content[posComment+1] == '/') { - // More text (comments) are okay after the last '}', but - // we can not easily find it to remove it (so we need to upgrade - // this code to better handle the case with comments or - // preprocessor code before and after the leading { and - // trailing }) - while (posComment <= posNonWS) { - content[posComment++] = ' '; // replace '}' and comment - } - } else { - content[replaced] = '{'; - // By putting the '{' back, we keep the code as consistent as - // the user wrote it ... but we should still warn that we not - // goint to treat this file an unamed macro. - llvm::errs() - << "Warning in cling::MetaProcessor: can not find the closing '}', " - << llvm::sys::path::filename(filename) - << " is not handled as an unamed script!\n"; - } // did not find "//" - } // remove comments after the trailing '}' - } // find '}' - } // have '{' - } // ignore outmost block + } else { + content[posCloseCurly] = '{'; + // By putting the '{' back, we keep the code as consistent as + // the user wrote it ... but we should still warn that we not + // goint to treat this file an unamed macro. + llvm::errs() + << "Warning in cling::MetaProcessor: can not find the closing '}', " + << llvm::sys::path::filename(filename) + << " is not handled as an unamed script!\n"; + } // did not find "//" + } // remove comments after the trailing '}' + } // find '}' + } // ignore outermost block std::string strFilename(filename.str()); m_CurrentlyExecutingFile = strFilename; diff --git a/lib/Utils/CMakeLists.txt b/lib/Utils/CMakeLists.txt index d537ae76..c385070e 100644 --- a/lib/Utils/CMakeLists.txt +++ b/lib/Utils/CMakeLists.txt @@ -11,11 +11,13 @@ set( LLVM_LINK_COMPONENTS add_cling_library(clingUtils OBJECT AST.cpp + SourceNormalization.cpp Validation.cpp LINK_LIBS clangSema clangAST + clangLex clangBasic ) diff --git a/lib/Utils/SourceNormalization.cpp b/lib/Utils/SourceNormalization.cpp new file mode 100644 index 00000000..3cea2c44 --- /dev/null +++ b/lib/Utils/SourceNormalization.cpp @@ -0,0 +1,144 @@ +//------------------------------------------------------------------------------ +// CLING - the C++ LLVM-based InterpreterG :) +// author: Lukasz Janyst +// +// This file is dual-licensed: you can choose to license it under the University +// of Illinois Open Source License or the GNU Lesser General Public License. See +// LICENSE.TXT for details. +//------------------------------------------------------------------------------ + +#include "cling/Utils/SourceNormalization.h" + +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +#include + +using namespace clang; + +namespace { +///\brief A Lexer that exposes preprocessor directives. +class MinimalPPLexer: public Lexer { +public: + ///\brief Construct a Lexer from LangOpts and source. + MinimalPPLexer(const LangOptions &LangOpts, llvm::StringRef source): + Lexer(SourceLocation(), LangOpts, + source.begin(), source.begin(), source.end()) {} + + bool inPPDirective() const { return ParsingPreprocessorDirective; } + + ///\brief Lex, forwarding to Lexer::LexFromRawLexer, and keeping track of + /// preprocessor directives to provide a tok::eod corresponding to a + /// tok::hash. + bool Lex(Token& Tok) { + bool ret = LexFromRawLexer(Tok); + if (inPPDirective()) { + // Saw a PP directive; probe for eod to end PP parsing mode. + if (Tok.is(tok::eod)) + ParsingPreprocessorDirective = false; + } else { + if (Tok.is(tok::hash)) { + // Found a PP directive, request tok::eod to be generated. + ParsingPreprocessorDirective = true; + } + } + return ret; + } + + ///\brief Advance to token with given token kind. + /// + /// \param Tok - Token to advance. + /// \param kind - Token kind where to stop lexing. + /// \return - Result of most recent call to Lex(). + bool AdvanceTo(Token& Tok, tok::TokenKind kind) { + while (!Lex(Tok)) { + if (Tok.is(kind)) + return false; + } + return true; + } +}; + +size_t getFileOffset(const Token& Tok) { + return Tok.getLocation().getRawEncoding(); +} + +} + +size_t +cling::utils::isUnnamedMacro(llvm::StringRef source, + clang::LangOptions& LangOpts) { + // Find the first token that is not a non-cpp directive nor a comment. + // If that token is a '{' we have an unnamed macro. + + MinimalPPLexer Lex(LangOpts, source); + Token Tok; + while (!Lex.Lex(Tok)) { + if (Lex.inPPDirective()) + continue; // Skip PP directives. + + const tok::TokenKind kind = Tok.getKind(); + if (kind == tok::comment) continue; // ignore comments + if (kind == tok::l_brace) + return getFileOffset(Tok); + + return std::string::npos; + } + + // Empty file? + + return std::string::npos; +} + + + +size_t cling::utils::getWrapPoint(std::string& source, + const clang::LangOptions& LangOpts) { + // TODO: For future reference. + // Parser* P = const_cast(m_IncrParser->getParser()); + // Parser::TentativeParsingAction TA(P); + // TPResult result = P->isCXXDeclarationSpecifier(); + // TA.Revert(); + // return result == TPResult::True(); + + MinimalPPLexer Lex(LangOpts, source); + Token Tok; + + size_t wrapPoint; + + while (!Lex.Lex(Tok)) { + if (Lex.inPPDirective()) { + wrapPoint = getFileOffset(Tok); + continue; // Skip PP directives; they just move the wrap point. + } + + const tok::TokenKind kind = Tok.getKind(); + + if (kind == tok::raw_identifier && !Tok.needsCleaning()) { + StringRef keyword(Tok.getRawIdentifier()); + if (keyword.equals("using")) { + // FIXME: Using definitions and declarations should be decl extracted. + // Until we have that, don't wrap them if they are the only input. + if (Lex.AdvanceTo(Tok, tok::semi)) { + // EOF while looking for semi. Don't wrap. + return std::string::npos; + } + // There is "more" - let's assume this input consists of a using + // declaration or definition plus some code that should be wrapped. + return getFileOffset(Tok); + } + if (keyword.equals("extern")) + return std::string::npos; + if (keyword.equals("namespace")) + return std::string::npos; + if (keyword.equals("template")) + return std::string::npos; + // Else there is something else here that needs to be wrapped. + return wrapPoint; + } + } + + // We have only had PP directives; no need to wrap. + return std::string::npos; +}