Move shouldWrap / isUnnamedMacro; use Lexer. Prerequisite for improving wrapInput().

2016-07-27 01:24:36 +02:00 · 2016-07-27 01:24:36 +02:00 · 93edfaeec2
commit 93edfaeec2
parent 112370dc17
7 changed files with 260 additions and 149 deletions
--- a/include/cling/Interpreter/Interpreter.h
+++ b/include/cling/Interpreter/Interpreter.h
@ -232,15 +232,6 @@ namespace cling {
    CompilationResult CodeCompleteInternal(const std::string& input,
                                           unsigned offset);

-    ///\brief Decides whether the input line should be wrapped into a function
-    /// declaration that can later be executed.
-    ///
-    ///\param[in] input - The input being scanned.
-    ///
-    ///\returns true if the input should be wrapped into a function declaration.
-    ///
-    bool ShouldWrapInput(const std::string& input);
-
    ///\brief Wraps a given input.
    ///
    /// The interpreter must be able to run statements on the fly, which is not
--- a/include/cling/MetaProcessor/MetaProcessor.h
+++ b/include/cling/MetaProcessor/MetaProcessor.h
@ -12,6 +12,8 @@

 #include "cling/Interpreter/Interpreter.h"

+#include "clang/Basic/SourceLocation.h"
+
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallString.h"

@ -158,14 +160,15 @@ namespace cling {
    ///\param [in] filename - The file to read.
    /// @param[out] result - the cling::Value as result of the
    ///             execution of the last statement
-    ///\param [in] ignoreOutmostBlock - Whether to ignore enlosing {}.
+    ///\param [in] curlyToIgnore - Whether to ignore enlosing {}, and position
+    ///            of the opening '{'.
    ///
    ///\returns result of the compilation.
    ///
    Interpreter::CompilationResult
    readInputFromFile(llvm::StringRef filename,
                      Value* result,
-                      bool ignoreOutmostBlock = false);
+                      size_t posOpenCurly = (size_t)(-1));
    ///\brief Set the stdout and stderr stream to the appropriate file.
    ///
    ///\param [in] file - The file for the redirection.
--- a/include/cling/Utils/SourceNormalization.h
+++ b/include/cling/Utils/SourceNormalization.h
@ -0,0 +1,53 @@
+//--------------------------------------------------------------------*- C++ -*-
+// CLING - the C++ LLVM-based InterpreterG :)
+// author:  Axel Naumann <axel@cern.ch>
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+#ifndef CLING_UTILS_SOURCE_NORMALIZATION_H
+#define CLING_UTILS_SOURCE_NORMALIZATION_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <string>
+
+namespace clang {
+  class LangOptions;
+  class SourceLocation;
+  class SourceManager;
+};
+
+namespace cling {
+namespace utils {
+  ///\brief Determine whether the source is an unnamed macro.
+  ///
+  /// Unnamed macros contain no function definition, but "prompt-style" code
+  /// surrounded by a set of curly braces.
+  ///
+  /// \param source The source code to analyze.
+  /// \return the position of the unnamed macro's opening '{'; or
+  ///         std::string::npos if this is not an unnamed macro.
+  size_t isUnnamedMacro(llvm::StringRef source,
+                        clang::LangOptions& LangOpts);
+
+  ///\brief Determine whether the source needs to be moved into a function.
+  ///
+  /// If so, move possible #includes out of the future body of the function and
+  /// return the position where the function signature should be inserted.
+  ///
+  /// \param source - The source code to analyze; out: the source with
+  ///        re-arranged includes.
+  /// \param LangOpts - LangOptions to use for lexing.
+  /// \param SM - SourceManager to use for lexed SourceLocations.
+  /// \return The position where the function signature and '{' should be
+  ///     inserted; std::string::npos if this source should not be wrapped.
+  size_t getWrapPoint(std::string& source, const clang::LangOptions& LangOpts);
+} // namespace utils
+} // namespace cling
+
+#endif // CLING_UTILS_SOURCE_NORMALIZATION_H
--- a/lib/Interpreter/Interpreter.cpp
+++ b/lib/Interpreter/Interpreter.cpp
@ -28,6 +28,7 @@
 #include "cling/Interpreter/Transaction.h"
 #include "cling/Interpreter/Value.h"
 #include "cling/Utils/AST.h"
+#include "cling/Utils/SourceNormalization.h"
 #include "cling/Interpreter/AutoloadCallback.h"

 #include "clang/AST/ASTContext.h"
@ -535,7 +536,12 @@ namespace cling {
  Interpreter::CompilationResult
  Interpreter::process(const std::string& input, Value* V /* = 0 */,
                       Transaction** T /* = 0 */) {
-    if (isRawInputEnabled() || !ShouldWrapInput(input)) {
+    std::string wrapReadySource = input;
+    size_t wrapPoint = std::string::npos;
+    if (!isRawInputEnabled())
+      wrapPoint = utils::getWrapPoint(wrapReadySource, getCI()->getLangOpts());
+
+    if (isRawInputEnabled() || wrapPoint == std::string::npos) {
      CompilationOptions CO;
      CO.DeclarationExtraction = 0;
      CO.ValuePrinting = 0;
@ -553,7 +559,7 @@ namespace cling {
    CO.DynamicScoping = isDynamicLookupEnabled();
    CO.Debug = isPrintingDebug();
    CO.CheckPointerValidity = 1;
-    if (EvaluateInternal(input, CO, V, T) == Interpreter::kFailure) {
+    if (EvaluateInternal(wrapReadySource, CO, V, T) == Interpreter::kFailure) {
      return Interpreter::kFailure;
    }

@ -658,7 +664,9 @@ namespace cling {

    std::string wrappedInput = input;
    std::string wrapperName;
-    if (ShouldWrapInput(input))
+    size_t wrapPos = utils::getWrapPoint(wrappedInput, getCI()->getLangOpts());
+
+    if (wrapPos != std::string::npos)
      WrapInput(wrappedInput, wrapperName, CO);

    StateDebuggerRAII stateDebugger(this);
@ -786,89 +794,6 @@ namespace cling {
    return Interpreter::kFailure;
  }

-  bool Interpreter::ShouldWrapInput(const std::string& input) {
-    // TODO: For future reference.
-    // Parser* P = const_cast<clang::Parser*>(m_IncrParser->getParser());
-    // Parser::TentativeParsingAction TA(P);
-    // TPResult result = P->isCXXDeclarationSpecifier();
-    // TA.Revert();
-    // return result == TPResult::True();
-
-    // FIXME: can't skipToEndOfLine because we don't want to PragmaLex
-    // because we don't want to pollute the preprocessor. Without PragmaLex
-    // there is no "end of line" / eod token. So skip the #line before lexing.
-    size_t posStart = 0;
-    size_t lenInput = input.length();
-    while (lenInput > posStart && isspace(input[posStart]))
-      ++posStart;
-    // Don't wrap empty input
-    if (posStart == lenInput)
-      return false;
-    if (input[posStart] == '#') {
-      size_t posDirective = posStart + 1;
-      while (lenInput > posDirective && isspace(input[posDirective]))
-        ++posDirective;
-      // A single '#'? Weird... better don't wrap.
-      if (posDirective == lenInput)
-        return false;
-      if (!strncmp(&input[posDirective], "line ", 5)) {
-        // There is a line directive. It does affect the determination whether
-        // this input should be wrapped; skip the line.
-        size_t posEOD = input.find('\n', posDirective + 5);
-        if (posEOD != std::string::npos)
-          posStart = posEOD + 1;
-      }
-    }
-    //llvm::OwningPtr<llvm::MemoryBuffer> buf;
-    //buf.reset(llvm::MemoryBuffer::getMemBuffer(&input[posStart],
-    //                                           "Cling Preparse Buf"));
-    Lexer WrapLexer(SourceLocation(), getSema().getLangOpts(),
-                    input.c_str() + posStart,
-                    input.c_str() + posStart,
-                    input.c_str() + input.size());
-    Token Tok;
-    WrapLexer.LexFromRawLexer(Tok);
-    const tok::TokenKind kind = Tok.getKind();
-
-    if (kind == tok::raw_identifier && !Tok.needsCleaning()) {
-      StringRef keyword(Tok.getRawIdentifier());
-      if (keyword.equals("using")) {
-        // FIXME: Using definitions and declarations should be decl extracted.
-        // Until we have that, don't wrap them if they are the only input.
-        const char* cursor = keyword.data();
-        cursor = strchr(cursor, ';'); // advance to end of using decl / def.
-        if (!cursor) {
-          // Using decl / def without trailing ';' means input consists of only
-          // that using decl /def: should not wrap.
-          return false;
-        }
-        // Skip whitespace after ';'
-        do ++cursor;
-        while (*cursor && isspace(*cursor));
-        if (!*cursor)
-          return false;
-        // There is "more" - let's assume this input consists of a using
-        // declaration or definition plus some code that should be wrapped.
-        return true;
-      }
-      if (keyword.equals("extern"))
-        return false;
-      if (keyword.equals("namespace"))
-        return false;
-      if (keyword.equals("template"))
-        return false;
-    }
-    else if (kind == tok::hash) {
-      WrapLexer.LexFromRawLexer(Tok);
-      if (Tok.is(tok::raw_identifier) && !Tok.needsCleaning()) {
-        StringRef keyword(Tok.getRawIdentifier());
-        if (keyword.equals("include"))
-          return false;
-      }
-    }
-
-    return true;
-  }

  void Interpreter::WrapInput(std::string& input, std::string& fname,
                              CompilationOptions &CO) {
--- a/lib/MetaProcessor/MetaProcessor.cpp
+++ b/lib/MetaProcessor/MetaProcessor.cpp
@ -189,7 +189,7 @@ namespace cling {
  Interpreter::CompilationResult
  MetaProcessor::readInputFromFile(llvm::StringRef filename,
                                   Value* result,
-                                   bool ignoreOutmostBlock /*=false*/) {
+                                   size_t posOpenCurly) {

    {
      // check that it's not binary:
@ -197,7 +197,9 @@ namespace cling {
      char magic[1024] = {0};
      in.read(magic, sizeof(magic));
      size_t readMagic = in.gcount();
-      if (readMagic >= 4) {
+      // Binary files < 300 bytes are rare, and below newlines etc make the
+      // heuristic unreliable.
+      if (readMagic >= 300) {
        llvm::StringRef magicStr(magic,in.gcount());
        llvm::sys::fs::file_magic fileType
          = llvm::sys::fs::identify_magic(magicStr);
@ -226,59 +228,50 @@ namespace cling {
    in.seekg(0);
    in.read(&content[0], size);

-    if (ignoreOutmostBlock && !content.empty()) {
+    if (posOpenCurly != (size_t)-1 && !content.empty()) {
+      assert(content[posOpenCurly] == '{'
+             && "No curly at claimed position of opening curly!");
+      // hide the curly brace:
+      content[posOpenCurly] = ' ';
+      // and the matching closing '}'
      static const char whitespace[] = " \t\r\n";
-      std::string::size_type posNonWS = content.find_first_not_of(whitespace);
-      // Handle comments before leading {
-      while (posNonWS != std::string::npos
-             && content[posNonWS] == '/' && content[posNonWS+1] == '/') {
-        // Remove the comment line
-        posNonWS = content.find_first_of('\n', posNonWS+2)+1;
-        posNonWS = content.find_first_not_of(whitespace, posNonWS);
-      }
-      std::string::size_type replaced = posNonWS;
-      if (posNonWS != std::string::npos && content[posNonWS] == '{') {
-        // hide the curly brace:
-        content[posNonWS] = ' ';
-        // and the matching closing '}'
-        posNonWS = content.find_last_not_of(whitespace);
-        if (posNonWS != std::string::npos) {
-          if (content[posNonWS] == ';' && content[posNonWS-1] == '}') {
-            content[posNonWS--] = ' '; // replace ';' and enter next if
+      size_t posCloseCurly = content.find_last_not_of(whitespace);
+      if (posCloseCurly != std::string::npos) {
+        if (content[posCloseCurly] == ';' && content[posCloseCurly-1] == '}') {
+          content[posCloseCurly--] = ' '; // replace ';' and enter next if
+        }
+        if (content[posCloseCurly] == '}') {
+          content[posCloseCurly] = ' '; // replace '}'
+        } else {
+          std::string::size_type posBlockClose = content.find_last_of('}');
+          if (posBlockClose != std::string::npos) {
+            content[posBlockClose] = ' '; // replace '}'
          }
-          if (content[posNonWS] == '}') {
-            content[posNonWS] = ' '; // replace '}'
-          } else {
-            std::string::size_type posBlockClose = content.find_last_of('}');
-            if (posBlockClose != std::string::npos) {
-              content[posBlockClose] = ' '; // replace '}'
+          std::string::size_type posComment
+            = content.find_first_not_of(whitespace, posBlockClose);
+          if (posComment != std::string::npos
+              && content[posComment] == '/' && content[posComment+1] == '/') {
+            // More text (comments) are okay after the last '}', but
+            // we can not easily find it to remove it (so we need to upgrade
+            // this code to better handle the case with comments or
+            // preprocessor code before and after the leading { and
+            // trailing })
+            while (posComment <= posCloseCurly) {
+              content[posComment++] = ' '; // replace '}' and comment
            }
-            std::string::size_type posComment
-              = content.find_first_not_of(whitespace, posBlockClose);
-            if (posComment != std::string::npos
-                && content[posComment] == '/' && content[posComment+1] == '/') {
-              // More text (comments) are okay after the last '}', but
-              // we can not easily find it to remove it (so we need to upgrade
-              // this code to better handle the case with comments or
-              // preprocessor code before and after the leading { and
-              // trailing })
-              while (posComment <= posNonWS) {
-                content[posComment++] = ' '; // replace '}' and comment
-              }
-            } else {
-              content[replaced] = '{';
-              // By putting the '{' back, we keep the code as consistent as
-              // the user wrote it ... but we should still warn that we not
-              // goint to treat this file an unamed macro.
-              llvm::errs()
-                << "Warning in cling::MetaProcessor: can not find the closing '}', "
-                << llvm::sys::path::filename(filename)
-                << " is not handled as an unamed script!\n";
-            } // did not find "//"
-          } // remove comments after the trailing '}'
-        } // find '}'
-      } // have '{'
-    } // ignore outmost block
+          } else {
+            content[posCloseCurly] = '{';
+            // By putting the '{' back, we keep the code as consistent as
+            // the user wrote it ... but we should still warn that we not
+            // goint to treat this file an unamed macro.
+            llvm::errs()
+              << "Warning in cling::MetaProcessor: can not find the closing '}', "
+              << llvm::sys::path::filename(filename)
+              << " is not handled as an unamed script!\n";
+          } // did not find "//"
+        } // remove comments after the trailing '}'
+      } // find '}'
+    } // ignore outermost block

    std::string strFilename(filename.str());
    m_CurrentlyExecutingFile = strFilename;
--- a/lib/Utils/CMakeLists.txt
+++ b/lib/Utils/CMakeLists.txt
@ -11,11 +11,13 @@ set( LLVM_LINK_COMPONENTS

 add_cling_library(clingUtils OBJECT
  AST.cpp
+  SourceNormalization.cpp
  Validation.cpp

  LINK_LIBS
  clangSema
  clangAST
+  clangLex
  clangBasic
 )

--- a/lib/Utils/SourceNormalization.cpp
+++ b/lib/Utils/SourceNormalization.cpp
@ -0,0 +1,144 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+// author:  Lukasz Janyst <ljanyst@cern.ch>
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+#include "cling/Utils/SourceNormalization.h"
+
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+#include <utility>
+
+using namespace clang;
+
+namespace {
+///\brief A Lexer that exposes preprocessor directives.
+class MinimalPPLexer: public Lexer {
+public:
+  ///\brief Construct a Lexer from LangOpts and source.
+  MinimalPPLexer(const LangOptions &LangOpts, llvm::StringRef source):
+    Lexer(SourceLocation(), LangOpts,
+          source.begin(), source.begin(), source.end()) {}
+
+  bool inPPDirective() const { return ParsingPreprocessorDirective; }
+
+  ///\brief Lex, forwarding to Lexer::LexFromRawLexer, and keeping track of
+  /// preprocessor directives to provide a tok::eod corresponding to a
+  /// tok::hash.
+  bool Lex(Token& Tok) {
+    bool ret = LexFromRawLexer(Tok);
+    if (inPPDirective()) {
+      // Saw a PP directive; probe for eod to end PP parsing mode.
+      if (Tok.is(tok::eod))
+        ParsingPreprocessorDirective = false;
+    } else {
+      if (Tok.is(tok::hash)) {
+        // Found a PP directive, request tok::eod to be generated.
+        ParsingPreprocessorDirective = true;
+      }
+    }
+    return ret;
+  }
+
+  ///\brief Advance to token with given token kind.
+  ///
+  /// \param Tok - Token to advance.
+  /// \param kind - Token kind where to stop lexing.
+  /// \return - Result of most recent call to Lex().
+  bool AdvanceTo(Token& Tok, tok::TokenKind kind) {
+    while (!Lex(Tok)) {
+      if (Tok.is(kind))
+        return false;
+    }
+    return true;
+  }
+};
+
+size_t getFileOffset(const Token& Tok) {
+  return Tok.getLocation().getRawEncoding();
+}
+
+}
+
+size_t
+cling::utils::isUnnamedMacro(llvm::StringRef source,
+                             clang::LangOptions& LangOpts) {
+  // Find the first token that is not a non-cpp directive nor a comment.
+  // If that token is a '{' we have an unnamed macro.
+
+  MinimalPPLexer Lex(LangOpts, source);
+  Token Tok;
+  while (!Lex.Lex(Tok)) {
+    if (Lex.inPPDirective())
+      continue; // Skip PP directives.
+
+    const tok::TokenKind kind = Tok.getKind();
+    if (kind == tok::comment) continue; // ignore comments
+    if (kind == tok::l_brace)
+      return getFileOffset(Tok);
+
+    return std::string::npos;
+  }
+
+  // Empty file?
+
+  return std::string::npos;
+}
+
+
+
+size_t cling::utils::getWrapPoint(std::string& source,
+                                  const clang::LangOptions& LangOpts) {
+  // TODO: For future reference.
+  // Parser* P = const_cast<clang::Parser*>(m_IncrParser->getParser());
+  // Parser::TentativeParsingAction TA(P);
+  // TPResult result = P->isCXXDeclarationSpecifier();
+  // TA.Revert();
+  // return result == TPResult::True();
+
+  MinimalPPLexer Lex(LangOpts, source);
+  Token Tok;
+
+  size_t wrapPoint;
+
+  while (!Lex.Lex(Tok)) {
+    if (Lex.inPPDirective()) {
+      wrapPoint = getFileOffset(Tok);
+      continue; // Skip PP directives; they just move the wrap point.
+    }
+
+    const tok::TokenKind kind = Tok.getKind();
+
+    if (kind == tok::raw_identifier && !Tok.needsCleaning()) {
+      StringRef keyword(Tok.getRawIdentifier());
+      if (keyword.equals("using")) {
+        // FIXME: Using definitions and declarations should be decl extracted.
+        // Until we have that, don't wrap them if they are the only input.
+        if (Lex.AdvanceTo(Tok, tok::semi)) {
+          // EOF while looking for semi. Don't wrap.
+          return std::string::npos;
+        }
+        // There is "more" - let's assume this input consists of a using
+        // declaration or definition plus some code that should be wrapped.
+        return getFileOffset(Tok);
+      }
+      if (keyword.equals("extern"))
+        return std::string::npos;
+      if (keyword.equals("namespace"))
+        return std::string::npos;
+      if (keyword.equals("template"))
+        return std::string::npos;
+      // Else there is something else here that needs to be wrapped.
+      return wrapPoint;
+    }
+  }
+
+  // We have only had PP directives; no need to wrap.
+  return std::string::npos;
+}