Handle properly ticks and quotes.

I know that the lexers are getting too many and soon I will merge both implementations
into one full-blown.
Reduces the test failures a lot.


git-svn-id: http://root.cern.ch/svn/root/trunk@47652 27541ba8-7e3a-0410-8455-c3a389f83636
This commit is contained in:
Vassil Vassilev 2012-11-27 15:41:08 +00:00
parent 1015181bd2
commit b66d4750fd
5 changed files with 176 additions and 46 deletions

View File

@ -6,58 +6,73 @@
#include "InputValidator.h"
#include "clang/Lex/Preprocessor.h"
#include "PunctuationLexer.h"
using namespace clang;
//#include "clang/Lex/Preprocessor.h"
namespace cling {
InputValidator::ValidationResult
InputValidator::validate(llvm::StringRef line, const LangOptions& LO) {
if (!m_Input.empty())
m_Input.append("\\n");
InputValidator::validate(llvm::StringRef line) {
ValidationResult Res = kComplete;
// FIXME: Do it properly for the comments too
if (!line.startswith("//")
&& !line.startswith("/*") && !line.startswith("*/")) {
PunctuationLexer PL(line);
Token Tok;
do {
PL.LexPunctuator(Tok);
int kind = (int)Tok.getKind();
// If there is " or ' we don't need to look for balancing until we
// enounter matching " or '
if (kind >= (int)tok::quote && kind <= (int)tok::apostrophe)
if (m_ParenStack.empty())
m_ParenStack.push(kind);
else if (m_ParenStack.top() == kind)
m_ParenStack.pop();
else
continue;
// In case when we need closing brace.
if (kind >= (int)tok::l_square && kind <= (int)tok::r_brace) {
// The closing paren kind is open paren kind + 1 (i.e odd number)
if (kind % 2) {
// closing the right one?
if (m_ParenStack.empty()) {
Res = kMismatch;
break;
}
int prev = m_ParenStack.top();
if (prev != kind - 1) {
Res = kMismatch;
break;
}
m_ParenStack.pop();
}
else
m_ParenStack.push(kind);
}
}
while (Tok.isNot(tok::eof));
}
if (!m_ParenStack.empty() && Res != kMismatch)
Res = kIncomplete;
if (!m_Input.empty()) {
if (!m_ParenStack.empty() && (m_ParenStack.top() == tok::quote
|| m_ParenStack.top() == tok::apostrophe))
m_Input.append("\\n");
else
m_Input.append("\n");
}
else
m_Input = "";
m_Input.append(line);
// Imballanced ' or " drives our lexer nuts.
llvm::StringRef fullInput(m_Input);
if (fullInput.count('\'') % 2 || fullInput.count('"') % 2) {
return kIncomplete;
}
llvm::OwningPtr<llvm::MemoryBuffer> MB;
MB.reset(llvm::MemoryBuffer::getMemBuffer(line));
Lexer RawLexer(SourceLocation(), LO, MB->getBufferStart(),
MB->getBufferStart(), MB->getBufferEnd());
Token Tok;
do {
RawLexer.LexFromRawLexer(Tok);
int kind = (int)Tok.getKind();
if (kind >= (int)tok::l_square
&& kind <= (int)tok::r_brace) {
kind -= (int)tok::l_square;
if (kind % 2) {
// closing the right one?
if (m_ParenStack.empty())
return kMismatch;
int prev = m_ParenStack.top();
if (prev != kind - 1)
return kMismatch;
m_ParenStack.pop();
} else {
m_ParenStack.push(kind);
}
}
}
while (Tok.isNot(tok::eof));
if (!m_ParenStack.empty())
return kIncomplete;
return kComplete;
return Res;
}
void InputValidator::reset() {

View File

@ -7,6 +7,8 @@
#ifndef CLING_INPUT_VALIDATOR_H
#define CLING_INPUT_VALIDATOR_H
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/StringRef.h"
#include <stack>
@ -22,6 +24,10 @@ namespace cling {
///
class InputValidator {
private:
enum {
kTick = clang::tok::r_brace + 1,
kQuote
};
///\brief The input being collected.
///
@ -49,8 +55,7 @@ namespace cling {
///\param[in] LO - Langluage options to validate against.
///\returns Information about the outcome of the validation.
///
ValidationResult validate(llvm::StringRef line,
const clang::LangOptions& LO);
ValidationResult validate(llvm::StringRef line);
///\returns Reference to the collected input.
///

View File

@ -226,7 +226,7 @@ namespace cling {
// Check if the current statement is now complete. If not, return to
// prompt for more.
if (m_InputValidator->validate(input_line, m_Interp.getCI()->getLangOpts())
if (m_InputValidator->validate(input_line)
== InputValidator::kIncomplete) {
if (compRes) *compRes = Interpreter::kMoreInputExpected;
return m_InputValidator->getExpectedIndent();

View File

@ -0,0 +1,42 @@
//------------------------------------------------------------------------------
// CLING - the C++ LLVM-based InterpreterG :)
// version: $Id: DeclCollector.cpp 47416 2012-11-18 22:44:58Z vvassilev $
// author: Vassil Vassilev <vasil.georgiev.vasilev@cern.ch>
//------------------------------------------------------------------------------
#include "PunctuationLexer.h"
#include "llvm/ADT/StringRef.h"
namespace cling {
PunctuationLexer::PunctuationLexer(llvm::StringRef line)
: bufferStart(line.data()), curPos(line.data())
{ }
bool PunctuationLexer::LexPunctuator(Token& Tok) {
Tok.startToken();
while (true) {
Tok.bufStart = curPos;
switch (*curPos++) {
case '[' : Tok.kind = tok::l_square; Tok.bufEnd = curPos; return true;
case ']' : Tok.kind = tok::r_square; Tok.bufEnd = curPos; return true;
case '(' : Tok.kind = tok::l_paren; Tok.bufEnd = curPos; return true;
case ')' : Tok.kind = tok::r_paren; Tok.bufEnd = curPos; return true;
case '{' : Tok.kind = tok::l_brace; Tok.bufEnd = curPos; return true;
case '}' : Tok.kind = tok::r_brace; Tok.bufEnd = curPos; return true;
case '"' : Tok.kind = tok::quote; Tok.bufEnd = curPos; return true;
case '\'' : Tok.kind = tok::apostrophe; Tok.bufEnd = curPos; return true;
case ',' : Tok.kind = tok::comma; Tok.bufEnd = curPos; return true;
case 0 : LexEndOfFile(Tok); Tok.bufEnd = curPos -1; return false;
}
}
}
bool PunctuationLexer::LexEndOfFile(Token& Tok) {
Tok.startToken();
if (*curPos == '\0')
Tok.kind = tok::eof;
return Tok.kind != tok::unknown;
}
} // end namespace cling

View File

@ -0,0 +1,68 @@
//--------------------------------------------------------------------*- C++ -*-
// CLING - the C++ LLVM-based InterpreterG :)
// version: $Id: DeclCollector.h 46525 2012-10-13 15:04:49Z vvassilev $
// author: Vassil Vassilev <vasil.georgiev.vasilev@cern.ch>
//------------------------------------------------------------------------------
#ifndef CLING_PUNCTUATION_LEXER_H
#define CLING_PUNCTUATION_LEXER_H
namespace llvm {
class StringRef;
}
namespace cling {
namespace tok {
enum TokenKind {
l_square, // "["
r_square, // "]"
l_paren, // "("
r_paren, // ")"
l_brace, // "{"
r_brace, // "}"
quote, // """
apostrophe, // "'"
comma, // ","
eof,
unknown
};
}
class Token {
private:
tok::TokenKind kind;
const char* bufStart;
const char* bufEnd;
void startToken() {
bufStart = 0;
kind = tok::unknown;
}
public:
tok::TokenKind getKind() const { return kind; }
unsigned getLength() const { return bufEnd - bufStart; }
const char* getBufStart() const { return bufStart; }
bool isNot(tok::TokenKind K) const { return kind != (unsigned) K; }
friend class PunctuationLexer;
};
class PunctuationLexer {
protected:
const char* bufferStart;
const char* bufferEnd;
const char* curPos;
public:
PunctuationLexer(const char* bufStart)
: bufferStart(bufStart), curPos(bufStart)
{ }
PunctuationLexer(llvm::StringRef input);
bool LexPunctuator(Token& Result);
bool LexEndOfFile(Token& Result);
};
} //end namespace cling
#endif