summaryrefslogtreecommitdiff
path: root/deps/v8/src/scanner.h
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/scanner.h')
-rw-r--r--deps/v8/src/scanner.h160
1 files changed, 147 insertions, 13 deletions
diff --git a/deps/v8/src/scanner.h b/deps/v8/src/scanner.h
index a201d0e976..9d7b34e7ca 100644
--- a/deps/v8/src/scanner.h
+++ b/deps/v8/src/scanner.h
@@ -41,6 +41,7 @@ class UTF8Buffer {
~UTF8Buffer();
void AddChar(uc32 c) {
+ ASSERT_NOT_NULL(data_);
if (cursor_ <= limit_ &&
static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
*cursor_++ = static_cast<char>(c);
@@ -49,17 +50,30 @@ class UTF8Buffer {
}
}
- void Reset() { cursor_ = data_; }
- int pos() const { return cursor_ - data_; }
+ void Reset() {
+ if (data_ == NULL) {
+ data_ = NewArray<char>(kInitialCapacity);
+ limit_ = ComputeLimit(data_, kInitialCapacity);
+ }
+ cursor_ = data_;
+ }
+
+ int pos() const {
+ ASSERT_NOT_NULL(data_);
+ return static_cast<int>(cursor_ - data_);
+ }
+
char* data() const { return data_; }
private:
+ static const int kInitialCapacity = 256;
char* data_;
char* cursor_;
char* limit_;
int Capacity() const {
- return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;
+ ASSERT_NOT_NULL(data_);
+ return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize;
}
static char* ComputeLimit(char* data, int capacity) {
@@ -123,6 +137,121 @@ class TwoByteStringUTF16Buffer: public UTF16Buffer {
};
+class KeywordMatcher {
+// Incrementally recognize keywords.
+//
+// Recognized keywords:
+// break case catch const* continue debugger* default delete do else
+// finally false for function if in instanceof native* new null
+// return switch this throw true try typeof var void while with
+//
+// *: Actually "future reserved keywords". These are the only ones we
+// recognized, the remaining are allowed as identifiers.
+ public:
+ KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {}
+
+ Token::Value token() { return token_; }
+
+ inline void AddChar(uc32 input) {
+ if (state_ != UNMATCHABLE) {
+ Step(input);
+ }
+ }
+
+ void Fail() {
+ token_ = Token::IDENTIFIER;
+ state_ = UNMATCHABLE;
+ }
+
+ private:
+ enum State {
+ UNMATCHABLE,
+ INITIAL,
+ KEYWORD_PREFIX,
+ KEYWORD_MATCHED,
+ C,
+ CA,
+ CO,
+ CON,
+ D,
+ DE,
+ F,
+ I,
+ IN,
+ N,
+ T,
+ TH,
+ TR,
+ V,
+ W
+ };
+
+ struct FirstState {
+ const char* keyword;
+ State state;
+ Token::Value token;
+ };
+
+ // Range of possible first characters of a keyword.
+ static const unsigned int kFirstCharRangeMin = 'b';
+ static const unsigned int kFirstCharRangeMax = 'w';
+ static const unsigned int kFirstCharRangeLength =
+ kFirstCharRangeMax - kFirstCharRangeMin + 1;
+ // State map for first keyword character range.
+ static FirstState first_states_[kFirstCharRangeLength];
+
+ // Current state.
+ State state_;
+ // Token for currently added characters.
+ Token::Value token_;
+
+ // Matching a specific keyword string (there is only one possible valid
+ // keyword with the current prefix).
+ const char* keyword_;
+ int counter_;
+ Token::Value keyword_token_;
+
+ // If input equals keyword's character at position, continue matching keyword
+ // from that position.
+ inline bool MatchKeywordStart(uc32 input,
+ const char* keyword,
+ int position,
+ Token::Value token_if_match) {
+ if (input == keyword[position]) {
+ state_ = KEYWORD_PREFIX;
+ this->keyword_ = keyword;
+ this->counter_ = position + 1;
+ this->keyword_token_ = token_if_match;
+ return true;
+ }
+ return false;
+ }
+
+ // If input equals match character, transition to new state and return true.
+ inline bool MatchState(uc32 input, char match, State new_state) {
+ if (input == match) {
+ state_ = new_state;
+ return true;
+ }
+ return false;
+ }
+
+ inline bool MatchKeyword(uc32 input,
+ char match,
+ State new_state,
+ Token::Value keyword_token) {
+ if (input == match) { // Matched "do".
+ state_ = new_state;
+ token_ = keyword_token;
+ return true;
+ }
+ return false;
+ }
+
+ void Step(uc32 input);
+};
+
+
class Scanner {
public:
@@ -163,26 +292,30 @@ class Scanner {
// token returned by Next()). The string is 0-terminated and in
// UTF-8 format; they may contain 0-characters. Literal strings are
// collected for identifiers, strings, and numbers.
+ // These functions only give the correct result if the literal
+ // was scanned between calls to StartLiteral() and TerminateLiteral().
const char* literal_string() const {
- return &literals_.data()[current_.literal_pos];
+ return current_.literal_buffer->data();
}
int literal_length() const {
- return current_.literal_end - current_.literal_pos;
- }
-
- Vector<const char> next_literal() const {
- return Vector<const char>(next_literal_string(), next_literal_length());
+ // Excluding terminal '\0' added by TerminateLiteral().
+ return current_.literal_buffer->pos() - 1;
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
const char* next_literal_string() const {
- return &literals_.data()[next_.literal_pos];
+ return next_.literal_buffer->data();
}
// Returns the length of the next token (that would be returned if
// Next() were called).
int next_literal_length() const {
- return next_.literal_end - next_.literal_pos;
+ return next_.literal_buffer->pos() - 1;
+ }
+
+ Vector<const char> next_literal() const {
+ return Vector<const char>(next_literal_string(),
+ next_literal_length());
}
// Scans the input as a regular expression pattern, previous
@@ -224,7 +357,8 @@ class Scanner {
// Buffer to hold literal values (identifiers, strings, numbers)
// using 0-terminated UTF-8 encoding.
- UTF8Buffer literals_;
+ UTF8Buffer literal_buffer_1_;
+ UTF8Buffer literal_buffer_2_;
bool stack_overflow_;
static StaticResource<Utf8Decoder> utf8_decoder_;
@@ -236,7 +370,7 @@ class Scanner {
struct TokenDesc {
Token::Value token;
Location location;
- int literal_pos, literal_end;
+ UTF8Buffer* literal_buffer;
};
TokenDesc current_; // desc for current token (as returned by Next())