diff options
Diffstat (limited to 'deps/v8/src/scanner.h')
-rw-r--r-- | deps/v8/src/scanner.h | 160 |
1 files changed, 147 insertions, 13 deletions
diff --git a/deps/v8/src/scanner.h b/deps/v8/src/scanner.h index a201d0e976..9d7b34e7ca 100644 --- a/deps/v8/src/scanner.h +++ b/deps/v8/src/scanner.h @@ -41,6 +41,7 @@ class UTF8Buffer { ~UTF8Buffer(); void AddChar(uc32 c) { + ASSERT_NOT_NULL(data_); if (cursor_ <= limit_ && static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { *cursor_++ = static_cast<char>(c); @@ -49,17 +50,30 @@ class UTF8Buffer { } } - void Reset() { cursor_ = data_; } - int pos() const { return cursor_ - data_; } + void Reset() { + if (data_ == NULL) { + data_ = NewArray<char>(kInitialCapacity); + limit_ = ComputeLimit(data_, kInitialCapacity); + } + cursor_ = data_; + } + + int pos() const { + ASSERT_NOT_NULL(data_); + return static_cast<int>(cursor_ - data_); + } + char* data() const { return data_; } private: + static const int kInitialCapacity = 256; char* data_; char* cursor_; char* limit_; int Capacity() const { - return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; + ASSERT_NOT_NULL(data_); + return static_cast<int>(limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; } static char* ComputeLimit(char* data, int capacity) { @@ -123,6 +137,121 @@ class TwoByteStringUTF16Buffer: public UTF16Buffer { }; +class KeywordMatcher { +// Incrementally recognize keywords. +// +// Recognized keywords: +// break case catch const* continue debugger* default delete do else +// finally false for function if in instanceof native* new null +// return switch this throw true try typeof var void while with +// +// *: Actually "future reserved keywords". These are the only ones we +// recognized, the remaining are allowed as identifiers. + public: + KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {} + + Token::Value token() { return token_; } + + inline void AddChar(uc32 input) { + if (state_ != UNMATCHABLE) { + Step(input); + } + } + + void Fail() { + token_ = Token::IDENTIFIER; + state_ = UNMATCHABLE; + } + + private: + enum State { + UNMATCHABLE, + INITIAL, + KEYWORD_PREFIX, + KEYWORD_MATCHED, + C, + CA, + CO, + CON, + D, + DE, + F, + I, + IN, + N, + T, + TH, + TR, + V, + W + }; + + struct FirstState { + const char* keyword; + State state; + Token::Value token; + }; + + // Range of possible first characters of a keyword. + static const unsigned int kFirstCharRangeMin = 'b'; + static const unsigned int kFirstCharRangeMax = 'w'; + static const unsigned int kFirstCharRangeLength = + kFirstCharRangeMax - kFirstCharRangeMin + 1; + // State map for first keyword character range. + static FirstState first_states_[kFirstCharRangeLength]; + + // Current state. + State state_; + // Token for currently added characters. + Token::Value token_; + + // Matching a specific keyword string (there is only one possible valid + // keyword with the current prefix). + const char* keyword_; + int counter_; + Token::Value keyword_token_; + + // If input equals keyword's character at position, continue matching keyword + // from that position. + inline bool MatchKeywordStart(uc32 input, + const char* keyword, + int position, + Token::Value token_if_match) { + if (input == keyword[position]) { + state_ = KEYWORD_PREFIX; + this->keyword_ = keyword; + this->counter_ = position + 1; + this->keyword_token_ = token_if_match; + return true; + } + return false; + } + + // If input equals match character, transition to new state and return true. + inline bool MatchState(uc32 input, char match, State new_state) { + if (input == match) { + state_ = new_state; + return true; + } + return false; + } + + inline bool MatchKeyword(uc32 input, + char match, + State new_state, + Token::Value keyword_token) { + if (input == match) { // Matched "do". + state_ = new_state; + token_ = keyword_token; + return true; + } + return false; + } + + void Step(uc32 input); +}; + + class Scanner { public: @@ -163,26 +292,30 @@ class Scanner { // token returned by Next()). The string is 0-terminated and in // UTF-8 format; they may contain 0-characters. Literal strings are // collected for identifiers, strings, and numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). const char* literal_string() const { - return &literals_.data()[current_.literal_pos]; + return current_.literal_buffer->data(); } int literal_length() const { - return current_.literal_end - current_.literal_pos; - } - - Vector<const char> next_literal() const { - return Vector<const char>(next_literal_string(), next_literal_length()); + // Excluding terminal '\0' added by TerminateLiteral(). + return current_.literal_buffer->pos() - 1; } // Returns the literal string for the next token (the token that // would be returned if Next() were called). const char* next_literal_string() const { - return &literals_.data()[next_.literal_pos]; + return next_.literal_buffer->data(); } // Returns the length of the next token (that would be returned if // Next() were called). int next_literal_length() const { - return next_.literal_end - next_.literal_pos; + return next_.literal_buffer->pos() - 1; + } + + Vector<const char> next_literal() const { + return Vector<const char>(next_literal_string(), + next_literal_length()); } // Scans the input as a regular expression pattern, previous @@ -224,7 +357,8 @@ class Scanner { // Buffer to hold literal values (identifiers, strings, numbers) // using 0-terminated UTF-8 encoding. - UTF8Buffer literals_; + UTF8Buffer literal_buffer_1_; + UTF8Buffer literal_buffer_2_; bool stack_overflow_; static StaticResource<Utf8Decoder> utf8_decoder_; @@ -236,7 +370,7 @@ class Scanner { struct TokenDesc { Token::Value token; Location location; - int literal_pos, literal_end; + UTF8Buffer* literal_buffer; }; TokenDesc current_; // desc for current token (as returned by Next()) |