diff options
Diffstat (limited to 'deps/v8/src/scanner.cc')
-rw-r--r-- | deps/v8/src/scanner.cc | 189 |
1 files changed, 156 insertions, 33 deletions
diff --git a/deps/v8/src/scanner.cc b/deps/v8/src/scanner.cc index 3dae414f9d..0d3b789f9b 100644 --- a/deps/v8/src/scanner.cc +++ b/deps/v8/src/scanner.cc @@ -49,17 +49,11 @@ StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; // ---------------------------------------------------------------------------- // UTF8Buffer -UTF8Buffer::UTF8Buffer() { - static const int kInitialCapacity = 1 * KB; - data_ = NewArray<char>(kInitialCapacity); - limit_ = ComputeLimit(data_, kInitialCapacity); - Reset(); - ASSERT(Capacity() == kInitialCapacity && pos() == 0); -} +UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } UTF8Buffer::~UTF8Buffer() { - DeleteArray(data_); + if (data_ != NULL) DeleteArray(data_); } @@ -69,7 +63,7 @@ void UTF8Buffer::AddCharSlow(uc32 c) { int old_capacity = Capacity(); int old_position = pos(); int new_capacity = - Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); + Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit); char* new_data = NewArray<char>(new_capacity); memcpy(new_data, data_, old_position); DeleteArray(data_); @@ -194,11 +188,142 @@ void TwoByteStringUTF16Buffer::SeekForward(int pos) { // ---------------------------------------------------------------------------- +// Keyword Matcher +KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { + { "break", KEYWORD_PREFIX, Token::BREAK }, + { NULL, C, Token::ILLEGAL }, + { NULL, D, Token::ILLEGAL }, + { "else", KEYWORD_PREFIX, Token::ELSE }, + { NULL, F, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, I, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, N, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { "return", KEYWORD_PREFIX, Token::RETURN }, + { "switch", KEYWORD_PREFIX, Token::SWITCH }, + { NULL, T, Token::ILLEGAL }, + { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, V, Token::ILLEGAL }, + { NULL, W, Token::ILLEGAL } +}; + + +void KeywordMatcher::Step(uc32 input) { + switch (state_) { + case INITIAL: { + // matching the first character is the only state with significant fanout. + // Match only lower-case letters in range 'b'..'w'. + unsigned int offset = input - kFirstCharRangeMin; + if (offset < kFirstCharRangeLength) { + state_ = first_states_[offset].state; + if (state_ == KEYWORD_PREFIX) { + keyword_ = first_states_[offset].keyword; + counter_ = 1; + keyword_token_ = first_states_[offset].token; + } + return; + } + break; + } + case KEYWORD_PREFIX: + if (keyword_[counter_] == input) { + ASSERT_NE(input, '\0'); + counter_++; + if (keyword_[counter_] == '\0') { + state_ = KEYWORD_MATCHED; + token_ = keyword_token_; + } + return; + } + break; + case KEYWORD_MATCHED: + token_ = Token::IDENTIFIER; + break; + case C: + if (MatchState(input, 'a', CA)) return; + if (MatchState(input, 'o', CO)) return; + break; + case CA: + if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; + if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; + break; + case CO: + if (MatchState(input, 'n', CON)) return; + break; + case CON: + if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; + if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; + break; + case D: + if (MatchState(input, 'e', DE)) return; + if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; + break; + case DE: + if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; + if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; + if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; + break; + case F: + if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; + if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; + if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; + if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; + break; + case I: + if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; + if (MatchKeyword(input, 'n', IN, Token::IN)) return; + break; + case IN: + token_ = Token::IDENTIFIER; + if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { + return; + } + break; + case N: + if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; + if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; + if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; + break; + case T: + if (MatchState(input, 'h', TH)) return; + if (MatchState(input, 'r', TR)) return; + if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; + break; + case TH: + if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; + if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; + break; + case TR: + if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; + if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; + break; + case V: + if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; + if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; + break; + case W: + if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; + if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; + break; + default: + UNREACHABLE(); + } + // On fallthrough, it's a failure. + state_ = UNMATCHABLE; +} + + +// ---------------------------------------------------------------------------- // Scanner -Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { - Token::Initialize(); -} +Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, @@ -215,12 +340,11 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, position_ = position; - // Reset literals buffer - literals_.Reset(); - // Set c0_ (one character ahead) ASSERT(kCharacterLookaheadBufferSize == 1); Advance(); + // Initializer current_ to not refer to a literal buffer. + current_.literal_buffer = NULL; // Skip initial whitespace allowing HTML comment ends just like // after a newline and scan first token. @@ -253,17 +377,23 @@ Token::Value Scanner::Next() { void Scanner::StartLiteral() { - next_.literal_pos = literals_.pos(); + // Use the first buffer unless it's currently in use by the current_ token. + // In most cases we won't have two literals/identifiers in a row, so + // the second buffer won't be used very often and is unlikely to grow much. + UTF8Buffer* free_buffer = + (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ + : &literal_buffer_2_; + next_.literal_buffer = free_buffer; + free_buffer->Reset(); } void Scanner::AddChar(uc32 c) { - literals_.AddChar(c); + next_.literal_buffer->AddChar(c); } void Scanner::TerminateLiteral() { - next_.literal_end = literals_.pos(); AddChar(0); } @@ -383,6 +513,7 @@ Token::Value Scanner::ScanHtmlComment() { void Scanner::Scan() { + next_.literal_buffer = NULL; Token::Value token; has_line_terminator_before_next_ = false; do { @@ -855,48 +986,40 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() { Token::Value Scanner::ScanIdentifier() { ASSERT(kIsIdentifierStart.get(c0_)); - bool has_escapes = false; StartLiteral(); + KeywordMatcher keyword_match; + // Scan identifier start character. if (c0_ == '\\') { - has_escapes = true; uc32 c = ScanIdentifierUnicodeEscape(); // Only allow legal identifier start characters. if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; AddChar(c); + keyword_match.Fail(); } else { AddChar(c0_); + keyword_match.AddChar(c0_); Advance(); } // Scan the rest of the identifier characters. while (kIsIdentifierPart.get(c0_)) { if (c0_ == '\\') { - has_escapes = true; uc32 c = ScanIdentifierUnicodeEscape(); // Only allow legal identifier part characters. if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; AddChar(c); + keyword_match.Fail(); } else { AddChar(c0_); + keyword_match.AddChar(c0_); Advance(); } } TerminateLiteral(); - // We don't have any 1-letter keywords (this is probably a common case). - if ((next_.literal_end - next_.literal_pos) == 1) { - return Token::IDENTIFIER; - } - - // If the identifier contains unicode escapes, it must not be - // resolved to a keyword. - if (has_escapes) { - return Token::IDENTIFIER; - } - - return Token::Lookup(&literals_.data()[next_.literal_pos]); + return keyword_match.token(); } |