diff options
Diffstat (limited to 'deps/v8/src/parsing/scanner.cc')
-rw-r--r-- | deps/v8/src/parsing/scanner.cc | 402 |
1 files changed, 160 insertions, 242 deletions
diff --git a/deps/v8/src/parsing/scanner.cc b/deps/v8/src/parsing/scanner.cc index 852b5e400b..781832c2e6 100644 --- a/deps/v8/src/parsing/scanner.cc +++ b/deps/v8/src/parsing/scanner.cc @@ -15,7 +15,7 @@ #include "src/conversions-inl.h" #include "src/objects/bigint.h" #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol -#include "src/unicode-cache-inl.h" +#include "src/parsing/scanner-inl.h" namespace v8 { namespace internal { @@ -60,6 +60,7 @@ class Scanner::ErrorState { // Scanner::LiteralBuffer Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { + DCHECK(is_used_); if (is_one_byte()) { return isolate->factory()->InternalizeOneByteString(one_byte_literal()); } @@ -103,16 +104,9 @@ void Scanner::LiteralBuffer::ConvertToTwoByte() { is_one_byte_ = false; } -void Scanner::LiteralBuffer::AddCharSlow(uc32 code_unit) { +void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) { + DCHECK(!is_one_byte_); if (position_ >= backing_store_.length()) ExpandBuffer(); - if (is_one_byte_) { - if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) { - backing_store_[position_] = static_cast<byte>(code_unit); - position_ += kOneByteSize; - return; - } - ConvertToTwoByte(); - } if (code_unit <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; @@ -140,16 +134,16 @@ const size_t Scanner::BookmarkScope::kBookmarkWasApplied = void Scanner::BookmarkScope::Set() { DCHECK_EQ(bookmark_, kNoBookmark); - DCHECK_EQ(scanner_->next_next_.token, Token::UNINITIALIZED); + DCHECK_EQ(scanner_->next_next().token, Token::UNINITIALIZED); // The first token is a bit special, since current_ will still be // uninitialized. In this case, store kBookmarkAtFirstPos and special-case it // when // applying the bookmark. - DCHECK_IMPLIES( - scanner_->current_.token == Token::UNINITIALIZED, - scanner_->current_.location.beg_pos == scanner_->next_.location.beg_pos); - bookmark_ = (scanner_->current_.token == Token::UNINITIALIZED) + DCHECK_IMPLIES(scanner_->current().token == Token::UNINITIALIZED, + scanner_->current().location.beg_pos == + scanner_->next().location.beg_pos); + bookmark_ = (scanner_->current().token == Token::UNINITIALIZED) ? kBookmarkAtFirstPos : scanner_->location().beg_pos; } @@ -177,22 +171,24 @@ bool Scanner::BookmarkScope::HasBeenApplied() { // ---------------------------------------------------------------------------- // Scanner -Scanner::Scanner(UnicodeCache* unicode_cache) +Scanner::Scanner(UnicodeCache* unicode_cache, Utf16CharacterStream* source, + bool is_module) : unicode_cache_(unicode_cache), + source_(source), octal_pos_(Location::invalid()), octal_message_(MessageTemplate::kNone), found_html_comment_(false), allow_harmony_bigint_(false), - allow_harmony_numeric_separator_(false) {} - -void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) { + allow_harmony_numeric_separator_(false), + is_module_(is_module) { DCHECK_NOT_NULL(source); - source_ = source; - is_module_ = is_module; +} + +void Scanner::Initialize() { // Need to capture identifiers in order to recognize "get" and "set" // in object literals. Init(); - has_line_terminator_before_next_ = true; + next().after_line_terminator = true; Scan(); } @@ -377,96 +373,43 @@ static const byte one_char_tokens[] = { // clang-format on Token::Value Scanner::Next() { - if (next_.token == Token::EOS) { - next_.location.beg_pos = current_.location.beg_pos; - next_.location.end_pos = current_.location.end_pos; - } + if (next().token == Token::EOS) next().location = current().location; + // Rotate through tokens. + TokenDesc* previous = current_; current_ = next_; - if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) { + // Either we already have the next token lined up, in which case next_next_ + // simply becomes next_. In that case we use current_ as new next_next_ and + // clear its token to indicate that it wasn't scanned yet. Otherwise we use + // current_ as next_ and scan into it, leaving next_next_ uninitialized. + if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) { + next_ = previous; + next().after_line_terminator = false; + Scan(); + } else { next_ = next_next_; - next_next_.token = Token::UNINITIALIZED; - next_next_.contextual_token = Token::UNINITIALIZED; - has_line_terminator_before_next_ = has_line_terminator_after_next_; - return current_.token; + next_next_ = previous; + previous->token = Token::UNINITIALIZED; + previous->contextual_token = Token::UNINITIALIZED; + DCHECK_NE(Token::UNINITIALIZED, current().token); } - has_line_terminator_before_next_ = false; - has_multiline_comment_before_next_ = false; - Scan(); - return current_.token; + return current().token; } Token::Value Scanner::PeekAhead() { - DCHECK(next_.token != Token::DIV); - DCHECK(next_.token != Token::ASSIGN_DIV); - - if (next_next_.token != Token::UNINITIALIZED) { - return next_next_.token; - } - TokenDesc prev = current_; - bool has_line_terminator_before_next = - has_line_terminator_before_next_ || has_multiline_comment_before_next_; - Next(); - has_line_terminator_after_next_ = - has_line_terminator_before_next_ || has_multiline_comment_before_next_; - has_line_terminator_before_next_ = has_line_terminator_before_next; - Token::Value ret = next_.token; - next_next_ = next_; - next_ = current_; - current_ = prev; - return ret; -} - - -Token::Value Scanner::SkipWhiteSpace() { - int start_position = source_pos(); - - while (true) { - while (true) { - // We won't skip behind the end of input. - DCHECK(!unicode_cache_->IsWhiteSpace(kEndOfInput)); - - // Advance as long as character is a WhiteSpace or LineTerminator. - // Remember if the latter is the case. - if (unibrow::IsLineTerminator(c0_)) { - has_line_terminator_before_next_ = true; - } else if (!unicode_cache_->IsWhiteSpace(c0_)) { - break; - } - Advance(); - } + DCHECK(next().token != Token::DIV); + DCHECK(next().token != Token::ASSIGN_DIV); - // If there is an HTML comment end '-->' at the beginning of a - // line (with only whitespace in front of it), we treat the rest - // of the line as a comment. This is in line with the way - // SpiderMonkey handles it. - if (c0_ != '-' || !has_line_terminator_before_next_) break; - - Advance(); - if (c0_ != '-') { - PushBack('-'); // undo Advance() - break; - } - - Advance(); - if (c0_ != '>') { - PushBack2('-', '-'); // undo 2x Advance(); - break; - } - - // Treat the rest of the line as a comment. - Token::Value token = SkipSingleHTMLComment(); - if (token == Token::ILLEGAL) { - return token; - } + if (next_next().token != Token::UNINITIALIZED) { + return next_next().token; } - - // Return whether or not we skipped any characters. - if (source_pos() == start_position) { - return Token::ILLEGAL; - } - - return Token::WHITESPACE; + TokenDesc* temp = next_; + next_ = next_next_; + next().after_line_terminator = false; + Scan(); + next_next_ = next_; + next_ = temp; + return next_next().token; } Token::Value Scanner::SkipSingleHTMLComment() { @@ -478,21 +421,16 @@ Token::Value Scanner::SkipSingleHTMLComment() { } Token::Value Scanner::SkipSingleLineComment() { - Advance(); - // The line terminator at the end of the line is not considered // to be part of the single-line comment; it is recognized // separately by the lexical grammar and becomes part of the // stream of input elements for the syntactic grammar (see // ECMA-262, section 7.4). - while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) { - Advance(); - } + AdvanceUntil([](uc32 c0_) { return unibrow::IsLineTerminator(c0_); }); return Token::WHITESPACE; } - Token::Value Scanner::SkipSourceURLComment() { TryToParseSourceURLComment(); while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) { @@ -502,7 +440,6 @@ Token::Value Scanner::SkipSourceURLComment() { return Token::WHITESPACE; } - void Scanner::TryToParseSourceURLComment() { // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this // function will just return if it cannot parse a magic comment. @@ -510,6 +447,7 @@ void Scanner::TryToParseSourceURLComment() { if (!unicode_cache_->IsWhiteSpace(c0_)) return; Advance(); LiteralBuffer name; + name.Start(); while (c0_ != kEndOfInput && !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') { @@ -528,15 +466,16 @@ void Scanner::TryToParseSourceURLComment() { } if (c0_ != '=') return; + value->Drop(); + value->Start(); Advance(); - value->Reset(); while (unicode_cache_->IsWhiteSpace(c0_)) { Advance(); } while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) { // Disallowed characters. if (c0_ == '"' || c0_ == '\'') { - value->Reset(); + value->Drop(); return; } if (unicode_cache_->IsWhiteSpace(c0_)) { @@ -548,34 +487,33 @@ void Scanner::TryToParseSourceURLComment() { // Allow whitespace at the end. while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) { if (!unicode_cache_->IsWhiteSpace(c0_)) { - value->Reset(); + value->Drop(); break; } Advance(); } } - Token::Value Scanner::SkipMultiLineComment() { DCHECK_EQ(c0_, '*'); Advance(); while (c0_ != kEndOfInput) { - uc32 ch = c0_; - Advance(); DCHECK(!unibrow::IsLineTerminator(kEndOfInput)); - if (unibrow::IsLineTerminator(ch)) { + if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) { // Following ECMA-262, section 7.4, a comment containing // a newline will make the comment count as a line-terminator. - has_multiline_comment_before_next_ = true; + next().after_line_terminator = true; } - // If we have reached the end of the multi-line comment, we - // consume the '/' and insert a whitespace. This way all - // multi-line comments are treated as whitespace. - if (ch == '*' && c0_ == '/') { - c0_ = ' '; - return Token::WHITESPACE; + + while (V8_UNLIKELY(c0_ == '*')) { + Advance(); + if (c0_ == '/') { + Advance(); + return Token::WHITESPACE; + } } + Advance(); } // Unterminated multi-line comment. @@ -586,25 +524,20 @@ Token::Value Scanner::ScanHtmlComment() { // Check for <!-- comments. DCHECK_EQ(c0_, '!'); Advance(); - if (c0_ != '-') { + if (c0_ != '-' || Peek() != '-') { PushBack('!'); // undo Advance() return Token::LT; } - Advance(); - if (c0_ != '-') { - PushBack2('-', '!'); // undo 2x Advance() - return Token::LT; - } found_html_comment_ = true; return SkipSingleHTMLComment(); } void Scanner::Scan() { - next_.literal_chars = nullptr; - next_.raw_literal_chars = nullptr; - next_.invalid_template_escape_message = MessageTemplate::kNone; + next().literal_chars.Drop(); + next().raw_literal_chars.Drop(); + next().invalid_template_escape_message = MessageTemplate::kNone; Token::Value token; do { @@ -612,17 +545,17 @@ void Scanner::Scan() { Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); if (token != Token::ILLEGAL) { int pos = source_pos(); - next_.token = token; - next_.contextual_token = Token::UNINITIALIZED; - next_.location.beg_pos = pos; - next_.location.end_pos = pos + 1; + next().token = token; + next().contextual_token = Token::UNINITIALIZED; + next().location.beg_pos = pos; + next().location.end_pos = pos + 1; Advance(); return; } } // Remember the position of the next token - next_.location.beg_pos = source_pos(); + next().location.beg_pos = source_pos(); switch (c0_) { case '"': @@ -703,7 +636,7 @@ void Scanner::Scan() { Advance(); if (c0_ == '-') { Advance(); - if (c0_ == '>' && HasAnyLineTerminatorBeforeNext()) { + if (c0_ == '>' && HasLineTerminatorBeforeNext()) { // For compatibility with SpiderMonkey, we skip lines that // start with an HTML comment end '-->'. token = SkipSingleHTMLComment(); @@ -738,12 +671,12 @@ void Scanner::Scan() { // / // /* /= Advance(); if (c0_ == '/') { - Advance(); - if (c0_ == '#' || c0_ == '@') { + uc32 c = Peek(); + if (c == '#' || c == '@') { + Advance(); Advance(); token = SkipSourceURLComment(); } else { - PushBack(c0_); token = SkipSingleLineComment(); } } else if (c0_ == '*') { @@ -792,12 +725,10 @@ void Scanner::Scan() { } else { token = Token::PERIOD; if (c0_ == '.') { - Advance(); - if (c0_ == '.') { + if (Peek() == '.') { + Advance(); Advance(); token = Token::ELLIPSIS; - } else { - PushBack('.'); } } } @@ -831,19 +762,19 @@ void Scanner::Scan() { // whitespace. } while (token == Token::WHITESPACE); - next_.location.end_pos = source_pos(); + next().location.end_pos = source_pos(); if (Token::IsContextualKeyword(token)) { - next_.token = Token::IDENTIFIER; - next_.contextual_token = token; + next().token = Token::IDENTIFIER; + next().contextual_token = token; } else { - next_.token = token; - next_.contextual_token = Token::UNINITIALIZED; + next().token = token; + next().contextual_token = Token::UNINITIALIZED; } #ifdef DEBUG - SanityCheckTokenDesc(current_); - SanityCheckTokenDesc(next_); - SanityCheckTokenDesc(next_next_); + SanityCheckTokenDesc(current()); + SanityCheckTokenDesc(next()); + SanityCheckTokenDesc(next_next()); #endif } @@ -864,8 +795,8 @@ void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { break; case Token::TEMPLATE_SPAN: case Token::TEMPLATE_TAIL: - DCHECK_NOT_NULL(token.raw_literal_chars); - DCHECK_NOT_NULL(token.literal_chars); + DCHECK(token.raw_literal_chars.is_used()); + DCHECK(token.literal_chars.is_used()); break; case Token::ESCAPED_KEYWORD: case Token::ESCAPED_STRICT_RESERVED_WORD: @@ -877,13 +808,13 @@ void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { case Token::SMI: case Token::STRING: case Token::PRIVATE_NAME: - DCHECK_NOT_NULL(token.literal_chars); - DCHECK_NULL(token.raw_literal_chars); + DCHECK(token.literal_chars.is_used()); + DCHECK(!token.raw_literal_chars.is_used()); DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone); break; default: - DCHECK_NULL(token.literal_chars); - DCHECK_NULL(token.raw_literal_chars); + DCHECK(!token.literal_chars.is_used()); + DCHECK(!token.raw_literal_chars.is_used()); DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone); break; } @@ -900,9 +831,9 @@ void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { void Scanner::SeekForward(int pos) { // After this call, we will have the token at the given position as // the "next" token. The "current" token will be invalid. - if (pos == next_.location.beg_pos) return; + if (pos == next().location.beg_pos) return; int current_pos = source_pos(); - DCHECK_EQ(next_.location.end_pos, current_pos); + DCHECK_EQ(next().location.end_pos, current_pos); // Positions inside the lookahead token aren't supported. DCHECK(pos >= current_pos); if (pos != current_pos) { @@ -911,23 +842,21 @@ void Scanner::SeekForward(int pos) { // This function is only called to seek to the location // of the end of a function (at the "}" token). It doesn't matter // whether there was a line terminator in the part we skip. - has_line_terminator_before_next_ = false; - has_multiline_comment_before_next_ = false; + next().after_line_terminator = false; } Scan(); } - -template <bool capture_raw, bool in_template_literal> +template <bool capture_raw> bool Scanner::ScanEscape() { uc32 c = c0_; Advance<capture_raw>(); // Skip escaped newlines. DCHECK(!unibrow::IsLineTerminator(kEndOfInput)); - if (!in_template_literal && unibrow::IsLineTerminator(c)) { + if (!capture_raw && unibrow::IsLineTerminator(c)) { // Allow escaped CR+LF newlines in multiline string literals. - if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); + if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); return true; } @@ -961,7 +890,7 @@ bool Scanner::ScanEscape() { case '5': // fall through case '6': // fall through case '7': - c = ScanOctalEscape<capture_raw>(c, 2, in_template_literal); + c = ScanOctalEscape<capture_raw>(c, 2); break; } @@ -971,7 +900,7 @@ bool Scanner::ScanEscape() { } template <bool capture_raw> -uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool in_template_literal) { +uc32 Scanner::ScanOctalEscape(uc32 c, int length) { uc32 x = c - '0'; int i = 0; for (; i < length; i++) { @@ -989,14 +918,12 @@ uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool in_template_literal) { // occur before the "use strict" directive. if (c != '0' || i > 0 || c0_ == '8' || c0_ == '9') { octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); - octal_message_ = in_template_literal - ? MessageTemplate::kTemplateOctalLiteral - : MessageTemplate::kStrictOctalEscape; + octal_message_ = capture_raw ? MessageTemplate::kTemplateOctalLiteral + : MessageTemplate::kStrictOctalEscape; } return x; } - Token::Value Scanner::ScanString() { uc32 quote = c0_; Advance(); // consume quote @@ -1014,7 +941,7 @@ Token::Value Scanner::ScanString() { if (c0_ == '\\') { Advance(); // TODO(verwaest): Check whether we can remove the additional check. - if (c0_ == kEndOfInput || !ScanEscape<false, false>()) { + if (c0_ == kEndOfInput || !ScanEscape<false>()) { return Token::ILLEGAL; } continue; @@ -1032,15 +959,14 @@ Token::Value Scanner::ScanPrivateName() { LiteralScope literal(this); DCHECK_EQ(c0_, '#'); - AddLiteralCharAdvance(); DCHECK(!unicode_cache_->IsIdentifierStart(kEndOfInput)); - if (!unicode_cache_->IsIdentifierStart(c0_)) { - PushBack(c0_); + if (!unicode_cache_->IsIdentifierStart(Peek())) { ReportScannerError(source_pos(), MessageTemplate::kInvalidOrUnexpectedToken); return Token::ILLEGAL; } + AddLiteralCharAdvance(); Token::Value token = ScanIdentifierOrKeywordInner(&literal); return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME; } @@ -1069,89 +995,87 @@ Token::Value Scanner::ScanTemplateSpan() { LiteralScope literal(this); StartRawLiteral(); const bool capture_raw = true; - const bool in_template_literal = true; while (true) { uc32 c = c0_; - Advance<capture_raw>(); if (c == '`') { + Advance(); // Consume '`' result = Token::TEMPLATE_TAIL; - ReduceRawLiteralLength(1); break; - } else if (c == '$' && c0_ == '{') { - Advance<capture_raw>(); // Consume '{' - ReduceRawLiteralLength(2); + } else if (c == '$' && Peek() == '{') { + Advance(); // Consume '$' + Advance(); // Consume '{' break; } else if (c == '\\') { + Advance(); // Consume '\\' DCHECK(!unibrow::IsLineTerminator(kEndOfInput)); + if (capture_raw) AddRawLiteralChar('\\'); if (unibrow::IsLineTerminator(c0_)) { // The TV of LineContinuation :: \ LineTerminatorSequence is the empty // code unit sequence. uc32 lastChar = c0_; - Advance<capture_raw>(); + Advance(); if (lastChar == '\r') { - ReduceRawLiteralLength(1); // Remove \r - if (c0_ == '\n') { - Advance<capture_raw>(); // Adds \n - } else { - AddRawLiteralChar('\n'); - } + // Also skip \n. + if (c0_ == '\n') Advance(); + lastChar = '\n'; } + if (capture_raw) AddRawLiteralChar(lastChar); } else { - bool success = ScanEscape<capture_raw, in_template_literal>(); + bool success = ScanEscape<capture_raw>(); USE(success); DCHECK_EQ(!success, has_error()); // For templates, invalid escape sequence checking is handled in the // parser. - scanner_error_state.MoveErrorTo(&next_); - octal_error_state.MoveErrorTo(&next_); + scanner_error_state.MoveErrorTo(next_); + octal_error_state.MoveErrorTo(next_); } } else if (c < 0) { // Unterminated template literal - PushBack(c); break; } else { + Advance(); // Consume c. // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence // consisting of the CV 0x000A. if (c == '\r') { - ReduceRawLiteralLength(1); // Remove \r - if (c0_ == '\n') { - Advance<capture_raw>(); // Adds \n - } else { - AddRawLiteralChar('\n'); - } + if (c0_ == '\n') Advance(); // Consume '\n' c = '\n'; } + if (capture_raw) AddRawLiteralChar(c); AddLiteralChar(c); } } literal.Complete(); - next_.location.end_pos = source_pos(); - next_.token = result; - next_.contextual_token = Token::UNINITIALIZED; + next().location.end_pos = source_pos(); + next().token = result; + next().contextual_token = Token::UNINITIALIZED; return result; } - Token::Value Scanner::ScanTemplateStart() { - DCHECK_EQ(next_next_.token, Token::UNINITIALIZED); + DCHECK_EQ(next_next().token, Token::UNINITIALIZED); DCHECK_EQ(c0_, '`'); - next_.location.beg_pos = source_pos(); + next().location.beg_pos = source_pos(); Advance(); // Consume ` return ScanTemplateSpan(); } Handle<String> Scanner::SourceUrl(Isolate* isolate) const { Handle<String> tmp; - if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate); + if (source_url_.length() > 0) { + DCHECK(source_url_.is_used()); + tmp = source_url_.Internalize(isolate); + } return tmp; } Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const { Handle<String> tmp; - if (source_mapping_url_.length() > 0) + if (source_mapping_url_.length() > 0) { + DCHECK(source_mapping_url_.is_used()); tmp = source_mapping_url_.Internalize(isolate); + } return tmp; } @@ -1375,10 +1299,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) { return Token::ILLEGAL; } - if (next_.literal_chars->one_byte_literal().length() <= 10 && + if (next().literal_chars.one_byte_literal().length() <= 10 && value <= Smi::kMaxValue && c0_ != '.' && !unicode_cache_->IsIdentifierStart(c0_)) { - next_.smi_value_ = static_cast<uint32_t>(value); + next().smi_value_ = static_cast<uint32_t>(value); literal.Complete(); if (kind == DECIMAL_WITH_LEADING_ZERO) { @@ -1448,7 +1372,6 @@ Token::Value Scanner::ScanNumber(bool seen_period) { return is_bigint ? Token::BIGINT : Token::NUMBER; } - uc32 Scanner::ScanIdentifierUnicodeEscape() { Advance(); if (c0_ != 'u') return -1; @@ -1456,7 +1379,6 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() { return ScanUnicodeEscape<false>(); } - template <bool capture_raw> uc32 Scanner::ScanUnicodeEscape() { // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of @@ -1622,13 +1544,15 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { bool escaped = false; if (IsInRange(c0_, 'a', 'z') || c0_ == '_') { do { - AddLiteralCharAdvance(); + AddLiteralChar(static_cast<char>(c0_)); + Advance(); } while (IsInRange(c0_, 'a', 'z') || c0_ == '_'); if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') { // Identifier starting with lowercase or _. do { - AddLiteralCharAdvance(); + AddLiteralChar(static_cast<char>(c0_)); + Advance(); } while (IsAsciiIdentifier(c0_)); if (c0_ <= kMaxAscii && c0_ != '\\') { @@ -1637,7 +1561,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } } else if (c0_ <= kMaxAscii && c0_ != '\\') { // Only a-z+ or _: could be a keyword or identifier. - Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); + Vector<const uint8_t> chars = next().literal_chars.one_byte_literal(); Token::Value token = KeywordOrIdentifierToken(chars.start(), chars.length()); if (token == Token::IDENTIFIER || @@ -1648,7 +1572,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') { do { - AddLiteralCharAdvance(); + AddLiteralChar(static_cast<char>(c0_)); + Advance(); } while (IsAsciiIdentifier(c0_)); if (c0_ <= kMaxAscii && c0_ != '\\') { @@ -1686,8 +1611,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } } - if (next_.literal_chars->is_one_byte()) { - Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); + if (next().literal_chars.is_one_byte()) { + Vector<const uint8_t> chars = next().literal_chars.one_byte_literal(); Token::Value token = KeywordOrIdentifierToken(chars.start(), chars.length()); /* TODO(adamk): YIELD should be handled specially. */ @@ -1715,17 +1640,17 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) { } bool Scanner::ScanRegExpPattern() { - DCHECK(next_next_.token == Token::UNINITIALIZED); - DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV); + DCHECK_EQ(Token::UNINITIALIZED, next_next().token); + DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV); // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags bool in_character_class = false; - bool seen_equal = (next_.token == Token::ASSIGN_DIV); + bool seen_equal = (next().token == Token::ASSIGN_DIV); // Previous token is either '/' or '/=', in the second case, the // pattern starts at =. - next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); - next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); + next().location.beg_pos = source_pos() - (seen_equal ? 2 : 1); + next().location.end_pos = source_pos() - (seen_equal ? 1 : 0); // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, // the scanner should pass uninterpreted bodies to the RegExp @@ -1764,14 +1689,14 @@ bool Scanner::ScanRegExpPattern() { Advance(); // consume '/' literal.Complete(); - next_.token = Token::REGEXP_LITERAL; - next_.contextual_token = Token::UNINITIALIZED; + next().token = Token::REGEXP_LITERAL; + next().contextual_token = Token::UNINITIALIZED; return true; } Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { - DCHECK(next_.token == Token::REGEXP_LITERAL); + DCHECK_EQ(Token::REGEXP_LITERAL, next().token); // Scan regular expression flags. int flags = 0; @@ -1806,7 +1731,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() { flags |= flag; } - next_.location.end_pos = source_pos(); + next().location.end_pos = source_pos(); return Just(RegExp::Flags(flags)); } @@ -1869,24 +1794,17 @@ void Scanner::SeekNext(size_t position) { // 1, Reset the current_, next_ and next_next_ tokens // (next_ + next_next_ will be overwrittem by Next(), // current_ will remain unchanged, so overwrite it fully.) - current_ = {{0, 0}, - nullptr, - nullptr, - 0, - Token::UNINITIALIZED, - MessageTemplate::kNone, - {0, 0}, - Token::UNINITIALIZED}; - next_.token = Token::UNINITIALIZED; - next_.contextual_token = Token::UNINITIALIZED; - next_next_.token = Token::UNINITIALIZED; - next_next_.contextual_token = Token::UNINITIALIZED; + for (TokenDesc& token : token_storage_) { + token.token = Token::UNINITIALIZED; + token.contextual_token = Token::UNINITIALIZED; + } // 2, reset the source to the desired position, source_->Seek(position); // 3, re-scan, by scanning the look-ahead char + 1 token (next_). c0_ = source_->Advance(); - Next(); - DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position)); + next().after_line_terminator = false; + Scan(); + DCHECK_EQ(next().location.beg_pos, static_cast<int>(position)); } } // namespace internal |