diff options
author | Ryan Dahl <ry@tinyclouds.org> | 2010-11-24 01:03:06 -0800 |
---|---|---|
committer | Ryan Dahl <ry@tinyclouds.org> | 2010-11-24 01:03:06 -0800 |
commit | 73318fa09d0b67a67c1033bf0bfcc0e78883f257 (patch) | |
tree | ecdf0c18b14e3158cfbdff95d012f810b216f43d /deps/v8/src/scanner.cc | |
parent | fa8ffaf9b2375f98ac86f887bf76f3aa81fa5aa4 (diff) | |
download | node-new-73318fa09d0b67a67c1033bf0bfcc0e78883f257.tar.gz |
Upgrade V8 to 2.5.8
Diffstat (limited to 'deps/v8/src/scanner.cc')
-rwxr-xr-x | deps/v8/src/scanner.cc | 868 |
1 files changed, 82 insertions, 786 deletions
diff --git a/deps/v8/src/scanner.cc b/deps/v8/src/scanner.cc index 6b2fcb4c5e..63b2fd807d 100755 --- a/deps/v8/src/scanner.cc +++ b/deps/v8/src/scanner.cc @@ -36,35 +36,8 @@ namespace v8 { namespace internal { // ---------------------------------------------------------------------------- -// UTF8Buffer - -UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { } - - -UTF8Buffer::~UTF8Buffer() {} - - -void UTF8Buffer::AddCharSlow(uc32 c) { - ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); - int length = unibrow::Utf8::Length(c); - Vector<char> block = buffer_.AddBlock(length, '\0'); -#ifdef DEBUG - int written_length = unibrow::Utf8::Encode(block.start(), c); - CHECK_EQ(length, written_length); -#else - unibrow::Utf8::Encode(block.start(), c); -#endif -} - - -// ---------------------------------------------------------------------------- // UTF16Buffer - -UTF16Buffer::UTF16Buffer() - : pos_(0), end_(Scanner::kNoEndPosition) { } - - // CharacterStreamUTF16Buffer CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() : pushback_buffer_(0), last_(0), stream_(NULL) { } @@ -78,7 +51,7 @@ void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, if (start_position > 0) { SeekForward(start_position); } - end_ = end_position != Scanner::kNoEndPosition ? end_position : kMaxInt; + end_ = end_position != kNoEndPosition ? end_position : kMaxInt; } @@ -90,7 +63,7 @@ void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { uc32 CharacterStreamUTF16Buffer::Advance() { - ASSERT(end_ != Scanner::kNoEndPosition); + ASSERT(end_ != kNoEndPosition); ASSERT(end_ >= 0); // NOTE: It is of importance to Persian / Farsi resources that we do // *not* strip format control characters in the scanner; see @@ -143,41 +116,74 @@ void Scanner::LiteralScope::Complete() { } // ---------------------------------------------------------------------------- -// Scanner - -Scanner::Scanner() - : has_line_terminator_before_next_(false), - is_parsing_json_(false), - source_(NULL), - stack_overflow_(false) {} +// V8JavaScriptScanner + +void V8JavaScriptScanner::Initialize(Handle<String> source, + int literal_flags) { + source_ = stream_initializer_.Init(source, NULL, 0, source->length()); + // Need to capture identifiers in order to recognize "get" and "set" + // in object literals. + literal_flags_ = literal_flags | kLiteralIdentifier; + Init(); + // Skip initial whitespace allowing HTML comment ends just like + // after a newline and scan first token. + has_line_terminator_before_next_ = true; + SkipWhiteSpace(); + Scan(); +} -void Scanner::Initialize(Handle<String> source, - ParserLanguage language) { - Init(source, NULL, 0, source->length(), language); +void V8JavaScriptScanner::Initialize(Handle<String> source, + unibrow::CharacterStream* stream, + int literal_flags) { + source_ = stream_initializer_.Init(source, stream, + 0, UTF16Buffer::kNoEndPosition); + literal_flags_ = literal_flags | kLiteralIdentifier; + Init(); + // Skip initial whitespace allowing HTML comment ends just like + // after a newline and scan first token. + has_line_terminator_before_next_ = true; + SkipWhiteSpace(); + Scan(); } -void Scanner::Initialize(Handle<String> source, - unibrow::CharacterStream* stream, - ParserLanguage language) { - Init(source, stream, 0, kNoEndPosition, language); +void V8JavaScriptScanner::Initialize(Handle<String> source, + int start_position, + int end_position, + int literal_flags) { + source_ = stream_initializer_.Init(source, NULL, + start_position, end_position); + literal_flags_ = literal_flags | kLiteralIdentifier; + Init(); + // Skip initial whitespace allowing HTML comment ends just like + // after a newline and scan first token. + has_line_terminator_before_next_ = true; + SkipWhiteSpace(); + Scan(); } -void Scanner::Initialize(Handle<String> source, - int start_position, - int end_position, - ParserLanguage language) { - Init(source, NULL, start_position, end_position, language); +Token::Value V8JavaScriptScanner::NextCheckStack() { + // BUG 1215673: Find a thread safe way to set a stack limit in + // pre-parse mode. Otherwise, we cannot safely pre-parse from other + // threads. + StackLimitCheck check; + if (check.HasOverflowed()) { + stack_overflow_ = true; + current_ = next_; + next_.token = Token::ILLEGAL; + return current_.token; + } else { + return Next(); + } } -void Scanner::Init(Handle<String> source, - unibrow::CharacterStream* stream, - int start_position, - int end_position, - ParserLanguage language) { +UTF16Buffer* StreamInitializer::Init(Handle<String> source, + unibrow::CharacterStream* stream, + int start_position, + int end_position) { // Either initialize the scanner from a character stream or from a // string. ASSERT(source.is_null() || stream == NULL); @@ -188,13 +194,13 @@ void Scanner::Init(Handle<String> source, Handle<ExternalTwoByteString>::cast(source), start_position, end_position); - source_ = &two_byte_string_buffer_; + return &two_byte_string_buffer_; } else if (!source.is_null() && StringShape(*source).IsExternalAscii()) { ascii_string_buffer_.Initialize( Handle<ExternalAsciiString>::cast(source), start_position, end_position); - source_ = &ascii_string_buffer_; + return &ascii_string_buffer_; } else { if (!source.is_null()) { safe_string_input_buffer_.Reset(source.location()); @@ -204,28 +210,27 @@ void Scanner::Init(Handle<String> source, stream, start_position, end_position); - source_ = &char_stream_buffer_; + return &char_stream_buffer_; } +} - is_parsing_json_ = (language == JSON); +// ---------------------------------------------------------------------------- +// JsonScanner - // Set c0_ (one character ahead) - ASSERT(kCharacterLookaheadBufferSize == 1); - Advance(); - // Initialize current_ to not refer to a literal. - current_.literal_chars = Vector<const char>(); - // Reset literal buffer. - literal_buffer_.Reset(); +JsonScanner::JsonScanner() {} - // Skip initial whitespace allowing HTML comment ends just like - // after a newline and scan first token. - has_line_terminator_before_next_ = true; - SkipWhiteSpace(); - Scan(); + +void JsonScanner::Initialize(Handle<String> source) { + source_ = stream_initializer_.Init(source, NULL, 0, source->length()); + Init(); + // Skip initial whitespace. + SkipJsonWhiteSpace(); + // Preload first token as look-ahead. + ScanJson(); } -Token::Value Scanner::Next() { +Token::Value JsonScanner::Next() { // BUG 1215673: Find a thread safe way to set a stack limit in // pre-parse mode. Otherwise, we cannot safely pre-parse from other // threads. @@ -236,52 +241,13 @@ Token::Value Scanner::Next() { stack_overflow_ = true; next_.token = Token::ILLEGAL; } else { - has_line_terminator_before_next_ = false; - Scan(); + ScanJson(); } return current_.token; } -void Scanner::StartLiteral() { - literal_buffer_.StartLiteral(); -} - - -void Scanner::AddLiteralChar(uc32 c) { - literal_buffer_.AddChar(c); -} - - -void Scanner::TerminateLiteral() { - next_.literal_chars = literal_buffer_.EndLiteral(); -} - - -void Scanner::DropLiteral() { - literal_buffer_.DropLiteral(); -} - - -void Scanner::AddLiteralCharAdvance() { - AddLiteralChar(c0_); - Advance(); -} - - -static inline bool IsByteOrderMark(uc32 c) { - // The Unicode value U+FFFE is guaranteed never to be assigned as a - // Unicode character; this implies that in a Unicode context the - // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF - // character expressed in little-endian byte order (since it could - // not be a U+FFFE character expressed in big-endian byte - // order). Nevertheless, we check for it to be compatible with - // Spidermonkey. - return c == 0xFEFF || c == 0xFFFE; -} - - -bool Scanner::SkipJsonWhiteSpace() { +bool JsonScanner::SkipJsonWhiteSpace() { int start_position = source_pos(); // JSON WhiteSpace is tab, carrige-return, newline and space. while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { @@ -291,107 +257,9 @@ bool Scanner::SkipJsonWhiteSpace() { } -bool Scanner::SkipJavaScriptWhiteSpace() { - int start_position = source_pos(); - - while (true) { - // We treat byte-order marks (BOMs) as whitespace for better - // compatibility with Spidermonkey and other JavaScript engines. - while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { - // IsWhiteSpace() includes line terminators! - if (ScannerConstants::kIsLineTerminator.get(c0_)) { - // Ignore line terminators, but remember them. This is necessary - // for automatic semicolon insertion. - has_line_terminator_before_next_ = true; - } - Advance(); - } - - // If there is an HTML comment end '-->' at the beginning of a - // line (with only whitespace in front of it), we treat the rest - // of the line as a comment. This is in line with the way - // SpiderMonkey handles it. - if (c0_ == '-' && has_line_terminator_before_next_) { - Advance(); - if (c0_ == '-') { - Advance(); - if (c0_ == '>') { - // Treat the rest of the line as a comment. - SkipSingleLineComment(); - // Continue skipping white space after the comment. - continue; - } - PushBack('-'); // undo Advance() - } - PushBack('-'); // undo Advance() - } - // Return whether or not we skipped any characters. - return source_pos() != start_position; - } -} - - -Token::Value Scanner::SkipSingleLineComment() { - Advance(); - - // The line terminator at the end of the line is not considered - // to be part of the single-line comment; it is recognized - // separately by the lexical grammar and becomes part of the - // stream of input elements for the syntactic grammar (see - // ECMA-262, section 7.4, page 12). - while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { - Advance(); - } - - return Token::WHITESPACE; -} - - -Token::Value Scanner::SkipMultiLineComment() { - ASSERT(c0_ == '*'); - Advance(); - - while (c0_ >= 0) { - char ch = c0_; - Advance(); - // If we have reached the end of the multi-line comment, we - // consume the '/' and insert a whitespace. This way all - // multi-line comments are treated as whitespace - even the ones - // containing line terminators. This contradicts ECMA-262, section - // 7.4, page 12, that says that multi-line comments containing - // line terminators should be treated as a line terminator, but it - // matches the behaviour of SpiderMonkey and KJS. - if (ch == '*' && c0_ == '/') { - c0_ = ' '; - return Token::WHITESPACE; - } - } - - // Unterminated multi-line comment. - return Token::ILLEGAL; -} - - -Token::Value Scanner::ScanHtmlComment() { - // Check for <!-- comments. - ASSERT(c0_ == '!'); - Advance(); - if (c0_ == '-') { - Advance(); - if (c0_ == '-') return SkipSingleLineComment(); - PushBack('-'); // undo Advance() - } - PushBack('!'); // undo Advance() - ASSERT(c0_ == '!'); - return Token::LT; -} - - - -void Scanner::ScanJson() { +void JsonScanner::ScanJson() { next_.literal_chars = Vector<const char>(); Token::Value token; - has_line_terminator_before_next_ = false; do { // Remember the position of the next token next_.location.beg_pos = source_pos(); @@ -468,7 +336,7 @@ void Scanner::ScanJson() { } -Token::Value Scanner::ScanJsonString() { +Token::Value JsonScanner::ScanJsonString() { ASSERT_EQ('"', c0_); Advance(); LiteralScope literal(this); @@ -528,7 +396,7 @@ Token::Value Scanner::ScanJsonString() { } -Token::Value Scanner::ScanJsonNumber() { +Token::Value JsonScanner::ScanJsonNumber() { LiteralScope literal(this); if (c0_ == '-') AddLiteralCharAdvance(); if (c0_ == '0') { @@ -562,8 +430,8 @@ Token::Value Scanner::ScanJsonNumber() { } -Token::Value Scanner::ScanJsonIdentifier(const char* text, - Token::Value token) { +Token::Value JsonScanner::ScanJsonIdentifier(const char* text, + Token::Value token) { LiteralScope literal(this); while (*text != '\0') { if (c0_ != *text) return Token::ILLEGAL; @@ -576,577 +444,5 @@ Token::Value Scanner::ScanJsonIdentifier(const char* text, } -void Scanner::ScanJavaScript() { - next_.literal_chars = Vector<const char>(); - Token::Value token; - do { - // Remember the position of the next token - next_.location.beg_pos = source_pos(); - - switch (c0_) { - case ' ': - case '\t': - Advance(); - token = Token::WHITESPACE; - break; - - case '\n': - Advance(); - has_line_terminator_before_next_ = true; - token = Token::WHITESPACE; - break; - - case '"': case '\'': - token = ScanString(); - break; - - case '<': - // < <= << <<= <!-- - Advance(); - if (c0_ == '=') { - token = Select(Token::LTE); - } else if (c0_ == '<') { - token = Select('=', Token::ASSIGN_SHL, Token::SHL); - } else if (c0_ == '!') { - token = ScanHtmlComment(); - } else { - token = Token::LT; - } - break; - - case '>': - // > >= >> >>= >>> >>>= - Advance(); - if (c0_ == '=') { - token = Select(Token::GTE); - } else if (c0_ == '>') { - // >> >>= >>> >>>= - Advance(); - if (c0_ == '=') { - token = Select(Token::ASSIGN_SAR); - } else if (c0_ == '>') { - token = Select('=', Token::ASSIGN_SHR, Token::SHR); - } else { - token = Token::SAR; - } - } else { - token = Token::GT; - } - break; - - case '=': - // = == === - Advance(); - if (c0_ == '=') { - token = Select('=', Token::EQ_STRICT, Token::EQ); - } else { - token = Token::ASSIGN; - } - break; - - case '!': - // ! != !== - Advance(); - if (c0_ == '=') { - token = Select('=', Token::NE_STRICT, Token::NE); - } else { - token = Token::NOT; - } - break; - - case '+': - // + ++ += - Advance(); - if (c0_ == '+') { - token = Select(Token::INC); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_ADD); - } else { - token = Token::ADD; - } - break; - - case '-': - // - -- --> -= - Advance(); - if (c0_ == '-') { - Advance(); - if (c0_ == '>' && has_line_terminator_before_next_) { - // For compatibility with SpiderMonkey, we skip lines that - // start with an HTML comment end '-->'. - token = SkipSingleLineComment(); - } else { - token = Token::DEC; - } - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_SUB); - } else { - token = Token::SUB; - } - break; - - case '*': - // * *= - token = Select('=', Token::ASSIGN_MUL, Token::MUL); - break; - - case '%': - // % %= - token = Select('=', Token::ASSIGN_MOD, Token::MOD); - break; - - case '/': - // / // /* /= - Advance(); - if (c0_ == '/') { - token = SkipSingleLineComment(); - } else if (c0_ == '*') { - token = SkipMultiLineComment(); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_DIV); - } else { - token = Token::DIV; - } - break; - - case '&': - // & && &= - Advance(); - if (c0_ == '&') { - token = Select(Token::AND); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_BIT_AND); - } else { - token = Token::BIT_AND; - } - break; - - case '|': - // | || |= - Advance(); - if (c0_ == '|') { - token = Select(Token::OR); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_BIT_OR); - } else { - token = Token::BIT_OR; - } - break; - - case '^': - // ^ ^= - token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); - break; - - case '.': - // . Number - Advance(); - if (IsDecimalDigit(c0_)) { - token = ScanNumber(true); - } else { - token = Token::PERIOD; - } - break; - - case ':': - token = Select(Token::COLON); - break; - - case ';': - token = Select(Token::SEMICOLON); - break; - - case ',': - token = Select(Token::COMMA); - break; - - case '(': - token = Select(Token::LPAREN); - break; - - case ')': - token = Select(Token::RPAREN); - break; - - case '[': - token = Select(Token::LBRACK); - break; - - case ']': - token = Select(Token::RBRACK); - break; - - case '{': - token = Select(Token::LBRACE); - break; - - case '}': - token = Select(Token::RBRACE); - break; - - case '?': - token = Select(Token::CONDITIONAL); - break; - - case '~': - token = Select(Token::BIT_NOT); - break; - - default: - if (ScannerConstants::kIsIdentifierStart.get(c0_)) { - token = ScanIdentifier(); - } else if (IsDecimalDigit(c0_)) { - token = ScanNumber(false); - } else if (SkipWhiteSpace()) { - token = Token::WHITESPACE; - } else if (c0_ < 0) { - token = Token::EOS; - } else { - token = Select(Token::ILLEGAL); - } - break; - } - - // Continue scanning for tokens as long as we're just skipping - // whitespace. - } while (token == Token::WHITESPACE); - - next_.location.end_pos = source_pos(); - next_.token = token; -} - - -void Scanner::SeekForward(int pos) { - source_->SeekForward(pos - 1); - Advance(); - // This function is only called to seek to the location - // of the end of a function (at the "}" token). It doesn't matter - // whether there was a line terminator in the part we skip. - has_line_terminator_before_next_ = false; - Scan(); -} - - -uc32 Scanner::ScanHexEscape(uc32 c, int length) { - ASSERT(length <= 4); // prevent overflow - - uc32 digits[4]; - uc32 x = 0; - for (int i = 0; i < length; i++) { - digits[i] = c0_; - int d = HexValue(c0_); - if (d < 0) { - // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes - // should be illegal, but other JS VMs just return the - // non-escaped version of the original character. - - // Push back digits read, except the last one (in c0_). - for (int j = i-1; j >= 0; j--) { - PushBack(digits[j]); - } - // Notice: No handling of error - treat it as "\u"->"u". - return c; - } - x = x * 16 + d; - Advance(); - } - - return x; -} - - -// Octal escapes of the forms '\0xx' and '\xxx' are not a part of -// ECMA-262. Other JS VMs support them. -uc32 Scanner::ScanOctalEscape(uc32 c, int length) { - uc32 x = c - '0'; - for (int i = 0; i < length; i++) { - int d = c0_ - '0'; - if (d < 0 || d > 7) break; - int nx = x * 8 + d; - if (nx >= 256) break; - x = nx; - Advance(); - } - return x; -} - - -void Scanner::ScanEscape() { - uc32 c = c0_; - Advance(); - - // Skip escaped newlines. - if (ScannerConstants::kIsLineTerminator.get(c)) { - // Allow CR+LF newlines in multiline string literals. - if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); - // Allow LF+CR newlines in multiline string literals. - if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); - return; - } - - switch (c) { - case '\'': // fall through - case '"' : // fall through - case '\\': break; - case 'b' : c = '\b'; break; - case 'f' : c = '\f'; break; - case 'n' : c = '\n'; break; - case 'r' : c = '\r'; break; - case 't' : c = '\t'; break; - case 'u' : c = ScanHexEscape(c, 4); break; - case 'v' : c = '\v'; break; - case 'x' : c = ScanHexEscape(c, 2); break; - case '0' : // fall through - case '1' : // fall through - case '2' : // fall through - case '3' : // fall through - case '4' : // fall through - case '5' : // fall through - case '6' : // fall through - case '7' : c = ScanOctalEscape(c, 2); break; - } - - // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these - // should be illegal, but they are commonly handled - // as non-escaped characters by JS VMs. - AddLiteralChar(c); -} - - -Token::Value Scanner::ScanString() { - uc32 quote = c0_; - Advance(); // consume quote - - LiteralScope literal(this); - while (c0_ != quote && c0_ >= 0 - && !ScannerConstants::kIsLineTerminator.get(c0_)) { - uc32 c = c0_; - Advance(); - if (c == '\\') { - if (c0_ < 0) return Token::ILLEGAL; - ScanEscape(); - } else { - AddLiteralChar(c); - } - } - if (c0_ != quote) return Token::ILLEGAL; - literal.Complete(); - - Advance(); // consume quote - return Token::STRING; -} - - -Token::Value Scanner::Select(Token::Value tok) { - Advance(); - return tok; -} - - -Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { - Advance(); - if (c0_ == next) { - Advance(); - return then; - } else { - return else_; - } -} - - -// Returns true if any decimal digits were scanned, returns false otherwise. -void Scanner::ScanDecimalDigits() { - while (IsDecimalDigit(c0_)) - AddLiteralCharAdvance(); -} - - -Token::Value Scanner::ScanNumber(bool seen_period) { - ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction - - enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; - - LiteralScope literal(this); - if (seen_period) { - // we have already seen a decimal point of the float - AddLiteralChar('.'); - ScanDecimalDigits(); // we know we have at least one digit - - } else { - // if the first character is '0' we must check for octals and hex - if (c0_ == '0') { - AddLiteralCharAdvance(); - - // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number - if (c0_ == 'x' || c0_ == 'X') { - // hex number - kind = HEX; - AddLiteralCharAdvance(); - if (!IsHexDigit(c0_)) { - // we must have at least one hex digit after 'x'/'X' - return Token::ILLEGAL; - } - while (IsHexDigit(c0_)) { - AddLiteralCharAdvance(); - } - } else if ('0' <= c0_ && c0_ <= '7') { - // (possible) octal number - kind = OCTAL; - while (true) { - if (c0_ == '8' || c0_ == '9') { - kind = DECIMAL; - break; - } - if (c0_ < '0' || '7' < c0_) break; - AddLiteralCharAdvance(); - } - } - } - - // Parse decimal digits and allow trailing fractional part. - if (kind == DECIMAL) { - ScanDecimalDigits(); // optional - if (c0_ == '.') { - AddLiteralCharAdvance(); - ScanDecimalDigits(); // optional - } - } - } - - // scan exponent, if any - if (c0_ == 'e' || c0_ == 'E') { - ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number - if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed - // scan exponent - AddLiteralCharAdvance(); - if (c0_ == '+' || c0_ == '-') - AddLiteralCharAdvance(); - if (!IsDecimalDigit(c0_)) { - // we must have at least one decimal digit after 'e'/'E' - return Token::ILLEGAL; - } - ScanDecimalDigits(); - } - - // The source character immediately following a numeric literal must - // not be an identifier start or a decimal digit; see ECMA-262 - // section 7.8.3, page 17 (note that we read only one decimal digit - // if the value is 0). - if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) - return Token::ILLEGAL; - - literal.Complete(); - - return Token::NUMBER; -} - - -uc32 Scanner::ScanIdentifierUnicodeEscape() { - Advance(); - if (c0_ != 'u') return unibrow::Utf8::kBadChar; - Advance(); - uc32 c = ScanHexEscape('u', 4); - // We do not allow a unicode escape sequence to start another - // unicode escape sequence. - if (c == '\\') return unibrow::Utf8::kBadChar; - return c; -} - - -Token::Value Scanner::ScanIdentifier() { - ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); - - LiteralScope literal(this); - KeywordMatcher keyword_match; - - // Scan identifier start character. - if (c0_ == '\\') { - uc32 c = ScanIdentifierUnicodeEscape(); - // Only allow legal identifier start characters. - if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; - AddLiteralChar(c); - keyword_match.Fail(); - } else { - AddLiteralChar(c0_); - keyword_match.AddChar(c0_); - Advance(); - } - - // Scan the rest of the identifier characters. - while (ScannerConstants::kIsIdentifierPart.get(c0_)) { - if (c0_ == '\\') { - uc32 c = ScanIdentifierUnicodeEscape(); - // Only allow legal identifier part characters. - if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; - AddLiteralChar(c); - keyword_match.Fail(); - } else { - AddLiteralChar(c0_); - keyword_match.AddChar(c0_); - Advance(); - } - } - literal.Complete(); - - return keyword_match.token(); -} - - - -bool Scanner::ScanRegExpPattern(bool seen_equal) { - // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags - bool in_character_class = false; - - // Previous token is either '/' or '/=', in the second case, the - // pattern starts at =. - next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); - next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); - - // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, - // the scanner should pass uninterpreted bodies to the RegExp - // constructor. - LiteralScope literal(this); - if (seen_equal) - AddLiteralChar('='); - - while (c0_ != '/' || in_character_class) { - if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; - if (c0_ == '\\') { // escaped character - AddLiteralCharAdvance(); - if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; - AddLiteralCharAdvance(); - } else { // unescaped character - if (c0_ == '[') in_character_class = true; - if (c0_ == ']') in_character_class = false; - AddLiteralCharAdvance(); - } - } - Advance(); // consume '/' - - literal.Complete(); - - return true; -} - -bool Scanner::ScanRegExpFlags() { - // Scan regular expression flags. - LiteralScope literal(this); - while (ScannerConstants::kIsIdentifierPart.get(c0_)) { - if (c0_ == '\\') { - uc32 c = ScanIdentifierUnicodeEscape(); - if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { - // We allow any escaped character, unlike the restriction on - // IdentifierPart when it is used to build an IdentifierName. - AddLiteralChar(c); - continue; - } - } - AddLiteralCharAdvance(); - } - literal.Complete(); - - next_.location.end_pos = source_pos() - 1; - return true; -} } } // namespace v8::internal |