diff options
author | Ryan Dahl <ry@tinyclouds.org> | 2010-11-24 01:03:06 -0800 |
---|---|---|
committer | Ryan Dahl <ry@tinyclouds.org> | 2010-11-24 01:03:06 -0800 |
commit | 73318fa09d0b67a67c1033bf0bfcc0e78883f257 (patch) | |
tree | ecdf0c18b14e3158cfbdff95d012f810b216f43d /deps/v8/src/scanner-base.h | |
parent | fa8ffaf9b2375f98ac86f887bf76f3aa81fa5aa4 (diff) | |
download | node-new-73318fa09d0b67a67c1033bf0bfcc0e78883f257.tar.gz |
Upgrade V8 to 2.5.8
Diffstat (limited to 'deps/v8/src/scanner-base.h')
-rw-r--r-- | deps/v8/src/scanner-base.h | 373 |
1 files changed, 360 insertions, 13 deletions
diff --git a/deps/v8/src/scanner-base.h b/deps/v8/src/scanner-base.h index 50f30305c4..3714ae2d1b 100644 --- a/deps/v8/src/scanner-base.h +++ b/deps/v8/src/scanner-base.h @@ -37,11 +37,24 @@ #include "unicode-inl.h" #include "char-predicates.h" #include "utils.h" +#include "list-inl.h" namespace v8 { namespace internal { -// Interface through which the scanner reads characters from the input source. +// Returns the value (0 .. 15) of a hexadecimal character c. +// If c is not a legal hexadecimal character, returns a value < 0. +inline int HexValue(uc32 c) { + c -= '0'; + if (static_cast<unsigned>(c) <= 9) return c; + c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. + if (static_cast<unsigned>(c) <= 5) return c + 10; + return -1; +} + +// ---------------------------------------------------------------------------- +// UTF16Buffer - scanner input source with pushback. + class UTF16Buffer { public: UTF16Buffer(); @@ -54,7 +67,11 @@ class UTF16Buffer { int pos() const { return pos_; } + static const int kNoEndPosition = 1; + protected: + // Initial value of end_ before the input stream is initialized. + int pos_; // Current position in the buffer. int end_; // Position where scanning should stop (EOF). }; @@ -79,6 +96,335 @@ class ScannerConstants : AllStatic { static StaticResource<Utf8Decoder> utf8_decoder_; }; +// ---------------------------------------------------------------------------- +// LiteralCollector - Collector of chars of literals. + +class LiteralCollector { + public: + LiteralCollector(); + ~LiteralCollector(); + + inline void AddChar(uc32 c) { + if (recording_) { + if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { + buffer_.Add(static_cast<char>(c)); + } else { + AddCharSlow(c); + } + } + } + + void StartLiteral() { + buffer_.StartSequence(); + recording_ = true; + } + + Vector<const char> EndLiteral() { + if (recording_) { + recording_ = false; + buffer_.Add(kEndMarker); + Vector<char> sequence = buffer_.EndSequence(); + return Vector<const char>(sequence.start(), sequence.length()); + } + return Vector<const char>(); + } + + void DropLiteral() { + if (recording_) { + recording_ = false; + buffer_.DropSequence(); + } + } + + void Reset() { + buffer_.Reset(); + } + + // The end marker added after a parsed literal. + // Using zero allows the usage of strlen and similar functions on + // identifiers and numbers (but not strings, since they may contain zero + // bytes). + static const char kEndMarker = '\x00'; + private: + static const int kInitialCapacity = 256; + SequenceCollector<char, 4> buffer_; + bool recording_; + void AddCharSlow(uc32 c); +}; + +// ---------------------------------------------------------------------------- +// Scanner base-class. + +// Generic functionality used by both JSON and JavaScript scanners. +class Scanner { + public: + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; + + class LiteralScope { + public: + explicit LiteralScope(Scanner* self); + ~LiteralScope(); + void Complete(); + + private: + Scanner* scanner_; + bool complete_; + }; + + Scanner(); + + // Returns the current token again. + Token::Value current_token() { return current_.token; } + + // One token look-ahead (past the token returned by Next()). + Token::Value peek() const { return next_.token; } + + struct Location { + Location(int b, int e) : beg_pos(b), end_pos(e) { } + Location() : beg_pos(0), end_pos(0) { } + int beg_pos; + int end_pos; + }; + + // Returns the location information for the current token + // (the token returned by Next()). + Location location() const { return current_.location; } + Location peek_location() const { return next_.location; } + + // Returns the literal string, if any, for the current token (the + // token returned by Next()). The string is 0-terminated and in + // UTF-8 format; they may contain 0-characters. Literal strings are + // collected for identifiers, strings, and numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). + const char* literal_string() const { + return current_.literal_chars.start(); + } + + int literal_length() const { + // Excluding terminal '\x00' added by TerminateLiteral(). + return current_.literal_chars.length() - 1; + } + + Vector<const char> literal() const { + return Vector<const char>(literal_string(), literal_length()); + } + + // Returns the literal string for the next token (the token that + // would be returned if Next() were called). + const char* next_literal_string() const { + return next_.literal_chars.start(); + } + + + // Returns the length of the next token (that would be returned if + // Next() were called). + int next_literal_length() const { + // Excluding terminal '\x00' added by TerminateLiteral(). + return next_.literal_chars.length() - 1; + } + + Vector<const char> next_literal() const { + return Vector<const char>(next_literal_string(), next_literal_length()); + } + + bool stack_overflow() { return stack_overflow_; } + + static const int kCharacterLookaheadBufferSize = 1; + + protected: + // The current and look-ahead token. + struct TokenDesc { + Token::Value token; + Location location; + Vector<const char> literal_chars; + }; + + // Call this after setting source_ to the input. + void Init() { + // Set c0_ (one character ahead) + ASSERT(kCharacterLookaheadBufferSize == 1); + Advance(); + // Initialize current_ to not refer to a literal. + current_.literal_chars = Vector<const char>(); + // Reset literal buffer. + literal_buffer_.Reset(); + } + + // Literal buffer support + inline void StartLiteral() { + literal_buffer_.StartLiteral(); + } + + inline void AddLiteralChar(uc32 c) { + literal_buffer_.AddChar(c); + } + + // Complete scanning of a literal. + inline void TerminateLiteral() { + next_.literal_chars = literal_buffer_.EndLiteral(); + } + + // Stops scanning of a literal and drop the collected characters, + // e.g., due to an encountered error. + inline void DropLiteral() { + literal_buffer_.DropLiteral(); + } + + inline void AddLiteralCharAdvance() { + AddLiteralChar(c0_); + Advance(); + } + + // Low-level scanning support. + void Advance() { c0_ = source_->Advance(); } + void PushBack(uc32 ch) { + source_->PushBack(ch); + c0_ = ch; + } + + inline Token::Value Select(Token::Value tok) { + Advance(); + return tok; + } + + inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { + Advance(); + if (c0_ == next) { + Advance(); + return then; + } else { + return else_; + } + } + + uc32 ScanHexEscape(uc32 c, int length); + uc32 ScanOctalEscape(uc32 c, int length); + + // Return the current source position. + int source_pos() { + return source_->pos() - kCharacterLookaheadBufferSize; + } + + TokenDesc current_; // desc for current token (as returned by Next()) + TokenDesc next_; // desc for next token (one token look-ahead) + + // Input stream. Must be initialized to an UTF16Buffer. + UTF16Buffer* source_; + + // Buffer to hold literal values (identifiers, strings, numbers) + // using '\x00'-terminated UTF-8 encoding. Handles allocation internally. + LiteralCollector literal_buffer_; + + bool stack_overflow_; + + // One Unicode character look-ahead; c0_ < 0 at the end of the input. + uc32 c0_; +}; + +// ---------------------------------------------------------------------------- +// JavaScriptScanner - base logic for JavaScript scanning. + +class JavaScriptScanner : public Scanner { + public: + + // Bit vector representing set of types of literals. + enum LiteralType { + kNoLiterals = 0, + kLiteralNumber = 1, + kLiteralIdentifier = 2, + kLiteralString = 4, + kLiteralRegExp = 8, + kLiteralRegExpFlags = 16, + kAllLiterals = 31 + }; + + // A LiteralScope that disables recording of some types of JavaScript + // literals. If the scanner is configured to not record the specific + // type of literal, the scope will not call StartLiteral. + class LiteralScope { + public: + LiteralScope(JavaScriptScanner* self, LiteralType type) + : scanner_(self), complete_(false) { + if (scanner_->RecordsLiteral(type)) { + scanner_->StartLiteral(); + } + } + ~LiteralScope() { + if (!complete_) scanner_->DropLiteral(); + } + void Complete() { + scanner_->TerminateLiteral(); + complete_ = true; + } + + private: + JavaScriptScanner* scanner_; + bool complete_; + }; + + JavaScriptScanner(); + + // Returns the next token. + Token::Value Next(); + + // Returns true if there was a line terminator before the peek'ed token. + bool has_line_terminator_before_next() const { + return has_line_terminator_before_next_; + } + + // Scans the input as a regular expression pattern, previous + // character(s) must be /(=). Returns true if a pattern is scanned. + bool ScanRegExpPattern(bool seen_equal); + // Returns true if regexp flags are scanned (always since flags can + // be empty). + bool ScanRegExpFlags(); + + // Tells whether the buffer contains an identifier (no escapes). + // Used for checking if a property name is an identifier. + static bool IsIdentifier(unibrow::CharacterStream* buffer); + + // Seek forward to the given position. This operation does not + // work in general, for instance when there are pushed back + // characters, but works for seeking forward until simple delimiter + // tokens, which is what it is used for. + void SeekForward(int pos); + + // Whether this scanner records the given literal type or not. + bool RecordsLiteral(LiteralType type) { + return (literal_flags_ & type) != 0; + } + + protected: + bool SkipWhiteSpace(); + Token::Value SkipSingleLineComment(); + Token::Value SkipMultiLineComment(); + + // Scans a single JavaScript token. + void Scan(); + + void ScanDecimalDigits(); + Token::Value ScanNumber(bool seen_period); + Token::Value ScanIdentifierOrKeyword(); + Token::Value ScanIdentifierSuffix(LiteralScope* literal); + + void ScanEscape(); + Token::Value ScanString(); + + // Scans a possible HTML comment -- begins with '<!'. + Token::Value ScanHtmlComment(); + + // Decodes a unicode escape-sequence which is part of an identifier. + // If the escape sequence cannot be decoded the result is kBadChar. + uc32 ScanIdentifierUnicodeEscape(); + + int literal_flags_; + bool has_line_terminator_before_next_; +}; + + +// ---------------------------------------------------------------------------- +// Keyword matching state machine. class KeywordMatcher { // Incrementally recognize keywords. @@ -101,10 +447,11 @@ class KeywordMatcher { Token::Value token() { return token_; } - inline void AddChar(unibrow::uchar input) { + inline bool AddChar(unibrow::uchar input) { if (state_ != UNMATCHABLE) { Step(input); } + return state_ != UNMATCHABLE; } void Fail() { @@ -155,23 +502,23 @@ class KeywordMatcher { const char* keyword, int position, Token::Value token_if_match) { - if (input == static_cast<unibrow::uchar>(keyword[position])) { - state_ = KEYWORD_PREFIX; - this->keyword_ = keyword; - this->counter_ = position + 1; - this->keyword_token_ = token_if_match; - return true; + if (input != static_cast<unibrow::uchar>(keyword[position])) { + return false; } - return false; + state_ = KEYWORD_PREFIX; + this->keyword_ = keyword; + this->counter_ = position + 1; + this->keyword_token_ = token_if_match; + return true; } // If input equals match character, transition to new state and return true. inline bool MatchState(unibrow::uchar input, char match, State new_state) { - if (input == static_cast<unibrow::uchar>(match)) { - state_ = new_state; - return true; + if (input != static_cast<unibrow::uchar>(match)) { + return false; } - return false; + state_ = new_state; + return true; } inline bool MatchKeyword(unibrow::uchar input, |