diff options
Diffstat (limited to 'deps/v8/src/scanner.h')
-rw-r--r-- | deps/v8/src/scanner.h | 211 |
1 files changed, 125 insertions, 86 deletions
diff --git a/deps/v8/src/scanner.h b/deps/v8/src/scanner.h index 3cefc833ac..73026ab5a8 100644 --- a/deps/v8/src/scanner.h +++ b/deps/v8/src/scanner.h @@ -44,6 +44,9 @@ namespace v8 { namespace internal { +class ParserRecorder; + + // Returns the value (0 .. 15) of a hexadecimal character c. // If c is not a legal hexadecimal character, returns a value < 0. inline int HexValue(uc32 c) { @@ -117,8 +120,8 @@ class Utf16CharacterStream { virtual bool ReadBlock() = 0; virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0; - const uc16* buffer_cursor_; - const uc16* buffer_end_; + const uint16_t* buffer_cursor_; + const uint16_t* buffer_end_; unsigned pos_; }; @@ -139,12 +142,17 @@ class UnicodeCache { bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); } + bool IsWhiteSpaceOrLineTerminator(unibrow::uchar c) { + return kIsWhiteSpaceOrLineTerminator.get(c); + } private: unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; - unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; + unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace; + unibrow::Predicate<WhiteSpaceOrLineTerminator, 128> + kIsWhiteSpaceOrLineTerminator; StaticResource<Utf8Decoder> utf8_decoder_; DISALLOW_COPY_AND_ASSIGN(UnicodeCache); @@ -161,32 +169,32 @@ class DuplicateFinder { backing_store_(16), map_(&Match) { } - int AddAsciiSymbol(Vector<const char> key, int value); - int AddUtf16Symbol(Vector<const uint16_t> key, int value); + int AddOneByteSymbol(Vector<const uint8_t> key, int value); + int AddTwoByteSymbol(Vector<const uint16_t> key, int value); // Add a a number literal by converting it (if necessary) // to the string that ToString(ToNumber(literal)) would generate. // and then adding that string with AddAsciiSymbol. // This string is the actual value used as key in an object literal, // and the one that must be different from the other keys. - int AddNumber(Vector<const char> key, int value); + int AddNumber(Vector<const uint8_t> key, int value); private: - int AddSymbol(Vector<const byte> key, bool is_ascii, int value); + int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value); // Backs up the key and its length in the backing store. // The backup is stored with a base 127 encoding of the - // length (plus a bit saying whether the string is ASCII), + // length (plus a bit saying whether the string is one byte), // followed by the bytes of the key. - byte* BackupKey(Vector<const byte> key, bool is_ascii); + uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte); // Compare two encoded keys (both pointing into the backing store) // for having the same base-127 encoded lengths and ASCII-ness, // and then having the same 'length' bytes following. static bool Match(void* first, void* second); // Creates a hash from a sequence of bytes. - static uint32_t Hash(Vector<const byte> key, bool is_ascii); + static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte); // Checks whether a string containing a JS number is its canonical // form. - static bool IsNumberCanonical(Vector<const char> key); + static bool IsNumberCanonical(Vector<const uint8_t> key); // Size of buffer. Sufficient for using it to call DoubleToCString in // from conversions.h. @@ -206,7 +214,7 @@ class DuplicateFinder { class LiteralBuffer { public: - LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } + LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } ~LiteralBuffer() { if (backing_store_.length() > 0) { @@ -216,48 +224,48 @@ class LiteralBuffer { INLINE(void AddChar(uint32_t code_unit)) { if (position_ >= backing_store_.length()) ExpandBuffer(); - if (is_ascii_) { + if (is_one_byte_) { if (code_unit <= unibrow::Latin1::kMaxChar) { backing_store_[position_] = static_cast<byte>(code_unit); position_ += kOneByteSize; return; } - ConvertToUtf16(); + ConvertToTwoByte(); } ASSERT(code_unit < 0x10000u); - *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit; + *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; position_ += kUC16Size; } - bool is_ascii() { return is_ascii_; } + bool is_one_byte() { return is_one_byte_; } bool is_contextual_keyword(Vector<const char> keyword) { - return is_ascii() && keyword.length() == position_ && + return is_one_byte() && keyword.length() == position_ && (memcmp(keyword.start(), backing_store_.start(), position_) == 0); } - Vector<const uc16> utf16_literal() { - ASSERT(!is_ascii_); + Vector<const uint16_t> two_byte_literal() { + ASSERT(!is_one_byte_); ASSERT((position_ & 0x1) == 0); - return Vector<const uc16>( - reinterpret_cast<const uc16*>(backing_store_.start()), + return Vector<const uint16_t>( + reinterpret_cast<const uint16_t*>(backing_store_.start()), position_ >> 1); } - Vector<const char> ascii_literal() { - ASSERT(is_ascii_); - return Vector<const char>( - reinterpret_cast<const char*>(backing_store_.start()), + Vector<const uint8_t> one_byte_literal() { + ASSERT(is_one_byte_); + return Vector<const uint8_t>( + reinterpret_cast<const uint8_t*>(backing_store_.start()), position_); } int length() { - return is_ascii_ ? position_ : (position_ >> 1); + return is_one_byte_ ? position_ : (position_ >> 1); } void Reset() { position_ = 0; - is_ascii_ = true; + is_one_byte_ = true; } private: @@ -278,8 +286,8 @@ class LiteralBuffer { backing_store_ = new_store; } - void ConvertToUtf16() { - ASSERT(is_ascii_); + void ConvertToTwoByte() { + ASSERT(is_one_byte_); Vector<byte> new_store; int new_content_size = position_ * kUC16Size; if (new_content_size >= backing_store_.length()) { @@ -290,7 +298,7 @@ class LiteralBuffer { new_store = backing_store_; } uint8_t* src = backing_store_.start(); - uc16* dst = reinterpret_cast<uc16*>(new_store.start()); + uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); for (int i = position_ - 1; i >= 0; i--) { dst[i] = src[i]; } @@ -299,10 +307,10 @@ class LiteralBuffer { backing_store_ = new_store; } position_ = new_content_size; - is_ascii_ = false; + is_one_byte_ = false; } - bool is_ascii_; + bool is_one_byte_; int position_; Vector<byte> backing_store_; @@ -365,32 +373,13 @@ class Scanner { // Returns the location information for the current token // (the token last returned by Next()). Location location() const { return current_.location; } - // Returns the literal string, if any, for the current token (the - // token last returned by Next()). The string is 0-terminated. - // Literal strings are collected for identifiers, strings, and - // numbers. - // These functions only give the correct result if the literal - // was scanned between calls to StartLiteral() and TerminateLiteral(). - Vector<const char> literal_ascii_string() { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->ascii_literal(); - } - Vector<const uc16> literal_utf16_string() { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->utf16_literal(); - } - bool is_literal_ascii() { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->is_ascii(); - } - bool is_literal_contextual_keyword(Vector<const char> keyword) { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->is_contextual_keyword(keyword); - } - int literal_length() const { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->length(); - } + + // Similar functions for the upcoming token. + + // One token look-ahead (past the token returned by Next()). + Token::Value peek() const { return next_.token; } + + Location peek_location() const { return next_.location; } bool literal_contains_escapes() const { Location location = current_.location; @@ -401,43 +390,47 @@ class Scanner { } return current_.literal_chars->length() != source_length; } - - // Similar functions for the upcoming token. - - // One token look-ahead (past the token returned by Next()). - Token::Value peek() const { return next_.token; } - - Location peek_location() const { return next_.location; } - - // Returns the literal string for the next token (the token that - // would be returned if Next() were called). - Vector<const char> next_literal_ascii_string() { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->ascii_literal(); - } - Vector<const uc16> next_literal_utf16_string() { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->utf16_literal(); - } - bool is_next_literal_ascii() { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->is_ascii(); + bool is_literal_contextual_keyword(Vector<const char> keyword) { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->is_contextual_keyword(keyword); } bool is_next_contextual_keyword(Vector<const char> keyword) { ASSERT_NOT_NULL(next_.literal_chars); return next_.literal_chars->is_contextual_keyword(keyword); } - int next_literal_length() const { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->length(); + + Handle<String> AllocateNextLiteralString(Isolate* isolate, + PretenureFlag tenured); + Handle<String> AllocateInternalizedString(Isolate* isolate); + + double DoubleValue(); + bool UnescapedLiteralMatches(const char* data, int length) { + if (is_literal_one_byte() && + literal_length() == length && + !literal_contains_escapes()) { + const char* token = + reinterpret_cast<const char*>(literal_one_byte_string().start()); + return !strncmp(token, data, length); + } + return false; + } + void IsGetOrSet(bool* is_get, bool* is_set) { + if (is_literal_one_byte() && + literal_length() == 3 && + !literal_contains_escapes()) { + const char* token = + reinterpret_cast<const char*>(literal_one_byte_string().start()); + *is_get = strncmp(token, "get", 3) == 0; + *is_set = !*is_get && strncmp(token, "set", 3) == 0; + } } - UnicodeCache* unicode_cache() { return unicode_cache_; } + int FindNumber(DuplicateFinder* finder, int value); + int FindSymbol(DuplicateFinder* finder, int value); - static const int kCharacterLookaheadBufferSize = 1; + void LogSymbol(ParserRecorder* log, int position); - // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. - uc32 ScanOctalEscape(uc32 c, int length); + UnicodeCache* unicode_cache() { return unicode_cache_; } // Returns the location of the last seen octal literal. Location octal_position() const { return octal_pos_; } @@ -490,6 +483,11 @@ class Scanner { LiteralBuffer* literal_chars; }; + static const int kCharacterLookaheadBufferSize = 1; + + // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. + uc32 ScanOctalEscape(uc32 c, int length); + // Call this after setting source_ to the input. void Init() { // Set c0_ (one character ahead) @@ -550,6 +548,47 @@ class Scanner { } } + // Returns the literal string, if any, for the current token (the + // token last returned by Next()). The string is 0-terminated. + // Literal strings are collected for identifiers, strings, and + // numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). + Vector<const uint8_t> literal_one_byte_string() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->one_byte_literal(); + } + Vector<const uint16_t> literal_two_byte_string() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->two_byte_literal(); + } + bool is_literal_one_byte() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->is_one_byte(); + } + int literal_length() const { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->length(); + } + // Returns the literal string for the next token (the token that + // would be returned if Next() were called). + Vector<const uint8_t> next_literal_one_byte_string() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->one_byte_literal(); + } + Vector<const uint16_t> next_literal_two_byte_string() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->two_byte_literal(); + } + bool is_next_literal_one_byte() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->is_one_byte(); + } + int next_literal_length() const { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->length(); + } + uc32 ScanHexNumber(int expected_length); // Scans a single JavaScript token. |