diff options
Diffstat (limited to 'js/src/jsstr.h')
-rw-r--r-- | js/src/jsstr.h | 1077 |
1 files changed, 1077 insertions, 0 deletions
diff --git a/js/src/jsstr.h b/js/src/jsstr.h new file mode 100644 index 0000000..275f989 --- /dev/null +++ b/js/src/jsstr.h @@ -0,0 +1,1077 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code, released + * March 31, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either of the GNU General Public License Version 2 or later (the "GPL"), + * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef jsstr_h___ +#define jsstr_h___ +/* + * JS string type implementation. + * + * A JS string is a counted array of unicode characters. To support handoff + * of API client memory, the chars are allocated separately from the length, + * necessitating a pointer after the count, to form a separately allocated + * string descriptor. String descriptors are GC'ed, while their chars are + * allocated from the malloc heap. + */ +#include <ctype.h> +#include "jsapi.h" +#include "jsprvtd.h" +#include "jshashtable.h" +#include "jslock.h" +#include "jsobj.h" +#include "jsvalue.h" +#include "jscell.h" + +enum { + UNIT_STRING_LIMIT = 256U, + SMALL_CHAR_LIMIT = 128U, /* Bigger chars cannot be in a length-2 string. */ + NUM_SMALL_CHARS = 64U, + INT_STRING_LIMIT = 256U, + NUM_HUNDRED_STRINGS = 156U +}; + +extern jschar * +js_GetDependentStringChars(JSString *str); + +extern JSString * JS_FASTCALL +js_ConcatStrings(JSContext *cx, JSString *left, JSString *right); + +JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32); + +struct JSRopeBufferInfo { + /* Number of jschars we can hold, not including null terminator. */ + size_t capacity; +}; + +/* Forward declaration for friending. */ +namespace js { namespace mjit { + class Compiler; +}} + +struct JSLinearString; + +/* + * The GC-thing "string" type. + * + * In FLAT strings, the mChars field points to a flat character array owned by + * its GC-thing descriptor. The array is terminated at index length by a zero + * character and the size of the array in bytes is + * (length + 1) * sizeof(jschar). The terminator is purely a backstop, in case + * the chars pointer flows out to native code that requires \u0000 termination. + * + * A flat string with the ATOMIZED flag means that the string is hashed as + * an atom. This flag is used to avoid re-hashing the already-atomized string. + * + * A flat string with the EXTENSIBLE flag means that the string may change into + * a dependent string as part of an optimization with js_ConcatStrings: + * extending |str1 = "abc"| with the character |str2 = str1 + "d"| will place + * "d" in the extra capacity from |str1|, make that the buffer for |str2|, and + * turn |str1| into a dependent string of |str2|. + * + * Flat strings without the EXTENSIBLE flag can be safely accessed by multiple + * threads. + * + * When the string is DEPENDENT, the string depends on characters of another + * string strongly referenced by the base field. The base member may point to + * another dependent string if chars() has not been called yet. + * + * When a string is a ROPE, it represents the lazy concatenation of other + * strings. In general, the nodes reachable from any rope form a dag. + * + * To allow static type-based checking that a given JSString* always points + * to a flat or non-rope string, the JSFlatString and JSLinearString types may + * be used. Instead of casting, callers should use ensureX() and assertIsX(). + */ +struct JSString +{ + friend class js::TraceRecorder; + friend class js::mjit::Compiler; + + friend JSAtom *js_AtomizeString(JSContext *cx, JSString *str, uintN flags); + + /* + * Not private because we want to be able to use static initializers for + * them. Don't use these directly! FIXME bug 614459. + */ + size_t lengthAndFlags; /* in all strings */ + union { + const jschar *chars; /* in non-rope strings */ + JSString *left; /* in rope strings */ + } u; + union { + jschar inlineStorage[4]; /* in short strings */ + struct { + union { + JSString *right; /* in rope strings */ + JSString *base; /* in dependent strings */ + size_t capacity; /* in extensible flat strings */ + }; + union { + JSString *parent; /* temporarily used during flatten */ + size_t reserved; /* may use for bug 615290 */ + }; + } s; + size_t externalStringType; /* in external strings */ + }; + + /* + * The lengthAndFlags field in string headers has data arranged in the + * following way: + * + * [ length (bits 4-31) ][ flags (bits 2-3) ][ type (bits 0-1) ] + * + * The length is packed in lengthAndFlags, even in string types that don't + * need 3 other fields, to make the length check simpler. + * + * When the string type is FLAT, the flags can contain ATOMIZED or + * EXTENSIBLE. + */ + static const size_t TYPE_FLAGS_MASK = JS_BITMASK(4); + static const size_t LENGTH_SHIFT = 4; + + static const size_t TYPE_MASK = JS_BITMASK(2); + static const size_t FLAT = 0x0; + static const size_t DEPENDENT = 0x1; + static const size_t ROPE = 0x2; + + /* Allow checking 1 bit for dependent/rope strings. */ + static const size_t DEPENDENT_BIT = JS_BIT(0); + static const size_t ROPE_BIT = JS_BIT(1); + + static const size_t ATOMIZED = JS_BIT(2); + static const size_t EXTENSIBLE = JS_BIT(3); + + + size_t buildLengthAndFlags(size_t length, size_t flags) { + return (length << LENGTH_SHIFT) | flags; + } + + inline js::gc::Cell *asCell() { + return reinterpret_cast<js::gc::Cell *>(this); + } + + inline js::gc::FreeCell *asFreeCell() { + return reinterpret_cast<js::gc::FreeCell *>(this); + } + + /* + * Generous but sane length bound; the "-1" is there for comptibility with + * OOM tests. + */ + static const size_t MAX_LENGTH = (1 << 28) - 1; + + JS_ALWAYS_INLINE bool isDependent() const { + return lengthAndFlags & DEPENDENT_BIT; + } + + JS_ALWAYS_INLINE bool isFlat() const { + return (lengthAndFlags & TYPE_MASK) == FLAT; + } + + JS_ALWAYS_INLINE bool isExtensible() const { + JS_ASSERT_IF(lengthAndFlags & EXTENSIBLE, isFlat()); + return lengthAndFlags & EXTENSIBLE; + } + + JS_ALWAYS_INLINE bool isAtomized() const { + JS_ASSERT_IF(lengthAndFlags & ATOMIZED, isFlat()); + return lengthAndFlags & ATOMIZED; + } + + JS_ALWAYS_INLINE bool isRope() const { + return lengthAndFlags & ROPE_BIT; + } + + JS_ALWAYS_INLINE size_t length() const { + return lengthAndFlags >> LENGTH_SHIFT; + } + + JS_ALWAYS_INLINE bool empty() const { + return lengthAndFlags <= TYPE_FLAGS_MASK; + } + + /* This can fail by returning null and reporting an error on cx. */ + JS_ALWAYS_INLINE const jschar *getChars(JSContext *cx) { + if (isRope()) + return flatten(cx); + return nonRopeChars(); + } + + /* This can fail by returning null and reporting an error on cx. */ + JS_ALWAYS_INLINE const jschar *getCharsZ(JSContext *cx) { + if (!isFlat()) + return undepend(cx); + return flatChars(); + } + + JS_ALWAYS_INLINE void initFlatNotTerminated(jschar *chars, size_t length) { + JS_ASSERT(length <= MAX_LENGTH); + JS_ASSERT(!isStatic(this)); + lengthAndFlags = buildLengthAndFlags(length, FLAT); + u.chars = chars; + } + + /* Specific flat string initializer and accessor methods. */ + JS_ALWAYS_INLINE void initFlat(jschar *chars, size_t length) { + initFlatNotTerminated(chars, length); + JS_ASSERT(chars[length] == jschar(0)); + } + + JS_ALWAYS_INLINE void initShortString(const jschar *chars, size_t length) { + JS_ASSERT(length <= MAX_LENGTH); + JS_ASSERT(chars >= inlineStorage && chars < (jschar *)(this + 2)); + JS_ASSERT(!isStatic(this)); + lengthAndFlags = buildLengthAndFlags(length, FLAT); + u.chars = chars; + } + + JS_ALWAYS_INLINE void initFlatExtensible(jschar *chars, size_t length, size_t cap) { + JS_ASSERT(length <= MAX_LENGTH); + JS_ASSERT(chars[length] == jschar(0)); + JS_ASSERT(!isStatic(this)); + lengthAndFlags = buildLengthAndFlags(length, FLAT | EXTENSIBLE); + u.chars = chars; + s.capacity = cap; + } + + JS_ALWAYS_INLINE JSFlatString *assertIsFlat() { + JS_ASSERT(isFlat()); + return reinterpret_cast<JSFlatString *>(this); + } + + JS_ALWAYS_INLINE const jschar *flatChars() const { + JS_ASSERT(isFlat()); + return u.chars; + } + + JS_ALWAYS_INLINE size_t flatLength() const { + JS_ASSERT(isFlat()); + return length(); + } + + inline void flatSetAtomized() { + JS_ASSERT(isFlat()); + JS_ASSERT(!isStatic(this)); + lengthAndFlags |= ATOMIZED; + } + + inline void flatClearExtensible() { + /* + * N.B. This may be called on static strings, which may be in read-only + * memory, so we cannot unconditionally apply the mask. + */ + JS_ASSERT(isFlat()); + if (lengthAndFlags & EXTENSIBLE) + lengthAndFlags &= ~EXTENSIBLE; + } + + /* + * The chars pointer should point somewhere inside the buffer owned by base. + * The caller still needs to pass base for GC purposes. + */ + inline void initDependent(JSString *base, const jschar *chars, size_t length) { + JS_ASSERT(!isStatic(this)); + JS_ASSERT(base->isFlat()); + JS_ASSERT(chars >= base->flatChars() && chars < base->flatChars() + base->length()); + JS_ASSERT(length <= base->length() - (chars - base->flatChars())); + lengthAndFlags = buildLengthAndFlags(length, DEPENDENT); + u.chars = chars; + s.base = base; + } + + inline JSLinearString *dependentBase() const { + JS_ASSERT(isDependent()); + return s.base->assertIsLinear(); + } + + JS_ALWAYS_INLINE const jschar *dependentChars() { + JS_ASSERT(isDependent()); + return u.chars; + } + + inline size_t dependentLength() const { + JS_ASSERT(isDependent()); + return length(); + } + + const jschar *undepend(JSContext *cx); + + const jschar *nonRopeChars() const { + JS_ASSERT(!isRope()); + return u.chars; + } + + /* Rope-related initializers and accessors. */ + inline void initRopeNode(JSString *left, JSString *right, size_t length) { + JS_ASSERT(left->length() + right->length() == length); + lengthAndFlags = buildLengthAndFlags(length, ROPE); + u.left = left; + s.right = right; + } + + inline JSString *ropeLeft() const { + JS_ASSERT(isRope()); + return u.left; + } + + inline JSString *ropeRight() const { + JS_ASSERT(isRope()); + return s.right; + } + + inline void finishTraversalConversion(JSString *base, const jschar *baseBegin, const jschar *end) { + JS_ASSERT(baseBegin <= u.chars && u.chars <= end); + lengthAndFlags = buildLengthAndFlags(end - u.chars, DEPENDENT); + s.base = base; + } + + const jschar *flatten(JSContext *maybecx); + + JSLinearString *ensureLinear(JSContext *cx) { + if (isRope() && !flatten(cx)) + return NULL; + return reinterpret_cast<JSLinearString *>(this); + } + + bool isLinear() const { + return !isRope(); + } + + JSLinearString *assertIsLinear() { + JS_ASSERT(isLinear()); + return reinterpret_cast<JSLinearString *>(this); + } + + typedef uint8 SmallChar; + + static inline bool fitsInSmallChar(jschar c) { + return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR; + } + + static inline bool isUnitString(void *ptr) { + jsuword delta = reinterpret_cast<jsuword>(ptr) - + reinterpret_cast<jsuword>(unitStringTable); + if (delta >= UNIT_STRING_LIMIT * sizeof(JSString)) + return false; + + /* If ptr points inside the static array, it must be well-aligned. */ + JS_ASSERT(delta % sizeof(JSString) == 0); + return true; + } + + static inline bool isLength2String(void *ptr) { + jsuword delta = reinterpret_cast<jsuword>(ptr) - + reinterpret_cast<jsuword>(length2StringTable); + if (delta >= NUM_SMALL_CHARS * NUM_SMALL_CHARS * sizeof(JSString)) + return false; + + /* If ptr points inside the static array, it must be well-aligned. */ + JS_ASSERT(delta % sizeof(JSString) == 0); + return true; + } + + static inline bool isHundredString(void *ptr) { + jsuword delta = reinterpret_cast<jsuword>(ptr) - + reinterpret_cast<jsuword>(hundredStringTable); + if (delta >= NUM_HUNDRED_STRINGS * sizeof(JSString)) + return false; + + /* If ptr points inside the static array, it must be well-aligned. */ + JS_ASSERT(delta % sizeof(JSString) == 0); + return true; + } + + static inline bool isStatic(void *ptr) { + return isUnitString(ptr) || isLength2String(ptr) || isHundredString(ptr); + } + +#ifdef __SUNPRO_CC +#pragma align 8 (__1cIJSStringPunitStringTable_, __1cIJSStringSlength2StringTable_, __1cIJSStringShundredStringTable_) +#endif + + static const SmallChar INVALID_SMALL_CHAR = -1; + + static const jschar fromSmallChar[]; + static const SmallChar toSmallChar[]; + static const JSString unitStringTable[]; + static const JSString length2StringTable[]; + static const JSString hundredStringTable[]; + /* + * Since int strings can be unit strings, length-2 strings, or hundred + * strings, we keep a table to map from integer to the correct string. + */ + static const JSString *const intStringTable[]; + + static JSFlatString *unitString(jschar c); + static JSLinearString *getUnitString(JSContext *cx, JSString *str, size_t index); + static JSFlatString *length2String(jschar c1, jschar c2); + static JSFlatString *length2String(uint32 i); + static JSFlatString *intString(jsint i); + + static JSFlatString *lookupStaticString(const jschar *chars, size_t length); + + JS_ALWAYS_INLINE void finalize(JSContext *cx); + + static size_t offsetOfLengthAndFlags() { + return offsetof(JSString, lengthAndFlags); + } + + static size_t offsetOfChars() { + return offsetof(JSString, u.chars); + } + + static void staticAsserts() { + JS_STATIC_ASSERT(((JSString::MAX_LENGTH << JSString::LENGTH_SHIFT) >> + JSString::LENGTH_SHIFT) == JSString::MAX_LENGTH); + } +}; + +/* + * A "linear" string may or may not be null-terminated, but it provides + * infallible access to a linear array of characters. Namely, this means the + * string is not a rope. + */ +struct JSLinearString : JSString +{ + const jschar *chars() const { return JSString::nonRopeChars(); } +}; + +JS_STATIC_ASSERT(sizeof(JSLinearString) == sizeof(JSString)); + +/* + * A linear string where, additionally, chars()[length()] == '\0'. Namely, this + * means the string is not a dependent string or rope. + */ +struct JSFlatString : JSLinearString +{ + const jschar *charsZ() const { return chars(); } +}; + +JS_STATIC_ASSERT(sizeof(JSFlatString) == sizeof(JSString)); + +/* + * A flat string which has been "atomized", i.e., that is a unique string among + * other atomized strings and therefore allows equality via pointer comparison. + */ +struct JSAtom : JSFlatString +{ +}; + +struct JSExternalString : JSString +{ + static const uintN TYPE_LIMIT = 8; + static JSStringFinalizeOp str_finalizers[TYPE_LIMIT]; + + static intN changeFinalizer(JSStringFinalizeOp oldop, + JSStringFinalizeOp newop) { + for (uintN i = 0; i != JS_ARRAY_LENGTH(str_finalizers); i++) { + if (str_finalizers[i] == oldop) { + str_finalizers[i] = newop; + return intN(i); + } + } + return -1; + } + + void finalize(JSContext *cx); + void finalize(); +}; + +JS_STATIC_ASSERT(sizeof(JSString) == sizeof(JSExternalString)); + +/* + * Short strings should be created in cases where it's worthwhile to avoid + * mallocing the string buffer for a small string. We keep 2 string headers' + * worth of space in short strings so that more strings can be stored this way. + */ +class JSShortString : public js::gc::Cell +{ + JSString mHeader; + JSString mDummy; + + public: + /* + * Set the length of the string, and return a buffer for the caller to write + * to. This buffer must be written immediately, and should not be modified + * afterward. + */ + inline jschar *init(size_t length) { + JS_ASSERT(length <= MAX_SHORT_STRING_LENGTH); + mHeader.initShortString(mHeader.inlineStorage, length); + return mHeader.inlineStorage; + } + + inline jschar *getInlineStorageBeforeInit() { + return mHeader.inlineStorage; + } + + inline void initAtOffsetInBuffer(jschar *p, size_t length) { + JS_ASSERT(p >= mHeader.inlineStorage && p < mHeader.inlineStorage + MAX_SHORT_STRING_LENGTH); + mHeader.initShortString(p, length); + } + + inline void resetLength(size_t length) { + mHeader.initShortString(mHeader.flatChars(), length); + } + + inline JSString *header() { + return &mHeader; + } + + static const size_t FREE_STRING_WORDS = 2; + + static const size_t MAX_SHORT_STRING_LENGTH = + ((sizeof(JSString) + FREE_STRING_WORDS * sizeof(size_t)) / sizeof(jschar)) - 1; + + static inline bool fitsIntoShortString(size_t length) { + return length <= MAX_SHORT_STRING_LENGTH; + } + + JS_ALWAYS_INLINE void finalize(JSContext *cx); + + static void staticAsserts() { + JS_STATIC_ASSERT(offsetof(JSString, inlineStorage) == + sizeof(JSString) - JSShortString::FREE_STRING_WORDS * sizeof(void *)); + JS_STATIC_ASSERT(offsetof(JSShortString, mDummy) == sizeof(JSString)); + JS_STATIC_ASSERT(offsetof(JSString, inlineStorage) + + sizeof(jschar) * (JSShortString::MAX_SHORT_STRING_LENGTH + 1) == + sizeof(JSShortString)); + } +}; + +namespace js { + +class StringBuffer; + +/* + * When an algorithm does not need a string represented as a single linear + * array of characters, this range utility may be used to traverse the string a + * sequence of linear arrays of characters. This avoids flattening ropes. + * + * Implemented in jsstrinlines.h. + */ +class StringSegmentRange; +class MutatingRopeSegmentRange; + +/* + * Utility for building a rope (lazy concatenation) of strings. + */ +class RopeBuilder; + +} /* namespace js */ + +extern const jschar * +js_GetStringChars(JSContext *cx, JSString *str); + +extern const jschar * +js_UndependString(JSContext *cx, JSString *str); + +extern JSBool +js_MakeStringImmutable(JSContext *cx, JSString *str); + +extern JSString * JS_FASTCALL +js_toLowerCase(JSContext *cx, JSString *str); + +extern JSString * JS_FASTCALL +js_toUpperCase(JSContext *cx, JSString *str); + +struct JSSubString { + size_t length; + const jschar *chars; +}; + +extern jschar js_empty_ucstr[]; +extern JSSubString js_EmptySubString; + +/* Unicode character attribute lookup tables. */ +extern const uint8 js_X[]; +extern const uint8 js_Y[]; +extern const uint32 js_A[]; + +/* Enumerated Unicode general category types. */ +typedef enum JSCharType { + JSCT_UNASSIGNED = 0, + JSCT_UPPERCASE_LETTER = 1, + JSCT_LOWERCASE_LETTER = 2, + JSCT_TITLECASE_LETTER = 3, + JSCT_MODIFIER_LETTER = 4, + JSCT_OTHER_LETTER = 5, + JSCT_NON_SPACING_MARK = 6, + JSCT_ENCLOSING_MARK = 7, + JSCT_COMBINING_SPACING_MARK = 8, + JSCT_DECIMAL_DIGIT_NUMBER = 9, + JSCT_LETTER_NUMBER = 10, + JSCT_OTHER_NUMBER = 11, + JSCT_SPACE_SEPARATOR = 12, + JSCT_LINE_SEPARATOR = 13, + JSCT_PARAGRAPH_SEPARATOR = 14, + JSCT_CONTROL = 15, + JSCT_FORMAT = 16, + JSCT_PRIVATE_USE = 18, + JSCT_SURROGATE = 19, + JSCT_DASH_PUNCTUATION = 20, + JSCT_START_PUNCTUATION = 21, + JSCT_END_PUNCTUATION = 22, + JSCT_CONNECTOR_PUNCTUATION = 23, + JSCT_OTHER_PUNCTUATION = 24, + JSCT_MATH_SYMBOL = 25, + JSCT_CURRENCY_SYMBOL = 26, + JSCT_MODIFIER_SYMBOL = 27, + JSCT_OTHER_SYMBOL = 28 +} JSCharType; + +/* Character classifying and mapping macros, based on java.lang.Character. */ +#define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]]) +#define JS_CTYPE(c) (JS_CCODE(c) & 0x1F) + +#define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ + (1 << JSCT_LOWERCASE_LETTER) | \ + (1 << JSCT_TITLECASE_LETTER) | \ + (1 << JSCT_MODIFIER_LETTER) | \ + (1 << JSCT_OTHER_LETTER)) \ + >> JS_CTYPE(c)) & 1) + +#define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ + (1 << JSCT_LOWERCASE_LETTER) | \ + (1 << JSCT_TITLECASE_LETTER) | \ + (1 << JSCT_MODIFIER_LETTER) | \ + (1 << JSCT_OTHER_LETTER) | \ + (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \ + >> JS_CTYPE(c)) & 1) + +/* A unicode letter, suitable for use in an identifier. */ +#define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ + (1 << JSCT_LOWERCASE_LETTER) | \ + (1 << JSCT_TITLECASE_LETTER) | \ + (1 << JSCT_MODIFIER_LETTER) | \ + (1 << JSCT_OTHER_LETTER) | \ + (1 << JSCT_LETTER_NUMBER)) \ + >> JS_CTYPE(c)) & 1) + +/* + * 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or + * digit or connector punctuation. + */ +#define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ + (1 << JSCT_LOWERCASE_LETTER) | \ + (1 << JSCT_TITLECASE_LETTER) | \ + (1 << JSCT_MODIFIER_LETTER) | \ + (1 << JSCT_OTHER_LETTER) | \ + (1 << JSCT_LETTER_NUMBER) | \ + (1 << JSCT_NON_SPACING_MARK) | \ + (1 << JSCT_COMBINING_SPACING_MARK) | \ + (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \ + (1 << JSCT_CONNECTOR_PUNCTUATION)) \ + >> JS_CTYPE(c)) & 1) + +/* Unicode control-format characters, ignored in input */ +#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1) + +/* + * This table is used in JS_ISWORD. The definition has external linkage to + * allow the raw table data to be used in the regular expression compiler. + */ +extern const bool js_alnum[]; + +/* + * This macro performs testing for the regular expression word class \w, which + * is defined by ECMA-262 15.10.2.6 to be [0-9A-Z_a-z]. If we want a + * Unicode-friendlier definition of "word", we should rename this macro to + * something regexp-y. + */ +#define JS_ISWORD(c) ((c) < 128 && js_alnum[(c)]) + +#define JS_ISIDSTART(c) (JS_ISLETTER(c) || (c) == '_' || (c) == '$') +#define JS_ISIDENT(c) (JS_ISIDPART(c) || (c) == '_' || (c) == '$') + +#define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \ + (c) == '\n') +#define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_') +#define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \ + (c) == '-' || (c) == '_') +#define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':') +#define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':') + +#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER) + +const jschar BYTE_ORDER_MARK = 0xFEFF; +const jschar NO_BREAK_SPACE = 0x00A0; + +static inline bool +JS_ISSPACE(jschar c) +{ + unsigned w = c; + + if (w < 256) + return (w <= ' ' && (w == ' ' || (9 <= w && w <= 0xD))) || w == NO_BREAK_SPACE; + + return w == BYTE_ORDER_MARK || (JS_CCODE(w) & 0x00070000) == 0x00040000; +} + +#define JS_ISPRINT(c) ((c) < 128 && isprint(c)) + +#define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER) +#define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER) + +#define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \ + ? (c) - ((int32)JS_CCODE(c) >> 22) \ + : (c))) +#define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \ + ? (c) + ((int32)JS_CCODE(c) >> 22) \ + : (c))) + +/* + * Shorthands for ASCII (7-bit) decimal and hex conversion. + * Manually inline isdigit for performance; MSVC doesn't do this for us. + */ +#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9) +#define JS7_UNDEC(c) ((c) - '0') +#define JS7_ISHEX(c) ((c) < 128 && isxdigit(c)) +#define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a') +#define JS7_ISLET(c) ((c) < 128 && isalpha(c)) + +/* Initialize the String class, returning its prototype object. */ +extern js::Class js_StringClass; + +inline bool +JSObject::isString() const +{ + return getClass() == &js_StringClass; +} + +extern JSObject * +js_InitStringClass(JSContext *cx, JSObject *obj); + +extern const char js_escape_str[]; +extern const char js_unescape_str[]; +extern const char js_uneval_str[]; +extern const char js_decodeURI_str[]; +extern const char js_encodeURI_str[]; +extern const char js_decodeURIComponent_str[]; +extern const char js_encodeURIComponent_str[]; + +/* GC-allocate a string descriptor for the given malloc-allocated chars. */ +extern JSFlatString * +js_NewString(JSContext *cx, jschar *chars, size_t length); + +extern JSLinearString * +js_NewDependentString(JSContext *cx, JSString *base, size_t start, + size_t length); + +/* Copy a counted string and GC-allocate a descriptor for it. */ +extern JSFlatString * +js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n); + +extern JSFlatString * +js_NewStringCopyN(JSContext *cx, const char *s, size_t n); + +/* Copy a C string and GC-allocate a descriptor for it. */ +extern JSFlatString * +js_NewStringCopyZ(JSContext *cx, const jschar *s); + +extern JSFlatString * +js_NewStringCopyZ(JSContext *cx, const char *s); + +/* + * Convert a value to a printable C string. + */ +extern const char * +js_ValueToPrintable(JSContext *cx, const js::Value &, + JSAutoByteString *bytes, bool asSource = false); + +/* + * Convert a value to a string, returning null after reporting an error, + * otherwise returning a new string reference. + */ +extern JSString * +js_ValueToString(JSContext *cx, const js::Value &v); + +namespace js { + +/* + * Most code that calls js_ValueToString knows the value is (probably) not a + * string, so it does not make sense to put this inline fast path into + * js_ValueToString. + */ +static JS_ALWAYS_INLINE JSString * +ValueToString_TestForStringInline(JSContext *cx, const Value &v) +{ + if (v.isString()) + return v.toString(); + return js_ValueToString(cx, v); +} + +/* + * This function implements E-262-3 section 9.8, toString. Convert the given + * value to a string of jschars appended to the given buffer. On error, the + * passed buffer may have partial results appended. + */ +extern bool +ValueToStringBuffer(JSContext *cx, const Value &v, StringBuffer &sb); + +} /* namespace js */ + +/* + * Convert a value to its source expression, returning null after reporting + * an error, otherwise returning a new string reference. + */ +extern JS_FRIEND_API(JSString *) +js_ValueToSource(JSContext *cx, const js::Value &v); + +/* + * Compute a hash function from str. The caller can call this function even if + * str is not a GC-allocated thing. + */ +inline uint32 +js_HashString(JSLinearString *str) +{ + const jschar *s = str->chars(); + size_t n = str->length(); + uint32 h; + for (h = 0; n; s++, n--) + h = JS_ROTATE_LEFT32(h, 4) ^ *s; + return h; +} + +namespace js { + +/* + * Test if strings are equal. The caller can call the function even if str1 + * or str2 are not GC-allocated things. + */ +extern bool +EqualStrings(JSContext *cx, JSString *str1, JSString *str2, JSBool *result); + +/* EqualStrings is infallible on linear strings. */ +extern bool +EqualStrings(JSLinearString *str1, JSLinearString *str2); + +/* + * Return less than, equal to, or greater than zero depending on whether + * str1 is less than, equal to, or greater than str2. + */ +extern bool +CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32 *result); + +/* + * Return true if the string matches the given sequence of ASCII bytes. + */ +extern bool +StringEqualsAscii(JSLinearString *str, const char *asciiBytes); + +} /* namespacejs */ + +/* + * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen. + * The patlen argument must be positive and no greater than sBMHPatLenMax. + * + * Return the index of pat in text, or -1 if not found. + */ +static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */ +static const jsuint sBMHPatLenMax = 255; /* skip table element is uint8 */ +static const jsint sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */ + +extern jsint +js_BoyerMooreHorspool(const jschar *text, jsuint textlen, + const jschar *pat, jsuint patlen); + +extern size_t +js_strlen(const jschar *s); + +extern jschar * +js_strchr(const jschar *s, jschar c); + +extern jschar * +js_strchr_limit(const jschar *s, jschar c, const jschar *limit); + +#define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar)) + +inline void +js_short_strncpy(jschar *dest, const jschar *src, size_t num) +{ + /* + * It isn't strictly necessary here for |num| to be small, but this function + * is currently only called on buffers for short strings. + */ + JS_ASSERT(JSShortString::fitsIntoShortString(num)); + for (size_t i = 0; i < num; i++) + dest[i] = src[i]; +} + +/* + * Return s advanced past any Unicode white space characters. + */ +static inline const jschar * +js_SkipWhiteSpace(const jschar *s, const jschar *end) +{ + JS_ASSERT(s <= end); + while (s != end && JS_ISSPACE(*s)) + s++; + return s; +} + +/* + * Inflate bytes to JS chars and vice versa. Report out of memory via cx and + * return null on error, otherwise return the jschar or byte vector that was + * JS_malloc'ed. length is updated to the length of the new string in jschars. + */ +extern jschar * +js_InflateString(JSContext *cx, const char *bytes, size_t *length); + +extern char * +js_DeflateString(JSContext *cx, const jschar *chars, size_t length); + +/* + * Inflate bytes to JS chars into a buffer. 'chars' must be large enough for + * 'length' jschars. The buffer is NOT null-terminated. The destination length + * must be be initialized with the buffer size and will contain on return the + * number of copied chars. Conversion behavior depends on js_CStringsAreUTF8. + */ +extern JSBool +js_InflateStringToBuffer(JSContext *cx, const char *bytes, size_t length, + jschar *chars, size_t *charsLength); + +/* + * Same as js_InflateStringToBuffer, but always treats 'bytes' as UTF-8. + */ +extern JSBool +js_InflateUTF8StringToBuffer(JSContext *cx, const char *bytes, size_t length, + jschar *chars, size_t *charsLength); + +/* + * Get number of bytes in the deflated sequence of characters. Behavior depends + * on js_CStringsAreUTF8. + */ +extern size_t +js_GetDeflatedStringLength(JSContext *cx, const jschar *chars, + size_t charsLength); + +/* + * Same as js_GetDeflatedStringLength, but always treats the result as UTF-8. + */ +extern size_t +js_GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars, + size_t charsLength); + +/* + * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for + * 'length chars. The buffer is NOT null-terminated. The destination length + * must to be initialized with the buffer size and will contain on return the + * number of copied bytes. Conversion behavior depends on js_CStringsAreUTF8. + */ +extern JSBool +js_DeflateStringToBuffer(JSContext *cx, const jschar *chars, + size_t charsLength, char *bytes, size_t *length); + +/* + * Same as js_DeflateStringToBuffer, but always treats 'bytes' as UTF-8. + */ +extern JSBool +js_DeflateStringToUTF8Buffer(JSContext *cx, const jschar *chars, + size_t charsLength, char *bytes, size_t *length); + +/* Export a few natives and a helper to other files in SpiderMonkey. */ +extern JSBool +js_str_escape(JSContext *cx, uintN argc, js::Value *argv, js::Value *rval); + +/* + * The String.prototype.replace fast-native entry point is exported for joined + * function optimization in js{interp,tracer}.cpp. + */ +namespace js { +extern JSBool +str_replace(JSContext *cx, uintN argc, js::Value *vp); +} + +extern JSBool +js_str_toString(JSContext *cx, uintN argc, js::Value *vp); + +extern JSBool +js_str_charAt(JSContext *cx, uintN argc, js::Value *vp); + +extern JSBool +js_str_charCodeAt(JSContext *cx, uintN argc, js::Value *vp); + +/* + * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at + * least 6 bytes long. Return the number of UTF-8 bytes of data written. + */ +extern int +js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char); + +namespace js { + +extern size_t +PutEscapedStringImpl(char *buffer, size_t size, FILE *fp, JSLinearString *str, uint32 quote); + +/* + * Write str into buffer escaping any non-printable or non-ASCII character + * using \escapes for JS string literals. + * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns + * the length of the written output, NOT including the NUL. Thus, a return + * value of size or more means that the output was truncated. If buffer + * is null, just returns the length of the output. If quote is not 0, it must + * be a single or double quote character that will quote the output. +*/ +inline size_t +PutEscapedString(char *buffer, size_t size, JSLinearString *str, uint32 quote) +{ + size_t n = PutEscapedStringImpl(buffer, size, NULL, str, quote); + + /* PutEscapedStringImpl can only fail with a file. */ + JS_ASSERT(n != size_t(-1)); + return n; +} + +/* + * Write str into file escaping any non-printable or non-ASCII character. + * If quote is not 0, it must be a single or double quote character that + * will quote the output. +*/ +inline bool +FileEscapedString(FILE *fp, JSLinearString *str, uint32 quote) +{ + return PutEscapedStringImpl(NULL, 0, fp, str, quote) != size_t(-1); +} + +} /* namespace js */ + +extern JSBool +js_String(JSContext *cx, uintN argc, js::Value *vp); + +#endif /* jsstr_h___ */ |