diff options
Diffstat (limited to 'js/src/yarr/yarr/RegexPattern.h')
-rw-r--r-- | js/src/yarr/yarr/RegexPattern.h | 438 |
1 files changed, 438 insertions, 0 deletions
diff --git a/js/src/yarr/yarr/RegexPattern.h b/js/src/yarr/yarr/RegexPattern.h new file mode 100644 index 0000000..9846b05 --- /dev/null +++ b/js/src/yarr/yarr/RegexPattern.h @@ -0,0 +1,438 @@ +/* + * Copyright (C) 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RegexPattern_h +#define RegexPattern_h + +#include "jsvector.h" +#include "yarr/jswtfbridge.h" +#include "yarr/yarr/RegexCommon.h" + + +namespace JSC { namespace Yarr { + +#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoBackReference 2 +#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. +#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 +#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. +#define RegexStackSpaceForBackTrackInfoParenthesesTerminal 1 +#define RegexStackSpaceForBackTrackInfoParentheses 4 + +struct PatternDisjunction; + +struct CharacterRange { + UChar begin; + UChar end; + + CharacterRange(UChar begin, UChar end) + : begin(begin) + , end(end) + { + } +}; + +/* + * Wraps a table and indicates inversion. Can be efficiently borrowed + * between character classes, so it's refcounted. + */ +struct CharacterClassTable { + const char* m_table; + bool m_inverted; + jsrefcount m_refcount; + + /* Ownership transferred to caller. */ + static CharacterClassTable *create(const char* table, bool inverted) + { + // FIXME: bug 574459 -- no NULL checks done by any of the callers, all + // of which are in RegExpJitTables.h. + /* We can't (easily) use js_new() here because the constructor is private. */ + void *memory = js_malloc(sizeof(CharacterClassTable)); + return memory ? new(memory) CharacterClassTable(table, inverted) : NULL; + } + + void incref() { JS_ATOMIC_INCREMENT(&m_refcount); } + void decref() { if (JS_ATOMIC_DECREMENT(&m_refcount) == 0) js_delete(this); } + +private: + CharacterClassTable(const char* table, bool inverted) + : m_table(table) + , m_inverted(inverted) + , m_refcount(0) + { + } +}; + +struct CharacterClass { + // All CharacterClass instances have to have the full set of matches and ranges, + // they may have an optional table for faster lookups (which must match the + // specified matches and ranges) + CharacterClass(CharacterClassTable *table) + : m_table(table) + { + if (m_table) + m_table->incref(); + } + ~CharacterClass() + { + if (m_table) + m_table->decref(); + } + typedef js::Vector<UChar, 0, js::SystemAllocPolicy> UChars; + typedef js::Vector<CharacterRange, 0, js::SystemAllocPolicy> CharacterRanges; + UChars m_matches; + CharacterRanges m_ranges; + UChars m_matchesUnicode; + CharacterRanges m_rangesUnicode; + CharacterClassTable *m_table; +}; + +enum QuantifierType { + QuantifierFixedCount, + QuantifierGreedy, + QuantifierNonGreedy +}; + +struct PatternTerm { + enum Type { + TypeAssertionBOL, + TypeAssertionEOL, + TypeAssertionWordBoundary, + TypePatternCharacter, + TypeCharacterClass, + TypeBackReference, + TypeForwardReference, + TypeParenthesesSubpattern, + TypeParentheticalAssertion + } type; + bool invertOrCapture; + union { + UChar patternCharacter; + CharacterClass* characterClass; + unsigned subpatternId; + struct { + PatternDisjunction* disjunction; + unsigned subpatternId; + unsigned lastSubpatternId; + bool isCopy; + bool isTerminal; + } parentheses; + }; + QuantifierType quantityType; + unsigned quantityCount; + int inputPosition; + unsigned frameLocation; + + PatternTerm(UChar ch) + : type(PatternTerm::TypePatternCharacter) + { + patternCharacter = ch; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(CharacterClass* charClass, bool invert) + : type(PatternTerm::TypeCharacterClass) + , invertOrCapture(invert) + { + characterClass = charClass; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) + : type(type) + , invertOrCapture(invertOrCapture) + { + parentheses.disjunction = disjunction; + parentheses.subpatternId = subpatternId; + parentheses.isCopy = false; + parentheses.isTerminal = false; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(Type type, bool invert = false) + : type(type) + , invertOrCapture(invert) + { + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + PatternTerm(unsigned spatternId) + : type(TypeBackReference) + , invertOrCapture(false) + { + subpatternId = spatternId; + quantityType = QuantifierFixedCount; + quantityCount = 1; + } + + static PatternTerm ForwardReference() + { + return PatternTerm(TypeForwardReference); + } + + static PatternTerm BOL() + { + return PatternTerm(TypeAssertionBOL); + } + + static PatternTerm EOL() + { + return PatternTerm(TypeAssertionEOL); + } + + static PatternTerm WordBoundary(bool invert) + { + return PatternTerm(TypeAssertionWordBoundary, invert); + } + + bool invert() + { + return invertOrCapture; + } + + bool capture() + { + return invertOrCapture; + } + + void quantify(unsigned count, QuantifierType type) + { + quantityCount = count; + quantityType = type; + } +}; + +struct PatternAlternative { + PatternAlternative(PatternDisjunction* disjunction) + : m_parent(disjunction) + , m_onceThrough(false) + , m_hasFixedSize(false) + , m_startsWithBOL(false) + , m_containsBOL(false) + { + } + + PatternTerm& lastTerm() + { + JS_ASSERT(m_terms.length()); + return m_terms[m_terms.length() - 1]; + } + + void removeLastTerm() + { + JS_ASSERT(m_terms.length()); + m_terms.popBack(); + } + + void setOnceThrough() + { + m_onceThrough = true; + } + + bool onceThrough() + { + return m_onceThrough; + } + + js::Vector<PatternTerm, 0, js::SystemAllocPolicy> m_terms; + PatternDisjunction* m_parent; + unsigned m_minimumSize; + bool m_onceThrough : 1; + bool m_hasFixedSize : 1; + bool m_startsWithBOL : 1; + bool m_containsBOL : 1; +}; + +template<typename T, size_t N, class AP> +static inline void +deleteAllValues(js::Vector<T*,N,AP> &vector) +{ + for (T** t = vector.begin(); t < vector.end(); ++t) + js_delete(*t); +} + +struct PatternDisjunction { + PatternDisjunction(PatternAlternative* parent = 0) + : m_parent(parent) + , m_hasFixedSize(false) + { + } + + ~PatternDisjunction() + { + deleteAllValues(m_alternatives); + } + + PatternAlternative* addNewAlternative() + { + // FIXME: bug 574459 -- no NULL check + PatternAlternative* alternative = js_new<PatternAlternative>(this); + m_alternatives.append(alternative); + return alternative; + } + + js::Vector<PatternAlternative*, 0, js::SystemAllocPolicy> m_alternatives; + PatternAlternative* m_parent; + unsigned m_minimumSize; + unsigned m_callFrameSize; + bool m_hasFixedSize; +}; + +// You probably don't want to be calling these functions directly +// (please to be calling newlineCharacterClass() et al on your +// friendly neighborhood RegexPattern instance to get nicely +// cached copies). +CharacterClass* newlineCreate(); +CharacterClass* digitsCreate(); +CharacterClass* spacesCreate(); +CharacterClass* wordcharCreate(); +CharacterClass* nondigitsCreate(); +CharacterClass* nonspacesCreate(); +CharacterClass* nonwordcharCreate(); + +struct RegexPattern { + RegexPattern(bool ignoreCase, bool multiline) + : m_ignoreCase(ignoreCase) + , m_multiline(multiline) + , m_containsBackreferences(false) + , m_containsBOL(false) + , m_numSubpatterns(0) + , m_maxBackReference(0) + , newlineCached(0) + , digitsCached(0) + , spacesCached(0) + , wordcharCached(0) + , nondigitsCached(0) + , nonspacesCached(0) + , nonwordcharCached(0) + { + } + + ~RegexPattern() + { + deleteAllValues(m_disjunctions); + deleteAllValues(m_userCharacterClasses); + } + + void reset() + { + m_numSubpatterns = 0; + m_maxBackReference = 0; + + m_containsBackreferences = false; + m_containsBOL = false; + + newlineCached = 0; + digitsCached = 0; + spacesCached = 0; + wordcharCached = 0; + nondigitsCached = 0; + nonspacesCached = 0; + nonwordcharCached = 0; + + deleteAllValues(m_disjunctions); + m_disjunctions.clear(); + deleteAllValues(m_userCharacterClasses); + m_userCharacterClasses.clear(); + } + + bool containsIllegalBackReference() + { + return m_maxBackReference > m_numSubpatterns; + } + + CharacterClass* newlineCharacterClass() + { + if (!newlineCached) + m_userCharacterClasses.append(newlineCached = newlineCreate()); + return newlineCached; + } + CharacterClass* digitsCharacterClass() + { + if (!digitsCached) + m_userCharacterClasses.append(digitsCached = digitsCreate()); + return digitsCached; + } + CharacterClass* spacesCharacterClass() + { + if (!spacesCached) + m_userCharacterClasses.append(spacesCached = spacesCreate()); + return spacesCached; + } + CharacterClass* wordcharCharacterClass() + { + if (!wordcharCached) + m_userCharacterClasses.append(wordcharCached = wordcharCreate()); + return wordcharCached; + } + CharacterClass* nondigitsCharacterClass() + { + if (!nondigitsCached) + m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); + return nondigitsCached; + } + CharacterClass* nonspacesCharacterClass() + { + if (!nonspacesCached) + m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); + return nonspacesCached; + } + CharacterClass* nonwordcharCharacterClass() + { + if (!nonwordcharCached) + m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); + return nonwordcharCached; + } + + typedef js::Vector<PatternDisjunction*, 4, js::SystemAllocPolicy> PatternDisjunctions; + typedef js::Vector<CharacterClass*, 0, js::SystemAllocPolicy> CharacterClasses; + bool m_ignoreCase : 1; + bool m_multiline : 1; + bool m_containsBackreferences : 1; + bool m_containsBOL : 1; + unsigned m_numSubpatterns; + unsigned m_maxBackReference; + PatternDisjunction *m_body; + PatternDisjunctions m_disjunctions; + CharacterClasses m_userCharacterClasses; + +private: + CharacterClass* newlineCached; + CharacterClass* digitsCached; + CharacterClass* spacesCached; + CharacterClass* wordcharCached; + CharacterClass* nondigitsCached; + CharacterClass* nonspacesCached; + CharacterClass* nonwordcharCached; +}; + +} } // namespace JSC::Yarr + +#endif // RegexPattern_h |