diff options
Diffstat (limited to 'deps/v8/src/regexp/regexp-macro-assembler.cc')
-rw-r--r-- | deps/v8/src/regexp/regexp-macro-assembler.cc | 112 |
1 files changed, 78 insertions, 34 deletions
diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc index caf8b51fe5..9bb5073a8b 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler.cc @@ -9,6 +9,10 @@ #include "src/regexp/regexp-stack.h" #include "src/simulator.h" +#ifdef V8_I18N_SUPPORT +#include "unicode/uchar.h" +#endif // V8_I18N_SUPPORT + namespace v8 { namespace internal { @@ -23,6 +27,80 @@ RegExpMacroAssembler::~RegExpMacroAssembler() { } +int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, + Address byte_offset2, + size_t byte_length, + Isolate* isolate) { + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = + isolate->regexp_macro_assembler_canonicalize(); + // This function is not allowed to cause a garbage collection. + // A GC might move the calling generated code and invalidate the + // return address on the stack. + DCHECK(byte_length % 2 == 0); + uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); + uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); + size_t length = byte_length >> 1; + +#ifdef V8_I18N_SUPPORT + if (isolate == nullptr) { + for (size_t i = 0; i < length; i++) { + uc32 c1 = substring1[i]; + uc32 c2 = substring2[i]; + if (unibrow::Utf16::IsLeadSurrogate(c1)) { + // Non-BMP characters do not have case-equivalents in the BMP. + // Both have to be non-BMP for them to be able to match. + if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; + if (i + 1 < length) { + uc16 c1t = substring1[i + 1]; + uc16 c2t = substring2[i + 1]; + if (unibrow::Utf16::IsTrailSurrogate(c1t) && + unibrow::Utf16::IsTrailSurrogate(c2t)) { + c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); + c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); + i++; + } + } + } + c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); + c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); + if (c1 != c2) return 0; + } + return 1; + } +#endif // V8_I18N_SUPPORT + DCHECK_NOT_NULL(isolate); + for (size_t i = 0; i < length; i++) { + unibrow::uchar c1 = substring1[i]; + unibrow::uchar c2 = substring2[i]; + if (c1 != c2) { + unibrow::uchar s1[1] = {c1}; + canonicalize->get(c1, '\0', s1); + if (s1[0] != c2) { + unibrow::uchar s2[1] = {c2}; + canonicalize->get(c2, '\0', s2); + if (s1[0] != s2[0]) { + return 0; + } + } + } + } + return 1; +} + + +void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset, + Label* on_failure) { + Label ok; + // Check that current character is not a trail surrogate. + LoadCurrentCharacter(cp_offset, &ok); + CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok); + // Check that previous character is not a lead surrogate. + LoadCurrentCharacter(cp_offset - 1, &ok); + CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure); + Bind(&ok); +} + + #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, @@ -245,40 +323,6 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = { }; -int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( - Address byte_offset1, - Address byte_offset2, - size_t byte_length, - Isolate* isolate) { - unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = - isolate->regexp_macro_assembler_canonicalize(); - // This function is not allowed to cause a garbage collection. - // A GC might move the calling generated code and invalidate the - // return address on the stack. - DCHECK(byte_length % 2 == 0); - uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); - uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); - size_t length = byte_length >> 1; - - for (size_t i = 0; i < length; i++) { - unibrow::uchar c1 = substring1[i]; - unibrow::uchar c2 = substring2[i]; - if (c1 != c2) { - unibrow::uchar s1[1] = { c1 }; - canonicalize->get(c1, '\0', s1); - if (s1[0] != c2) { - unibrow::uchar s2[1] = { c2 }; - canonicalize->get(c2, '\0', s2); - if (s1[0] != s2[0]) { - return 0; - } - } - } - } - return 1; -} - - Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, Address* stack_base, Isolate* isolate) { |