diff options
author | Michaël Zasso <targos@protonmail.com> | 2018-03-07 08:54:53 +0100 |
---|---|---|
committer | Michaël Zasso <targos@protonmail.com> | 2018-03-07 16:48:52 +0100 |
commit | 88786fecff336342a56e6f2e7ff3b286be716e47 (patch) | |
tree | 92e6ba5b8ac8dae1a058988d20c9d27bfa654390 /deps/v8/src/regexp | |
parent | 4e86f9b5ab83cbabf43839385bf383e6a7ef7d19 (diff) | |
download | node-new-88786fecff336342a56e6f2e7ff3b286be716e47.tar.gz |
deps: update V8 to 6.5.254.31
PR-URL: https://github.com/nodejs/node/pull/18453
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Yang Guo <yangguo@chromium.org>
Reviewed-By: Ali Ijaz Sheikh <ofrobots@google.com>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r-- | deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc | 35 | ||||
-rw-r--r-- | deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc | 36 | ||||
-rw-r--r-- | deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc | 32 | ||||
-rw-r--r-- | deps/v8/src/regexp/jsregexp.cc | 76 | ||||
-rw-r--r-- | deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc | 37 | ||||
-rw-r--r-- | deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc | 37 | ||||
-rw-r--r-- | deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc | 35 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-ast.h | 12 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-macro-assembler.cc | 40 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-parser.cc | 42 | ||||
-rw-r--r-- | deps/v8/src/regexp/regexp-utils.cc | 2 | ||||
-rw-r--r-- | deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc | 37 | ||||
-rw-r--r-- | deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc | 32 |
13 files changed, 240 insertions, 213 deletions
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc index 2e6425568b..5f9d3905a3 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc @@ -86,8 +86,7 @@ namespace internal { * bool direct_call = false, * Isolate* isolate); * The call is performed by NativeRegExpMacroAssembler::Execute() - * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro - * in arm/simulator-arm.h. + * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ #define __ ACCESS_MASM(masm_) @@ -506,12 +505,12 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type, Label success; __ cmp(current_character(), Operand(' ')); __ b(eq, &success); - // Check range 0x09..0x0d + // Check range 0x09..0x0D __ sub(r0, current_character(), Operand('\t')); __ cmp(r0, Operand('\r' - '\t')); __ b(ls, &success); // \u00a0 (NBSP). - __ cmp(r0, Operand(0x00a0 - '\t')); + __ cmp(r0, Operand(0x00A0 - '\t')); BranchOrBacktrack(ne, on_no_match); __ bind(&success); return true; @@ -533,37 +532,37 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(ls, on_no_match); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ eor(r0, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ sub(r0, r0, Operand(0x0b)); - __ cmp(r0, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ sub(r0, r0, Operand(0x0B)); + __ cmp(r0, Operand(0x0C - 0x0B)); BranchOrBacktrack(ls, on_no_match); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ sub(r0, r0, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ sub(r0, r0, Operand(0x2028 - 0x0B)); __ cmp(r0, Operand(1)); BranchOrBacktrack(ls, on_no_match); } return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ eor(r0, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ sub(r0, r0, Operand(0x0b)); - __ cmp(r0, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ sub(r0, r0, Operand(0x0B)); + __ cmp(r0, Operand(0x0C - 0x0B)); if (mode_ == LATIN1) { BranchOrBacktrack(hi, on_no_match); } else { Label done; __ b(ls, &done); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ sub(r0, r0, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ sub(r0, r0, Operand(0x2028 - 0x0B)); __ cmp(r0, Operand(1)); BranchOrBacktrack(hi, on_no_match); __ bind(&done); diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc index 558ee673f1..5f77ff4021 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc @@ -96,8 +96,7 @@ namespace internal { * bool direct_call = false, * Isolate* isolate); * The call is performed by NativeRegExpMacroAssembler::Execute() - * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro - * in arm64/simulator-arm64.h. + * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ #define __ ACCESS_MASM(masm_) @@ -116,7 +115,6 @@ RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate, success_label_(), backtrack_label_(), exit_label_() { - __ SetStackPointer(csp); DCHECK_EQ(0, registers_to_save % 2); // We can cache at most 16 W registers in x0-x7. STATIC_ASSERT(kNumCachedRegisters <= 16); @@ -366,7 +364,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); __ Ccmp(current_input_offset(), 0, NoFlag, eq); // The current input offset should be <= 0, and fit in a W register. - __ Check(le, kOffsetOutOfRange); + __ Check(le, AbortReason::kOffsetOutOfRange); } } else { DCHECK(mode_ == UC16); @@ -503,7 +501,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg, __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); __ Ccmp(current_input_offset(), 0, NoFlag, eq); // The current input offset should be <= 0, and fit in a W register. - __ Check(le, kOffsetOutOfRange); + __ Check(le, AbortReason::kOffsetOutOfRange); } __ Bind(&fallthrough); } @@ -588,11 +586,11 @@ bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, if (mode_ == LATIN1) { // One byte space characters are '\t'..'\r', ' ' and \u00a0. Label success; - // Check for ' ' or 0x00a0. + // Check for ' ' or 0x00A0. __ Cmp(current_character(), ' '); - __ Ccmp(current_character(), 0x00a0, ZFlag, ne); + __ Ccmp(current_character(), 0x00A0, ZFlag, ne); __ B(eq, &success); - // Check range 0x09..0x0d. + // Check range 0x09..0x0D. __ Sub(w10, current_character(), '\t'); CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match); __ Bind(&success); @@ -613,12 +611,12 @@ bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) // Here we emit the conditional branch only once at the end to make branch // prediction more efficient, even though we could branch out of here // as soon as a character matches. - __ Cmp(current_character(), 0x0a); - __ Ccmp(current_character(), 0x0d, ZFlag, ne); + __ Cmp(current_character(), 0x0A); + __ Ccmp(current_character(), 0x0D, ZFlag, ne); if (mode_ == UC16) { __ Sub(w10, current_character(), 0x2028); // If the Z flag was set we clear the flags to force a branch. @@ -631,11 +629,11 @@ bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type, return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) // We have to check all 4 newline characters before emitting // the conditional branch. - __ Cmp(current_character(), 0x0a); - __ Ccmp(current_character(), 0x0d, ZFlag, ne); + __ Cmp(current_character(), 0x0A); + __ Ccmp(current_character(), 0x0D, ZFlag, ne); if (mode_ == UC16) { __ Sub(w10, current_character(), 0x2028); // If the Z flag was set we clear the flags to force a fall-through. @@ -791,7 +789,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { // Check that the size of the input string chars is in range. __ Neg(x11, x10); __ Cmp(x11, SeqTwoByteString::kMaxCharsSize); - __ Check(ls, kInputStringTooLong); + __ Check(ls, AbortReason::kInputStringTooLong); } __ Mov(current_input_offset(), w10); @@ -855,7 +853,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { if (masm_->emit_debug_code()) { // Check that the size of the input string chars is in range. __ Cmp(x10, SeqTwoByteString::kMaxCharsSize); - __ Check(ls, kInputStringTooLong); + __ Check(ls, AbortReason::kInputStringTooLong); } // input_start has a start_offset offset on entry. We need to include // it when computing the length of the whole string. @@ -1158,7 +1156,7 @@ void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) { if (masm_->emit_debug_code()) { __ Cmp(x10, kWRegMask); // The code offset has to fit in a W register. - __ Check(ls, kOffsetOutOfRange); + __ Check(ls, AbortReason::kOffsetOutOfRange); } } Push(w10); @@ -1314,7 +1312,7 @@ void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) { if (masm_->emit_debug_code()) { __ Cmp(x10, Operand(w10, SXTW)); // The stack offset needs to fit in a W register. - __ Check(eq, kOffsetOutOfRange); + __ Check(eq, AbortReason::kOffsetOutOfRange); } StoreRegister(reg, w10); } @@ -1623,7 +1621,7 @@ void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset, __ Add(x10, x10, Operand(current_input_offset(), SXTW)); __ Cmp(x10, Operand(w10, SXTW)); // The offset needs to fit in a W register. - __ Check(eq, kOffsetOutOfRange); + __ Check(eq, AbortReason::kOffsetOutOfRange); } else { __ Add(w10, current_input_offset(), cp_offset * char_size()); } diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc index 99d1466f54..cb240d6c67 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc @@ -531,12 +531,12 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, Label success; __ cmp(current_character(), ' '); __ j(equal, &success, Label::kNear); - // Check range 0x09..0x0d + // Check range 0x09..0x0D __ lea(eax, Operand(current_character(), -'\t')); __ cmp(eax, '\r' - '\t'); __ j(below_equal, &success, Label::kNear); // \u00a0 (NBSP). - __ cmp(eax, 0x00a0 - '\t'); + __ cmp(eax, 0x00A0 - '\t'); BranchOrBacktrack(not_equal, on_no_match); __ bind(&success); return true; @@ -558,18 +558,18 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(below_equal, on_no_match); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ mov(eax, current_character()); __ xor_(eax, Immediate(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ sub(eax, Immediate(0x0b)); - __ cmp(eax, 0x0c - 0x0b); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ sub(eax, Immediate(0x0B)); + __ cmp(eax, 0x0C - 0x0B); BranchOrBacktrack(below_equal, on_no_match); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ sub(eax, Immediate(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ sub(eax, Immediate(0x2028 - 0x0B)); __ cmp(eax, 0x2029 - 0x2028); BranchOrBacktrack(below_equal, on_no_match); } @@ -610,13 +610,13 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, // Match any character. return true; case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029). + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 or 0x2029). // The opposite of '.'. __ mov(eax, current_character()); __ xor_(eax, Immediate(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ sub(eax, Immediate(0x0b)); - __ cmp(eax, 0x0c - 0x0b); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ sub(eax, Immediate(0x0B)); + __ cmp(eax, 0x0C - 0x0B); if (mode_ == LATIN1) { BranchOrBacktrack(above, on_no_match); } else { @@ -624,9 +624,9 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(below_equal, &done); DCHECK_EQ(UC16, mode_); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ sub(eax, Immediate(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ sub(eax, Immediate(0x2028 - 0x0B)); __ cmp(eax, 1); BranchOrBacktrack(above, on_no_match); __ bind(&done); diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc index 9d56e4cfa3..a26a1d77ce 100644 --- a/deps/v8/src/regexp/jsregexp.cc +++ b/deps/v8/src/regexp/jsregexp.cc @@ -98,12 +98,36 @@ ContainedInLattice AddRange(ContainedInLattice containment, return containment; } -// Generic RegExp methods. Dispatches to implementation specific methods. - +// More makes code generation slower, less makes V8 benchmark score lower. +const int kMaxLookaheadForBoyerMoore = 8; // In a 3-character pattern you can maximally step forwards 3 characters // at a time, which is not always enough to pay for the extra logic. const int kPatternTooShortForBoyerMoore = 2; +// Identifies the sort of regexps where the regexp engine is faster +// than the code used for atom matches. +static bool HasFewDifferentCharacters(Handle<String> pattern) { + int length = Min(kMaxLookaheadForBoyerMoore, pattern->length()); + if (length <= kPatternTooShortForBoyerMoore) return false; + const int kMod = 128; + bool character_found[kMod]; + int different = 0; + memset(&character_found[0], 0, sizeof(character_found)); + for (int i = 0; i < length; i++) { + int ch = (pattern->Get(i) & (kMod - 1)); + if (!character_found[ch]) { + character_found[ch] = true; + different++; + // We declare a regexp low-alphabet if it has at least 3 times as many + // characters as it has different characters. + if (different * 3 > length) return false; + } + } + return true; +} + +// Generic RegExp methods. Dispatches to implementation specific methods. + MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, Handle<String> pattern, JSRegExp::Flags flags) { @@ -133,7 +157,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, bool has_been_compiled = false; if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) && - pattern->length() <= kPatternTooShortForBoyerMoore) { + !HasFewDifferentCharacters(pattern)) { // Parse-tree is a single atom that is equal to the pattern. AtomCompile(re, pattern, flags, pattern); has_been_compiled = true; @@ -141,12 +165,11 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, parse_result.capture_count == 0) { RegExpAtom* atom = parse_result.tree->AsAtom(); Vector<const uc16> atom_pattern = atom->data(); - if (!IgnoreCase(atom->flags()) && - atom_pattern.length() <= kPatternTooShortForBoyerMoore) { - Handle<String> atom_string; - ASSIGN_RETURN_ON_EXCEPTION( - isolate, atom_string, - isolate->factory()->NewStringFromTwoByte(atom_pattern), Object); + Handle<String> atom_string; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, atom_string, + isolate->factory()->NewStringFromTwoByte(atom_pattern), Object); + if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) { AtomCompile(re, pattern, flags, atom_string); has_been_compiled = true; } @@ -2433,8 +2456,8 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, } else { // For 2-character preloads in one-byte mode or 1-character preloads in // two-byte mode we also use a 16 bit load with zero extend. - static const uint32_t kTwoByteMask = 0xffff; - static const uint32_t kFourByteMask = 0xffffffff; + static const uint32_t kTwoByteMask = 0xFFFF; + static const uint32_t kFourByteMask = 0xFFFFFFFF; if (details->characters() == 2 && compiler->one_byte()) { if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; } else if (details->characters() == 1 && !compiler->one_byte()) { @@ -2554,6 +2577,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, details->positions(characters_filled_in); RegExpCharacterClass* tree = elm.char_class(); ZoneList<CharacterRange>* ranges = tree->ranges(zone()); + DCHECK(!ranges->is_empty()); if (tree->is_negated()) { // A quick check uses multi-character mask and compare. There is no // useful way to incorporate a negative char class into this scheme @@ -2716,12 +2740,11 @@ RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) { return set_replacement(this); } - -// We need to check for the following characters: 0x39c 0x3bc 0x178. +// We need to check for the following characters: 0x39C 0x3BC 0x178. static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { // TODO(dcarney): this could be a lot more efficient. - return range.Contains(0x39c) || - range.Contains(0x3bc) || range.Contains(0x178); + return range.Contains(0x039C) || range.Contains(0x03BC) || + range.Contains(0x0178); } @@ -2973,7 +2996,7 @@ static void EmitHat(RegExpCompiler* compiler, new_trace.backtrack())) { // Newline means \n, \r, 0x2028 or 0x2029. if (!compiler->one_byte()) { - assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); + assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok); } assembler->CheckCharacter('\n', &ok); assembler->CheckNotCharacter('\r', new_trace.backtrack()); @@ -2982,8 +3005,6 @@ static void EmitHat(RegExpCompiler* compiler, on_success->Emit(compiler, &new_trace); } -// More makes code generation slower, less makes V8 benchmark score lower. -const int kMaxLookaheadForBoyerMoore = 8; // Emit the code to handle \b and \B (word-boundary or non-word-boundary). void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { @@ -3253,9 +3274,9 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone, JSRegExp::Flags flags) { DCHECK_NOT_NULL(ranges); ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone); - elms->Add( - TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, flags)), - zone); + elms->Add(TextElement::CharClass( + new (zone) RegExpCharacterClass(zone, ranges, flags)), + zone); return new (zone) TextNode(elms, read_backward, on_success); } @@ -3268,10 +3289,10 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone); elms->Add(TextElement::CharClass( - new (zone) RegExpCharacterClass(lead_ranges, flags)), + new (zone) RegExpCharacterClass(zone, lead_ranges, flags)), zone); elms->Add(TextElement::CharClass( - new (zone) RegExpCharacterClass(trail_ranges, flags)), + new (zone) RegExpCharacterClass(zone, trail_ranges, flags)), zone); return new (zone) TextNode(elms, read_backward, on_success); } @@ -5089,10 +5110,9 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, ranges = negated; } if (ranges->length() == 0) { - JSRegExp::Flags default_flags = JSRegExp::Flags(); - ranges->Add(CharacterRange::Everything(), zone); + JSRegExp::Flags default_flags; RegExpCharacterClass* fail = - new (zone) RegExpCharacterClass(ranges, default_flags, NEGATED); + new (zone) RegExpCharacterClass(zone, ranges, default_flags); return new (zone) TextNode(fail, compiler->read_backward(), on_success); } if (standard_type() == '*') { @@ -5346,8 +5366,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( if (IsUnicode(flags) && contains_trail_surrogate) { character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE; } - alternatives->at(write_posn++) = - new (zone) RegExpCharacterClass(ranges, flags, character_class_flags); + alternatives->at(write_posn++) = new (zone) + RegExpCharacterClass(zone, ranges, flags, character_class_flags); } else { // Just copy any trivial alternatives. for (int j = first_in_run; j < i; j++) { diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc index e45eeeb492..89046a56f3 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc @@ -85,8 +85,7 @@ namespace internal { * bool direct_call = false, * Isolate* isolate); * The call is performed by NativeRegExpMacroAssembler::Execute() - * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro - * in mips/simulator-mips.h. + * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ #define __ ACCESS_MASM(masm_) @@ -509,11 +508,11 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type, // One byte space characters are '\t'..'\r', ' ' and \u00a0. Label success; __ Branch(&success, eq, current_character(), Operand(' ')); - // Check range 0x09..0x0d. + // Check range 0x09..0x0D. __ Subu(a0, current_character(), Operand('\t')); __ Branch(&success, ls, a0, Operand('\r' - '\t')); // \u00a0 (NBSP). - BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00a0 - '\t')); + BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t')); __ bind(&success); return true; } @@ -532,34 +531,34 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0')); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029). + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). __ Xor(a0, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c. - __ Subu(a0, a0, Operand(0x0b)); - BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. + __ Subu(a0, a0, Operand(0x0B)); + BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B)); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ Subu(a0, a0, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ Subu(a0, a0, Operand(0x2028 - 0x0B)); BranchOrBacktrack(on_no_match, ls, a0, Operand(1)); } return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029). + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). __ Xor(a0, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c. - __ Subu(a0, a0, Operand(0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. + __ Subu(a0, a0, Operand(0x0B)); if (mode_ == LATIN1) { - BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0c - 0x0b)); + BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B)); } else { Label done; - BranchOrBacktrack(&done, ls, a0, Operand(0x0c - 0x0b)); + BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B)); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ Subu(a0, a0, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ Subu(a0, a0, Operand(0x2028 - 0x0B)); BranchOrBacktrack(on_no_match, hi, a0, Operand(1)); __ bind(&done); } diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc index 68a7f87843..841b2931fe 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc @@ -120,8 +120,7 @@ namespace internal { * bool direct_call = false, * Isolate* isolate); * The call is performed by NativeRegExpMacroAssembler::Execute() - * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro - * in mips/simulator-mips.h. + * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. * * clang-format on */ @@ -540,11 +539,11 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type, // One byte space characters are '\t'..'\r', ' ' and \u00a0. Label success; __ Branch(&success, eq, current_character(), Operand(' ')); - // Check range 0x09..0x0d. + // Check range 0x09..0x0D. __ Dsubu(a0, current_character(), Operand('\t')); __ Branch(&success, ls, a0, Operand('\r' - '\t')); // \u00a0 (NBSP). - BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00a0 - '\t')); + BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t')); __ bind(&success); return true; } @@ -563,34 +562,34 @@ bool RegExpMacroAssemblerMIPS::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0')); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029). + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). __ Xor(a0, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c. - __ Dsubu(a0, a0, Operand(0x0b)); - BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. + __ Dsubu(a0, a0, Operand(0x0B)); + BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B)); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ Dsubu(a0, a0, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ Dsubu(a0, a0, Operand(0x2028 - 0x0B)); BranchOrBacktrack(on_no_match, ls, a0, Operand(1)); } return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029). + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029). __ Xor(a0, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c. - __ Dsubu(a0, a0, Operand(0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C. + __ Dsubu(a0, a0, Operand(0x0B)); if (mode_ == LATIN1) { - BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0c - 0x0b)); + BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B)); } else { Label done; - BranchOrBacktrack(&done, ls, a0, Operand(0x0c - 0x0b)); + BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B)); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ Dsubu(a0, a0, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ Dsubu(a0, a0, Operand(0x2028 - 0x0B)); BranchOrBacktrack(on_no_match, hi, a0, Operand(1)); __ bind(&done); } diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc index bc3e643369..1187fc04b8 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc @@ -86,8 +86,7 @@ namespace internal { * bool direct_call = false, * Isolate* isolate); * The call is performed by NativeRegExpMacroAssembler::Execute() - * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro - * in ppc/simulator-ppc.h. + * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ #define __ ACCESS_MASM(masm_) @@ -522,12 +521,12 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(uc16 type, Label success; __ cmpi(current_character(), Operand(' ')); __ beq(&success); - // Check range 0x09..0x0d + // Check range 0x09..0x0D __ subi(r3, current_character(), Operand('\t')); __ cmpli(r3, Operand('\r' - '\t')); __ ble(&success); // \u00a0 (NBSP). - __ cmpi(r3, Operand(0x00a0 - '\t')); + __ cmpi(r3, Operand(0x00A0 - '\t')); BranchOrBacktrack(ne, on_no_match); __ bind(&success); return true; @@ -549,37 +548,37 @@ bool RegExpMacroAssemblerPPC::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(le, on_no_match); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ xori(r3, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ subi(r3, r3, Operand(0x0b)); - __ cmpli(r3, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ subi(r3, r3, Operand(0x0B)); + __ cmpli(r3, Operand(0x0C - 0x0B)); BranchOrBacktrack(le, on_no_match); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ subi(r3, r3, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ subi(r3, r3, Operand(0x2028 - 0x0B)); __ cmpli(r3, Operand(1)); BranchOrBacktrack(le, on_no_match); } return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ xori(r3, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ subi(r3, r3, Operand(0x0b)); - __ cmpli(r3, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ subi(r3, r3, Operand(0x0B)); + __ cmpli(r3, Operand(0x0C - 0x0B)); if (mode_ == LATIN1) { BranchOrBacktrack(gt, on_no_match); } else { Label done; __ ble(&done); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ subi(r3, r3, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ subi(r3, r3, Operand(0x2028 - 0x0B)); __ cmpli(r3, Operand(1)); BranchOrBacktrack(gt, on_no_match); __ bind(&done); diff --git a/deps/v8/src/regexp/regexp-ast.h b/deps/v8/src/regexp/regexp-ast.h index e60621f8b6..1a94832f71 100644 --- a/deps/v8/src/regexp/regexp-ast.h +++ b/deps/v8/src/regexp/regexp-ast.h @@ -306,11 +306,17 @@ class RegExpCharacterClass final : public RegExpTree { typedef base::Flags<Flag> CharacterClassFlags; RegExpCharacterClass( - ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags, + Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags, CharacterClassFlags character_class_flags = CharacterClassFlags()) : set_(ranges), flags_(flags), - character_class_flags_(character_class_flags) {} + character_class_flags_(character_class_flags) { + // Convert the empty set of ranges to the negated Everything() range. + if (ranges->is_empty()) { + ranges->Add(CharacterRange::Everything(), zone); + character_class_flags_ ^= NEGATED; + } + } RegExpCharacterClass(uc16 type, JSRegExp::Flags flags) : set_(type), flags_(flags), @@ -352,7 +358,7 @@ class RegExpCharacterClass final : public RegExpTree { private: CharacterSet set_; const JSRegExp::Flags flags_; - const CharacterClassFlags character_class_flags_; + CharacterClassFlags character_class_flags_; }; diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc index 600757a72b..af285abcb0 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler.cc @@ -286,9 +286,15 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( Address stack_base = stack_scope.stack()->stack_base(); int direct_call = 0; - int result = CALL_GENERATED_REGEXP_CODE( - isolate, code->entry(), input, start_offset, input_start, input_end, - output, output_size, stack_base, direct_call, isolate); + + using RegexpMatcherSig = int( + String * input, int start_offset, // NOLINT(readability/casting) + const byte* input_start, const byte* input_end, int* output, + int output_size, Address stack_base, int direct_call, Isolate* isolate); + + auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code); + int result = fn.Call(input, start_offset, input_start, input_end, output, + output_size, stack_base, direct_call, isolate); DCHECK(result >= RETRY); if (result == EXCEPTION && !isolate->has_pending_exception()) { @@ -299,7 +305,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( return static_cast<Result>(result); } - +// clang-format off const byte NativeRegExpMacroAssembler::word_character_map[] = { 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, @@ -308,18 +314,18 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = { 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, - 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7' - 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9' - - 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G' - 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O' - 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W' - 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_' - - 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g' - 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o' - 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w' - 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z' + 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // '0' - '7' + 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9' + + 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'A' - 'G' + 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'H' - 'O' + 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'P' - 'W' + 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu, // 'X' - 'Z', '_' + + 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'a' - 'g' + 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'h' - 'o' + 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'p' - 'w' + 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z' // Latin-1 range 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, @@ -341,7 +347,7 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = { 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, }; - +// clang-format on Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, Address* stack_base, diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc index 2c6aa5b23a..a7724c5d42 100644 --- a/deps/v8/src/regexp/regexp-parser.cc +++ b/deps/v8/src/regexp/regexp-parser.cc @@ -280,12 +280,12 @@ RegExpTree* RegExpParser::ParseDisjunction() { // Everything. CharacterRange::AddClassEscape('*', ranges, false, zone()); } else { - // Everything except \x0a, \x0d, \u2028 and \u2029 + // Everything except \x0A, \x0D, \u2028 and \u2029 CharacterRange::AddClassEscape('.', ranges, false, zone()); } RegExpCharacterClass* cc = - new (zone()) RegExpCharacterClass(ranges, builder->flags()); + new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags()); builder->AddCharacterClass(cc); break; } @@ -332,8 +332,8 @@ RegExpTree* RegExpParser::ParseDisjunction() { new (zone()) ZoneList<CharacterRange>(2, zone()); CharacterRange::AddClassEscape( c, ranges, unicode() && builder->ignore_case(), zone()); - RegExpCharacterClass* cc = - new (zone()) RegExpCharacterClass(ranges, builder->flags()); + RegExpCharacterClass* cc = new (zone()) + RegExpCharacterClass(zone(), ranges, builder->flags()); builder->AddCharacterClass(cc); break; } @@ -348,8 +348,8 @@ RegExpTree* RegExpParser::ParseDisjunction() { if (!ParsePropertyClass(ranges, p == 'P')) { return ReportError(CStrVector("Invalid property name")); } - RegExpCharacterClass* cc = - new (zone()) RegExpCharacterClass(ranges, builder->flags()); + RegExpCharacterClass* cc = new (zone()) + RegExpCharacterClass(zone(), ranges, builder->flags()); builder->AddCharacterClass(cc); } else { // With /u, no identity escapes except for syntax characters @@ -451,7 +451,7 @@ RegExpTree* RegExpParser::ParseDisjunction() { builder->AddCharacter('\\'); } else { Advance(2); - builder->AddCharacter(controlLetter & 0x1f); + builder->AddCharacter(controlLetter & 0x1F); } break; } @@ -1145,7 +1145,7 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) { if (current() == '{' && unicode()) { int start = position(); Advance(); - if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) { + if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) { if (current() == '}') { Advance(); return true; @@ -1255,10 +1255,15 @@ bool LookupSpecialPropertyValueName(const char* name, ZoneList<CharacterRange>* result, bool negate, Zone* zone) { if (NameEquals(name, "Any")) { - if (!negate) result->Add(CharacterRange::Everything(), zone); + if (negate) { + // Leave the list of character ranges empty, since the negation of 'Any' + // is the empty set. + } else { + result->Add(CharacterRange::Everything(), zone); + } } else if (NameEquals(name, "ASCII")) { result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) - : CharacterRange::Range(0x0, 0x7f), + : CharacterRange::Range(0x0, 0x7F), zone); } else if (NameEquals(name, "Assigned")) { return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", @@ -1486,8 +1491,8 @@ uc32 RegExpParser::ParseClassCharacterEscape() { if (letter >= 'A' && letter <= 'Z') { Advance(2); // Control letters mapped to ASCII control characters in the range - // 0x00-0x1f. - return controlLetter & 0x1f; + // 0x00-0x1F. + return controlLetter & 0x1F; } if (unicode()) { // With /u, invalid escapes are not treated as identity escapes. @@ -1497,7 +1502,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() { if ((controlLetter >= '0' && controlLetter <= '9') || controlLetter == '_') { Advance(2); - return controlLetter & 0x1f; + return controlLetter & 0x1F; } // We match JSC in reading the backslash as a literal // character instead of as starting an escape. @@ -1672,14 +1677,10 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) { return ReportError(CStrVector(kUnterminated)); } Advance(); - if (ranges->length() == 0) { - ranges->Add(CharacterRange::Everything(), zone()); - is_negated = !is_negated; - } RegExpCharacterClass::CharacterClassFlags character_class_flags; if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED; - return new (zone()) - RegExpCharacterClass(ranges, builder->flags(), character_class_flags); + return new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags(), + character_class_flags); } @@ -1853,7 +1854,8 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) { void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) { AddTerm(new (zone()) RegExpCharacterClass( - CharacterRange::List(zone(), CharacterRange::Singleton(c)), flags_)); + zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)), + flags_)); } diff --git a/deps/v8/src/regexp/regexp-utils.cc b/deps/v8/src/regexp/regexp-utils.cc index 16427e2933..d483125dd6 100644 --- a/deps/v8/src/regexp/regexp-utils.cc +++ b/deps/v8/src/regexp/regexp-utils.cc @@ -134,7 +134,7 @@ bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) { // TODO(ishell): Update this check once map changes for constant field // tracking are landing. -#if defined(DEBUG) || defined(ENABLE_SLOWFAST_SWITCH) +#ifdef V8_ENABLE_FORCE_SLOW_PATH if (isolate->force_slow_path()) return false; #endif diff --git a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc index fc9548fc78..4f8f234171 100644 --- a/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc +++ b/deps/v8/src/regexp/s390/regexp-macro-assembler-s390.cc @@ -88,8 +88,7 @@ namespace internal { * bool direct_call = false, * Isolate* isolate); * The call is performed by NativeRegExpMacroAssembler::Execute() - * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro - * in s390/simulator-s390.h. + * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper. */ #define __ ACCESS_MASM(masm_) @@ -493,12 +492,12 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(uc16 type, Label success; __ CmpP(current_character(), Operand(' ')); __ beq(&success); - // Check range 0x09..0x0d + // Check range 0x09..0x0D __ SubP(r2, current_character(), Operand('\t')); __ CmpLogicalP(r2, Operand('\r' - '\t')); __ ble(&success); // \u00a0 (NBSP). - __ CmpLogicalP(r2, Operand(0x00a0 - '\t')); + __ CmpLogicalP(r2, Operand(0x00A0 - '\t')); BranchOrBacktrack(ne, on_no_match); __ bind(&success); return true; @@ -520,37 +519,37 @@ bool RegExpMacroAssemblerS390::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(le, on_no_match); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ XorP(r2, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ SubP(r2, Operand(0x0b)); - __ CmpLogicalP(r2, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ SubP(r2, Operand(0x0B)); + __ CmpLogicalP(r2, Operand(0x0C - 0x0B)); BranchOrBacktrack(le, on_no_match); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ SubP(r2, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ SubP(r2, Operand(0x2028 - 0x0B)); __ CmpLogicalP(r2, Operand(1)); BranchOrBacktrack(le, on_no_match); } return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ XorP(r2, current_character(), Operand(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ SubP(r2, Operand(0x0b)); - __ CmpLogicalP(r2, Operand(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ SubP(r2, Operand(0x0B)); + __ CmpLogicalP(r2, Operand(0x0C - 0x0B)); if (mode_ == LATIN1) { BranchOrBacktrack(gt, on_no_match); } else { Label done; __ ble(&done); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ SubP(r2, Operand(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ SubP(r2, Operand(0x2028 - 0x0B)); __ CmpLogicalP(r2, Operand(1)); BranchOrBacktrack(gt, on_no_match); __ bind(&done); @@ -773,7 +772,7 @@ Handle<HeapObject> RegExpMacroAssemblerS390::GetCode(Handle<String> source) { // and the following use of that register. __ lay(r2, MemOperand(r2, num_saved_registers_ * kIntSize)); for (int i = 0; i < num_saved_registers_;) { - if (false && i < num_saved_registers_ - 4) { + if ((false) && i < num_saved_registers_ - 4) { // TODO(john.yan): Can be optimized by SIMD instructions __ LoadMultipleP(r3, r6, register_location(i + 3)); if (mode_ == UC16) { diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc index 1e21182c35..eb57b29602 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc @@ -551,12 +551,12 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, Label success; __ cmpl(current_character(), Immediate(' ')); __ j(equal, &success, Label::kNear); - // Check range 0x09..0x0d + // Check range 0x09..0x0D __ leap(rax, Operand(current_character(), -'\t')); __ cmpl(rax, Immediate('\r' - '\t')); __ j(below_equal, &success, Label::kNear); // \u00a0 (NBSP). - __ cmpl(rax, Immediate(0x00a0 - '\t')); + __ cmpl(rax, Immediate(0x00A0 - '\t')); BranchOrBacktrack(not_equal, on_no_match); __ bind(&success); return true; @@ -578,39 +578,39 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, BranchOrBacktrack(below_equal, on_no_match); return true; case '.': { - // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ movl(rax, current_character()); __ xorp(rax, Immediate(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ subl(rax, Immediate(0x0b)); - __ cmpl(rax, Immediate(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ subl(rax, Immediate(0x0B)); + __ cmpl(rax, Immediate(0x0C - 0x0B)); BranchOrBacktrack(below_equal, on_no_match); if (mode_ == UC16) { // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ subl(rax, Immediate(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ subl(rax, Immediate(0x2028 - 0x0B)); __ cmpl(rax, Immediate(0x2029 - 0x2028)); BranchOrBacktrack(below_equal, on_no_match); } return true; } case 'n': { - // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) + // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029) __ movl(rax, current_character()); __ xorp(rax, Immediate(0x01)); - // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c - __ subl(rax, Immediate(0x0b)); - __ cmpl(rax, Immediate(0x0c - 0x0b)); + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C + __ subl(rax, Immediate(0x0B)); + __ cmpl(rax, Immediate(0x0C - 0x0B)); if (mode_ == LATIN1) { BranchOrBacktrack(above, on_no_match); } else { Label done; BranchOrBacktrack(below_equal, &done); // Compare original value to 0x2028 and 0x2029, using the already - // computed (current_char ^ 0x01 - 0x0b). I.e., check for - // 0x201d (0x2028 - 0x0b) or 0x201e. - __ subl(rax, Immediate(0x2028 - 0x0b)); + // computed (current_char ^ 0x01 - 0x0B). I.e., check for + // 0x201D (0x2028 - 0x0B) or 0x201E. + __ subl(rax, Immediate(0x2028 - 0x0B)); __ cmpl(rax, Immediate(0x2029 - 0x2028)); BranchOrBacktrack(above, on_no_match); __ bind(&done); |