summaryrefslogtreecommitdiff
path: root/chromium/v8/src/regexp/regexp-compiler.cc
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2019-09-23 16:06:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2019-09-24 11:41:55 +0000
commitbac1035f131c0b95b75fb39ffd1a39652843de9f (patch)
tree44839fddbea648d54e4be47bcfbe4a5979bacd29 /chromium/v8/src/regexp/regexp-compiler.cc
parent271a6c3487a14599023a9106329505597638d793 (diff)
downloadqtwebengine-chromium-bac1035f131c0b95b75fb39ffd1a39652843de9f.tar.gz
BASELINE: Update Chromium to 77.0.3865.98
Change-Id: Ice85979eb8b64af9a3c649d719bec6ea14ac3bf7 Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/v8/src/regexp/regexp-compiler.cc')
-rw-r--r--chromium/v8/src/regexp/regexp-compiler.cc73
1 files changed, 52 insertions, 21 deletions
diff --git a/chromium/v8/src/regexp/regexp-compiler.cc b/chromium/v8/src/regexp/regexp-compiler.cc
index c643f988c0f..c70bbc3e4a5 100644
--- a/chromium/v8/src/regexp/regexp-compiler.cc
+++ b/chromium/v8/src/regexp/regexp-compiler.cc
@@ -1970,9 +1970,11 @@ void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
}
+namespace {
+
// Check for [0-9A-Z_a-z].
-static void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
- Label* non_word, bool fall_through_on_word) {
+void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
+ Label* non_word, bool fall_through_on_word) {
if (assembler->CheckSpecialCharacterClass(
fall_through_on_word ? 'w' : 'W',
fall_through_on_word ? non_word : word)) {
@@ -1994,24 +1996,37 @@ static void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
// Emit the code to check for a ^ in multiline mode (1-character lookbehind
// that matches newline or the start of input).
-static void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success,
- Trace* trace) {
+void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
- // We will be loading the previous character into the current character
- // register.
+
+ // We will load the previous character into the current character register.
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
+ // A positive (> 0) cp_offset means we've already successfully matched a
+ // non-empty-width part of the pattern, and thus cannot be at or before the
+ // start of the subject string. We can thus skip both at-start and
+ // bounds-checks when loading the one-character lookbehind.
+ const bool may_be_at_or_before_subject_string_start =
+ new_trace.cp_offset() <= 0;
+
Label ok;
- if (new_trace.cp_offset() == 0) {
- // The start of input counts as a newline in this context, so skip to
- // ok if we are at the start.
- assembler->CheckAtStart(&ok);
+ if (may_be_at_or_before_subject_string_start) {
+ // The start of input counts as a newline in this context, so skip to ok if
+ // we are at the start.
+ // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
+ // that currently does not support a non-zero cp_offset.
+ Label not_at_start;
+ assembler->CheckNotAtStart(new_trace.cp_offset(), &not_at_start);
+ assembler->GoTo(&ok);
+ assembler->Bind(&not_at_start);
}
- // We already checked that we are not at the start of input so it must be
- // OK to load the previous character.
+
+ // If we've already checked that we are not at the start of input, it's okay
+ // to load the previous character without bounds checks.
+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
- new_trace.backtrack(), false);
+ new_trace.backtrack(), can_skip_bounds_check);
if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
if (!compiler->one_byte()) {
@@ -2024,6 +2039,8 @@ static void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success,
on_success->Emit(compiler, &new_trace);
}
+} // namespace
+
// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
@@ -2080,21 +2097,35 @@ void AssertionNode::BacktrackIfPrevious(
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
- Label fall_through, dummy;
-
+ Label fall_through;
Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
: &fall_through;
Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
: new_trace.backtrack();
- if (new_trace.cp_offset() == 0) {
+ // A positive (> 0) cp_offset means we've already successfully matched a
+ // non-empty-width part of the pattern, and thus cannot be at or before the
+ // start of the subject string. We can thus skip both at-start and
+ // bounds-checks when loading the one-character lookbehind.
+ const bool may_be_at_or_before_subject_string_start =
+ new_trace.cp_offset() <= 0;
+
+ if (may_be_at_or_before_subject_string_start) {
// The start of input counts as a non-word character, so the question is
// decided if we are at the start.
- assembler->CheckAtStart(non_word);
- }
- // We already checked that we are not at the start of input so it must be
- // OK to load the previous character.
- assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
+ // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
+ // that currently does not support a non-zero cp_offset.
+ Label not_at_start;
+ assembler->CheckNotAtStart(new_trace.cp_offset(), &not_at_start);
+ assembler->GoTo(non_word);
+ assembler->Bind(&not_at_start);
+ }
+
+ // If we've already checked that we are not at the start of input, it's okay
+ // to load the previous character without bounds checks.
+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
+ can_skip_bounds_check);
EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
assembler->Bind(&fall_through);