summaryrefslogtreecommitdiff
path: root/chromium/third_party/re2
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-12 14:27:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-13 09:35:20 +0000
commitc30a6232df03e1efbd9f3b226777b07e087a1122 (patch)
treee992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/third_party/re2
parent7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff)
downloadqtwebengine-chromium-85-based.tar.gz
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/third_party/re2')
-rw-r--r--chromium/third_party/re2/src/BUILD18
-rw-r--r--chromium/third_party/re2/src/CMakeLists.txt10
-rw-r--r--chromium/third_party/re2/src/Makefile63
-rw-r--r--chromium/third_party/re2/src/re2/bitstate.cc13
-rw-r--r--chromium/third_party/re2/src/re2/compile.cc146
-rw-r--r--chromium/third_party/re2/src/re2/dfa.cc129
-rw-r--r--chromium/third_party/re2/src/re2/filtered_re2.cc20
-rw-r--r--chromium/third_party/re2/src/re2/filtered_re2.h15
-rw-r--r--chromium/third_party/re2/src/re2/nfa.cc144
-rw-r--r--chromium/third_party/re2/src/re2/parse.cc13
-rw-r--r--chromium/third_party/re2/src/re2/prog.cc79
-rw-r--r--chromium/third_party/re2/src/re2/prog.h25
-rw-r--r--chromium/third_party/re2/src/re2/re2.cc15
-rw-r--r--chromium/third_party/re2/src/re2/re2.h4
-rw-r--r--chromium/third_party/re2/src/re2/regexp.cc121
-rw-r--r--chromium/third_party/re2/src/re2/regexp.h8
-rw-r--r--chromium/third_party/re2/src/re2/set.cc37
-rw-r--r--chromium/third_party/re2/src/re2/set.h13
-rw-r--r--chromium/third_party/re2/src/re2/testing/backtrack.cc28
-rw-r--r--chromium/third_party/re2/src/re2/testing/compile_test.cc2
-rw-r--r--chromium/third_party/re2/src/re2/testing/filtered_re2_test.cc46
-rw-r--r--chromium/third_party/re2/src/re2/testing/re2_test.cc56
-rw-r--r--chromium/third_party/re2/src/re2/testing/required_prefix_test.cc85
-rw-r--r--chromium/third_party/re2/src/re2/testing/set_test.cc26
-rw-r--r--chromium/third_party/re2/src/re2/walker-inl.h26
25 files changed, 683 insertions, 459 deletions
diff --git a/chromium/third_party/re2/src/BUILD b/chromium/third_party/re2/src/BUILD
index 480646fc1a8..8a1a9c0b8f2 100644
--- a/chromium/third_party/re2/src/BUILD
+++ b/chromium/third_party/re2/src/BUILD
@@ -9,18 +9,18 @@ licenses(["notice"])
exports_files(["LICENSE"])
config_setting(
- name = "darwin",
+ name = "macos",
values = {"cpu": "darwin"},
)
config_setting(
- name = "windows",
- values = {"cpu": "x64_windows"},
+ name = "wasm",
+ values = {"cpu": "wasm"},
)
config_setting(
- name = "windows_msvc",
- values = {"cpu": "x64_windows_msvc"},
+ name = "windows",
+ values = {"cpu": "x64_windows"},
)
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
@@ -75,17 +75,17 @@ cc_library(
"re2/stringpiece.h",
],
copts = select({
+ ":wasm": [],
":windows": [],
- ":windows_msvc": [],
"//conditions:default": ["-pthread"],
}),
linkopts = select({
- # Darwin doesn't need `-pthread' when linking and it appears that
+ # macOS doesn't need `-pthread' when linking and it appears that
# older versions of Clang will warn about the unused command line
# argument, so just don't pass it.
- ":darwin": [],
+ ":macos": [],
+ ":wasm": [],
":windows": [],
- ":windows_msvc": [],
"//conditions:default": ["-pthread"],
}),
visibility = ["//visibility:public"],
diff --git a/chromium/third_party/re2/src/CMakeLists.txt b/chromium/third_party/re2/src/CMakeLists.txt
index 5c980f04622..44a4772088a 100644
--- a/chromium/third_party/re2/src/CMakeLists.txt
+++ b/chromium/third_party/re2/src/CMakeLists.txt
@@ -13,6 +13,11 @@ project(RE2 CXX)
include(CTest)
include(GNUInstallDirs)
+if(NOT CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
option(USEPCRE "use PCRE in tests and benchmarks" OFF)
@@ -36,11 +41,6 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
# Without a byte order mark (BOM), Visual Studio assumes that the source
# file is encoded using the current user code page, so we specify UTF-8.
add_compile_options(/utf-8)
-elseif(CYGWIN OR MINGW)
- # See https://stackoverflow.com/questions/38139631 for details.
- add_compile_options(-std=gnu++11)
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
- add_compile_options(-std=c++11)
endif()
if(WIN32)
diff --git a/chromium/third_party/re2/src/Makefile b/chromium/third_party/re2/src/Makefile
index a550717017a..20b8d0f3ca2 100644
--- a/chromium/third_party/re2/src/Makefile
+++ b/chromium/third_party/re2/src/Makefile
@@ -68,6 +68,7 @@ SOEXTVER00=$(SOEXT).$(SONAME).0.0
MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS)
endif
+.PHONY: all
all: obj/libre2.a obj/so/libre2.$(SOEXT)
INSTALL_HFILES=\
@@ -176,40 +177,49 @@ DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
+.PRECIOUS: obj/%.o
obj/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
+.PRECIOUS: obj/dbg/%.o
obj/dbg/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
+.PRECIOUS: obj/so/%.o
obj/so/%.o: %.cc $(HFILES)
@mkdir -p $$(dirname $@)
$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
+.PRECIOUS: obj/libre2.a
obj/libre2.a: $(OFILES)
@mkdir -p obj
$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
+.PRECIOUS: obj/dbg/libre2.a
obj/dbg/libre2.a: $(DOFILES)
@mkdir -p obj/dbg
$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
+.PRECIOUS: obj/so/libre2.$(SOEXT)
obj/so/libre2.$(SOEXT): $(SOFILES)
@mkdir -p obj/so
$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
ln -sf libre2.$(SOEXTVER) $@
+.PRECIOUS: obj/dbg/test/%
obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
@mkdir -p obj/dbg/test
$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+.PRECIOUS: obj/test/%
obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
@mkdir -p obj/test
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
# Test the shared lib, falling back to the static lib for private symbols
+.PRECIOUS: obj/so/test/%
obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
@mkdir -p obj/so/test
$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
@@ -229,64 +239,94 @@ obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
ifdef REBUILD_TABLES
+.PRECIOUS: re2/perl_groups.cc
re2/perl_groups.cc: re2/make_perl_groups.pl
perl $< > $@
+.PRECIOUS: re2/unicode_%.cc
re2/unicode_%.cc: re2/make_unicode_%.py
python $< > $@
-
-.PRECIOUS: re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
endif
+.PHONY: distclean
distclean: clean
rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
+.PHONY: clean
clean:
rm -rf obj
rm -f re2/*.pyc
+.PHONY: testofiles
testofiles: $(TESTOFILES)
+.PHONY: test
test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
+.PHONY: debug-test
debug-test: $(DTESTS)
@./runtests $(DTESTS)
+.PHONY: static-test
static-test: $(TESTS)
@./runtests $(TESTS)
+.PHONY: shared-test
shared-test: $(STESTS)
@./runtests -shared-library-path obj/so $(STESTS)
+.PHONY: debug-bigtest
debug-bigtest: $(DTESTS) $(DBIGTESTS)
@./runtests $(DTESTS) $(DBIGTESTS)
+.PHONY: static-bigtest
static-bigtest: $(TESTS) $(BIGTESTS)
@./runtests $(TESTS) $(BIGTESTS)
+.PHONY: shared-bigtest
shared-bigtest: $(STESTS) $(SBIGTESTS)
@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
+.PHONY: benchmark
benchmark: obj/test/regexp_benchmark
+.PHONY: fuzz
fuzz: obj/test/re2_fuzzer
-install: obj/libre2.a obj/so/libre2.$(SOEXT)
- mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
- $(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
+.PHONY: install
+install: static-install shared-install
+
+.PHONY: static
+static: obj/libre2.a
+
+.PHONY: static-install
+static-install: obj/libre2.a common-install
$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
+
+.PHONY: shared
+shared: obj/so/libre2.$(SOEXT)
+
+.PHONY: shared-install
+shared-install: obj/so/libre2.$(SOEXT) common-install
$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
+
+.PHONY: common-install
+common-install:
+ mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
+ $(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
$(SED_INPLACE) -e "s#@includedir@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
$(SED_INPLACE) -e "s#@libdir@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+.PHONY: testinstall
testinstall: static-testinstall shared-testinstall
@echo
@echo Install tests passed.
@echo
+.PHONY: static-testinstall
static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
static-testinstall:
@@ -301,6 +341,7 @@ else
obj/testinstall
endif
+.PHONY: shared-testinstall
shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
shared-testinstall:
@@ -313,19 +354,14 @@ else
LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall
endif
+.PHONY: benchlog
benchlog: obj/test/regexp_benchmark
(echo '==BENCHMARK==' `hostname` `date`; \
(uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
echo; \
./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
-# Keep gmake from deleting intermediate files it creates.
-# This makes repeated builds faster and preserves debug info on OS X.
-
-.PRECIOUS: obj/%.o obj/dbg/%.o obj/so/%.o obj/libre2.a \
- obj/dbg/libre2.a obj/so/libre2.a \
- obj/test/% obj/so/test/% obj/dbg/test/%
-
+.PHONY: log
log:
$(MAKE) clean
$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
@@ -341,6 +377,3 @@ log:
echo '#' RE2 basic search tests built by make $@ >re2-search.txt
echo '#' $$(date) >>re2-search.txt
obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
-
-x: x.cc obj/libre2.a
- g++ -I. -o x x.cc obj/libre2.a
diff --git a/chromium/third_party/re2/src/re2/bitstate.cc b/chromium/third_party/re2/src/re2/bitstate.cc
index 865b58edc62..320d1eea157 100644
--- a/chromium/third_party/re2/src/re2/bitstate.cc
+++ b/chromium/third_party/re2/src/re2/bitstate.cc
@@ -332,14 +332,13 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
// This looks like it's quadratic in the size of the text,
// but we are not clearing visited_ between calls to TrySearch,
// so no work is duplicated and it ends up still being linear.
- for (const char* p = text.data(); p <= text.data() + text.size(); p++) {
- // Try to use memchr to find the first byte quickly.
- int fb = prog_->first_byte();
- if (fb >= 0 && p < text.data() + text.size() && (p[0] & 0xFF) != fb) {
- p = reinterpret_cast<const char*>(
- memchr(p, fb, text.data() + text.size() - p));
+ const char* etext = text.data() + text.size();
+ for (const char* p = text.data(); p <= etext; p++) {
+ // Try to use prefix accel (e.g. memchr) to skip ahead.
+ if (p < etext && prog_->can_prefix_accel()) {
+ p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext - p));
if (p == NULL)
- p = text.data() + text.size();
+ p = etext;
}
cap_[0] = p;
diff --git a/chromium/third_party/re2/src/re2/compile.cc b/chromium/third_party/re2/src/re2/compile.cc
index b3e5bdca0ae..7a9de07281c 100644
--- a/chromium/third_party/re2/src/re2/compile.cc
+++ b/chromium/third_party/re2/src/re2/compile.cc
@@ -30,91 +30,57 @@ namespace re2 {
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
//
// Because the out and out1 fields in Inst are no longer pointers,
-// we can't use pointers directly here either. Instead, p refers
-// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
-// p == 0 represents the NULL list. This is okay because instruction #0
+// we can't use pointers directly here either. Instead, head refers
+// to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
+// head == 0 represents the NULL list. This is okay because instruction #0
// is always the fail instruction, which never appears on a list.
-
struct PatchList {
- uint32_t p;
-
// Returns patch list containing just p.
- static PatchList Mk(uint32_t p);
+ static PatchList Mk(uint32_t p) {
+ return {p, p};
+ }
- // Patches all the entries on l to have value v.
+ // Patches all the entries on l to have value p.
// Caller must not ever use patch list again.
- static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v);
-
- // Deref returns the next pointer pointed at by p.
- static PatchList Deref(Prog::Inst *inst0, PatchList l);
-
- // Appends two patch lists and returns result.
- static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
-};
-
-static PatchList nullPatchList = { 0 };
-
-// Returns patch list containing just p.
-PatchList PatchList::Mk(uint32_t p) {
- PatchList l;
- l.p = p;
- return l;
-}
-
-// Returns the next pointer pointed at by l.
-PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) {
- Prog::Inst* ip = &inst0[l.p>>1];
- if (l.p&1)
- l.p = ip->out1();
- else
- l.p = ip->out();
- return l;
-}
-
-// Patches all the entries on l to have value v.
-void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32_t val) {
- while (l.p != 0) {
- Prog::Inst* ip = &inst0[l.p>>1];
- if (l.p&1) {
- l.p = ip->out1();
- ip->out1_ = val;
- } else {
- l.p = ip->out();
- ip->set_out(val);
+ static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
+ while (l.head != 0) {
+ Prog::Inst* ip = &inst0[l.head>>1];
+ if (l.head&1) {
+ l.head = ip->out1();
+ ip->out1_ = p;
+ } else {
+ l.head = ip->out();
+ ip->set_out(p);
+ }
}
}
-}
-// Appends two patch lists and returns result.
-PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
- if (l1.p == 0)
- return l2;
- if (l2.p == 0)
- return l1;
-
- PatchList l = l1;
- for (;;) {
- PatchList next = PatchList::Deref(inst0, l);
- if (next.p == 0)
- break;
- l = next;
+ // Appends two patch lists and returns result.
+ static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
+ if (l1.head == 0)
+ return l2;
+ if (l2.head == 0)
+ return l1;
+ Prog::Inst* ip = &inst0[l1.tail>>1];
+ if (l1.tail&1)
+ ip->out1_ = l2.head;
+ else
+ ip->set_out(l2.head);
+ return {l1.head, l2.tail};
}
- Prog::Inst* ip = &inst0[l.p>>1];
- if (l.p&1)
- ip->out1_ = l2.p;
- else
- ip->set_out(l2.p);
+ uint32_t head;
+ uint32_t tail; // for constant-time append
+};
- return l1;
-}
+static const PatchList kNullPatchList = {0, 0};
// Compiled program fragment.
struct Frag {
uint32_t begin;
PatchList end;
- Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector
+ Frag() : begin(0) { end.head = 0; } // needed so Frag can go in vector
Frag(uint32_t begin, PatchList end) : begin(begin), end(end) {}
};
@@ -212,8 +178,8 @@ class Compiler : public Regexp::Walker<Frag> {
int AddSuffixRecursive(int root, int id);
// Finds the trie node for the given suffix. Returns a Frag in order to
- // distinguish between pointing at the root node directly (end.p == 0)
- // and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
+ // distinguish between pointing at the root node directly (end.head == 0)
+ // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
Frag FindByteRange(int root, int id);
// Compares two ByteRanges and returns true iff they are equal.
@@ -225,8 +191,8 @@ class Compiler : public Regexp::Walker<Frag> {
// Single rune.
Frag Literal(Rune r, bool foldcase);
- void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor);
- Prog* Finish();
+ void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
+ Prog* Finish(Regexp* re);
// Returns .* where dot = any byte
Frag DotStar();
@@ -298,7 +264,7 @@ int Compiler::AllocInst(int n) {
// Returns an unmatchable fragment.
Frag Compiler::NoMatch() {
- return Frag(0, nullPatchList);
+ return Frag(0, kNullPatchList);
}
// Is a an unmatchable fragment?
@@ -314,7 +280,7 @@ Frag Compiler::Cat(Frag a, Frag b) {
// Elide no-op.
Prog::Inst* begin = &inst_[a.begin];
if (begin->opcode() == kInstNop &&
- a.end.p == (a.begin << 1) &&
+ a.end.head == (a.begin << 1) &&
begin->out() == 0) {
// in case refs to a somewhere
PatchList::Patch(inst_.data(), a.end, b.begin);
@@ -419,7 +385,7 @@ Frag Compiler::Match(int32_t match_id) {
if (id < 0)
return NoMatch();
inst_[id].InitMatch(match_id);
- return Frag(id, nullPatchList);
+ return Frag(id, kNullPatchList);
}
// Returns a fragment matching a particular empty-width op (like ^ or $)
@@ -467,7 +433,7 @@ static int MaxRune(int len) {
void Compiler::BeginRange() {
rune_cache_.clear();
rune_range_.begin = 0;
- rune_range_.end = nullPatchList;
+ rune_range_.end = kNullPatchList;
}
int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
@@ -548,9 +514,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
}
int br;
- if (f.end.p == 0)
+ if (f.end.head == 0)
br = root;
- else if (f.end.p&1)
+ else if (f.end.head&1)
br = inst_[f.begin].out1();
else
br = inst_[f.begin].out();
@@ -566,9 +532,9 @@ int Compiler::AddSuffixRecursive(int root, int id) {
// Ensure that the parent points to the clone, not to the original.
// Note that this could leave the head unreachable except via the cache.
br = byterange;
- if (f.end.p == 0)
+ if (f.end.head == 0)
root = br;
- else if (f.end.p&1)
+ else if (f.end.head&1)
inst_[f.begin].out1_ = br;
else
inst_[f.begin].set_out(br);
@@ -601,7 +567,7 @@ bool Compiler::ByteRangeEqual(int id1, int id2) {
Frag Compiler::FindByteRange(int root, int id) {
if (inst_[root].opcode() == kInstByteRange) {
if (ByteRangeEqual(root, id))
- return Frag(root, nullPatchList);
+ return Frag(root, kNullPatchList);
else
return NoMatch();
}
@@ -1167,10 +1133,10 @@ Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
c.prog_->set_start_unanchored(all.begin);
// Hand ownership of prog_ to caller.
- return c.Finish();
+ return c.Finish(re);
}
-Prog* Compiler::Finish() {
+Prog* Compiler::Finish(Regexp* re) {
if (failed_)
return NULL;
@@ -1186,7 +1152,17 @@ Prog* Compiler::Finish() {
prog_->Optimize();
prog_->Flatten();
prog_->ComputeByteMap();
- prog_->ComputeFirstByte();
+
+ if (!prog_->reversed()) {
+ std::string prefix;
+ bool prefix_foldcase;
+ if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase) &&
+ !prefix_foldcase) {
+ prog_->prefix_size_ = prefix.size();
+ prog_->prefix_front_ = prefix.front();
+ prog_->prefix_back_ = prefix.back();
+ }
+ }
// Record remaining memory for DFA.
if (max_mem_ <= 0) {
@@ -1244,7 +1220,7 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
c.prog_->set_start(all.begin);
c.prog_->set_start_unanchored(all.begin);
- Prog* prog = c.Finish();
+ Prog* prog = c.Finish(re);
if (prog == NULL)
return NULL;
diff --git a/chromium/third_party/re2/src/re2/dfa.cc b/chromium/third_party/re2/src/re2/dfa.cc
index 0934f5a936e..b25cc97ff48 100644
--- a/chromium/third_party/re2/src/re2/dfa.cc
+++ b/chromium/third_party/re2/src/re2/dfa.cc
@@ -167,12 +167,6 @@ class DFA {
typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
private:
- // Special "first_byte" values for a state. (Values >= 0 denote actual bytes.)
- enum {
- kFbUnknown = -1, // No analysis has been performed.
- kFbNone = -2, // The first-byte trick cannot be used.
- };
-
enum {
// Indices into start_ for unanchored searches.
// Add kStartAnchored for anchored searches.
@@ -239,25 +233,26 @@ class DFA {
struct SearchParams {
SearchParams(const StringPiece& text, const StringPiece& context,
RWLocker* cache_lock)
- : text(text), context(context),
+ : text(text),
+ context(context),
anchored(false),
+ can_prefix_accel(false),
want_earliest_match(false),
run_forward(false),
start(NULL),
- first_byte(kFbUnknown),
cache_lock(cache_lock),
failed(false),
ep(NULL),
- matches(NULL) { }
+ matches(NULL) {}
StringPiece text;
StringPiece context;
bool anchored;
+ bool can_prefix_accel;
bool want_earliest_match;
bool run_forward;
State* start;
- int first_byte;
- RWLocker *cache_lock;
+ RWLocker* cache_lock;
bool failed; // "out" parameter: whether search gave up
const char* ep; // "out" parameter: end pointer for match
SparseSet* matches;
@@ -268,15 +263,13 @@ class DFA {
};
// Before each search, the parameters to Search are analyzed by
- // AnalyzeSearch to determine the state in which to start and the
- // "first_byte" for that state, if any.
+ // AnalyzeSearch to determine the state in which to start.
struct StartInfo {
- StartInfo() : start(NULL), first_byte(kFbUnknown) {}
- State* start;
- std::atomic<int> first_byte;
+ StartInfo() : start(NULL) {}
+ std::atomic<State*> start;
};
- // Fills in params->start and params->first_byte using
+ // Fills in params->start and params->can_prefix_accel using
// the other search parameters. Returns true on success,
// false on failure.
// cache_mutex_.r <= L < mutex_
@@ -288,7 +281,7 @@ class DFA {
// cache_mutex_.r <= L < mutex_
// Might unlock and relock cache_mutex_ via params->cache_lock.
inline bool InlinedSearchLoop(SearchParams* params,
- bool have_first_byte,
+ bool can_prefix_accel,
bool want_earliest_match,
bool run_forward);
@@ -606,7 +599,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
// those are the only operators with any effect in
// RunWorkqOnEmptyString or RunWorkqOnByte.
- int* inst = new int[q->size()];
+ PODArray<int> inst(q->size());
int n = 0;
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
@@ -636,7 +629,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
(it == q->begin() && ip->greedy(prog_))) &&
(kind_ != Prog::kLongestMatch || !sawmark) &&
(flag & kFlagMatch)) {
- delete[] inst;
if (ExtraDebug)
fprintf(stderr, " -> FullMatchState\n");
return FullMatchState;
@@ -683,7 +675,6 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// the execution loop can stop early. This is only okay
// if the state is *not* a matching state.
if (n == 0 && flag == 0) {
- delete[] inst;
if (ExtraDebug)
fprintf(stderr, " -> DeadState\n");
return DeadState;
@@ -693,7 +684,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// unordered state sets separated by Marks. Sort each set
// to canonicalize, to reduce the number of distinct sets stored.
if (kind_ == Prog::kLongestMatch) {
- int* ip = inst;
+ int* ip = inst.data();
int* ep = ip + n;
while (ip < ep) {
int* markp = ip;
@@ -710,7 +701,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// we have an unordered set of states (i.e. we don't have Marks)
// and sorting will reduce the number of distinct sets stored.
if (kind_ == Prog::kManyMatch) {
- int* ip = inst;
+ int* ip = inst.data();
int* ep = ip + n;
std::sort(ip, ep);
}
@@ -729,8 +720,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// Save the needed empty-width flags in the top bits for use later.
flag |= needflags << kFlagNeedShift;
- State* state = CachedState(inst, n, flag);
- delete[] inst;
+ State* state = CachedState(inst.data(), n, flag);
return state;
}
@@ -1183,10 +1173,8 @@ void DFA::ResetCache(RWLocker* cache_lock) {
});
// Clear the cache, reset the memory budget.
- for (int i = 0; i < kMaxStart; i++) {
- start_[i].start = NULL;
- start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
- }
+ for (int i = 0; i < kMaxStart; i++)
+ start_[i].start.store(NULL, std::memory_order_relaxed);
ClearCache();
mem_budget_ = state_budget_;
}
@@ -1301,8 +1289,7 @@ DFA::State* DFA::StateSaver::Restore() {
// situation, the DFA can do better than executing the simple loop.
// Instead, it can call memchr to search very quickly for the byte c.
// Whether the start state has this property is determined during a
-// pre-compilation pass, and if so, the byte b is passed to the search
-// loop as the "first_byte" argument, along with a boolean "have_first_byte".
+// pre-compilation pass and the "can_prefix_accel" argument is set.
//
// Fourth, the desired behavior is to search for the leftmost-best match
// (approximately, the same one that Perl would find), which is not
@@ -1335,7 +1322,7 @@ DFA::State* DFA::StateSaver::Restore() {
// making them function arguments lets the inliner specialize
// this function to each combination (see two paragraphs above).
inline bool DFA::InlinedSearchLoop(SearchParams* params,
- bool have_first_byte,
+ bool can_prefix_accel,
bool want_earliest_match,
bool run_forward) {
State* start = params->start;
@@ -1380,12 +1367,12 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params,
if (ExtraDebug)
fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
- if (run_forward && have_first_byte && s == start) {
- // In start state, only way out is to find first_byte,
- // so use optimized assembly in memchr to skip ahead.
- // If first_byte isn't found, we can skip to the end
- // of the string.
- if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
+ if (can_prefix_accel && s == start) {
+ // In start state, only way out is to find the prefix,
+ // so we use prefix accel (e.g. memchr) to skip ahead.
+ // If not found, we can skip to the end of the string.
+ p = BytePtr(prog_->PrefixAccel(p, ep - p));
+ if (p == NULL) {
p = ep;
break;
}
@@ -1589,7 +1576,7 @@ bool DFA::SearchTTT(SearchParams* params) {
// For debugging, calls the general code directly.
bool DFA::SlowSearchLoop(SearchParams* params) {
return InlinedSearchLoop(params,
- params->first_byte >= 0,
+ params->can_prefix_accel,
params->want_earliest_match,
params->run_forward);
}
@@ -1610,8 +1597,7 @@ bool DFA::FastSearchLoop(SearchParams* params) {
&DFA::SearchTTT,
};
- bool have_first_byte = params->first_byte >= 0;
- int index = 4 * have_first_byte +
+ int index = 4 * params->can_prefix_accel +
2 * params->want_earliest_match +
1 * params->run_forward;
return (this->*Searches[index])(params);
@@ -1703,13 +1689,22 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
}
}
+ params->start = info->start.load(std::memory_order_acquire);
+
+ // Even if we could prefix accel, we cannot do so when anchored and,
+ // less obviously, we cannot do so when we are going to need flags.
+ // This trick works only when there is a single byte that leads to a
+ // different state!
+ if (prog_->can_prefix_accel() &&
+ !params->anchored &&
+ params->start > SpecialStateMax &&
+ params->start->flag_ >> kFlagNeedShift == 0)
+ params->can_prefix_accel = true;
+
if (ExtraDebug)
- fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
+ fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
params->anchored, params->run_forward, flags,
- DumpState(info->start).c_str(), info->first_byte.load());
-
- params->start = info->start;
- params->first_byte = info->first_byte.load(std::memory_order_acquire);
+ DumpState(params->start).c_str(), params->can_prefix_accel);
return true;
}
@@ -1718,47 +1713,25 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
uint32_t flags) {
// Quick check.
- int fb = info->first_byte.load(std::memory_order_acquire);
- if (fb != kFbUnknown)
+ State* start = info->start.load(std::memory_order_acquire);
+ if (start != NULL)
return true;
MutexLock l(&mutex_);
- fb = info->first_byte.load(std::memory_order_relaxed);
- if (fb != kFbUnknown)
+ start = info->start.load(std::memory_order_relaxed);
+ if (start != NULL)
return true;
q0_->clear();
AddToQueue(q0_,
params->anchored ? prog_->start() : prog_->start_unanchored(),
flags);
- info->start = WorkqToCachedState(q0_, NULL, flags);
- if (info->start == NULL)
+ start = WorkqToCachedState(q0_, NULL, flags);
+ if (start == NULL)
return false;
- if (info->start == DeadState) {
- // Synchronize with "quick check" above.
- info->first_byte.store(kFbNone, std::memory_order_release);
- return true;
- }
-
- if (info->start == FullMatchState) {
- // Synchronize with "quick check" above.
- info->first_byte.store(kFbNone, std::memory_order_release); // will be ignored
- return true;
- }
-
- // Even if we have a first_byte, we cannot use it when anchored and,
- // less obviously, we cannot use it when we are going to need flags.
- // This trick works only when there is a single byte that leads to a
- // different state!
- int first_byte = prog_->first_byte();
- if (first_byte == -1 ||
- params->anchored ||
- info->start->flag_ >> kFlagNeedShift != 0)
- first_byte = kFbNone;
-
// Synchronize with "quick check" above.
- info->first_byte.store(first_byte, std::memory_order_release);
+ info->start.store(start, std::memory_order_release);
return true;
}
@@ -1866,13 +1839,13 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
StringPiece context = const_context;
if (context.data() == NULL)
context = text;
- bool carat = anchor_start();
+ bool caret = anchor_start();
bool dollar = anchor_end();
if (reversed_) {
using std::swap;
- swap(carat, dollar);
+ swap(caret, dollar);
}
- if (carat && context.begin() != text.begin())
+ if (caret && context.begin() != text.begin())
return false;
if (dollar && context.end() != text.end())
return false;
diff --git a/chromium/third_party/re2/src/re2/filtered_re2.cc b/chromium/third_party/re2/src/re2/filtered_re2.cc
index e5d8de5ce62..5df97456e25 100644
--- a/chromium/third_party/re2/src/re2/filtered_re2.cc
+++ b/chromium/third_party/re2/src/re2/filtered_re2.cc
@@ -6,6 +6,7 @@
#include <stddef.h>
#include <string>
+#include <utility>
#include "util/util.h"
#include "util/logging.h"
@@ -27,7 +28,22 @@ FilteredRE2::FilteredRE2(int min_atom_len)
FilteredRE2::~FilteredRE2() {
for (size_t i = 0; i < re2_vec_.size(); i++)
delete re2_vec_[i];
- delete prefilter_tree_;
+}
+
+FilteredRE2::FilteredRE2(FilteredRE2&& other)
+ : re2_vec_(std::move(other.re2_vec_)),
+ compiled_(other.compiled_),
+ prefilter_tree_(std::move(other.prefilter_tree_)) {
+ other.re2_vec_.clear();
+ other.re2_vec_.shrink_to_fit();
+ other.compiled_ = false;
+ other.prefilter_tree_.reset(new PrefilterTree());
+}
+
+FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
+ this->~FilteredRE2();
+ (void) new (this) FilteredRE2(std::move(other));
+ return *this;
}
RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
@@ -38,7 +54,7 @@ RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
if (!re->ok()) {
if (options.log_errors()) {
LOG(ERROR) << "Couldn't compile regular expression, skipping: "
- << re << " due to error " << re->error();
+ << pattern << " due to error " << re->error();
}
delete re;
} else {
diff --git a/chromium/third_party/re2/src/re2/filtered_re2.h b/chromium/third_party/re2/src/re2/filtered_re2.h
index 86fa58681d2..dd618c70e8b 100644
--- a/chromium/third_party/re2/src/re2/filtered_re2.h
+++ b/chromium/third_party/re2/src/re2/filtered_re2.h
@@ -21,6 +21,7 @@
// or AllMatches with a vector of indices of strings that were found
// in the text to get the actual regexp matches.
+#include <memory>
#include <string>
#include <vector>
@@ -36,12 +37,19 @@ class FilteredRE2 {
explicit FilteredRE2(int min_atom_len);
~FilteredRE2();
+ // Not copyable.
+ FilteredRE2(const FilteredRE2&) = delete;
+ FilteredRE2& operator=(const FilteredRE2&) = delete;
+ // Movable.
+ FilteredRE2(FilteredRE2&& other);
+ FilteredRE2& operator=(FilteredRE2&& other);
+
// Uses RE2 constructor to create a RE2 object (re). Returns
// re->error_code(). If error_code is other than NoError, then re is
// deleted and not added to re2_vec_.
RE2::ErrorCode Add(const StringPiece& pattern,
const RE2::Options& options,
- int *id);
+ int* id);
// Prepares the regexps added by Add for filtering. Returns a set
// of strings that the caller should check for in candidate texts.
@@ -98,10 +106,7 @@ class FilteredRE2 {
bool compiled_;
// An AND-OR tree of string atoms used for filtering regexps.
- PrefilterTree* prefilter_tree_;
-
- FilteredRE2(const FilteredRE2&) = delete;
- FilteredRE2& operator=(const FilteredRE2&) = delete;
+ std::unique_ptr<PrefilterTree> prefilter_tree_;
};
} // namespace re2
diff --git a/chromium/third_party/re2/src/re2/nfa.cc b/chromium/third_party/re2/src/re2/nfa.cc
index 75ca30654b6..19ee08de215 100644
--- a/chromium/third_party/re2/src/re2/nfa.cc
+++ b/chromium/third_party/re2/src/re2/nfa.cc
@@ -27,6 +27,7 @@
#include <stdio.h>
#include <string.h>
#include <algorithm>
+#include <deque>
#include <string>
#include <utility>
#include <vector>
@@ -107,18 +108,21 @@ class NFA {
// Returns text version of capture information, for debugging.
std::string FormatCapture(const char** capture);
- inline void CopyCapture(const char** dst, const char** src);
+ void CopyCapture(const char** dst, const char** src) {
+ memmove(dst, src, ncapture_*sizeof src[0]);
+ }
Prog* prog_; // underlying program
int start_; // start instruction in program
int ncapture_; // number of submatches to track
bool longest_; // whether searching for longest match
bool endmatch_; // whether match must end at text.end()
- const char* btext_; // beginning of text being matched (for FormatSubmatch)
- const char* etext_; // end of text being matched (for endmatch_)
+ const char* btext_; // beginning of text (for FormatSubmatch)
+ const char* etext_; // end of text (for endmatch_)
Threadq q0_, q1_; // pre-allocated for Search.
PODArray<AddState> stack_; // pre-allocated for AddToThreadq
- Thread* free_threads_; // free list
+ std::deque<Thread> arena_; // thread arena
+ Thread* freelist_; // thread freelist
const char** match_; // best match so far
bool matched_; // any match so far?
@@ -141,31 +145,30 @@ NFA::NFA(Prog* prog) {
prog_->inst_count(kInstEmptyWidth) +
prog_->inst_count(kInstNop) + 1; // + 1 for start inst
stack_ = PODArray<AddState>(nstack);
- free_threads_ = NULL;
+ freelist_ = NULL;
match_ = NULL;
matched_ = false;
}
NFA::~NFA() {
delete[] match_;
- Thread* next;
- for (Thread* t = free_threads_; t; t = next) {
- next = t->next;
- delete[] t->capture;
- delete t;
- }
+ for (const Thread& t : arena_)
+ delete[] t.capture;
}
NFA::Thread* NFA::AllocThread() {
- Thread* t = free_threads_;
- if (t == NULL) {
- t = new Thread;
+ Thread* t = freelist_;
+ if (t != NULL) {
+ freelist_ = t->next;
t->ref = 1;
- t->capture = new const char*[ncapture_];
+ // We don't need to touch t->capture because
+ // the caller will immediately overwrite it.
return t;
}
- free_threads_ = t->next;
+ arena_.emplace_back();
+ t = &arena_.back();
t->ref = 1;
+ t->capture = new const char*[ncapture_];
return t;
}
@@ -176,21 +179,13 @@ NFA::Thread* NFA::Incref(Thread* t) {
}
void NFA::Decref(Thread* t) {
- if (t == NULL)
- return;
+ DCHECK(t != NULL);
t->ref--;
if (t->ref > 0)
return;
DCHECK_EQ(t->ref, 0);
- t->next = free_threads_;
- free_threads_ = t;
-}
-
-void NFA::CopyCapture(const char** dst, const char** src) {
- for (int i = 0; i < ncapture_; i+=2) {
- dst[i] = src[i];
- dst[i+1] = src[i+1];
- }
+ t->next = freelist_;
+ freelist_ = t;
}
// Follows all empty arrows from id0 and enqueues all the states reached.
@@ -372,8 +367,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
matched_ = true;
Decref(t);
- for (++i; i != runq->end(); ++i)
- Decref(i->value());
+ for (++i; i != runq->end(); ++i) {
+ if (i->value() != NULL)
+ Decref(i->value());
+ }
runq->clear();
if (ip->greedy(prog_))
return ip->out1();
@@ -416,8 +413,10 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
// worse than the one we just found: don't run the
// rest of the current Threadq.
Decref(t);
- for (++i; i != runq->end(); ++i)
- Decref(i->value());
+ for (++i; i != runq->end(); ++i) {
+ if (i->value() != NULL)
+ Decref(i->value());
+ }
runq->clear();
return 0;
}
@@ -489,6 +488,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
}
match_ = new const char*[ncapture_];
+ memset(match_, 0, ncapture_*sizeof match_[0]);
matched_ = false;
// For debugging prints.
@@ -505,7 +505,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
Threadq* nextq = &q1_;
runq->clear();
nextq->clear();
- memset(&match_[0], 0, ncapture_*sizeof match_[0]);
// Loop over the text, stepping the machine.
for (const char* p = text.data();; p++) {
@@ -572,16 +571,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// matches, since it would be to the right of the match
// we already found.)
if (!matched_ && (!anchored || p == text.data())) {
- // If there's a required first byte for an unanchored search
- // and we're not in the middle of any possible matches,
- // use memchr to search for the byte quickly.
- int fb = prog_->first_byte();
+ // Try to use prefix accel (e.g. memchr) to skip ahead.
+ // The search must be unanchored and there must be zero
+ // possible matches already.
if (!anchored && runq->size() == 0 &&
- fb >= 0 && p < etext_ && (p[0] & 0xFF) != fb) {
- p = reinterpret_cast<const char*>(memchr(p, fb, etext_ - p));
- if (p == NULL) {
+ p < etext_ && prog_->can_prefix_accel()) {
+ p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
+ if (p == NULL)
p = etext_;
- }
}
Thread* t = AllocThread();
@@ -612,8 +609,10 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
}
}
- for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
- Decref(i->value());
+ for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
+ if (i->value() != NULL)
+ Decref(i->value());
+ }
if (matched_) {
for (int i = 0; i < nsubmatch; i++)
@@ -629,67 +628,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
return false;
}
-void Prog::ComputeFirstByte() {
- SparseSet q(size());
- q.insert(start());
- for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) {
- int id = *it;
- Prog::Inst* ip = inst(id);
- switch (ip->opcode()) {
- default:
- LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte";
- break;
-
- case kInstMatch:
- // The empty string matches: no first byte.
- first_byte_ = -1;
- return;
-
- case kInstByteRange:
- if (!ip->last())
- q.insert(id+1);
-
- // Must match only a single byte.
- if (ip->lo() != ip->hi() ||
- (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')) {
- first_byte_ = -1;
- return;
- }
- // If we haven't seen any bytes yet, record it;
- // otherwise must match the one we saw before.
- if (first_byte_ == -1) {
- first_byte_ = ip->lo();
- } else if (first_byte_ != ip->lo()) {
- first_byte_ = -1;
- return;
- }
- break;
-
- case kInstNop:
- case kInstCapture:
- case kInstEmptyWidth:
- if (!ip->last())
- q.insert(id+1);
-
- // Continue on.
- // Ignore ip->empty() flags for kInstEmptyWidth
- // in order to be as conservative as possible
- // (assume all possible empty-width flags are true).
- if (ip->out())
- q.insert(ip->out());
- break;
-
- case kInstAltMatch:
- DCHECK(!ip->last());
- q.insert(id+1);
- break;
-
- case kInstFail:
- break;
- }
- }
-}
-
bool
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
Anchor anchor, MatchKind kind,
diff --git a/chromium/third_party/re2/src/re2/parse.cc b/chromium/third_party/re2/src/re2/parse.cc
index 0a5ff7494f9..3bba6137f4f 100644
--- a/chromium/third_party/re2/src/re2/parse.cc
+++ b/chromium/third_party/re2/src/re2/parse.cc
@@ -93,7 +93,7 @@ class Regexp::ParseState {
bool PushSimpleOp(RegexpOp op);
// Pushes a ^ onto the stack.
- bool PushCarat();
+ bool PushCaret();
// Pushes a \b (word == true) or \B (word == false) onto the stack.
bool PushWordBoundary(bool word);
@@ -423,7 +423,7 @@ bool Regexp::ParseState::PushLiteral(Rune r) {
}
// Pushes a ^ onto the stack.
-bool Regexp::ParseState::PushCarat() {
+bool Regexp::ParseState::PushCaret() {
if (flags_ & OneLine) {
return PushSimpleOp(kRegexpBeginText);
}
@@ -685,7 +685,7 @@ bool Regexp::ParseState::DoRightParen() {
if ((r1 = stacktop_) == NULL ||
(r2 = r1->down_) == NULL ||
r2->op() != kLeftParen) {
- status_->set_code(kRegexpMissingParen);
+ status_->set_code(kRegexpUnexpectedParen);
status_->set_error_arg(whole_regexp_);
return false;
}
@@ -1802,14 +1802,13 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Convert the UnicodeSet to a URange32 and UGroup that we can add.
int nr = uset.getRangeCount();
- URange32* r = new URange32[nr];
+ PODArray<URange32> r(nr);
for (int i = 0; i < nr; i++) {
r[i].lo = uset.getRangeStart(i);
r[i].hi = uset.getRangeEnd(i);
}
- UGroup g = {"", +1, 0, 0, r, nr};
+ UGroup g = {"", +1, 0, 0, r.data(), nr};
AddUGroup(cc, &g, sign, parse_flags);
- delete[] r;
#endif
return kParseOk;
@@ -2272,7 +2271,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
break;
case '^': // Beginning of line.
- if (!ps.PushCarat())
+ if (!ps.PushCaret())
return NULL;
t.remove_prefix(1); // '^'
break;
diff --git a/chromium/third_party/re2/src/re2/prog.cc b/chromium/third_party/re2/src/re2/prog.cc
index 9244319eccf..ac9c0852406 100644
--- a/chromium/third_party/re2/src/re2/prog.cc
+++ b/chromium/third_party/re2/src/re2/prog.cc
@@ -7,6 +7,12 @@
#include "re2/prog.h"
+#if defined(__AVX2__)
+#include <immintrin.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#endif
#include <stdint.h>
#include <string.h>
#include <algorithm>
@@ -109,7 +115,9 @@ Prog::Prog()
start_unanchored_(0),
size_(0),
bytemap_range_(0),
- first_byte_(-1),
+ prefix_size_(0),
+ prefix_front_(-1),
+ prefix_back_(-1),
list_count_(0),
dfa_mem_(0),
dfa_first_(NULL),
@@ -908,4 +916,73 @@ void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
}
}
+#if defined(__AVX2__)
+// Finds the least significant non-zero bit in n.
+static int FindLSBSet(uint32_t n) {
+ DCHECK_NE(n, 0);
+#if defined(__GNUC__)
+ return __builtin_ctz(n);
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+ unsigned long c;
+ _BitScanForward(&c, n);
+ return static_cast<int>(c);
+#else
+ int c = 31;
+ for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+ uint32_t word = n << shift;
+ if (word != 0) {
+ n = word;
+ c -= shift;
+ }
+ }
+ return c;
+#endif
+}
+#endif
+
+const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
+ DCHECK_GE(prefix_size_, 2);
+ if (size < prefix_size_)
+ return NULL;
+ // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
+ // This also means that probing for prefix_back_ doesn't go out of bounds.
+ size -= prefix_size_-1;
+
+#if defined(__AVX2__)
+ // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
+ if (size >= sizeof(__m256i)) {
+ const __m256i* fp = reinterpret_cast<const __m256i*>(
+ reinterpret_cast<const char*>(data));
+ const __m256i* bp = reinterpret_cast<const __m256i*>(
+ reinterpret_cast<const char*>(data) + prefix_size_-1);
+ const __m256i* endfp = fp + size/sizeof(__m256i);
+ const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
+ const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
+ while (fp != endfp) {
+ const __m256i f_loadu = _mm256_loadu_si256(fp++);
+ const __m256i b_loadu = _mm256_loadu_si256(bp++);
+ const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
+ const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
+ const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
+ if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
+ const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
+ const int fb_movemask = _mm256_movemask_epi8(fb_and);
+ const int fb_ctz = FindLSBSet(fb_movemask);
+ return reinterpret_cast<const char*>(fp-1) + fb_ctz;
+ }
+ }
+ data = fp;
+ size = size%sizeof(__m256i);
+ }
+#endif
+
+ const char* p0 = reinterpret_cast<const char*>(data);
+ for (const char* p = p0;; p++) {
+ DCHECK_GE(size, static_cast<size_t>(p-p0));
+ p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
+ if (p == NULL || p[prefix_size_-1] == prefix_back_)
+ return p;
+ }
+}
+
} // namespace re2
diff --git a/chromium/third_party/re2/src/re2/prog.h b/chromium/third_party/re2/src/re2/prog.h
index 4306672e9ff..e9ce682d992 100644
--- a/chromium/third_party/re2/src/re2/prog.h
+++ b/chromium/third_party/re2/src/re2/prog.h
@@ -198,8 +198,8 @@ class Prog {
Inst *inst(int id) { return &inst_[id]; }
int start() { return start_; }
- int start_unanchored() { return start_unanchored_; }
void set_start(int start) { start_ = start; }
+ int start_unanchored() { return start_unanchored_; }
void set_start_unanchored(int start) { start_unanchored_ = start; }
int size() { return size_; }
bool reversed() { return reversed_; }
@@ -207,15 +207,27 @@ class Prog {
int list_count() { return list_count_; }
int inst_count(InstOp op) { return inst_count_[op]; }
uint16_t* list_heads() { return list_heads_.data(); }
- void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
int64_t dfa_mem() { return dfa_mem_; }
+ void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
bool anchor_start() { return anchor_start_; }
void set_anchor_start(bool b) { anchor_start_ = b; }
bool anchor_end() { return anchor_end_; }
void set_anchor_end(bool b) { anchor_end_ = b; }
int bytemap_range() { return bytemap_range_; }
const uint8_t* bytemap() { return bytemap_; }
- int first_byte() { return first_byte_; }
+ bool can_prefix_accel() { return prefix_size_ != 0; }
+
+ // Accelerates to the first likely occurrence of the prefix.
+ // Returns a pointer to the first byte or NULL if not found.
+ const void* PrefixAccel(const void* data, size_t size) {
+ DCHECK_GE(prefix_size_, 1);
+ return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
+ : PrefixAccel_FrontAndBack(data, size);
+ }
+
+ // An implementation of prefix accel that looks for prefix_front_ and
+ // prefix_back_ to return fewer false positives than memchr(3) alone.
+ const void* PrefixAccel_FrontAndBack(const void* data, size_t size);
// Returns string representation of program for debugging.
std::string Dump();
@@ -293,9 +305,6 @@ class Prog {
// Compute bytemap.
void ComputeByteMap();
- // Computes whether all matches must begin with the same first byte.
- void ComputeFirstByte();
-
// Run peep-hole optimizer on program.
void Optimize();
@@ -397,7 +406,9 @@ class Prog {
int start_unanchored_; // unanchored entry point for program
int size_; // number of instructions
int bytemap_range_; // bytemap_[x] < bytemap_range_
- int first_byte_; // required first byte for match, or -1 if none
+ size_t prefix_size_; // size of prefix (0 if no prefix)
+ int prefix_front_; // first byte of prefix (-1 if no prefix)
+ int prefix_back_; // last byte of prefix (-1 if no prefix)
int list_count_; // count of lists (see above)
int inst_count_[kNumInst]; // count of instructions by opcode
diff --git a/chromium/third_party/re2/src/re2/re2.cc b/chromium/third_party/re2/src/re2/re2.cc
index d231a21496d..7ec193c190b 100644
--- a/chromium/third_party/re2/src/re2/re2.cc
+++ b/chromium/third_party/re2/src/re2/re2.cc
@@ -83,6 +83,8 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
return RE2::ErrorMissingBracket;
case re2::kRegexpMissingParen:
return RE2::ErrorMissingParen;
+ case re2::kRegexpUnexpectedParen:
+ return RE2::ErrorUnexpectedParen;
case re2::kRegexpTrailingBackslash:
return RE2::ErrorTrailingBackslash;
case re2::kRegexpRepeatArgument:
@@ -293,7 +295,7 @@ static int FindMSBSet(uint32_t n) {
DCHECK_NE(n, 0);
#if defined(__GNUC__)
return 31 ^ __builtin_clz(n);
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
unsigned long c;
_BitScanReverse(&c, n);
return static_cast<int>(c);
@@ -405,6 +407,8 @@ bool RE2::Replace(std::string* str,
const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
+ if (nvec > 1 + re.NumberOfCapturingGroups())
+ return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
@@ -425,6 +429,8 @@ int RE2::GlobalReplace(std::string* str,
const StringPiece& rewrite) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
+ if (nvec > 1 + re.NumberOfCapturingGroups())
+ return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
@@ -497,9 +503,10 @@ bool RE2::Extract(const StringPiece& text,
std::string* out) {
StringPiece vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
+ if (nvec > 1 + re.NumberOfCapturingGroups())
+ return false;
if (nvec > static_cast<int>(arraysize(vec)))
return false;
-
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
return false;
@@ -1012,8 +1019,8 @@ bool RE2::Rewrite(std::string* out,
int n = (c - '0');
if (n >= veclen) {
if (options_.log_errors()) {
- LOG(ERROR) << "requested group " << n
- << " in regexp " << rewrite.data();
+ LOG(ERROR) << "invalid substitution \\" << n
+ << " from " << veclen << " groups";
}
return false;
}
diff --git a/chromium/third_party/re2/src/re2/re2.h b/chromium/third_party/re2/src/re2/re2.h
index 32b2718f3c0..9d3496c9abd 100644
--- a/chromium/third_party/re2/src/re2/re2.h
+++ b/chromium/third_party/re2/src/re2/re2.h
@@ -40,6 +40,9 @@
// R"((?i)hello)" -- (?i) turns on case-insensitive matching
// R"(/\*(.*?)\*/)" -- .*? matches . minimum no. of times possible
//
+// When using UTF-8 encoding, case-insensitive matching will perform
+// simple case folding, not full case folding.
+//
// -----------------------------------------------------------------------
// MATCHING INTERFACE:
//
@@ -244,6 +247,7 @@ class RE2 {
ErrorBadCharRange, // bad character class range
ErrorMissingBracket, // missing closing ]
ErrorMissingParen, // missing closing )
+ ErrorUnexpectedParen, // unexpected closing )
ErrorTrailingBackslash, // trailing \ at end of regexp
ErrorRepeatArgument, // repeat argument missing, e.g. "*"
ErrorRepeatSize, // bad repetition argument
diff --git a/chromium/third_party/re2/src/re2/regexp.cc b/chromium/third_party/re2/src/re2/regexp.cc
index 5732166eebd..574780fbea9 100644
--- a/chromium/third_party/re2/src/re2/regexp.cc
+++ b/chromium/third_party/re2/src/re2/regexp.cc
@@ -20,6 +20,7 @@
#include "util/logging.h"
#include "util/mutex.h"
#include "util/utf.h"
+#include "re2/pod_array.h"
#include "re2/stringpiece.h"
#include "re2/walker-inl.h"
@@ -243,16 +244,15 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
return new Regexp(kRegexpEmptyMatch, flags);
}
- Regexp** subcopy = NULL;
+ PODArray<Regexp*> subcopy;
if (op == kRegexpAlternate && can_factor) {
// Going to edit sub; make a copy so we don't step on caller.
- subcopy = new Regexp*[nsub];
- memmove(subcopy, sub, nsub * sizeof sub[0]);
- sub = subcopy;
+ subcopy = PODArray<Regexp*>(nsub);
+ memmove(subcopy.data(), sub, nsub * sizeof sub[0]);
+ sub = subcopy.data();
nsub = FactorAlternation(sub, nsub, flags);
if (nsub == 1) {
Regexp* re = sub[0];
- delete[] subcopy;
return re;
}
}
@@ -269,7 +269,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub,
nsub - (nbigsub-1)*kMaxNsub, flags,
false);
- delete[] subcopy;
return re;
}
@@ -278,8 +277,6 @@ Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub,
Regexp** subs = re->sub();
for (int i = 0; i < nsub; i++)
subs[i] = sub[i];
-
- delete[] subcopy;
return re;
}
@@ -501,6 +498,7 @@ static const char *kErrorStrings[] = {
"invalid character class range",
"missing ]",
"missing )",
+ "unexpected )",
"trailing \\",
"no argument for repetition operator",
"invalid repetition size",
@@ -658,78 +656,83 @@ std::map<int, std::string>* Regexp::CaptureNames() {
return w.TakeMap();
}
+void ConvertRunesToBytes(bool latin1, Rune* runes, int nrunes,
+ std::string* bytes) {
+ if (latin1) {
+ bytes->resize(nrunes);
+ for (int i = 0; i < nrunes; i++)
+ (*bytes)[i] = static_cast<char>(runes[i]);
+ } else {
+ bytes->resize(nrunes * UTFmax); // worst case
+ char* p = &(*bytes)[0];
+ for (int i = 0; i < nrunes; i++)
+ p += runetochar(p, &runes[i]);
+ bytes->resize(p - &(*bytes)[0]);
+ bytes->shrink_to_fit();
+ }
+}
+
// Determines whether regexp matches must be anchored
// with a fixed string prefix. If so, returns the prefix and
// the regexp that remains after the prefix. The prefix might
// be ASCII case-insensitive.
bool Regexp::RequiredPrefix(std::string* prefix, bool* foldcase,
Regexp** suffix) {
+ prefix->clear();
+ *foldcase = false;
+ *suffix = NULL;
+
// No need for a walker: the regexp must be of the form
// 1. some number of ^ anchors
// 2. a literal char or string
// 3. the rest
- prefix->clear();
- *foldcase = false;
- *suffix = NULL;
if (op_ != kRegexpConcat)
return false;
-
- // Some number of anchors, then a literal or concatenation.
int i = 0;
- Regexp** sub = this->sub();
- while (i < nsub_ && sub[i]->op_ == kRegexpBeginText)
+ while (i < nsub_ && sub()[i]->op_ == kRegexpBeginText)
i++;
if (i == 0 || i >= nsub_)
return false;
-
- Regexp* re = sub[i];
- switch (re->op_) {
- default:
- return false;
-
- case kRegexpLiteralString:
- // Convert to string in proper encoding.
- if (re->parse_flags() & Latin1) {
- prefix->resize(re->nrunes_);
- for (int j = 0; j < re->nrunes_; j++)
- (*prefix)[j] = static_cast<char>(re->runes_[j]);
- } else {
- // Convert to UTF-8 in place.
- // Assume worst-case space and then trim.
- prefix->resize(re->nrunes_ * UTFmax);
- char *p = &(*prefix)[0];
- for (int j = 0; j < re->nrunes_; j++) {
- Rune r = re->runes_[j];
- if (r < Runeself)
- *p++ = static_cast<char>(r);
- else
- p += runetochar(p, &r);
- }
- prefix->resize(p - &(*prefix)[0]);
- }
- break;
-
- case kRegexpLiteral:
- if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) {
- prefix->append(1, static_cast<char>(re->rune_));
- } else {
- char buf[UTFmax];
- prefix->append(buf, runetochar(buf, &re->rune_));
- }
- break;
- }
- *foldcase = (sub[i]->parse_flags() & FoldCase) != 0;
+ Regexp* re = sub()[i];
+ if (re->op_ != kRegexpLiteral &&
+ re->op_ != kRegexpLiteralString)
+ return false;
i++;
-
- // The rest.
if (i < nsub_) {
for (int j = i; j < nsub_; j++)
- sub[j]->Incref();
- re = Concat(sub + i, nsub_ - i, parse_flags());
+ sub()[j]->Incref();
+ *suffix = Concat(sub() + i, nsub_ - i, parse_flags());
} else {
- re = new Regexp(kRegexpEmptyMatch, parse_flags());
+ *suffix = new Regexp(kRegexpEmptyMatch, parse_flags());
}
- *suffix = re;
+
+ bool latin1 = (re->parse_flags() & Latin1) != 0;
+ Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
+ int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
+ ConvertRunesToBytes(latin1, runes, nrunes, prefix);
+ *foldcase = (re->parse_flags() & FoldCase) != 0;
+ return true;
+}
+
+// Determines whether regexp matches must be unanchored
+// with a fixed string prefix. If so, returns the prefix.
+// The prefix might be ASCII case-insensitive.
+bool Regexp::RequiredPrefixForAccel(std::string* prefix, bool* foldcase) {
+ prefix->clear();
+ *foldcase = false;
+
+ // No need for a walker: the regexp must either begin with or be
+ // a literal char or string.
+ Regexp* re = op_ == kRegexpConcat && nsub_ > 0 ? sub()[0] : this;
+ if (re->op_ != kRegexpLiteral &&
+ re->op_ != kRegexpLiteralString)
+ return false;
+
+ bool latin1 = (re->parse_flags() & Latin1) != 0;
+ Rune* runes = re->op_ == kRegexpLiteral ? &re->rune_ : re->runes_;
+ int nrunes = re->op_ == kRegexpLiteral ? 1 : re->nrunes_;
+ ConvertRunesToBytes(latin1, runes, nrunes, prefix);
+ *foldcase = (re->parse_flags() & FoldCase) != 0;
return true;
}
diff --git a/chromium/third_party/re2/src/re2/regexp.h b/chromium/third_party/re2/src/re2/regexp.h
index a5d85c81288..9ea7a07733c 100644
--- a/chromium/third_party/re2/src/re2/regexp.h
+++ b/chromium/third_party/re2/src/re2/regexp.h
@@ -177,6 +177,7 @@ enum RegexpStatusCode {
kRegexpBadCharRange, // bad character class range
kRegexpMissingBracket, // missing closing ]
kRegexpMissingParen, // missing closing )
+ kRegexpUnexpectedParen, // unexpected closing )
kRegexpTrailingBackslash, // at end of regexp
kRegexpRepeatArgument, // repeat argument missing, e.g. "*"
kRegexpRepeatSize, // bad repetition argument
@@ -440,6 +441,13 @@ class Regexp {
bool RequiredPrefix(std::string* prefix, bool* foldcase,
Regexp** suffix);
+ // Whether every match of this regexp must be unanchored and
+ // begin with a non-empty fixed string (perhaps after ASCII
+ // case-folding). If so, returns the prefix.
+ // Callers should expect *prefix and *foldcase to be "zeroed"
+ // regardless of the return value.
+ bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
+
private:
// Constructor allocates vectors as appropriate for operator.
explicit Regexp(RegexpOp op, ParseFlags parse_flags);
diff --git a/chromium/third_party/re2/src/re2/set.cc b/chromium/third_party/re2/src/re2/set.cc
index 87db6b72b11..c3924bfd982 100644
--- a/chromium/third_party/re2/src/re2/set.cc
+++ b/chromium/third_party/re2/src/re2/set.cc
@@ -7,6 +7,7 @@
#include <stddef.h>
#include <algorithm>
#include <memory>
+#include <utility>
#include "util/util.h"
#include "util/logging.h"
@@ -18,19 +19,37 @@
namespace re2 {
-RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
- options_.Copy(options);
+RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
+ : options_(options),
+ anchor_(anchor),
+ compiled_(false),
+ size_(0) {
options_.set_never_capture(true); // might unblock some optimisations
- anchor_ = anchor;
- prog_ = NULL;
- compiled_ = false;
- size_ = 0;
}
RE2::Set::~Set() {
for (size_t i = 0; i < elem_.size(); i++)
elem_[i].second->Decref();
- delete prog_;
+}
+
+RE2::Set::Set(Set&& other)
+ : options_(other.options_),
+ anchor_(other.anchor_),
+ elem_(std::move(other.elem_)),
+ compiled_(other.compiled_),
+ size_(other.size_),
+ prog_(std::move(other.prog_)) {
+ other.elem_.clear();
+ other.elem_.shrink_to_fit();
+ other.compiled_ = false;
+ other.size_ = 0;
+ other.prog_.reset();
+}
+
+RE2::Set& RE2::Set::operator=(Set&& other) {
+ this->~Set();
+ (void) new (this) Set(std::move(other));
+ return *this;
}
int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
@@ -97,9 +116,9 @@ bool RE2::Set::Compile() {
options_.ParseFlags());
re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
- prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
+ prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
re->Decref();
- return prog_ != NULL;
+ return prog_ != nullptr;
}
bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
diff --git a/chromium/third_party/re2/src/re2/set.h b/chromium/third_party/re2/src/re2/set.h
index 59733fd94cd..8d64f30ccd9 100644
--- a/chromium/third_party/re2/src/re2/set.h
+++ b/chromium/third_party/re2/src/re2/set.h
@@ -5,6 +5,7 @@
#ifndef RE2_SET_H_
#define RE2_SET_H_
+#include <memory>
#include <string>
#include <utility>
#include <vector>
@@ -36,6 +37,13 @@ class RE2::Set {
Set(const RE2::Options& options, RE2::Anchor anchor);
~Set();
+ // Not copyable.
+ Set(const Set&) = delete;
+ Set& operator=(const Set&) = delete;
+ // Movable.
+ Set(Set&& other);
+ Set& operator=(Set&& other);
+
// Adds pattern to the set using the options passed to the constructor.
// Returns the index that will identify the regexp in the output of Match(),
// or -1 if the regexp cannot be parsed.
@@ -67,12 +75,9 @@ class RE2::Set {
RE2::Options options_;
RE2::Anchor anchor_;
std::vector<Elem> elem_;
- re2::Prog* prog_;
bool compiled_;
int size_;
-
- Set(const Set&) = delete;
- Set& operator=(const Set&) = delete;
+ std::unique_ptr<re2::Prog> prog_;
};
} // namespace re2
diff --git a/chromium/third_party/re2/src/re2/testing/backtrack.cc b/chromium/third_party/re2/src/re2/testing/backtrack.cc
index 3bab1e7776d..216d259802f 100644
--- a/chromium/third_party/re2/src/re2/testing/backtrack.cc
+++ b/chromium/third_party/re2/src/re2/testing/backtrack.cc
@@ -29,6 +29,7 @@
#include "util/util.h"
#include "util/logging.h"
+#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -53,7 +54,6 @@ namespace re2 {
class Backtracker {
public:
explicit Backtracker(Prog* prog);
- ~Backtracker();
bool Search(const StringPiece& text, const StringPiece& context,
bool anchored, bool longest,
@@ -79,9 +79,8 @@ class Backtracker {
int nsubmatch_; // # of submatches to fill in
// Search state
- const char* cap_[64]; // capture registers
- uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked
- size_t nvisited_; // # of words in bitmap
+ const char* cap_[64]; // capture registers
+ PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
Backtracker(const Backtracker&) = delete;
Backtracker& operator=(const Backtracker&) = delete;
@@ -93,13 +92,7 @@ Backtracker::Backtracker(Prog* prog)
longest_(false),
endmatch_(false),
submatch_(NULL),
- nsubmatch_(0),
- visited_(NULL),
- nvisited_(0) {
-}
-
-Backtracker::~Backtracker() {
- delete[] visited_;
+ nsubmatch_(0) {
}
// Runs a backtracking search.
@@ -133,10 +126,10 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
// Allocate new visited_ bitmap -- size is proportional
// to text, so have to reallocate on each call to Search.
- delete[] visited_;
- nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
- visited_ = new uint32_t[nvisited_];
- memset(visited_, 0, nvisited_*sizeof visited_[0]);
+ int nvisited = prog_->size() * static_cast<int>(text.size()+1);
+ nvisited = (nvisited + 31) / 32;
+ visited_ = PODArray<uint32_t>(nvisited);
+ memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
// Anchored search must start at text.begin().
if (anchored_) {
@@ -166,8 +159,9 @@ bool Backtracker::Visit(int id, const char* p) {
// either it didn't match or it did but we're hoping for a better match.
// Either way, don't go down that road again.
CHECK(p <= text_.data() + text_.size());
- size_t n = id*(text_.size()+1) + (p - text_.data());
- CHECK_LT(n/32, nvisited_);
+ int n = id * static_cast<int>(text_.size()+1) +
+ static_cast<int>(p-text_.data());
+ CHECK_LT(n/32, visited_.size());
if (visited_[n/32] & (1 << (n&31)))
return false;
visited_[n/32] |= 1 << (n&31);
diff --git a/chromium/third_party/re2/src/re2/testing/compile_test.cc b/chromium/third_party/re2/src/re2/testing/compile_test.cc
index 4598aa640fb..2096e2f0797 100644
--- a/chromium/third_party/re2/src/re2/testing/compile_test.cc
+++ b/chromium/third_party/re2/src/re2/testing/compile_test.cc
@@ -236,7 +236,7 @@ TEST(TestCompile, InsufficientMemory) {
"^(?P<name1>[^\\s]+)\\s+(?P<name2>[^\\s]+)\\s+(?P<name3>.+)$",
Regexp::LikePerl, NULL);
EXPECT_TRUE(re != NULL);
- Prog* prog = re->CompileToProg(920);
+ Prog* prog = re->CompileToProg(850);
// If the memory budget has been exhausted, compilation should fail
// and return NULL instead of trying to do anything with NoMatch().
EXPECT_TRUE(prog == NULL);
diff --git a/chromium/third_party/re2/src/re2/testing/filtered_re2_test.cc b/chromium/third_party/re2/src/re2/testing/filtered_re2_test.cc
index deef2f87d62..c788fdadc49 100644
--- a/chromium/third_party/re2/src/re2/testing/filtered_re2_test.cc
+++ b/chromium/third_party/re2/src/re2/testing/filtered_re2_test.cc
@@ -7,6 +7,7 @@
#include <memory>
#include <string>
#include <vector>
+#include <utility>
#include "util/test.h"
#include "util/logging.h"
@@ -291,4 +292,49 @@ TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
"EmptyStringInStringSetBug", &v));
}
+TEST(FilteredRE2Test, MoveSemantics) {
+ FilterTestVars v1;
+ int id;
+ v1.f.Add("foo\\d+", v1.opts, &id);
+ EXPECT_EQ(0, id);
+ v1.f.Compile(&v1.atoms);
+ EXPECT_EQ(1, v1.atoms.size());
+ EXPECT_EQ("foo", v1.atoms[0]);
+ v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+ EXPECT_EQ(1, v1.matches.size());
+ EXPECT_EQ(0, v1.matches[0]);
+ v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+ EXPECT_EQ(0, v1.matches.size());
+
+ // The moved-to object should do what the moved-from object did.
+ FilterTestVars v2;
+ v2.f = std::move(v1.f);
+ v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
+ EXPECT_EQ(1, v2.matches.size());
+ EXPECT_EQ(0, v2.matches[0]);
+ v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
+ EXPECT_EQ(0, v2.matches.size());
+
+ // The moved-from object should have been reset and be reusable.
+ v1.f.Add("bar\\d+", v1.opts, &id);
+ EXPECT_EQ(0, id);
+ v1.f.Compile(&v1.atoms);
+ EXPECT_EQ(1, v1.atoms.size());
+ EXPECT_EQ("bar", v1.atoms[0]);
+ v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+ EXPECT_EQ(0, v1.matches.size());
+ v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+ EXPECT_EQ(1, v1.matches.size());
+ EXPECT_EQ(0, v1.matches[0]);
+
+ // Verify that "overwriting" works and also doesn't leak memory.
+ // (The latter will need a leak detector such as LeakSanitizer.)
+ v1.f = std::move(v2.f);
+ v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+ EXPECT_EQ(1, v1.matches.size());
+ EXPECT_EQ(0, v1.matches[0]);
+ v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+ EXPECT_EQ(0, v1.matches.size());
+}
+
} // namespace re2
diff --git a/chromium/third_party/re2/src/re2/testing/re2_test.cc b/chromium/third_party/re2/src/re2/testing/re2_test.cc
index 52e1f6f066e..41fccf68eb5 100644
--- a/chromium/third_party/re2/src/re2/testing/re2_test.cc
+++ b/chromium/third_party/re2/src/re2/testing/re2_test.cc
@@ -224,6 +224,15 @@ TEST(RE2, Extract) {
ASSERT_EQ(s, "'foo'");
}
+TEST(RE2, MaxSubmatchTooLarge) {
+ std::string s;
+ ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
+ s = "foo";
+ ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
+ s = "foo";
+ ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
+}
+
TEST(RE2, Consume) {
RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
std::string word;
@@ -1268,38 +1277,43 @@ TEST(RE2, CL8622304) {
EXPECT_EQ(val, "1,0x2F,030,4,5");
}
-
// Check that RE2 returns correct regexp pieces on error.
// In particular, make sure it returns whole runes
// and that it always reports invalid UTF-8.
// Also check that Perl error flag piece is big enough.
static struct ErrorTest {
const char *regexp;
- const char *error;
+ RE2::ErrorCode error_code;
+ const char *error_arg;
} error_tests[] = {
- { "ab\\αcd", "\\α" },
- { "ef\\x☺01", "\\x☺0" },
- { "gh\\x1☺01", "\\x1☺" },
- { "ij\\x1", "\\x1" },
- { "kl\\x", "\\x" },
- { "uv\\x{0000☺}", "\\x{0000☺" },
- { "wx\\p{ABC", "\\p{ABC" },
- { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
- { "aa(?sm☺i", "(?sm☺" },
- { "bb[abc", "[abc" },
-
- { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
- { "op\377qr", "" },
- { "st\\x{00000\377", "" },
- { "zz\\p{\377}", "" },
- { "zz\\x{00\377}", "" },
- { "zz(?P<name\377>abc)", "" },
+ { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
+ { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
+ { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
+ { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
+ { "kl\\x", RE2::ErrorBadEscape, "\\x" },
+ { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
+ { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
+ // used to return (?s but the error is X
+ { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
+ { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
+ { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
+ { "abc(def", RE2::ErrorMissingParen, "abc(def" },
+ { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
+
+ // no argument string returned for invalid UTF-8
+ { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
+ { "op\377qr", RE2::ErrorBadUTF8, "" },
+ { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
+ { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
+ { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
+ { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
};
-TEST(RE2, ErrorArgs) {
+TEST(RE2, ErrorCodeAndArg) {
for (size_t i = 0; i < arraysize(error_tests); i++) {
RE2 re(error_tests[i].regexp, RE2::Quiet);
EXPECT_FALSE(re.ok());
- EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
+ EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
+ EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
}
}
diff --git a/chromium/third_party/re2/src/re2/testing/required_prefix_test.cc b/chromium/third_party/re2/src/re2/testing/required_prefix_test.cc
index 54600456a88..c00e81234eb 100644
--- a/chromium/third_party/re2/src/re2/testing/required_prefix_test.cc
+++ b/chromium/third_party/re2/src/re2/testing/required_prefix_test.cc
@@ -6,6 +6,7 @@
#include "util/test.h"
#include "util/logging.h"
+#include "re2/prog.h"
#include "re2/regexp.h"
namespace re2 {
@@ -19,10 +20,13 @@ struct PrefixTest {
};
static PrefixTest tests[] = {
- // If the regexp is missing a ^, there's no required prefix.
- { "abc", false },
+ // Empty cases.
{ "", false },
{ "(?m)^", false },
+ { "(?-m)^", false },
+
+ // If the regexp has no ^, there's no required prefix.
+ { "abc", false },
// If the regexp immediately goes into
// something not a literal match, there's no required prefix.
@@ -53,15 +57,15 @@ TEST(RequiredPrefix, SimpleTests) {
bool f;
Regexp* s;
ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf")
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
<< " " << re->Dump();
if (t.return_value) {
ASSERT_EQ(p, std::string(t.prefix))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
ASSERT_EQ(f, t.foldcase)
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
ASSERT_EQ(s->ToString(), std::string(t.suffix))
- << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf");
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
s->Decref();
}
re->Decref();
@@ -69,4 +73,73 @@ TEST(RequiredPrefix, SimpleTests) {
}
}
+static PrefixTest for_accel_tests[] = {
+ // Empty cases.
+ { "", false },
+ { "(?m)^", false },
+ { "(?-m)^", false },
+
+ // If the regexp has a ^, there's no required prefix.
+ { "^abc", false },
+
+ // If the regexp immediately goes into
+ // something not a literal match, there's no required prefix.
+ { "(abc)", false },
+ { "a*", false },
+
+ // Otherwise, it should work.
+ { "abc$", true, "abc", false, },
+ { "abc", true, "abc", false, },
+ { "(?i)abc", true, "abc", true, },
+ { "abcd*", true, "abc", false, },
+ { "[Aa][Bb]cd*", true, "ab", true, },
+ { "ab[Cc]d*", true, "ab", false, },
+ { "☺abc", true, "☺abc", false, },
+};
+
+TEST(RequiredPrefixForAccel, SimpleTests) {
+ for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
+ const PrefixTest& t = for_accel_tests[i];
+ for (size_t j = 0; j < 2; j++) {
+ Regexp::ParseFlags flags = Regexp::LikePerl;
+ if (j == 0)
+ flags = flags | Regexp::Latin1;
+ Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
+ ASSERT_TRUE(re != NULL) << " " << t.regexp;
+
+ std::string p;
+ bool f;
+ ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
+ << " " << re->Dump();
+ if (t.return_value) {
+ ASSERT_EQ(p, std::string(t.prefix))
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ ASSERT_EQ(f, t.foldcase)
+ << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
+ }
+ re->Decref();
+ }
+ }
+}
+
+TEST(PrefixAccel, BasicTest) {
+ Regexp* re = Regexp::Parse("abc\\d+", Regexp::LikePerl, NULL);
+ ASSERT_TRUE(re != NULL);
+ Prog* prog = re->CompileToProg(0);
+ ASSERT_TRUE(prog != NULL);
+ for (int i = 0; i < 100; i++) {
+ std::string text(i, 'a');
+ const char* p = reinterpret_cast<const char*>(
+ prog->PrefixAccel(text.data(), text.size()));
+ EXPECT_TRUE(p == NULL);
+ text.append("abc");
+ p = reinterpret_cast<const char*>(
+ prog->PrefixAccel(text.data(), text.size()));
+ EXPECT_EQ(i, p-text.data());
+ }
+ delete prog;
+ re->Decref();
+}
+
} // namespace re2
diff --git a/chromium/third_party/re2/src/re2/testing/set_test.cc b/chromium/third_party/re2/src/re2/testing/set_test.cc
index 61d1cf295f3..5a760c4b5e2 100644
--- a/chromium/third_party/re2/src/re2/testing/set_test.cc
+++ b/chromium/third_party/re2/src/re2/testing/set_test.cc
@@ -5,6 +5,7 @@
#include <stddef.h>
#include <string>
#include <vector>
+#include <utility>
#include "util/test.h"
#include "util/logging.h"
@@ -201,4 +202,29 @@ TEST(Set, Prefix) {
ASSERT_EQ(v[0], 0);
}
+TEST(Set, MoveSemantics) {
+ RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
+ ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
+ ASSERT_EQ(s1.Compile(), true);
+ ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+ ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+
+ // The moved-to object should do what the moved-from object did.
+ RE2::Set s2 = std::move(s1);
+ ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
+ ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
+
+ // The moved-from object should have been reset and be reusable.
+ ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
+ ASSERT_EQ(s1.Compile(), true);
+ ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
+ ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
+
+ // Verify that "overwriting" works and also doesn't leak memory.
+ // (The latter will need a leak detector such as LeakSanitizer.)
+ s1 = std::move(s2);
+ ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+ ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+}
+
} // namespace re2
diff --git a/chromium/third_party/re2/src/re2/walker-inl.h b/chromium/third_party/re2/src/re2/walker-inl.h
index 310be54bd7e..8cfe79960b1 100644
--- a/chromium/third_party/re2/src/re2/walker-inl.h
+++ b/chromium/third_party/re2/src/re2/walker-inl.h
@@ -89,7 +89,7 @@ template<typename T> class Regexp::Walker {
private:
// Walk state for the entire traversal.
- std::stack<WalkState<T> >* stack_;
+ std::stack<WalkState<T>> stack_;
bool stopped_early_;
int max_visits_;
@@ -134,24 +134,22 @@ template<typename T> struct WalkState {
};
template<typename T> Regexp::Walker<T>::Walker() {
- stack_ = new std::stack<WalkState<T> >;
stopped_early_ = false;
}
template<typename T> Regexp::Walker<T>::~Walker() {
Reset();
- delete stack_;
}
// Clears the stack. Should never be necessary, since
// Walk always enters and exits with an empty stack.
// Logs DFATAL if stack is not already clear.
template<typename T> void Regexp::Walker<T>::Reset() {
- if (stack_ && stack_->size() > 0) {
+ if (!stack_.empty()) {
LOG(DFATAL) << "Stack not empty.";
- while (stack_->size() > 0) {
- delete[] stack_->top().child_args;
- stack_->pop();
+ while (!stack_.empty()) {
+ delete[] stack_.top().child_args;
+ stack_.pop();
}
}
}
@@ -165,12 +163,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
return top_arg;
}
- stack_->push(WalkState<T>(re, top_arg));
+ stack_.push(WalkState<T>(re, top_arg));
WalkState<T>* s;
for (;;) {
T t;
- s = &stack_->top();
+ s = &stack_.top();
Regexp* re = s->re;
switch (s->n) {
case -1: {
@@ -201,7 +199,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
s->n++;
} else {
- stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
+ stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
}
continue;
}
@@ -214,12 +212,12 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
}
}
- // We've finished stack_->top().
+ // We've finished stack_.top().
// Update next guy down.
- stack_->pop();
- if (stack_->size() == 0)
+ stack_.pop();
+ if (stack_.empty())
return t;
- s = &stack_->top();
+ s = &stack_.top();
if (s->child_args != NULL)
s->child_args[s->n] = t;
else