summaryrefslogtreecommitdiff
path: root/regcomp.h
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-03-18 10:59:12 +0100
committerYves Orton <demerphq@gmail.com>2023-03-19 05:27:01 +0800
commitb16c8aa582a2f6fe12c09c7eb93ada71b0d85300 (patch)
tree443326f34d5925565437a727c8056ab6674ad6a4 /regcomp.h
parent1ed8aa9fdf7c8713cc895a86e3f9f22f89dfd200 (diff)
downloadperl-b16c8aa582a2f6fe12c09c7eb93ada71b0d85300.tar.gz
regcomp.h - document RE_PESSIMISTIC_PARENS and VOLATILE_REF defines
These two defines are related to each other, and even though VOLATILE_REF is not explicitly used in regexec.c which would require it being placed in regcomp.h, it is implicitly, and RE_PESSIMISTIC_PARENS *is* used in regexec.c. So put them both in regcomp.h and document them together. This adds copious documentation for what they both are for. RE_PESSIMISTIC_PARENS is effectively a "build option" (although intended for debugging regex engine bugs only). VOLATILE_REF is the name of a flag which is used to mark REF nodes as requiring special backtracking support in regexec.c
Diffstat (limited to 'regcomp.h')
-rw-r--r--regcomp.h54
1 files changed, 52 insertions, 2 deletions
diff --git a/regcomp.h b/regcomp.h
index 1b380fccba..9d908b6dc6 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -13,12 +13,62 @@
#define PERL_REGCOMP_H_
-/* define this to 1 if you want to enable a really aggressive and inefficient
- * paren cleanup during backtracking. We should pass test with this as 0. */
#ifndef RE_PESSIMISTIC_PARENS
+/* Define this to 1 if you want to enable a really aggressive and
+ * inefficient paren cleanup during backtracking which should ensure
+ * correctness. Doing so should fix any bugs related to backreferences,
+ * at the cost of saving and restoring paren state far more than we
+ * necessarily must.
+ *
+ * When it is set to 0 we try to optimize away unnecessary save/restore
+ * operations which could potentially introduce bugs. We should pass our
+ * test suite with this as 0, but setting it to 1 might fix cases we do
+ * not currently test for. If setting this to 1 does fix a bug, then
+ * review the code related to storing and restoring paren state.
+ *
+ * See comment for VOLATILE_REF below for more details of a
+ * related case.
+ */
#define RE_PESSIMISTIC_PARENS 0
#endif
+/* a VOLATILE_REF is a ref which is inside of a capturing group and it
+ * refers to the capturing group it is inside of or to a following capture
+ * group which might be affected by what this capture group matches, and
+ * thus the ref requires additional backtracking support. For example:
+ *
+ * "xa=xaaa" =~ /^(xa|=?\1a){2}\z/
+ *
+ * should not match. In older perls the matching process would go like this:
+ *
+ * Iter 1: "xa" matches in capture group.
+ * Iter 2: "xa" does not match, goes to next alternation.
+ * "=" matches in =?
+ * Bifurcates here (= might not match)
+ * "xa" matches via \1 from previous iteration
+ * "a" matches via "a" at end of second alternation
+ * # at this point $1 is "=xaa"
+ * \z does not match -> backtracks.
+ * Backtracks to Iter 2 "=?" Bifurcation point where we have NOT matched "="
+ * "=xaa" matches via \1 (as $1 has not been reset)
+ * "a" matches via "a" at end of second alternation
+ * "\z" does match. -> Pattern matches overall.
+ *
+ * What should happen and now does happen instead is:
+ *
+ * Backtracks to Iter 2 "=?" Bifurcation point where we have NOT matched "=",
+ * \1 does not match as it is "xa" (as $1 was reset when backtracked)
+ * and the current character in the string is an "="
+ *
+ * The fact that \1 in this case is marked as a VOLATILE_REF is what ensures
+ * that we reset the capture buffer properly.
+ *
+ * See 59db194299c94c6707095797c3df0e2f67ff82b2
+ * and 38508ce8fc3a1bd12a3bb65e9d4ceb9b396a18db
+ * for more details.
+ */
+#define VOLATILE_REF 1
+
#include "regcharclass.h"
/* Convert branch sequences to more efficient trie ops? */