summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-03-18 08:36:17 -0600
committerKarl Williamson <public@khwilliamson.com>2011-03-18 09:18:00 -0600
commitdd58aee1a749e6188a04cac2e4ba58a7004c1ec1 (patch)
tree49bd56e35c794fe000447b3aff2d9d0a6ee3dc11 /regcomp.c
parentfff7535cca3d7faa5b59aa82de74bd71e938ea92 (diff)
downloadperl-dd58aee1a749e6188a04cac2e4ba58a7004c1ec1.tar.gz
regex: Fix locale regression
Things like \S have not been accessible to the synthetic start class under locale matching rules. They have been placed there, but the start class didn't know they were there. This patch sets ANYOF_CLASS in initializing the synthetic start class so that downstream code knows it is a charclass_class, and removes the code that partially allowed this bit to be shared, and which isn't needed in 5.14, and more thought would have to go into doing it than was reflected in the code. I can't come up with a test case that would verify that this works, because of general locale testing issues, except it looked at a dump of the generated regex synthetic start class, but the dump isn't the same thing as the real behavior, and using one is also subject to breakage if the regex code changes in the slightest.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c29
1 files changed, 10 insertions, 19 deletions
diff --git a/regcomp.c b/regcomp.c
index 75da2bfa06..addc0d0266 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -728,7 +728,8 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c
ANYOF_BITMAP_SETALL(cl);
ANYOF_CLASS_ZERO(cl); /* all bits set, so class is irrelevant */
- cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL|ANYOF_LOC_NONBITMAP_FOLD|ANYOF_NON_UTF8_LATIN1_ALL;
+ cl->flags = ANYOF_CLASS|ANYOF_EOS|ANYOF_UNICODE_ALL
+ |ANYOF_LOC_NONBITMAP_FOLD|ANYOF_NON_UTF8_LATIN1_ALL;
/* If any portion of the regex is to operate under locale rules,
* initialization includes it. The reason this isn't done for all regexes
@@ -775,8 +776,9 @@ S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
/* These two functions currently do the exact same thing */
#define cl_init_zero S_cl_init
-/* 'And' a given class with another one. Can create false positives */
-/* cl should not be inverted */
+/* 'AND' a given class with another one. Can create false positives. 'cl'
+ * should not be inverted. 'and_with->flags & ANYOF_CLASS' should be 0 if
+ * 'and_with' is a regnode_charclass instead of a regnode_charclass_class. */
STATIC void
S_cl_and(struct regnode_charclass_class *cl,
const struct regnode_charclass_class *and_with)
@@ -866,8 +868,9 @@ S_cl_and(struct regnode_charclass_class *cl,
}
}
-/* 'OR' a given class with another one. Can create false positives */
-/* cl should not be inverted */
+/* 'OR' a given class with another one. Can create false positives. 'cl'
+ * should not be inverted. 'or_with->flags & ANYOF_CLASS' should be 0 if
+ * 'or_with' is a regnode_charclass instead of a regnode_charclass_class. */
STATIC void
S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
{
@@ -9542,20 +9545,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
if (SIZE_ONLY) {
RExC_size += ANYOF_SKIP;
-#ifdef ANYOF_ADD_LOC_SKIP
- if (LOC) {
- RExC_size += ANYOF_ADD_LOC_SKIP;
- }
-#endif
listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
}
else {
RExC_emit += ANYOF_SKIP;
if (LOC) {
ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
-#ifdef ANYOF_ADD_LOC_SKIP
- RExC_emit += ANYOF_ADD_LOC_SKIP;
-#endif
}
ANYOF_BITMAP_ZERO(ret);
listsv = newSVpvs("# comment\n");
@@ -9784,14 +9779,10 @@ parseit:
if (LOC && namedclass < ANYOF_MAX && ! need_class) {
need_class = 1;
if (SIZE_ONLY) {
-#ifdef ANYOF_CLASS_ADD_SKIP
- RExC_size += ANYOF_CLASS_ADD_SKIP;
-#endif
+ RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
}
else {
-#ifdef ANYOF_CLASS_ADD_SKIP
- RExC_emit += ANYOF_CLASS_ADD_SKIP;
-#endif
+ RExC_emit += ANYOF_CLASS_SKIP - ANYOF_SKIP;
ANYOF_CLASS_ZERO(ret);
}
ANYOF_FLAGS(ret) |= ANYOF_CLASS;