summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-11-22 15:28:13 -0700
committerKarl Williamson <khw@cpan.org>2019-11-22 15:54:44 -0700
commit53d42e43e359facdd83b313c1f4b70f9ff559a70 (patch)
treed1e8547bbdfa043aaa7cd7a23b58ffcbc25a8fdc
parent008bb368ebc18adc42e95769e4ebbd7d5545ce3d (diff)
downloadperl-53d42e43e359facdd83b313c1f4b70f9ff559a70.tar.gz
PATCH: gh #17319 Segfault
It turns out that one isn't supposed to fill in the offset to the next regnode at node creation time. And this node is like EXACTish, so the string stuff isn't accounted for in its regcomp.sym definition
-rw-r--r--pod/perldebguts.pod2
-rw-r--r--regcomp.c11
-rw-r--r--regcomp.h2
-rw-r--r--regcomp.sym2
-rw-r--r--regnodes.h2
-rw-r--r--t/re/pat.t11
6 files changed, 18 insertions, 12 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index ce411e6d29..c8c251d27a 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -621,7 +621,7 @@ will be lost.
ANYOFHr sv 1 Like ANYOFH, but the flags field contains
packed bounds for all matchable UTF-8 start
bytes.
- ANYOFHs sv anyofhs Like ANYOFHb, but has a string field that
+ ANYOFHs sv 1 Like ANYOFHb, but has a string field that
gives the leading matchable UTF-8 bytes;
flags field is len
ANYOFR packed 1 Matches any character in the range given by
diff --git a/regcomp.c b/regcomp.c
index 1a6ab15b9c..bb3cd6618c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19516,15 +19516,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
regarglen[op] + STR_SZ(len),
"anyofhs");
FILL_NODE(ret, op);
- RExC_emit += 1 + regarglen[op]
- - 1 + STR_SZ(len); /* Replace the [1]
- element of the struct
- by the real value */
- REGNODE_p(ret)->flags = len;
+ ((struct regnode_anyofhs *) REGNODE_p(ret))->str_len
+ = len;
Copy(low_utf8, /* Add the common bytes */
((struct regnode_anyofhs *) REGNODE_p(ret))->string,
len, U8);
- NEXT_OFF(REGNODE_p(ret)) = regarglen[op] + STR_SZ(len);
+ RExC_emit += NODE_SZ_STR(REGNODE_p(ret));
set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list,
NULL, only_utf8_locale_list);
goto not_anyof;
@@ -22571,7 +22568,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
else if ( op == PLUS || op == STAR) {
DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
}
- else if (PL_regkind[(U8)op] == EXACT) {
+ else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) {
/* Literal string, where present. */
node += NODE_SZ_STR(node) - 1;
node = NEXTOPER(node);
diff --git a/regcomp.h b/regcomp.h
index 3f7dd31391..e58534278d 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -165,7 +165,7 @@ struct regnode_lstring { /* Constructed this way to keep the string aligned. */
};
struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */
- U8 flags;
+ U8 str_len;
U8 type;
U16 next_off;
U32 arg1; /* set by set_ANYOF_arg() */
diff --git a/regcomp.sym b/regcomp.sym
index 2f4018d62d..a8ff034083 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -82,7 +82,7 @@ ANYOFPOSIXL ANYOF, sv charclass_posixl S ; Like ANYOFL, but matches [[:p
ANYOFH ANYOF, sv 1 S ; Like ANYOF, but only has "High" matches, none in the bitmap; the flags field contains the lowest matchable UTF-8 start byte
ANYOFHb ANYOF, sv 1 S ; Like ANYOFH, but all matches share the same UTF-8 start byte, given in the flags field
ANYOFHr ANYOF, sv 1 S ; Like ANYOFH, but the flags field contains packed bounds for all matchable UTF-8 start bytes.
-ANYOFHs ANYOF, sv anyofhs S ; Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len
+ANYOFHs ANYOF, sv 1 S ; Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len
ANYOFR ANYOFR, packed 1 S ; Matches any character in the range given by its packed args: upper 12 bits is the max delta from the base lower 20; the flags field contains the lowest matchable UTF-8 start byte
ANYOFRb ANYOFR, packed 1 S ; Like ANYOFR, but all matches share the same UTF-8 start byte, given in the flags field
# There is no ANYOFRr because khw doesn't think there are likely to be real-world cases where such a large range is used.
diff --git a/regnodes.h b/regnodes.h
index fa90f50b14..89f8ecc2e7 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -351,7 +351,7 @@ static const U8 regarglen[] = {
EXTRA_SIZE(struct regnode_1), /* ANYOFH */
EXTRA_SIZE(struct regnode_1), /* ANYOFHb */
EXTRA_SIZE(struct regnode_1), /* ANYOFHr */
- EXTRA_SIZE(struct regnode_anyofhs), /* ANYOFHs */
+ EXTRA_SIZE(struct regnode_1), /* ANYOFHs */
EXTRA_SIZE(struct regnode_1), /* ANYOFR */
EXTRA_SIZE(struct regnode_1), /* ANYOFRb */
EXTRA_SIZE(struct regnode_1), /* ANYOFM */
diff --git a/t/re/pat.t b/t/re/pat.t
index ccf494c302..7d07d9981e 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -25,7 +25,7 @@ BEGIN {
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();
-plan tests => 1005; # Update this when adding/deleting tests.
+plan tests => 1011; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -2207,6 +2207,15 @@ SKIP:
unlike("\x{4000004}", $pat, "4000004 isn't in pattern");
like("\x{4000005}", $pat, "4000005 is in pattern");
unlike("\x{4000006}", $pat, "4000006 isn't in pattern");
+
+ # gh #17319
+ $pat = qr/[\N{U+200D}\N{U+2000}]()/;
+ unlike("\x{1FFF}", $pat, "1FFF isn't in pattern");
+ like("\x{2000}", $pat, "2000 is in pattern");
+ unlike("\x{2001}", $pat, "2001 isn't in pattern");
+ unlike("\x{200C}", $pat, "200C isn't in pattern");
+ like("\x{200D}", $pat, "200 is in pattern");
+ unlike("\x{200E}", $pat, "200E isn't in pattern");
}
} # End of sub run_tests