diff options
author | Karl Williamson <khw@cpan.org> | 2019-11-22 15:28:13 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-11-22 15:54:44 -0700 |
commit | 53d42e43e359facdd83b313c1f4b70f9ff559a70 (patch) | |
tree | d1e8547bbdfa043aaa7cd7a23b58ffcbc25a8fdc | |
parent | 008bb368ebc18adc42e95769e4ebbd7d5545ce3d (diff) | |
download | perl-53d42e43e359facdd83b313c1f4b70f9ff559a70.tar.gz |
PATCH: gh #17319 Segfault
It turns out that one isn't supposed to fill in the offset to the next
regnode at node creation time. And this node is like EXACTish, so the
string stuff isn't accounted for in its regcomp.sym definition
-rw-r--r-- | pod/perldebguts.pod | 2 | ||||
-rw-r--r-- | regcomp.c | 11 | ||||
-rw-r--r-- | regcomp.h | 2 | ||||
-rw-r--r-- | regcomp.sym | 2 | ||||
-rw-r--r-- | regnodes.h | 2 | ||||
-rw-r--r-- | t/re/pat.t | 11 |
6 files changed, 18 insertions, 12 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index ce411e6d29..c8c251d27a 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -621,7 +621,7 @@ will be lost. ANYOFHr sv 1 Like ANYOFH, but the flags field contains packed bounds for all matchable UTF-8 start bytes. - ANYOFHs sv anyofhs Like ANYOFHb, but has a string field that + ANYOFHs sv 1 Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len ANYOFR packed 1 Matches any character in the range given by @@ -19516,15 +19516,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, regarglen[op] + STR_SZ(len), "anyofhs"); FILL_NODE(ret, op); - RExC_emit += 1 + regarglen[op] - - 1 + STR_SZ(len); /* Replace the [1] - element of the struct - by the real value */ - REGNODE_p(ret)->flags = len; + ((struct regnode_anyofhs *) REGNODE_p(ret))->str_len + = len; Copy(low_utf8, /* Add the common bytes */ ((struct regnode_anyofhs *) REGNODE_p(ret))->string, len, U8); - NEXT_OFF(REGNODE_p(ret)) = regarglen[op] + STR_SZ(len); + RExC_emit += NODE_SZ_STR(REGNODE_p(ret)); set_ANYOF_arg(pRExC_state, REGNODE_p(ret), cp_list, NULL, only_utf8_locale_list); goto not_anyof; @@ -22571,7 +22568,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, else if ( op == PLUS || op == STAR) { DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1); } - else if (PL_regkind[(U8)op] == EXACT) { + else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) { /* Literal string, where present. */ node += NODE_SZ_STR(node) - 1; node = NEXTOPER(node); @@ -165,7 +165,7 @@ struct regnode_lstring { /* Constructed this way to keep the string aligned. */ }; struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */ - U8 flags; + U8 str_len; U8 type; U16 next_off; U32 arg1; /* set by set_ANYOF_arg() */ diff --git a/regcomp.sym b/regcomp.sym index 2f4018d62d..a8ff034083 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -82,7 +82,7 @@ ANYOFPOSIXL ANYOF, sv charclass_posixl S ; Like ANYOFL, but matches [[:p ANYOFH ANYOF, sv 1 S ; Like ANYOF, but only has "High" matches, none in the bitmap; the flags field contains the lowest matchable UTF-8 start byte ANYOFHb ANYOF, sv 1 S ; Like ANYOFH, but all matches share the same UTF-8 start byte, given in the flags field ANYOFHr ANYOF, sv 1 S ; Like ANYOFH, but the flags field contains packed bounds for all matchable UTF-8 start bytes. -ANYOFHs ANYOF, sv anyofhs S ; Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len +ANYOFHs ANYOF, sv 1 S ; Like ANYOFHb, but has a string field that gives the leading matchable UTF-8 bytes; flags field is len ANYOFR ANYOFR, packed 1 S ; Matches any character in the range given by its packed args: upper 12 bits is the max delta from the base lower 20; the flags field contains the lowest matchable UTF-8 start byte ANYOFRb ANYOFR, packed 1 S ; Like ANYOFR, but all matches share the same UTF-8 start byte, given in the flags field # There is no ANYOFRr because khw doesn't think there are likely to be real-world cases where such a large range is used. diff --git a/regnodes.h b/regnodes.h index fa90f50b14..89f8ecc2e7 100644 --- a/regnodes.h +++ b/regnodes.h @@ -351,7 +351,7 @@ static const U8 regarglen[] = { EXTRA_SIZE(struct regnode_1), /* ANYOFH */ EXTRA_SIZE(struct regnode_1), /* ANYOFHb */ EXTRA_SIZE(struct regnode_1), /* ANYOFHr */ - EXTRA_SIZE(struct regnode_anyofhs), /* ANYOFHs */ + EXTRA_SIZE(struct regnode_1), /* ANYOFHs */ EXTRA_SIZE(struct regnode_1), /* ANYOFR */ EXTRA_SIZE(struct regnode_1), /* ANYOFRb */ EXTRA_SIZE(struct regnode_1), /* ANYOFM */ diff --git a/t/re/pat.t b/t/re/pat.t index ccf494c302..7d07d9981e 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -25,7 +25,7 @@ BEGIN { skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; skip_all_without_unicode_tables(); -plan tests => 1005; # Update this when adding/deleting tests. +plan tests => 1011; # Update this when adding/deleting tests. run_tests() unless caller; @@ -2207,6 +2207,15 @@ SKIP: unlike("\x{4000004}", $pat, "4000004 isn't in pattern"); like("\x{4000005}", $pat, "4000005 is in pattern"); unlike("\x{4000006}", $pat, "4000006 isn't in pattern"); + + # gh #17319 + $pat = qr/[\N{U+200D}\N{U+2000}]()/; + unlike("\x{1FFF}", $pat, "1FFF isn't in pattern"); + like("\x{2000}", $pat, "2000 is in pattern"); + unlike("\x{2001}", $pat, "2001 isn't in pattern"); + unlike("\x{200C}", $pat, "200C isn't in pattern"); + like("\x{200D}", $pat, "200 is in pattern"); + unlike("\x{200E}", $pat, "200E isn't in pattern"); } } # End of sub run_tests |