summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc2
-rw-r--r--embed.h2
-rw-r--r--proto.h2
-rw-r--r--regcomp.c32
-rw-r--r--t/op/re_tests13
5 files changed, 39 insertions, 12 deletions
diff --git a/embed.fnc b/embed.fnc
index 68f38171c7..439203ca5d 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1593,7 +1593,7 @@ Es |UV |reg_recode |const char value|NN SV **encp
Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
Es |regnode*|reg_namedseq |NN struct RExC_state_t *pRExC_state \
- |NULLOK UV *valuep
+ |NULLOK UV *valuep|NULLOK I32 *flagp
Es |void |reginsert |NN struct RExC_state_t *pRExC_state \
|U8 op|NN regnode *opnd|U32 depth
Es |void |regtail |NN struct RExC_state_t *pRExC_state \
diff --git a/embed.h b/embed.h
index e320dc5654..9af17f6810 100644
--- a/embed.h
+++ b/embed.h
@@ -3756,7 +3756,7 @@
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
#define reg_recode(a,b) S_reg_recode(aTHX_ a,b)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
-#define reg_namedseq(a,b) S_reg_namedseq(aTHX_ a,b)
+#define reg_namedseq(a,b,c) S_reg_namedseq(aTHX_ a,b,c)
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
#define reg_scan_name(a,b) S_reg_scan_name(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 78f17ddedb..285e05f8f7 100644
--- a/proto.h
+++ b/proto.h
@@ -5178,7 +5178,7 @@ STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
-STATIC regnode* S_reg_namedseq(pTHX_ struct RExC_state_t *pRExC_state, UV *valuep)
+STATIC regnode* S_reg_namedseq(pTHX_ struct RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_REG_NAMEDSEQ \
assert(pRExC_state)
diff --git a/regcomp.c b/regcomp.c
index e06152820c..bc7086f8df 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6553,7 +6553,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* reg_namedseq(pRExC_state,UVp)
This is expected to be called by a parser routine that has
- recognized'\N' and needs to handle the rest. RExC_parse is
+ recognized '\N' and needs to handle the rest. RExC_parse is
expected to point at the first char following the N at the time
of the call.
@@ -6567,11 +6567,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
be returned to indicate failure. (This will NOT be a valid pointer
to a regnode.)
- If value is null then it is assumed that we are parsing normal text
+ If valuep is null then it is assumed that we are parsing normal text
and inserts a new EXACT node into the program containing the resolved
string and returns a pointer to the new node. If the string is
zerolength a NOTHING node is emitted.
-
+
On success RExC_parse is set to the char following the endbrace.
Parsing failures will generate a fatal errorvia vFAIL(...)
@@ -6585,7 +6585,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
*/
STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
{
char * name; /* start of the content of the name */
char * endbrace; /* endbrace following the name */
@@ -6597,8 +6597,22 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
PERL_ARGS_ASSERT_REG_NAMEDSEQ;
- if (*RExC_parse != '{') {
- vFAIL("Missing braces on \\N{}");
+ if (*RExC_parse != '{' ||
+ (*RExC_parse == '{' && RExC_parse[1]
+ && strchr("0123456789", RExC_parse[1])))
+ {
+ GET_RE_DEBUG_FLAGS_DECL;
+ if (valuep)
+ /* no bare \N in a charclass */
+ vFAIL("Missing braces on \\N{}");
+ GET_RE_DEBUG_FLAGS;
+ nextchar(pRExC_state);
+ ret = reg_node(pRExC_state, REG_ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
+ RExC_parse--;
+ Set_Node_Length(ret, 1); /* MJD */
+ return ret;
}
name = RExC_parse+1;
endbrace = strchr(RExC_parse, '}');
@@ -7159,12 +7173,12 @@ tryagain:
}
break;
case 'N':
- /* Handle \N{NAME} here and not below because it can be
+ /* Handle \N and \N{NAME} here and not below because it can be
multicharacter. join_exact() will join them up later on.
Also this makes sure that things like /\N{BLAH}+/ and
\N{BLAH} being multi char Just Happen. dmq*/
++RExC_parse;
- ret= reg_namedseq(pRExC_state, NULL);
+ ret= reg_namedseq(pRExC_state, NULL, flagp);
break;
case 'k': /* Handle \k<NAME> and \k'NAME' */
parse_named_seq:
@@ -7964,7 +7978,7 @@ parseit:
from earlier versions, OTOH that behaviour was broken
as well. */
UV v; /* value is register so we cant & it /grrr */
- if (reg_namedseq(pRExC_state, &v)) {
+ if (reg_namedseq(pRExC_state, &v, NULL)) {
goto parseit;
}
value= v;
diff --git a/t/op/re_tests b/t/op/re_tests
index f9b070d62c..e26419823c 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -31,6 +31,12 @@ ab*bc abbbbc y $+[0] 6
.{3,4} abbbbc y $& abbb
.{3,4} abbbbc y $-[0] 0
.{3,4} abbbbc y $+[0] 4
+\N{1} abbbbc y $& a
+\N{1} abbbbc y $-[0] 0
+\N{1} abbbbc y $+[0] 1
+\N{3,4} abbbbc y $& abbb
+\N{3,4} abbbbc y $-[0] 0
+\N{3,4} abbbbc y $+[0] 4
ab{0,}bc abbbbc y $& abbbbc
ab{0,}bc abbbbc y $-[0] 0
ab{0,}bc abbbbc y $+[0] 6
@@ -69,8 +75,10 @@ abc$ aabcd n - -
$ abc y $&
a.c abc y $& abc
a.c axc y $& axc
+a\Nc abc y $& abc
a.*c axyzc y $& axyzc
a.*c axyzd n - -
+a\N*c axyzd n - -
a[bc]d abc n - -
a[bc]d abd y $& abd
a[b]d abd y $& abd
@@ -78,6 +86,7 @@ a[b]d abd y $& abd
.[b]. abd y $& abd
.[b]. aBd n - -
(?i:.[b].) abd y $& abd
+(?i:\N[b]\N) abd y $& abd
a[b-d]e abd n - -
a[b-d]e ace y $& ace
a[b-d] aac y $& ac
@@ -315,6 +324,7 @@ a[-]?c ac y $& ac
'$'i ABC y $&
'a.c'i ABC y $& ABC
'a.c'i AXC y $& AXC
+'a\Nc'i ABC y $& ABC
'a.*?c'i AXYZC y $& AXYZC
'a.*c'i AXYZD n - -
'a[bc]d'i ABC n - -
@@ -497,8 +507,11 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce
'(?-i:a)b'i AB n - -
'((?-i:a))b'i AB n - -
'((?-i:a.))b'i a\nB n - -
+'((?-i:a\N))b'i a\nB n - -
'((?s-i:a.))b'i a\nB y $1 a\n
+'((?s-i:a\N))b'i a\nB n - -
'((?s-i:a.))b'i B\nB n - -
+'((?s-i:a\N))b'i B\nB n - -
(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb
(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
'(ab)\d\1'i Ab4ab y $1 Ab