summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--op.c6
-rw-r--r--op.h2
-rw-r--r--op_reg_common.h5
-rw-r--r--pod/perldelta.pod26
-rw-r--r--pod/perldiag.pod6
-rw-r--r--pod/perlre.pod65
-rw-r--r--regcomp.c40
-rw-r--r--regexp.h12
-rw-r--r--regnodes.h4
-rw-r--r--t/re/pat.t32
-rw-r--r--t/re/re.t10
-rw-r--r--t/re/reg_mesg.t6
-rw-r--r--universal.c11
13 files changed, 187 insertions, 38 deletions
diff --git a/op.c b/op.c
index db91cdb913..983bf3d1c6 100644
--- a/op.c
+++ b/op.c
@@ -3649,8 +3649,12 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags)
if (PL_hints & HINT_RE_TAINT)
pmop->op_pmflags |= PMf_RETAINT;
- if (PL_hints & HINT_LOCALE)
+ if (PL_hints & HINT_LOCALE) {
pmop->op_pmflags |= PMf_LOCALE;
+ }
+ else if ((! (PL_hints & HINT_BYTES)) && (PL_hints & HINT_UNI_8_BIT)) {
+ pmop->op_pmflags |= RXf_PMf_UNICODE;
+ }
#ifdef USE_ITHREADS
diff --git a/op.h b/op.h
index da280b8b8d..a29d516d10 100644
--- a/op.h
+++ b/op.h
@@ -366,7 +366,7 @@ struct pmop {
/* Leave some space, so future bit allocations can go either in the shared or
* unshared area without affecting binary compatibility */
-#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+8)
+#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+7)
/* taint $1 etc. if target tainted */
#define PMf_RETAINT (1<<(PMf_BASE_SHIFT+0))
diff --git a/op_reg_common.h b/op_reg_common.h
index d4e39873e1..ce12da542f 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -29,14 +29,15 @@
#define RXf_PMf_EXTENDED (1 << (RXf_PMf_STD_PMMOD_SHIFT+3)) /* /x */
#define RXf_PMf_KEEPCOPY (1 << (RXf_PMf_STD_PMMOD_SHIFT+4)) /* /p */
#define RXf_PMf_LOCALE (1 << (RXf_PMf_STD_PMMOD_SHIFT+5))
+#define RXf_PMf_UNICODE (1 << (RXf_PMf_STD_PMMOD_SHIFT+6))
/* Next available bit after the above. Name begins with '_' so won't be
* exported by B */
-#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+6)
+#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+7)
/* Mask of the above bits. These need to be transferred from op_pmflags to
* re->extflags during compilation */
-#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
+#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_UNICODE)
/* These copies need to be numerical or defsubs_h.PL won't know about them. */
#define PMf_MULTILINE 1<<0
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 4289130a83..8cdbdb31ea 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -40,6 +40,19 @@ main purpose of this is to allow tests that rely on the stringification
to not have to change when new modifiers are added. See
L<perlre/Extended Patterns>.
+=head2 C<"d">, C<"l">, and C<"u"> regex modifiers added
+
+These modifiers are currently only available within a C<(?...)> construct.
+
+The C<"l"> modifier says to compile the regular expression as if it were
+in the scope of C<use locale>, even if it is not.
+
+The C<"u"> modifier currently does nothing.
+
+The C<"d"> modifier is used in the scope of C<use locale> to compile the
+regular expression as if it were not in that scope.
+See L<perlre/(?dlupimsx-imsx)>.
+
=head1 Security
XXX Any security-related notices go here. In particular, any security
@@ -66,6 +79,17 @@ can use something like the following:
And then use C<$modifiers> instead of C<-xism>.
+=head2 Regular expressions retain their localeness when interpolated
+
+Regular expressions compiled under C<"use locale"> now retain this when
+interpolated into a new regular expression compiled outside a
+C<"use locale">, and vice-versa.
+
+Previously, a regular expression interpolated into another one inherited
+the localeness of the surrounding one, losing whatever state it
+originally had. This is considered a bug fix, but may trip up code that
+has come to rely on the incorrect behavior.
+
[ List each incompatible change as a =head2 entry ]
=head1 Deprecations
@@ -320,7 +344,7 @@ be noted as well.
=item *
-XXX
+See L</Regular expressions retain their localeness when interpolated>
=back
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index e725749275..c6806c1c13 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -4035,8 +4035,10 @@ where the problem was discovered. See L<perlre>.
<-- HERE shows in the regular expression about where the problem was
discovered. This happens when using the C<(?^...)> construct to tell
Perl to use the default regular expression modifiers, and you
-redundantly specify a default modifier. For other causes, see
-L<perlre>.
+redundantly specify a default modifier; or having a modifier that can't
+be turned off (such as C<"p"> or C<"l">) after a minus; or specifying
+more than one of the C<"d">, C<"l">, or C<"u"> modifiers. For other
+causes, see L<perlre>.
=item Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 6e68bcd1db..b9216c156c 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -594,20 +594,15 @@ whitespace formatting, a simple C<#> will suffice. Note that Perl closes
the comment as soon as it sees a C<)>, so there is no way to put a literal
C<)> in the comment.
-=item C<(?pimsx-imsx)>
+=item C<(?dlupimsx-imsx)>
-=item C<(?^pimsx)>
+=item C<(?^lupimsx)>
X<(?)> X<(?^)>
One or more embedded pattern-match modifiers, to be turned on (or
turned off, if preceded by C<->) for the remainder of the pattern or
the remainder of the enclosing pattern group (if any).
-Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately
-after the C<"?"> is a shorthand equivalent to C<-imsx> and compiling the
-regex under C<no locale>. Flags may follow the caret to override it.
-But a minus sign is not legal with it.
-
This is particularly useful for dynamic patterns, such as those read in from a
configuration file, taken from an argument, or specified in a table
somewhere. Consider the case where some patterns want to be case
@@ -634,17 +629,53 @@ These modifiers do not carry over into named subpatterns called in the
enclosing group. In other words, a pattern such as C<((?i)(&NAME))> does not
change the case-sensitivity of the "NAME" pattern.
-Note that the C<p> modifier is special in that it can only be enabled,
-not disabled, and that its presence anywhere in a pattern has a global
-effect. Thus C<(?-p)> and C<(?-p:...)> are meaningless and will warn
-when executed under C<use warnings>.
+Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately
+after the C<"?"> is a shorthand equivalent to C<d-imsx>. Flags (except
+C<"d">) may follow the caret to override it.
+But a minus sign is not legal with it.
+
+Also, starting in Perl 5.14, are modifiers C<"d">, C<"l">, and C<"u">,
+which for 5.14 may not be used as suffix modifiers.
+
+C<"l"> means to use a locale (see L<perllocale>) when pattern matching.
+The locale used will be the one in effect at the time of execution of
+the pattern match. This may not be the same as the compilation-time
+locale, and can differ from one match to another if there is an
+intervening call of the
+L<setlocale() function|perllocale/The setlocale function>.
+This modifier is automatically set if the regular expression is compiled
+within the scope of a C<"use locale"> pragma.
+
+C<"u"> has no effect currently. It is automatically set if the regular
+expression is compiled within the scope of a
+L<C<"use feature 'unicode_strings">|feature> pragma.
+
+C<"d"> means to use the traditional Perl pattern matching behavior.
+This is dualistic (hence the name C<"d">, which also could stand for
+"default"). When this is in effect, Perl matches utf8-encoded strings
+using Unicode rules, and matches non-utf8-encoded strings using the
+platform's native character set rules.
+See L<perlunicode/The "Unicode Bug">. It is automatically selected by
+default if the regular expression is compiled neither within the scope
+of a C<"use locale"> pragma nor a <C<"use feature 'unicode_strings">
+pragma.
+
+Note that the C<d>, C<l>, C<p>, and C<u> modifiers are special in that
+they can only be enabled, not disabled, and the C<d>, C<l>, and C<u>
+modifiers are mutually exclusive; a maximum of one may appear in the
+construct. Specifying one de-specifies the others. Thus, for example,
+C<(?-p)> and C<(?-d:...)> are meaningless and will warn when compiled
+under C<use warnings>.
+
+Note also that the C<p> modifier is special in that its presence
+anywhere in a pattern has a global effect.
=item C<(?:pattern)>
X<(?:)>
-=item C<(?imsx-imsx:pattern)>
+=item C<(?dluimsx-imsx:pattern)>
-=item C<(?^imsx:pattern)>
+=item C<(?^luimsx:pattern)>
X<(?^:)>
This is for clustering, not capturing; it groups subexpressions like
@@ -660,7 +691,7 @@ but doesn't spit out extra fields. It's also cheaper not to capture
characters if you don't need to.
Any letters between C<?> and C<:> act as flags modifiers as with
-C<(?imsx-imsx)>. For example,
+C<(?dluimsx-imsx)>. For example,
/(?s-i:more.*than).*million/i
@@ -669,8 +700,8 @@ is equivalent to the more verbose
/(?:(?s-i)more.*than).*million/i
Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately
-after the C<"?"> is a shorthand equivalent to C<-imsx> and compiling the
-regex under C<no locale>. Any positive flags may follow the caret, so
+after the C<"?"> is a shorthand equivalent to C<d-imsx>. Any positive
+flags (except C<"d">) may follow the caret, so
(?^x:foo)
@@ -679,7 +710,7 @@ is equivalent to
(?x-ims:foo)
The caret tells Perl that this cluster doesn't inherit the flags of any
-surrounding pattern, but to go back to the system defaults (C<-imsx>),
+surrounding pattern, but to go back to the system defaults (C<d-imsx>),
modified by any flags specified.
The caret allows for simpler stringification of compiled regular
diff --git a/regcomp.c b/regcomp.c
index c080fcdeab..4c332b0433 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -370,6 +370,7 @@ static const scan_data_t zero_scan_data =
#define UTF (RExC_utf8 != 0)
#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0)
+#define UNI_SEMANTICS ((RExC_flags & RXf_PMf_UNICODE) != 0)
#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0)
#define OOB_UNICODE 12345678
@@ -4441,9 +4442,17 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 pm_flags)
SvPOK_on(rx);
SvFLAGS(rx) |= SvUTF8(pattern);
*p++='('; *p++='?';
- if (has_minus) { /* If a default, cover it using the caret */
+
+ /* If a default, cover it using the caret */
+ if (has_minus || (r->extflags & ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE))) {
*p++= DEFAULT_PAT_MOD;
}
+ if (r->extflags & RXf_PMf_LOCALE) {
+ *p++ = LOCALE_PAT_MOD;
+ }
+ else if (r->extflags & RXf_PMf_UNICODE) {
+ *p++ = UNICODE_PAT_MOD;
+ }
if (has_p)
*p++ = KEEPCOPY_PAT_MOD; /*'p'*/
{
@@ -6124,6 +6133,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
that follow */
has_use_defaults = TRUE;
STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
+ RExC_flags &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE);
goto parse_flags;
default:
--RExC_parse;
@@ -6131,6 +6141,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
{
U32 posflags = 0, negflags = 0;
U32 *flagsp = &posflags;
+ bool has_charset_modifier = 0;
while (*RExC_parse) {
/* && strchr("iogcmsx", *RExC_parse) */
@@ -6138,6 +6149,32 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
and must be globally applied -- japhy */
switch (*RExC_parse) {
CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+ case LOCALE_PAT_MOD:
+ if (has_charset_modifier || flagsp == &negflags) {
+ goto fail_modifiers;
+ }
+ *flagsp &= ~RXf_PMf_UNICODE;
+ *flagsp |= RXf_PMf_LOCALE;
+ has_charset_modifier = 1;
+ break;
+ case UNICODE_PAT_MOD:
+ if (has_charset_modifier || flagsp == &negflags) {
+ goto fail_modifiers;
+ }
+ *flagsp &= ~RXf_PMf_LOCALE;
+ *flagsp |= RXf_PMf_UNICODE;
+ has_charset_modifier = 1;
+ break;
+ case DUAL_PAT_MOD:
+ if (has_use_defaults
+ || has_charset_modifier
+ || flagsp == &negflags)
+ {
+ goto fail_modifiers;
+ }
+ *flagsp &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+ has_charset_modifier = 1;
+ break;
case ONCE_PAT_MOD: /* 'o' */
case GLOBAL_PAT_MOD: /* 'g' */
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
@@ -6182,6 +6219,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
* if there is a minus, it means will be trying to
* re-specify a default which is an error */
if (has_use_defaults || flagsp == &negflags) {
+ fail_modifiers:
RExC_parse++;
vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
/*NOTREACHED*/
diff --git a/regexp.h b/regexp.h
index 17f998310e..004d614b09 100644
--- a/regexp.h
+++ b/regexp.h
@@ -236,9 +236,9 @@ and check for NULL.
case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \
case XTENDED_PAT_MOD: *(pmfl) |= RXf_PMf_EXTENDED; break
-/* Note, includes locale */
+/* Note, includes locale, unicode */
#define STD_PMMOD_FLAGS_CLEAR(pmfl) \
- *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE)
+ *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE|RXf_PMf_UNICODE)
/* chars and strings used as regex pattern modifiers
* Singlular is a 'c'har, plural is a "string"
@@ -258,12 +258,18 @@ and check for NULL.
#define IGNORE_PAT_MOD 'i'
#define XTENDED_PAT_MOD 'x'
#define NONDESTRUCT_PAT_MOD 'r'
+#define LOCALE_PAT_MOD 'l'
+#define UNICODE_PAT_MOD 'u'
+#define DUAL_PAT_MOD 'd'
#define ONCE_PAT_MODS "o"
#define KEEPCOPY_PAT_MODS "p"
#define EXEC_PAT_MODS "e"
#define LOOP_PAT_MODS "gc"
#define NONDESTRUCT_PAT_MODS "r"
+#define LOCALE_PAT_MODS "l"
+#define UNICODE_PAT_MODS "u"
+#define DUAL_PAT_MODS "d"
/* This string is expected by regcomp.c to be ordered so that the first
* character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next
@@ -288,7 +294,7 @@ and check for NULL.
/* Leave some space, so future bit allocations can go either in the shared or
* unshared area without affecting binary compatibility */
-#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+3)
+#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2)
/* Anchor and GPOS related stuff */
#define RXf_ANCH_BOL (1<<(RXf_BASE_SHIFT+0))
diff --git a/regnodes.h b/regnodes.h
index d132013b5d..f5aacc2f97 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -625,14 +625,14 @@ EXTCONST char * const PL_reg_name[] = {
EXTCONST char * PL_reg_extflags_name[];
#else
EXTCONST char * const PL_reg_extflags_name[] = {
- /* Bits in extflags defined: 11111111111111111111111000111111 */
+ /* Bits in extflags defined: 11111111111111111111111001111111 */
"MULTILINE", /* 0x00000001 */
"SINGLELINE", /* 0x00000002 */
"FOLD", /* 0x00000004 */
"EXTENDED", /* 0x00000008 */
"KEEPCOPY", /* 0x00000010 */
"LOCALE", /* 0x00000020 */
- "UNUSED_BIT_6", /* 0x00000040 */
+ "UNICODE", /* 0x00000040 */
"UNUSED_BIT_7", /* 0x00000080 */
"UNUSED_BIT_8", /* 0x00000100 */
"ANCH_BOL", /* 0x00000200 */
diff --git a/t/re/pat.t b/t/re/pat.t
index 3bc7f5d372..c007880b8c 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -23,7 +23,7 @@ BEGIN {
}
-plan tests => 385; # Update this when adding/deleting tests.
+plan tests => 398; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -503,10 +503,38 @@ sub run_tests {
iseq qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s';
iseq qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m';
iseq qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x';
- iseq qr/\b\v$/xism, '(?msix:\b\v$)', 'qr/\b\v$/xism';
+ iseq qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism';
iseq qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/';
}
+ { # Test that charset modifier work, and are interpolated
+ iseq qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier';
+ iseq qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles';
+ iseq qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles';
+ iseq qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles';
+ iseq qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles';
+
+ my $dual = qr/\b\v$/;
+ use locale;
+ my $locale = qr/\b\v$/;
+ iseq $locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale';
+ no locale;
+
+ use feature 'unicode_strings';
+ my $unicode = qr/\b\v$/;
+ iseq $unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings';
+ iseq qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
+ iseq qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings';
+
+ no feature 'unicode_strings';
+ iseq qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings';
+ iseq qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings';
+
+ use locale;
+ iseq qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale';
+ iseq qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale';
+ }
+
{
local $Message = "Look around";
diff --git a/t/re/re.t b/t/re/re.t
index 10e2ee2b64..76835f09b3 100644
--- a/t/re/re.t
+++ b/t/re/re.t
@@ -12,7 +12,9 @@ use warnings;
use re qw(is_regexp regexp_pattern
regname regnames regnames_count);
{
+ use feature 'unicode_strings'; # Force 'u' pat mod
my $qr=qr/foo/pi;
+ no feature 'unicode_strings';
my $rx = $$qr;
ok(is_regexp($qr),'is_regexp(REGEXP ref)');
@@ -20,12 +22,12 @@ use re qw(is_regexp regexp_pattern
ok(!is_regexp(''),'is_regexp("")');
is((regexp_pattern($qr))[0],'foo','regexp_pattern[0] (ref)');
- is((regexp_pattern($qr))[1],'ip','regexp_pattern[1] (ref)');
- is(regexp_pattern($qr),'(?^pi:foo)','scalar regexp_pattern (ref)');
+ is((regexp_pattern($qr))[1],'uip','regexp_pattern[1] (ref)');
+ is(regexp_pattern($qr),'(?^upi:foo)','scalar regexp_pattern (ref)');
is((regexp_pattern($rx))[0],'foo','regexp_pattern[0] (bare REGEXP)');
- is((regexp_pattern($rx))[1],'ip','regexp_pattern[1] (bare REGEXP)');
- is(regexp_pattern($rx),'(?^pi:foo)', 'scalar regexp_pattern (bare REGEXP)');
+ is((regexp_pattern($rx))[1],'uip','regexp_pattern[1] (bare REGEXP)');
+ is(regexp_pattern($rx),'(?^upi:foo)', 'scalar regexp_pattern (bare REGEXP)');
ok(!regexp_pattern(''),'!regexp_pattern("")');
}
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index 4e8f3c4924..80af8df3dd 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -55,6 +55,12 @@ my @death =
'/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/',
'/(?\<!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<!x/',
'/(?\>x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/',
+ '/(?^-i:foo)/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i:foo)/',
+ '/(?^-i)foo/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i)foo/',
+ '/(?^d:foo)/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#}:foo)/',
+ '/(?^d)foo/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#})foo/',
+ '/(?^lu:foo)/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#}:foo)/',
+ '/(?^lu)foo/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#})foo/',
'/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/',
diff --git a/universal.c b/universal.c
index fe53969bcd..102ff91919 100644
--- a/universal.c
+++ b/universal.c
@@ -1189,16 +1189,23 @@ XS(XS_re_regexp_pattern)
{
/* Houston, we have a regex! */
SV *pattern;
- STRLEN left = 0;
- char reflags[sizeof(INT_PAT_MODS)];
if ( GIMME_V == G_ARRAY ) {
+ STRLEN left = 0;
+ char reflags[sizeof(INT_PAT_MODS) + 1]; /* The +1 is for the charset
+ modifier */
/*
we are in list context so stringify
the modifiers that apply. We ignore "negative
modifiers" in this scenario.
*/
+ if (RX_EXTFLAGS(re) & RXf_PMf_LOCALE) {
+ reflags[left++] = LOCALE_PAT_MOD;
+ }
+ else if (RX_EXTFLAGS(re) & RXf_PMf_UNICODE) {
+ reflags[left++] = UNICODE_PAT_MOD;
+ }
const char *fptr = INT_PAT_MODS;
char ch;
U16 match_flags = (U16)((RX_EXTFLAGS(re) & PMf_COMPILETIME)