diff options
-rw-r--r-- | op.c | 6 | ||||
-rw-r--r-- | op.h | 2 | ||||
-rw-r--r-- | op_reg_common.h | 5 | ||||
-rw-r--r-- | pod/perldelta.pod | 26 | ||||
-rw-r--r-- | pod/perldiag.pod | 6 | ||||
-rw-r--r-- | pod/perlre.pod | 65 | ||||
-rw-r--r-- | regcomp.c | 40 | ||||
-rw-r--r-- | regexp.h | 12 | ||||
-rw-r--r-- | regnodes.h | 4 | ||||
-rw-r--r-- | t/re/pat.t | 32 | ||||
-rw-r--r-- | t/re/re.t | 10 | ||||
-rw-r--r-- | t/re/reg_mesg.t | 6 | ||||
-rw-r--r-- | universal.c | 11 |
13 files changed, 187 insertions, 38 deletions
@@ -3649,8 +3649,12 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags) if (PL_hints & HINT_RE_TAINT) pmop->op_pmflags |= PMf_RETAINT; - if (PL_hints & HINT_LOCALE) + if (PL_hints & HINT_LOCALE) { pmop->op_pmflags |= PMf_LOCALE; + } + else if ((! (PL_hints & HINT_BYTES)) && (PL_hints & HINT_UNI_8_BIT)) { + pmop->op_pmflags |= RXf_PMf_UNICODE; + } #ifdef USE_ITHREADS @@ -366,7 +366,7 @@ struct pmop { /* Leave some space, so future bit allocations can go either in the shared or * unshared area without affecting binary compatibility */ -#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+8) +#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+7) /* taint $1 etc. if target tainted */ #define PMf_RETAINT (1<<(PMf_BASE_SHIFT+0)) diff --git a/op_reg_common.h b/op_reg_common.h index d4e39873e1..ce12da542f 100644 --- a/op_reg_common.h +++ b/op_reg_common.h @@ -29,14 +29,15 @@ #define RXf_PMf_EXTENDED (1 << (RXf_PMf_STD_PMMOD_SHIFT+3)) /* /x */ #define RXf_PMf_KEEPCOPY (1 << (RXf_PMf_STD_PMMOD_SHIFT+4)) /* /p */ #define RXf_PMf_LOCALE (1 << (RXf_PMf_STD_PMMOD_SHIFT+5)) +#define RXf_PMf_UNICODE (1 << (RXf_PMf_STD_PMMOD_SHIFT+6)) /* Next available bit after the above. Name begins with '_' so won't be * exported by B */ -#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+6) +#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+7) /* Mask of the above bits. These need to be transferred from op_pmflags to * re->extflags during compilation */ -#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY) +#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_UNICODE) /* These copies need to be numerical or defsubs_h.PL won't know about them. */ #define PMf_MULTILINE 1<<0 diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 4289130a83..8cdbdb31ea 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -40,6 +40,19 @@ main purpose of this is to allow tests that rely on the stringification to not have to change when new modifiers are added. See L<perlre/Extended Patterns>. +=head2 C<"d">, C<"l">, and C<"u"> regex modifiers added + +These modifiers are currently only available within a C<(?...)> construct. + +The C<"l"> modifier says to compile the regular expression as if it were +in the scope of C<use locale>, even if it is not. + +The C<"u"> modifier currently does nothing. + +The C<"d"> modifier is used in the scope of C<use locale> to compile the +regular expression as if it were not in that scope. +See L<perlre/(?dlupimsx-imsx)>. + =head1 Security XXX Any security-related notices go here. In particular, any security @@ -66,6 +79,17 @@ can use something like the following: And then use C<$modifiers> instead of C<-xism>. +=head2 Regular expressions retain their localeness when interpolated + +Regular expressions compiled under C<"use locale"> now retain this when +interpolated into a new regular expression compiled outside a +C<"use locale">, and vice-versa. + +Previously, a regular expression interpolated into another one inherited +the localeness of the surrounding one, losing whatever state it +originally had. This is considered a bug fix, but may trip up code that +has come to rely on the incorrect behavior. + [ List each incompatible change as a =head2 entry ] =head1 Deprecations @@ -320,7 +344,7 @@ be noted as well. =item * -XXX +See L</Regular expressions retain their localeness when interpolated> =back diff --git a/pod/perldiag.pod b/pod/perldiag.pod index e725749275..c6806c1c13 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4035,8 +4035,10 @@ where the problem was discovered. See L<perlre>. <-- HERE shows in the regular expression about where the problem was discovered. This happens when using the C<(?^...)> construct to tell Perl to use the default regular expression modifiers, and you -redundantly specify a default modifier. For other causes, see -L<perlre>. +redundantly specify a default modifier; or having a modifier that can't +be turned off (such as C<"p"> or C<"l">) after a minus; or specifying +more than one of the C<"d">, C<"l">, or C<"u"> modifiers. For other +causes, see L<perlre>. =item Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ diff --git a/pod/perlre.pod b/pod/perlre.pod index 6e68bcd1db..b9216c156c 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -594,20 +594,15 @@ whitespace formatting, a simple C<#> will suffice. Note that Perl closes the comment as soon as it sees a C<)>, so there is no way to put a literal C<)> in the comment. -=item C<(?pimsx-imsx)> +=item C<(?dlupimsx-imsx)> -=item C<(?^pimsx)> +=item C<(?^lupimsx)> X<(?)> X<(?^)> One or more embedded pattern-match modifiers, to be turned on (or turned off, if preceded by C<->) for the remainder of the pattern or the remainder of the enclosing pattern group (if any). -Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately -after the C<"?"> is a shorthand equivalent to C<-imsx> and compiling the -regex under C<no locale>. Flags may follow the caret to override it. -But a minus sign is not legal with it. - This is particularly useful for dynamic patterns, such as those read in from a configuration file, taken from an argument, or specified in a table somewhere. Consider the case where some patterns want to be case @@ -634,17 +629,53 @@ These modifiers do not carry over into named subpatterns called in the enclosing group. In other words, a pattern such as C<((?i)(&NAME))> does not change the case-sensitivity of the "NAME" pattern. -Note that the C<p> modifier is special in that it can only be enabled, -not disabled, and that its presence anywhere in a pattern has a global -effect. Thus C<(?-p)> and C<(?-p:...)> are meaningless and will warn -when executed under C<use warnings>. +Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately +after the C<"?"> is a shorthand equivalent to C<d-imsx>. Flags (except +C<"d">) may follow the caret to override it. +But a minus sign is not legal with it. + +Also, starting in Perl 5.14, are modifiers C<"d">, C<"l">, and C<"u">, +which for 5.14 may not be used as suffix modifiers. + +C<"l"> means to use a locale (see L<perllocale>) when pattern matching. +The locale used will be the one in effect at the time of execution of +the pattern match. This may not be the same as the compilation-time +locale, and can differ from one match to another if there is an +intervening call of the +L<setlocale() function|perllocale/The setlocale function>. +This modifier is automatically set if the regular expression is compiled +within the scope of a C<"use locale"> pragma. + +C<"u"> has no effect currently. It is automatically set if the regular +expression is compiled within the scope of a +L<C<"use feature 'unicode_strings">|feature> pragma. + +C<"d"> means to use the traditional Perl pattern matching behavior. +This is dualistic (hence the name C<"d">, which also could stand for +"default"). When this is in effect, Perl matches utf8-encoded strings +using Unicode rules, and matches non-utf8-encoded strings using the +platform's native character set rules. +See L<perlunicode/The "Unicode Bug">. It is automatically selected by +default if the regular expression is compiled neither within the scope +of a C<"use locale"> pragma nor a <C<"use feature 'unicode_strings"> +pragma. + +Note that the C<d>, C<l>, C<p>, and C<u> modifiers are special in that +they can only be enabled, not disabled, and the C<d>, C<l>, and C<u> +modifiers are mutually exclusive; a maximum of one may appear in the +construct. Specifying one de-specifies the others. Thus, for example, +C<(?-p)> and C<(?-d:...)> are meaningless and will warn when compiled +under C<use warnings>. + +Note also that the C<p> modifier is special in that its presence +anywhere in a pattern has a global effect. =item C<(?:pattern)> X<(?:)> -=item C<(?imsx-imsx:pattern)> +=item C<(?dluimsx-imsx:pattern)> -=item C<(?^imsx:pattern)> +=item C<(?^luimsx:pattern)> X<(?^:)> This is for clustering, not capturing; it groups subexpressions like @@ -660,7 +691,7 @@ but doesn't spit out extra fields. It's also cheaper not to capture characters if you don't need to. Any letters between C<?> and C<:> act as flags modifiers as with -C<(?imsx-imsx)>. For example, +C<(?dluimsx-imsx)>. For example, /(?s-i:more.*than).*million/i @@ -669,8 +700,8 @@ is equivalent to the more verbose /(?:(?s-i)more.*than).*million/i Starting in Perl 5.14, a C<"^"> (caret or circumflex accent) immediately -after the C<"?"> is a shorthand equivalent to C<-imsx> and compiling the -regex under C<no locale>. Any positive flags may follow the caret, so +after the C<"?"> is a shorthand equivalent to C<d-imsx>. Any positive +flags (except C<"d">) may follow the caret, so (?^x:foo) @@ -679,7 +710,7 @@ is equivalent to (?x-ims:foo) The caret tells Perl that this cluster doesn't inherit the flags of any -surrounding pattern, but to go back to the system defaults (C<-imsx>), +surrounding pattern, but to go back to the system defaults (C<d-imsx>), modified by any flags specified. The caret allows for simpler stringification of compiled regular @@ -370,6 +370,7 @@ static const scan_data_t zero_scan_data = #define UTF (RExC_utf8 != 0) #define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0) +#define UNI_SEMANTICS ((RExC_flags & RXf_PMf_UNICODE) != 0) #define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0) #define OOB_UNICODE 12345678 @@ -4441,9 +4442,17 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 pm_flags) SvPOK_on(rx); SvFLAGS(rx) |= SvUTF8(pattern); *p++='('; *p++='?'; - if (has_minus) { /* If a default, cover it using the caret */ + + /* If a default, cover it using the caret */ + if (has_minus || (r->extflags & ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE))) { *p++= DEFAULT_PAT_MOD; } + if (r->extflags & RXf_PMf_LOCALE) { + *p++ = LOCALE_PAT_MOD; + } + else if (r->extflags & RXf_PMf_UNICODE) { + *p++ = UNICODE_PAT_MOD; + } if (has_p) *p++ = KEEPCOPY_PAT_MOD; /*'p'*/ { @@ -6124,6 +6133,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) that follow */ has_use_defaults = TRUE; STD_PMMOD_FLAGS_CLEAR(&RExC_flags); + RExC_flags &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE); goto parse_flags; default: --RExC_parse; @@ -6131,6 +6141,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) { U32 posflags = 0, negflags = 0; U32 *flagsp = &posflags; + bool has_charset_modifier = 0; while (*RExC_parse) { /* && strchr("iogcmsx", *RExC_parse) */ @@ -6138,6 +6149,32 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) and must be globally applied -- japhy */ switch (*RExC_parse) { CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp); + case LOCALE_PAT_MOD: + if (has_charset_modifier || flagsp == &negflags) { + goto fail_modifiers; + } + *flagsp &= ~RXf_PMf_UNICODE; + *flagsp |= RXf_PMf_LOCALE; + has_charset_modifier = 1; + break; + case UNICODE_PAT_MOD: + if (has_charset_modifier || flagsp == &negflags) { + goto fail_modifiers; + } + *flagsp &= ~RXf_PMf_LOCALE; + *flagsp |= RXf_PMf_UNICODE; + has_charset_modifier = 1; + break; + case DUAL_PAT_MOD: + if (has_use_defaults + || has_charset_modifier + || flagsp == &negflags) + { + goto fail_modifiers; + } + *flagsp &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE); + has_charset_modifier = 1; + break; case ONCE_PAT_MOD: /* 'o' */ case GLOBAL_PAT_MOD: /* 'g' */ if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { @@ -6182,6 +6219,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) * if there is a minus, it means will be trying to * re-specify a default which is an error */ if (has_use_defaults || flagsp == &negflags) { + fail_modifiers: RExC_parse++; vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart); /*NOTREACHED*/ @@ -236,9 +236,9 @@ and check for NULL. case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \ case XTENDED_PAT_MOD: *(pmfl) |= RXf_PMf_EXTENDED; break -/* Note, includes locale */ +/* Note, includes locale, unicode */ #define STD_PMMOD_FLAGS_CLEAR(pmfl) \ - *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE) + *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_LOCALE|RXf_PMf_UNICODE) /* chars and strings used as regex pattern modifiers * Singlular is a 'c'har, plural is a "string" @@ -258,12 +258,18 @@ and check for NULL. #define IGNORE_PAT_MOD 'i' #define XTENDED_PAT_MOD 'x' #define NONDESTRUCT_PAT_MOD 'r' +#define LOCALE_PAT_MOD 'l' +#define UNICODE_PAT_MOD 'u' +#define DUAL_PAT_MOD 'd' #define ONCE_PAT_MODS "o" #define KEEPCOPY_PAT_MODS "p" #define EXEC_PAT_MODS "e" #define LOOP_PAT_MODS "gc" #define NONDESTRUCT_PAT_MODS "r" +#define LOCALE_PAT_MODS "l" +#define UNICODE_PAT_MODS "u" +#define DUAL_PAT_MODS "d" /* This string is expected by regcomp.c to be ordered so that the first * character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next @@ -288,7 +294,7 @@ and check for NULL. /* Leave some space, so future bit allocations can go either in the shared or * unshared area without affecting binary compatibility */ -#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+3) +#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2) /* Anchor and GPOS related stuff */ #define RXf_ANCH_BOL (1<<(RXf_BASE_SHIFT+0)) diff --git a/regnodes.h b/regnodes.h index d132013b5d..f5aacc2f97 100644 --- a/regnodes.h +++ b/regnodes.h @@ -625,14 +625,14 @@ EXTCONST char * const PL_reg_name[] = { EXTCONST char * PL_reg_extflags_name[]; #else EXTCONST char * const PL_reg_extflags_name[] = { - /* Bits in extflags defined: 11111111111111111111111000111111 */ + /* Bits in extflags defined: 11111111111111111111111001111111 */ "MULTILINE", /* 0x00000001 */ "SINGLELINE", /* 0x00000002 */ "FOLD", /* 0x00000004 */ "EXTENDED", /* 0x00000008 */ "KEEPCOPY", /* 0x00000010 */ "LOCALE", /* 0x00000020 */ - "UNUSED_BIT_6", /* 0x00000040 */ + "UNICODE", /* 0x00000040 */ "UNUSED_BIT_7", /* 0x00000080 */ "UNUSED_BIT_8", /* 0x00000100 */ "ANCH_BOL", /* 0x00000200 */ diff --git a/t/re/pat.t b/t/re/pat.t index 3bc7f5d372..c007880b8c 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -23,7 +23,7 @@ BEGIN { } -plan tests => 385; # Update this when adding/deleting tests. +plan tests => 398; # Update this when adding/deleting tests. run_tests() unless caller; @@ -503,10 +503,38 @@ sub run_tests { iseq qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'; iseq qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'; iseq qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'; - iseq qr/\b\v$/xism, '(?msix:\b\v$)', 'qr/\b\v$/xism'; + iseq qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'; iseq qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'; } + { # Test that charset modifier work, and are interpolated + iseq qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'; + iseq qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'; + iseq qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'; + iseq qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'; + iseq qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'; + + my $dual = qr/\b\v$/; + use locale; + my $locale = qr/\b\v$/; + iseq $locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'; + no locale; + + use feature 'unicode_strings'; + my $unicode = qr/\b\v$/; + iseq $unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'; + iseq qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'; + iseq qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'; + + no feature 'unicode_strings'; + iseq qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'; + iseq qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'; + + use locale; + iseq qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'; + iseq qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'; + } + { local $Message = "Look around"; @@ -12,7 +12,9 @@ use warnings; use re qw(is_regexp regexp_pattern regname regnames regnames_count); { + use feature 'unicode_strings'; # Force 'u' pat mod my $qr=qr/foo/pi; + no feature 'unicode_strings'; my $rx = $$qr; ok(is_regexp($qr),'is_regexp(REGEXP ref)'); @@ -20,12 +22,12 @@ use re qw(is_regexp regexp_pattern ok(!is_regexp(''),'is_regexp("")'); is((regexp_pattern($qr))[0],'foo','regexp_pattern[0] (ref)'); - is((regexp_pattern($qr))[1],'ip','regexp_pattern[1] (ref)'); - is(regexp_pattern($qr),'(?^pi:foo)','scalar regexp_pattern (ref)'); + is((regexp_pattern($qr))[1],'uip','regexp_pattern[1] (ref)'); + is(regexp_pattern($qr),'(?^upi:foo)','scalar regexp_pattern (ref)'); is((regexp_pattern($rx))[0],'foo','regexp_pattern[0] (bare REGEXP)'); - is((regexp_pattern($rx))[1],'ip','regexp_pattern[1] (bare REGEXP)'); - is(regexp_pattern($rx),'(?^pi:foo)', 'scalar regexp_pattern (bare REGEXP)'); + is((regexp_pattern($rx))[1],'uip','regexp_pattern[1] (bare REGEXP)'); + is(regexp_pattern($rx),'(?^upi:foo)', 'scalar regexp_pattern (bare REGEXP)'); ok(!regexp_pattern(''),'!regexp_pattern("")'); } diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 4e8f3c4924..80af8df3dd 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -55,6 +55,12 @@ my @death = '/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/', '/(?\<!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<!x/', '/(?\>x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/', + '/(?^-i:foo)/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i:foo)/', + '/(?^-i)foo/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i)foo/', + '/(?^d:foo)/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#}:foo)/', + '/(?^d)foo/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#})foo/', + '/(?^lu:foo)/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#}:foo)/', + '/(?^lu)foo/' => 'Sequence (?^lu...) not recognized in regex; marked by {#} in m/(?^lu{#})foo/', '/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/', diff --git a/universal.c b/universal.c index fe53969bcd..102ff91919 100644 --- a/universal.c +++ b/universal.c @@ -1189,16 +1189,23 @@ XS(XS_re_regexp_pattern) { /* Houston, we have a regex! */ SV *pattern; - STRLEN left = 0; - char reflags[sizeof(INT_PAT_MODS)]; if ( GIMME_V == G_ARRAY ) { + STRLEN left = 0; + char reflags[sizeof(INT_PAT_MODS) + 1]; /* The +1 is for the charset + modifier */ /* we are in list context so stringify the modifiers that apply. We ignore "negative modifiers" in this scenario. */ + if (RX_EXTFLAGS(re) & RXf_PMf_LOCALE) { + reflags[left++] = LOCALE_PAT_MOD; + } + else if (RX_EXTFLAGS(re) & RXf_PMf_UNICODE) { + reflags[left++] = UNICODE_PAT_MOD; + } const char *fptr = INT_PAT_MODS; char ch; U16 match_flags = (U16)((RX_EXTFLAGS(re) & PMf_COMPILETIME) |