summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--op.c16
-rw-r--r--pp_ctl.c14
-rw-r--r--regcomp.c7
-rw-r--r--regexec.c14
4 files changed, 43 insertions, 8 deletions
diff --git a/op.c b/op.c
index 2ea2ef8ef7..a0c4f3587f 100644
--- a/op.c
+++ b/op.c
@@ -3432,14 +3432,24 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
pm = (PMOP*)o;
if (expr->op_type == OP_CONST) {
- SV * const pat = ((SVOP*)expr)->op_sv;
+ SV *pat = ((SVOP*)expr)->op_sv;
U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME;
if (o->op_flags & OPf_SPECIAL)
pm_flags |= RXf_SPLIT;
- if (DO_UTF8(pat))
- pm_flags |= RXf_UTF8;
+ if (DO_UTF8(pat)) {
+ assert (SvUTF8(pat));
+ } else if (SvUTF8(pat)) {
+ /* Not doing UTF-8, despite what the SV says. Is this only if we're
+ trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without the flag on, as
+ the compiler now honours the SvUTF8 flag on pat. */
+ STRLEN len;
+ const char *const p = SvPV(pat, len);
+ pat = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ assert(!(pm_flags & RXf_UTF8));
PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
diff --git a/pp_ctl.c b/pp_ctl.c
index 6353df6a40..d2094f538f 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -148,8 +148,18 @@ PP(pp_regcomp)
if (PL_op->op_flags & OPf_SPECIAL)
PL_reginterp_cnt = I32_MAX; /* Mark as safe. */
- if (DO_UTF8(tmpstr))
- pm_flags |= RXf_UTF8;
+ if (DO_UTF8(tmpstr)) {
+ assert (SvUTF8(tmpstr));
+ } else if (SvUTF8(tmpstr)) {
+ /* Not doing UTF-8, despite what the SV says. Is this only if
+ we're trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without the flag on,
+ as the compiler now honours the SvUTF8 flag on tmpstr. */
+ STRLEN len;
+ const char *const p = SvPV(tmpstr, len);
+ tmpstr = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ assert(!(pm_flags & RXf_UTF8));
if (eng)
PM_SETRE(pm, CALLREGCOMP_ENG(eng, tmpstr, pm_flags));
diff --git a/regcomp.c b/regcomp.c
index b71e54620c..a7b1cf6fdb 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4151,7 +4151,7 @@ Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
#endif
REGEXP *
-Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
+Perl_re_compile(pTHX_ const SV * const pattern, U32 pm_flags)
{
dVAR;
REGEXP *rx;
@@ -4175,7 +4175,10 @@ Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
GET_RE_DEBUG_FLAGS_DECL;
DEBUG_r(if (!PL_colorset) reginitcolors());
- RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
+ RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+ assert(!(pm_flags & RXf_UTF8));
+ if (RExC_utf8)
+ pm_flags |= RXf_UTF8;
DEBUG_COMPILE_r({
SV *dsv= sv_newmortal();
diff --git a/regexec.c b/regexec.c
index 95bba2e270..8144f998f7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3755,7 +3755,19 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
U32 pm_flags = 0;
const I32 osize = PL_regsize;
- if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
+ if (DO_UTF8(ret)) {
+ assert (SvUTF8(ret));
+ } else if (SvUTF8(ret)) {
+ /* Not doing UTF-8, despite what the SV says. Is
+ this only if we're trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without
+ the flag on, as the compiler now honours the
+ SvUTF8 flag on ret. */
+ STRLEN len;
+ const char *const p = SvPV(ret, len);
+ ret = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ assert(!(pm_flags & RXf_UTF8));
rx = CALLREGCOMP(ret, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY