summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYitzchak Scott-Thoennes <sthoenna@efn.org>2002-09-11 15:22:45 -0700
committerhv <hv@crypt.org>2002-10-01 08:10:21 +0000
commitcb50f42d44feb5486b1014e87f10579f0b7cddbf (patch)
treea6287078174bb8819276c355a6b5d93123e8837a
parent0d1032abc6ecb94ee2955275dc38423a9d34216c (diff)
downloadperl-cb50f42d44feb5486b1014e87f10579f0b7cddbf.tar.gz
Re: sv_2pv_flags and ROK and UTF8 flags
Message-ID: <lSCg9gzkgymX092yn@efn.org> p4raw-id: //depot/perl@17947
-rw-r--r--dump.c8
-rw-r--r--mg.h5
-rw-r--r--regexec.c7
-rw-r--r--sv.c16
-rwxr-xr-xt/op/pat.t54
5 files changed, 71 insertions, 19 deletions
diff --git a/dump.c b/dump.c
index e287a79578..520b210515 100644
--- a/dump.c
+++ b/dump.c
@@ -768,7 +768,7 @@ static struct { char type; char *name; } magic_names[] = {
{ PERL_MAGIC_taint, "taint(t)" },
{ PERL_MAGIC_uvar_elem, "uvar_elem(v)" },
{ PERL_MAGIC_vec, "vec(v)" },
- { PERL_MAGIC_vstring, "v-string(V)" },
+ { PERL_MAGIC_vstring, "vstring(V)" },
{ PERL_MAGIC_substr, "substr(x)" },
{ PERL_MAGIC_defelem, "defelem(y)" },
{ PERL_MAGIC_ext, "ext(~)" },
@@ -842,13 +842,15 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, MAGIC *mg, I32 nest, I32 maxne
if (mg->mg_flags) {
Perl_dump_indent(aTHX_ level, file, " MG_FLAGS = 0x%02X\n", mg->mg_flags);
- if (mg->mg_flags & MGf_TAINTEDDIR)
+ if (mg->mg_type == PERL_MAGIC_envelem &&
+ mg->mg_flags & MGf_TAINTEDDIR)
Perl_dump_indent(aTHX_ level, file, " TAINTEDDIR\n");
if (mg->mg_flags & MGf_REFCOUNTED)
Perl_dump_indent(aTHX_ level, file, " REFCOUNTED\n");
if (mg->mg_flags & MGf_GSKIP)
Perl_dump_indent(aTHX_ level, file, " GSKIP\n");
- if (mg->mg_flags & MGf_MINMATCH)
+ if (mg->mg_type == PERL_MAGIC_regex_global &&
+ mg->mg_flags & MGf_MINMATCH)
Perl_dump_indent(aTHX_ level, file, " MINMATCH\n");
}
if (mg->mg_obj) {
diff --git a/mg.h b/mg.h
index e99b52cb6d..bbd675b98d 100644
--- a/mg.h
+++ b/mg.h
@@ -33,14 +33,13 @@ struct magic {
I32 mg_len;
};
-#define MGf_TAINTEDDIR 1
+#define MGf_TAINTEDDIR 1 /* PERL_MAGIC_envelem only */
+#define MGf_MINMATCH 1 /* PERL_MAGIC_regex_global only */
#define MGf_REFCOUNTED 2
#define MGf_GSKIP 4
#define MGf_COPY 8
#define MGf_DUP 16
-#define MGf_MINMATCH 1
-
#define MgTAINTEDDIR(mg) (mg->mg_flags & MGf_TAINTEDDIR)
#define MgTAINTEDDIR_on(mg) (mg->mg_flags |= MGf_TAINTEDDIR)
#define MgTAINTEDDIR_off(mg) (mg->mg_flags &= ~MGf_TAINTEDDIR)
diff --git a/regexec.c b/regexec.c
index b69fd2b08b..c93df5dff7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2821,6 +2821,7 @@ S_regmatch(pTHX_ regnode *prog)
MAGIC *mg = Null(MAGIC*);
re_cc_state state;
CHECKPOINT cp, lastcp;
+ int toggleutf;
if(SvROK(ret) || SvRMAGICAL(ret)) {
SV *sv = SvROK(ret) ? SvRV(ret) : ret;
@@ -2841,6 +2842,7 @@ S_regmatch(pTHX_ regnode *prog)
I32 onpar = PL_regnpar;
Zero(&pm, 1, PMOP);
+ if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
re = CALLREGCOMP(aTHX_ t, t + len, &pm);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
@@ -2873,6 +2875,9 @@ S_regmatch(pTHX_ regnode *prog)
*PL_reglastcloseparen = 0;
PL_reg_call_cc = &state;
PL_reginput = locinput;
+ toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+ ((re->reganch & ROPT_UTF8) != 0);
+ if (toggleutf) PL_reg_flags ^= RF_utf8;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
@@ -2887,6 +2892,7 @@ S_regmatch(pTHX_ regnode *prog)
PL_regcc = state.cc;
PL_reg_re = state.re;
cache_re(PL_reg_re);
+ if (toggleutf) PL_reg_flags ^= RF_utf8;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
@@ -2903,6 +2909,7 @@ S_regmatch(pTHX_ regnode *prog)
PL_regcc = state.cc;
PL_reg_re = state.re;
cache_re(PL_reg_re);
+ if (toggleutf) PL_reg_flags ^= RF_utf8;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
diff --git a/sv.c b/sv.c
index b4b7dbad7f..78048c06fe 100644
--- a/sv.c
+++ b/sv.c
@@ -2890,7 +2890,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
{
register char *s;
int olderrno;
- SV *tsv;
+ SV *tsv, *origsv;
char tbuf[64]; /* Must fit sprintf/Gconvert of longest IV/NV */
char *tmpbuf = tbuf;
@@ -2939,6 +2939,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
SvUTF8_off(sv);
return pv;
}
+ origsv = sv;
sv = (SV*)SvRV(sv);
if (!sv)
s = "NULLREF";
@@ -3020,6 +3021,11 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
mg->mg_ptr[mg->mg_len] = 0;
}
PL_reginterp_cnt += re->program[0].next_off;
+
+ if (re->reganch & ROPT_UTF8)
+ SvUTF8_on(origsv);
+ else
+ SvUTF8_off(origsv);
*lp = mg->mg_len;
return mg->mg_ptr;
}
@@ -3188,16 +3194,14 @@ would lose the UTF-8'ness of the PV.
void
Perl_sv_copypv(pTHX_ SV *dsv, register SV *ssv)
{
- SV *tmpsv = sv_newmortal();
STRLEN len;
char *s;
s = SvPV(ssv,len);
- sv_setpvn(tmpsv,s,len);
+ sv_setpvn(dsv,s,len);
if (SvUTF8(ssv))
- SvUTF8_on(tmpsv);
+ SvUTF8_on(dsv);
else
- SvUTF8_off(tmpsv);
- SvSetSV(dsv,tmpsv);
+ SvUTF8_off(dsv);
}
/*
diff --git a/t/op/pat.t b/t/op/pat.t
index ed6101505c..4ef860c0b9 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..932\n";
+print "1..940\n";
BEGIN {
chdir 't' if -d 't';
@@ -2913,22 +2913,62 @@ print(($a eq '(?-xism:foo)' ? '' : 'not '),
++$test;
$x = "\x{3fe}";
+$z=$y = "\317\276"; # $y is byte representation of $x
+
$a = qr/$x/;
print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n");
++$test;
print(("a$a" =~ $x ? '' : 'not '),
- "ok $test - stringifed qr// preserves utf8 # TODO\n");
+ "ok $test - stringifed qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a$a\z/ ? '' : 'not '),
+ "ok $test - interpolated qr// preserves utf8\n");
+++$test;
+
+print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '),
+ "ok $test - postponed interpolation of qr// preserves utf8\n");
+++$test;
+
+{ use re 'eval';
+
+print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in utf8 re matches utf8\n");
+++$test;
+
+print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in non-utf8 re matches utf8\n");
++$test;
-print(("a$x" =~ qr/a$a/ ? '' : 'not '),
- "ok $test - interpolated qr// preserves utf8 # TODO\n");
+print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n");
++$test;
-print(("a$x" =~ qr/a(??{$a})/ ? '' : 'not '),
- "ok $test - postponed interpolation of qr// preserves utf8 # TODO\n");
+print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n");
++$test;
+print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n");
+++$test;
+
+print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '),
+ "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n");
+++$test;
+$y = $z; # reset $y after upgrade
+
+print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '),
+ "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n");
+++$test;
+
+} # no re 'eval'
+
print "# more user-defined character properties\n";
sub IsSyriac1 {
@@ -2951,4 +2991,4 @@ END
print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++;
-# last test 932
+# last test 940