summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-08-18 14:24:42 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-08-18 14:24:42 +0000
commitd9f424b23bb434af43f899daf2cb6cfe42fe6e1a (patch)
treea04a1a4067438cca3b5395a68ab540e71418e9c9
parent8b1981a6c4fcb68914e5a9c635d9471dbbe6f42a (diff)
downloadperl-d9f424b23bb434af43f899daf2cb6cfe42fe6e1a.tar.gz
New try for ID 20010407.006: detach the semantics
"was the last match target UTF8" into its own variable. p4raw-id: //depot/perl@11717
-rw-r--r--embedvar.h4
-rw-r--r--mg.c6
-rw-r--r--perlapi.h2
-rw-r--r--pod/perlapi.pod2
-rw-r--r--pp.c2
-rw-r--r--pp_hot.c14
-rw-r--r--regcomp.c1
-rw-r--r--regexec.c20
-rw-r--r--sv.c1
-rwxr-xr-xt/op/pat.t21
-rw-r--r--thrdvar.h4
11 files changed, 56 insertions, 21 deletions
diff --git a/embedvar.h b/embedvar.h
index d0a7ec486b..b5c6340148 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -94,6 +94,7 @@
#define PL_reg_start_tmpl (vTHX->Treg_start_tmpl)
#define PL_reg_starttry (vTHX->Treg_starttry)
#define PL_reg_sv (vTHX->Treg_sv)
+#define PL_reg_sv_utf8 (vTHX->Treg_sv_utf8)
#define PL_reg_whilem_seen (vTHX->Treg_whilem_seen)
#define PL_regbol (vTHX->Tregbol)
#define PL_regcc (vTHX->Tregcc)
@@ -812,6 +813,7 @@
#define PL_reg_start_tmpl (aTHXo->interp.Treg_start_tmpl)
#define PL_reg_starttry (aTHXo->interp.Treg_starttry)
#define PL_reg_sv (aTHXo->interp.Treg_sv)
+#define PL_reg_sv_utf8 (aTHXo->interp.Treg_sv_utf8)
#define PL_reg_whilem_seen (aTHXo->interp.Treg_whilem_seen)
#define PL_regbol (aTHXo->interp.Tregbol)
#define PL_regcc (aTHXo->interp.Tregcc)
@@ -1519,6 +1521,7 @@
#define PL_reg_start_tmpl (aTHX->Treg_start_tmpl)
#define PL_reg_starttry (aTHX->Treg_starttry)
#define PL_reg_sv (aTHX->Treg_sv)
+#define PL_reg_sv_utf8 (aTHX->Treg_sv_utf8)
#define PL_reg_whilem_seen (aTHX->Treg_whilem_seen)
#define PL_regbol (aTHX->Tregbol)
#define PL_regcc (aTHX->Tregcc)
@@ -1657,6 +1660,7 @@
#define PL_Treg_start_tmpl PL_reg_start_tmpl
#define PL_Treg_starttry PL_reg_starttry
#define PL_Treg_sv PL_reg_sv
+#define PL_Treg_sv_utf8 PL_reg_sv_utf8
#define PL_Treg_whilem_seen PL_reg_whilem_seen
#define PL_Tregbol PL_regbol
#define PL_Tregcc PL_regcc
diff --git a/mg.c b/mg.c
index ea9650c9b5..07869e0c6b 100644
--- a/mg.c
+++ b/mg.c
@@ -392,7 +392,7 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg)
else /* @- */
i = s;
- if (i > 0 && DO_UTF8(PL_reg_sv)) {
+ if (i > 0 && PL_reg_sv_utf8) {
char *b = rx->subbeg;
if (b)
i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i));
@@ -433,7 +433,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg)
{
i = t1 - s1;
getlen:
- if (i > 0 && DO_UTF8(PL_reg_sv)) {
+ if (i > 0 && PL_reg_sv_utf8) {
char *s = rx->subbeg + s1;
char *send = rx->subbeg + t1;
@@ -666,7 +666,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
PL_tainted = FALSE;
}
sv_setpvn(sv, s, i);
- if (PL_reg_sv && DO_UTF8(PL_reg_sv) && is_utf8_string((U8*)s, i))
+ if (PL_reg_sv_utf8 && is_utf8_string((U8*)s, i))
SvUTF8_on(sv);
else
SvUTF8_off(sv);
diff --git a/perlapi.h b/perlapi.h
index 6a5a6c783a..8c9bb5ce77 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -772,6 +772,8 @@ START_EXTERN_C
#define PL_reg_starttry (*Perl_Treg_starttry_ptr(aTHXo))
#undef PL_reg_sv
#define PL_reg_sv (*Perl_Treg_sv_ptr(aTHXo))
+#undef PL_reg_sv_utf8
+#define PL_reg_sv_utf8 (*Perl_Treg_sv_utf8_ptr(aTHXo))
#undef PL_reg_whilem_seen
#define PL_reg_whilem_seen (*Perl_Treg_whilem_seen_ptr(aTHXo))
#undef PL_regbol
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 57e3f5c0e4..dc7f320976 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -1566,7 +1566,7 @@ Found in file perl.c
Shuts down a Perl interpreter. See L<perlembed>.
- void perl_destruct(PerlInterpreter* interp)
+ int perl_destruct(PerlInterpreter* interp)
=for hackers
Found in file perl.c
diff --git a/pp.c b/pp.c
index c0148b36a1..e470d1c411 100644
--- a/pp.c
+++ b/pp.c
@@ -4055,6 +4055,8 @@ PP(pp_split)
TAINT_IF((pm->op_pmflags & PMf_LOCALE) &&
(pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE)));
+ PL_reg_sv_utf8 = do_utf8;
+
if (pm->op_pmreplroot) {
#ifdef USE_ITHREADS
ary = GvAVn((GV*)PL_curpad[(PADOFFSET)pm->op_pmreplroot]);
diff --git a/pp_hot.c b/pp_hot.c
index 0f4a69326e..d21977691b 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1222,7 +1222,7 @@ PP(pp_match)
TARG = DEFSV;
EXTEND(SP,1);
}
- PL_reg_sv = TARG;
+
PUTBACK; /* EVAL blocks need stack_sp. */
s = SvPV(TARG, len);
strend = s + len;
@@ -1232,6 +1232,8 @@ PP(pp_match)
(PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
TAINT_NOT;
+ PL_reg_sv_utf8 = DO_UTF8(TARG);
+
if (pm->op_pmdynflags & PMdf_USED) {
failure:
if (gimme == G_ARRAY)
@@ -1398,7 +1400,7 @@ yup: /* Confirmed by INTUIT */
if (global) {
rx->subbeg = truebase;
rx->startp[0] = s - truebase;
- if (DO_UTF8(PL_reg_sv)) {
+ if (PL_reg_sv_utf8) {
char *t = (char*)utf8_hop((U8*)s, rx->minlen);
rx->endp[0] = t - truebase;
}
@@ -1898,7 +1900,6 @@ PP(pp_subst)
STRLEN len;
int force_on_match = 0;
I32 oldsave = PL_savestack_ix;
- bool do_utf8;
STRLEN slen;
/* known replacement string? */
@@ -1909,8 +1910,7 @@ PP(pp_subst)
TARG = DEFSV;
EXTEND(SP,1);
}
- PL_reg_sv = TARG;
- do_utf8 = DO_UTF8(PL_reg_sv);
+
if (SvFAKE(TARG) && SvREADONLY(TARG))
sv_force_normal(TARG);
if (SvREADONLY(TARG)
@@ -1928,12 +1928,14 @@ PP(pp_subst)
rxtainted |= 2;
TAINT_NOT;
+ PL_reg_sv_utf8 = DO_UTF8(TARG);
+
force_it:
if (!pm || !s)
DIE(aTHX_ "panic: pp_subst");
strend = s + len;
- slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : len;
+ slen = PL_reg_sv_utf8 ? utf8_length((U8*)s, (U8*)strend) : len;
maxiters = 2 * slen + 10; /* We can match twice at each
position, once with zero-length,
second time with non-zero. */
diff --git a/regcomp.c b/regcomp.c
index 18aa0576ec..9877658a0e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4735,6 +4735,7 @@ Perl_save_re_context(pTHX)
SAVEVPTR(PL_reg_re); /* from regexec.c */
SAVEPPTR(PL_reg_ganch); /* from regexec.c */
SAVESPTR(PL_reg_sv); /* from regexec.c */
+ SAVEI32(PL_reg_sv_utf8); /* from regexec.c */
SAVEVPTR(PL_reg_magic); /* from regexec.c */
SAVEI32(PL_reg_oldpos); /* from regexec.c */
SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */
diff --git a/regexec.c b/regexec.c
index 3f062ed798..4a19958c41 100644
--- a/regexec.c
+++ b/regexec.c
@@ -107,17 +107,17 @@
*/
#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
-#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
+#define CHR_DIST(a,b) (PL_reg_sv_utf8 ? utf8_distance(a,b) : a - b)
#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
-#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
-#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
+#define HOP(pos,off) (PL_reg_sv_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
+#define HOPMAYBE(pos,off) (PL_reg_sv_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
#define HOPc(pos,off) ((char*)HOP(pos,off))
#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
#define HOPBACK(pos, off) ( \
- (UTF && DO_UTF8(PL_reg_sv)) \
+ (UTF && PL_reg_sv_utf8) \
? reghopmaybe((U8*)pos, -off) \
: (pos - off >= PL_bostr) \
? (U8*)(pos - off) \
@@ -127,8 +127,8 @@
#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
-#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
-#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
+#define HOP3(pos,off,lim) (PL_reg_sv_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
+#define HOPMAYBE3(pos,off,lim) (PL_reg_sv_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
@@ -878,7 +878,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
unsigned int c2;
char *e;
register I32 tmp = 1; /* Scratch variable? */
- register bool do_utf8 = DO_UTF8(PL_reg_sv);
+ register bool do_utf8 = PL_reg_sv_utf8;
/* We know what class it must start with. */
switch (OP(c)) {
@@ -2009,7 +2009,7 @@ S_regmatch(pTHX_ regnode *prog)
#if 0
I32 firstcp = PL_savestack_ix;
#endif
- register bool do_utf8 = DO_UTF8(PL_reg_sv);
+ register bool do_utf8 = PL_reg_sv_utf8;
#ifdef DEBUGGING
PL_regindent++;
@@ -3590,7 +3590,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
register I32 c;
register char *loceol = PL_regeol;
register I32 hardcount = 0;
- register bool do_utf8 = DO_UTF8(PL_reg_sv);
+ register bool do_utf8 = PL_reg_sv_utf8;
scan = PL_reginput;
if (max != REG_INFTY && max < loceol - scan)
@@ -3829,7 +3829,7 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
return 0;
start = PL_reginput;
- if (DO_UTF8(PL_reg_sv)) {
+ if (PL_reg_sv_utf8) {
while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
if (!count++) {
l = 0;
diff --git a/sv.c b/sv.c
index d157f711a2..e0a242e6c2 100644
--- a/sv.c
+++ b/sv.c
@@ -10259,6 +10259,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_reg_re = (regexp*)NULL;
PL_reg_ganch = Nullch;
PL_reg_sv = Nullsv;
+ PL_reg_sv_utf8 = FALSE;
PL_reg_magic = (MAGIC*)NULL;
PL_reg_oldpos = 0;
PL_reg_oldcurpm = (PMOP*)NULL;
diff --git a/t/op/pat.t b/t/op/pat.t
index d2d3205576..478e2994f0 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..684\n";
+print "1..686\n";
BEGIN {
chdir 't' if -d 't';
@@ -1987,3 +1987,22 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r";
$c = pos;
print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n";
}
+
+{
+ package ID_20010407_006;
+
+ sub x {
+ "a\x{1234}";
+ }
+
+ my $x = x;
+ my $y;
+
+ $x =~ /(..)/; $y = $1;
+ print "not " unless length($y) == 2 && $y eq $x;
+ print "ok 685\n" if length($y) == 2;
+
+ x =~ /(..)/; $y = $1;
+ print "not " unless length($y) == 2 && $y eq $x;
+ print "ok 686\n";
+}
diff --git a/thrdvar.h b/thrdvar.h
index 8e999fc62e..2dd74a69d8 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -256,4 +256,8 @@ PERLVAR(i, struct thread_intern)
PERLVAR(trailing_nul, char) /* For the sake of thrsv and oursv */
PERLVAR(thr_done, bool) /* True when the thread has finished */
+
#endif /* USE_THREADS */
+
+PERLVAR(Treg_sv_utf8, bool) /* was what we matched against utf8 */
+