summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael Garcia-Suarez <rgarciasuarez@gmail.com>2003-01-22 23:35:56 +0100
committerJarkko Hietaniemi <jhi@iki.fi>2003-02-02 16:48:26 +0000
commita30b2f1f43446bf5477e7baa5fee5d5c59659ce6 (patch)
treef119415cdab16fe15be9b51e64fce8e428881183
parentec3916884f8b516a101ac0b622cd88ae38c1b49d (diff)
downloadperl-a30b2f1f43446bf5477e7baa5fee5d5c59659ce6.tar.gz
[perl #18232] [PATCH] store PL_reg_match_utf8 in reganch
Message-Id: <20030122223556.57d597a3.rgarciasuarez@free.fr> p4raw-id: //depot/perl@18633
-rw-r--r--mg.c6
-rw-r--r--pp.c2
-rw-r--r--pp_ctl.c2
-rw-r--r--pp_hot.c8
-rw-r--r--regexec.c2
-rw-r--r--regexp.h8
-rwxr-xr-xt/op/pat.t14
7 files changed, 31 insertions, 11 deletions
diff --git a/mg.c b/mg.c
index fae5cda8dd..0edd71106c 100644
--- a/mg.c
+++ b/mg.c
@@ -418,7 +418,7 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg)
else /* @- */
i = s;
- if (i > 0 && PL_reg_match_utf8) {
+ if (i > 0 && RX_MATCH_UTF8(rx)) {
char *b = rx->subbeg;
if (b)
i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i));
@@ -459,7 +459,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg)
{
i = t1 - s1;
getlen:
- if (i > 0 && PL_reg_match_utf8) {
+ if (i > 0 && RX_MATCH_UTF8(rx)) {
char *s = rx->subbeg + s1;
char *send = rx->subbeg + t1;
@@ -707,7 +707,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
getrx:
if (i >= 0) {
sv_setpvn(sv, s, i);
- if (PL_reg_match_utf8 && is_utf8_string((U8*)s, i))
+ if (RX_MATCH_UTF8(rx) && is_utf8_string((U8*)s, i))
SvUTF8_on(sv);
else
SvUTF8_off(sv);
diff --git a/pp.c b/pp.c
index c9d1dc67f0..67fe7f64e3 100644
--- a/pp.c
+++ b/pp.c
@@ -4391,7 +4391,7 @@ PP(pp_split)
TAINT_IF((pm->op_pmflags & PMf_LOCALE) &&
(pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE)));
- PL_reg_match_utf8 = do_utf8;
+ RX_MATCH_UTF8_set(rx, do_utf8);
if (pm->op_pmreplroot) {
#ifdef USE_ITHREADS
diff --git a/pp_ctl.c b/pp_ctl.c
index 7c0f8bad68..a08e2b61a8 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -158,7 +158,7 @@ PP(pp_substcont)
register REGEXP *rx = cx->sb_rx;
rxres_restore(&cx->sb_rxres, rx);
- PL_reg_match_utf8 = SvUTF8(cx->sb_targ) ? 1 : 0;
+ RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ));
if (cx->sb_iters++) {
I32 saviters = cx->sb_iters;
diff --git a/pp_hot.c b/pp_hot.c
index 1a40441ad8..8d9625b3d7 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1180,7 +1180,7 @@ PP(pp_match)
(PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
TAINT_NOT;
- PL_reg_match_utf8 = DO_UTF8(TARG);
+ RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
/* PMdf_USED is set after a ?? matches once */
if (pm->op_pmdynflags & PMdf_USED) {
@@ -1355,7 +1355,7 @@ yup: /* Confirmed by INTUIT */
if (global) {
rx->subbeg = truebase;
rx->startp[0] = s - truebase;
- if (PL_reg_match_utf8) {
+ if (RX_MATCH_UTF8(rx)) {
char *t = (char*)utf8_hop((U8*)s, rx->minlen);
rx->endp[0] = t - truebase;
}
@@ -1907,14 +1907,14 @@ PP(pp_subst)
rxtainted |= 2;
TAINT_NOT;
- PL_reg_match_utf8 = DO_UTF8(TARG);
+ RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
force_it:
if (!pm || !s)
DIE(aTHX_ "panic: pp_subst");
strend = s + len;
- slen = PL_reg_match_utf8 ? utf8_length((U8*)s, (U8*)strend) : len;
+ slen = RX_MATCH_UTF8(rx) ? utf8_length((U8*)s, (U8*)strend) : len;
maxiters = 2 * slen + 10; /* We can match twice at each
position, once with zero-length,
second time with non-zero. */
diff --git a/regexec.c b/regexec.c
index 157f426ac4..59111a0069 100644
--- a/regexec.c
+++ b/regexec.c
@@ -400,6 +400,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *i_strpos = strpos;
SV *dsv = PERL_DEBUG_PAD_ZERO(0);
#endif
+ RX_MATCH_UTF8_set(prog,do_utf8);
if (prog->reganch & ROPT_UTF8) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
@@ -1615,6 +1616,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
#endif
+ RX_MATCH_UTF8_set(prog,do_utf8);
PL_regcc = 0;
diff --git a/regexp.h b/regexp.h
index 81552baf6c..05640548b1 100644
--- a/regexp.h
+++ b/regexp.h
@@ -71,6 +71,7 @@ typedef struct regexp {
#define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */
#define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */
#define ROPT_TAINTED_SEEN 0x80000
+#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */
#define RE_USE_INTUIT_NOML 0x0100000 /* Best to intuit before matching */
#define RE_USE_INTUIT_ML 0x0200000
@@ -99,6 +100,13 @@ typedef struct regexp {
? RX_MATCH_COPIED_on(prog) \
: RX_MATCH_COPIED_off(prog))
+#define RX_MATCH_UTF8(prog) ((prog)->reganch & ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_on(prog) ((prog)->reganch |= ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_off(prog) ((prog)->reganch &= ~ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_set(prog, t) ((t) \
+ ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
+ : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+
#define REXEC_COPY_STR 0x01 /* Need to copy the string. */
#define REXEC_CHECKED 0x02 /* check_substr already checked. */
#define REXEC_SCREAM 0x04 /* use scream table. */
diff --git a/t/op/pat.t b/t/op/pat.t
index 0868546a9b..360892bc4c 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..983\n";
+print "1..986\n";
BEGIN {
chdir 't' if -d 't';
@@ -3081,5 +3081,15 @@ ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
-# last test 983
+{
+ # [perl #18232]
+ "\x{100}" =~ /(.)/;
+ ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' );
+ { 'a' =~ /./; }
+ ok( $1 eq "\x{100}", '$1 is still utf-8' );
+ ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' );
+}
+
+# last test 984
+