diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2003-01-22 23:35:56 +0100 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-02-02 16:48:26 +0000 |
commit | a30b2f1f43446bf5477e7baa5fee5d5c59659ce6 (patch) | |
tree | f119415cdab16fe15be9b51e64fce8e428881183 | |
parent | ec3916884f8b516a101ac0b622cd88ae38c1b49d (diff) | |
download | perl-a30b2f1f43446bf5477e7baa5fee5d5c59659ce6.tar.gz |
[perl #18232] [PATCH] store PL_reg_match_utf8 in reganch
Message-Id: <20030122223556.57d597a3.rgarciasuarez@free.fr>
p4raw-id: //depot/perl@18633
-rw-r--r-- | mg.c | 6 | ||||
-rw-r--r-- | pp.c | 2 | ||||
-rw-r--r-- | pp_ctl.c | 2 | ||||
-rw-r--r-- | pp_hot.c | 8 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rw-r--r-- | regexp.h | 8 | ||||
-rwxr-xr-x | t/op/pat.t | 14 |
7 files changed, 31 insertions, 11 deletions
@@ -418,7 +418,7 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg) else /* @- */ i = s; - if (i > 0 && PL_reg_match_utf8) { + if (i > 0 && RX_MATCH_UTF8(rx)) { char *b = rx->subbeg; if (b) i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i)); @@ -459,7 +459,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) { i = t1 - s1; getlen: - if (i > 0 && PL_reg_match_utf8) { + if (i > 0 && RX_MATCH_UTF8(rx)) { char *s = rx->subbeg + s1; char *send = rx->subbeg + t1; @@ -707,7 +707,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) getrx: if (i >= 0) { sv_setpvn(sv, s, i); - if (PL_reg_match_utf8 && is_utf8_string((U8*)s, i)) + if (RX_MATCH_UTF8(rx) && is_utf8_string((U8*)s, i)) SvUTF8_on(sv); else SvUTF8_off(sv); @@ -4391,7 +4391,7 @@ PP(pp_split) TAINT_IF((pm->op_pmflags & PMf_LOCALE) && (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); - PL_reg_match_utf8 = do_utf8; + RX_MATCH_UTF8_set(rx, do_utf8); if (pm->op_pmreplroot) { #ifdef USE_ITHREADS @@ -158,7 +158,7 @@ PP(pp_substcont) register REGEXP *rx = cx->sb_rx; rxres_restore(&cx->sb_rxres, rx); - PL_reg_match_utf8 = SvUTF8(cx->sb_targ) ? 1 : 0; + RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ)); if (cx->sb_iters++) { I32 saviters = cx->sb_iters; @@ -1180,7 +1180,7 @@ PP(pp_match) (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); TAINT_NOT; - PL_reg_match_utf8 = DO_UTF8(TARG); + RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); /* PMdf_USED is set after a ?? matches once */ if (pm->op_pmdynflags & PMdf_USED) { @@ -1355,7 +1355,7 @@ yup: /* Confirmed by INTUIT */ if (global) { rx->subbeg = truebase; rx->startp[0] = s - truebase; - if (PL_reg_match_utf8) { + if (RX_MATCH_UTF8(rx)) { char *t = (char*)utf8_hop((U8*)s, rx->minlen); rx->endp[0] = t - truebase; } @@ -1907,14 +1907,14 @@ PP(pp_subst) rxtainted |= 2; TAINT_NOT; - PL_reg_match_utf8 = DO_UTF8(TARG); + RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); force_it: if (!pm || !s) DIE(aTHX_ "panic: pp_subst"); strend = s + len; - slen = PL_reg_match_utf8 ? utf8_length((U8*)s, (U8*)strend) : len; + slen = RX_MATCH_UTF8(rx) ? utf8_length((U8*)s, (U8*)strend) : len; maxiters = 2 * slen + 10; /* We can match twice at each position, once with zero-length, second time with non-zero. */ @@ -400,6 +400,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *i_strpos = strpos; SV *dsv = PERL_DEBUG_PAD_ZERO(0); #endif + RX_MATCH_UTF8_set(prog,do_utf8); if (prog->reganch & ROPT_UTF8) { DEBUG_r(PerlIO_printf(Perl_debug_log, @@ -1615,6 +1616,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV *dsv0 = PERL_DEBUG_PAD_ZERO(0); SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); #endif + RX_MATCH_UTF8_set(prog,do_utf8); PL_regcc = 0; @@ -71,6 +71,7 @@ typedef struct regexp { #define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */ #define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */ #define ROPT_TAINTED_SEEN 0x80000 +#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */ #define RE_USE_INTUIT_NOML 0x0100000 /* Best to intuit before matching */ #define RE_USE_INTUIT_ML 0x0200000 @@ -99,6 +100,13 @@ typedef struct regexp { ? RX_MATCH_COPIED_on(prog) \ : RX_MATCH_COPIED_off(prog)) +#define RX_MATCH_UTF8(prog) ((prog)->reganch & ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8_on(prog) ((prog)->reganch |= ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8_off(prog) ((prog)->reganch &= ~ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8_set(prog, t) ((t) \ + ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \ + : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0))) + #define REXEC_COPY_STR 0x01 /* Need to copy the string. */ #define REXEC_CHECKED 0x02 /* check_substr already checked. */ #define REXEC_SCREAM 0x04 /* use scream table. */ diff --git a/t/op/pat.t b/t/op/pat.t index 0868546a9b..360892bc4c 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..983\n"; +print "1..986\n"; BEGIN { chdir 't' if -d 't'; @@ -3081,5 +3081,15 @@ ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); -# last test 983 +{ + # [perl #18232] + "\x{100}" =~ /(.)/; + ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' ); + { 'a' =~ /./; } + ok( $1 eq "\x{100}", '$1 is still utf-8' ); + ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' ); +} + +# last test 984 + |