summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-12-22 02:47:08 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-12-22 02:47:08 +0000
commita5961de5f4215b5cd376e88c8c5d267c7f7123f6 (patch)
tree84f55358517d319d1b067b05c90e7034e1e4a29e
parent96c57f7e641f15cc924272c2d866750e62c1b955 (diff)
downloadperl-a5961de5f4215b5cd376e88c8c5d267c7f7123f6.tar.gz
Unicode casefolding fixes.
p4raw-id: //depot/perl@13843
-rw-r--r--op.c4
-rw-r--r--regcomp.c22
-rw-r--r--regexec.c7
-rwxr-xr-xt/op/pat.t35
4 files changed, 53 insertions, 15 deletions
diff --git a/op.c b/op.c
index 9b1556e2b0..c7330521d7 100644
--- a/op.c
+++ b/op.c
@@ -3127,12 +3127,16 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl)
p = SvPV(pat, plen);
pm->op_pmflags |= PMf_SKIPWHITE;
}
+ if (DO_UTF8(pat) || (PL_hints & HINT_UTF8))
+ pm->op_pmdynflags |= PMdf_UTF8;
PM_SETRE(pm, CALLREGCOMP(aTHX_ p, p + plen, pm));
if (strEQ("\\s+", PM_GETRE(pm)->precomp))
pm->op_pmflags |= PMf_WHITE;
op_free(expr);
}
else {
+ if (PL_hints & HINT_UTF8)
+ pm->op_pmdynflags |= PMdf_UTF8;
if (pm->op_pmflags & PMf_KEEP || !(PL_hints & HINT_RE_EVAL))
expr = newUNOP((!(PL_hints & HINT_RE_EVAL)
? OP_REGCRESET
diff --git a/regcomp.c b/regcomp.c
index 53d89479a5..463b778c5e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1690,17 +1690,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
if (exp == NULL)
FAIL("NULL regexp argument");
- /* XXXX This looks very suspicious... */
- if (pm->op_pmdynflags & PMdf_CMP_UTF8)
- RExC_utf8 = 1;
- else
- RExC_utf8 = 0;
+ RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
RExC_precomp = exp;
- DEBUG_r(if (!PL_colorset) reginitcolors());
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
- PL_colors[4],PL_colors[5],PL_colors[0],
- (int)(xend - exp), RExC_precomp, PL_colors[1]));
+ DEBUG_r({
+ if (!PL_colorset) reginitcolors();
+ PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
+ PL_colors[4],PL_colors[5],PL_colors[0],
+ (int)(xend - exp), RExC_precomp, PL_colors[1]);
+ });
RExC_flags16 = pm->op_pmflags;
RExC_sawback = 0;
@@ -3967,10 +3965,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
}
else
#endif
- for (i = prevvalue; i <= ceilvalue; i++)
- ANYOF_BITMAP_SET(ret, i);
+ for (i = prevvalue; i <= ceilvalue; i++)
+ ANYOF_BITMAP_SET(ret, i);
}
- if (value > 255) {
+ if (value > 255 || UTF) {
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
if (prevvalue < value)
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
diff --git a/regexec.c b/regexec.c
index 35a0a6c2b0..b7528e720d 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4110,9 +4110,12 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
match = TRUE;
else if (flags & ANYOF_FOLD) {
STRLEN ulen;
- U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
+ U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
- toLOWER_utf8(p, tmpbuf, &ulen);
+ to_utf8_fold(p, tmpbuf, &ulen);
+ if (swash_fetch(sw, tmpbuf, do_utf8))
+ match = TRUE;
+ to_utf8_upper(p, tmpbuf, &ulen);
if (swash_fetch(sw, tmpbuf, do_utf8))
match = TRUE;
}
diff --git a/t/op/pat.t b/t/op/pat.t
index 077b9579e5..ee7a736091 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..757\n";
+print "1..769\n";
BEGIN {
chdir 't' if -d 't';
@@ -2291,3 +2291,36 @@ print "# some Unicode properties\n";
print "not " unless "A\x{100}" =~ /A/i;
print "ok 757\n";
}
+
+{
+ use charnames ':full';
+
+ print "# LATIN LETTER A WITH GRAVE\n";
+ my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}";
+ my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}";
+
+ print $lower =~ m/$UPPER/i ? "ok 758\n" : "not ok 758\n";
+ print $UPPER =~ m/$lower/i ? "ok 759\n" : "not ok 759\n";
+ print $lower =~ m/[$UPPER]/i ? "ok 760\n" : "not ok 760\n";
+ print $UPPER =~ m/[$lower]/i ? "ok 761\n" : "not ok 761\n";
+
+ print "# GREEK LETTER ALPHA WITH VRACHY\n";
+
+ $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}";
+ $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}";
+
+ print $lower =~ m/$UPPER/i ? "ok 762\n" : "not ok 762\n";
+ print $UPPER =~ m/$lower/i ? "ok 763\n" : "not ok 763\n";
+ print $lower =~ m/[$UPPER]/i ? "ok 764\n" : "not ok 764\n";
+ print $UPPER =~ m/[$lower]/i ? "ok 765\n" : "not ok 765\n";
+
+ print "# LATIN LETTER Y WITH DIAERESIS\n";
+
+ $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}";
+ $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}";
+
+ print $lower =~ m/$UPPER/i ? "ok 766\n" : "not ok 766\n";
+ print $UPPER =~ m/$lower/i ? "ok 767\n" : "not ok 767\n";
+ print $lower =~ m/[$UPPER]/i ? "ok 768\n" : "not ok 768\n";
+ print $UPPER =~ m/[$lower]/i ? "ok 769\n" : "not ok 769\n";
+}