summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-02-19 10:20:50 -0700
committerKarl Williamson <public@khwilliamson.com>2011-02-19 11:47:42 -0700
commit17580e7a366d68c68fa37fe63c284c1d83b245fe (patch)
tree083c8313722786593172d0a6ff14a4767c9d520c /regexec.c
parent41ce0a5ea4efb0067c8416f074bb757a70d2faa1 (diff)
downloadperl-17580e7a366d68c68fa37fe63c284c1d83b245fe.tar.gz
Fix locale caseless matching and utf8
As explained in the doc changes of this patch, under /l, caseless matching of code points less than 256 now use locale rules regardless of the utf8ness of the pattern or string. They now match the behavior of things like \w, in this regard.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c17
1 files changed, 7 insertions, 10 deletions
diff --git a/regexec.c b/regexec.c
index 13f7cac1cf..6bcfee0b3c 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1504,7 +1504,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
case EXACTFL:
if (UTF_PATTERN || utf8_target) {
- utf8_fold_flags = 0; /* XXX, add new flag for locale */
+ utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
goto do_exactf_utf8;
}
fold_array = PL_fold_locale;
@@ -3640,7 +3640,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
PL_reg_flags |= RF_tainted;
folder = foldEQ_locale;
fold_array = PL_fold_locale;
- fold_utf8_flags = 0;
+ fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
goto do_exactf;
case EXACTFU:
@@ -4051,7 +4051,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
folder = foldEQ_locale;
fold_array = PL_fold_locale;
type = REFFL;
- utf8_fold_flags = 0;
+ utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
goto do_nref;
case NREFFA:
@@ -4095,7 +4095,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
PL_reg_flags |= RF_tainted;
folder = foldEQ_locale;
fold_array = PL_fold_locale;
- utf8_fold_flags = 0;
+ utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
goto do_ref;
case REFFA:
@@ -6005,7 +6005,9 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
case EXACTFL:
PL_reg_flags |= RF_tainted;
- /* FALL THROUGH */
+ utf8_flags = FOLDEQ_UTF8_LOCALE;
+ goto do_exactf;
+
case EXACTF:
case EXACTFU:
utf8_flags = 0;
@@ -6018,11 +6020,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
assert(! UTF_PATTERN || UNI_IS_INVARIANT(c));
if (utf8_target) { /* Use full Unicode fold matching */
-
- /* For the EXACTFL case, It doesn't really make sense to compare
- * locale and utf8, but it is best we can do. The documents warn
- * against mixing them */
-
char *tmpeol = loceol;
while (hardcount < max
&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,