diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-11-30 01:16:22 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-11-30 01:16:22 +0000 |
commit | cadb39a9446639e3c297a768022eb9c72347992a (patch) | |
tree | 5b60c2cc75c82fd17442a5079781525984e078c9 /regexec.c | |
parent | 596717cee028c8ad9e0b419ef9143521a52d81b0 (diff) | |
download | perl-cadb39a9446639e3c297a768022eb9c72347992a.tar.gz |
Add a note about folding vs lowercase.
p4raw-id: //depot/perl@13376
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 8 |
1 files changed, 8 insertions, 0 deletions
@@ -959,6 +959,14 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if (do_utf8) { STRLEN len; + /* The ibcmp_utf8() uses to_uni_fold() which is more + * correct folding for Unicode than using lowercase. + * However, it doesn't work quite fully since the folding + * is a one-to-many mapping and the regex optimizer is + * unaware of this, so it may throw out good matches. + * Fortunately, not getting this right is allowed + * for Unicode Regular Expression Support level 1, + * only one-to-one matching is required. --jhi */ if (c1 == c2) while (s <= e) { if ( utf8_to_uvchr((U8*)s, &len) == c1 |