summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-02-02 12:01:34 -0700
committerKarl Williamson <public@khwilliamson.com>2011-02-02 16:31:23 -0700
commit56ca34cada940c7f6aae9a59da266e541530041e (patch)
tree98fd450cd1ce016ebeddfdbe4d2241925b1fc618 /utf8.c
parent19c4061aa8fa454637e29db1afd668c3f66d3a01 (diff)
downloadperl-56ca34cada940c7f6aae9a59da266e541530041e.tar.gz
Move ANYOF folding from regexec to regcomp
This is for security as well as performance. It allows Unicode properties to not be matched case sensitively. As a result the swash inversion hash is converted from having utf8 keys to numeric, code point, keys. It also for the first time fixes the bug where /i doesn't work for a code point not at the end of a range in a bracketed character class has a multi-character fold
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c15
1 files changed, 5 insertions, 10 deletions
diff --git a/utf8.c b/utf8.c
index 18ff1d8880..6053465802 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2657,10 +2657,6 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash)
char* key_end = (char *) uvuni_to_utf8((U8*) key, val);
STRLEN key_len = key_end - key;
- /* And the value is what the forward mapping is from. */
- char utf8_inverse[UTF8_MAXBYTES+1];
- char *utf8_inverse_end = (char *) uvuni_to_utf8((U8*) utf8_inverse, inverse);
-
/* Get the list for the map */
if ((listp = hv_fetch(ret, key, key_len, FALSE))) {
list = (AV*) *listp;
@@ -2679,22 +2675,21 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash)
Perl_croak(aTHX_ "panic: av_fetch() unexpectedly failed");
}
entry = *entryp;
- if (SvCUR(entry) != key_len) {
- continue;
- }
- if (memEQ(key, SvPVX(entry), key_len)) {
+ if (SvUV(entry) == val) {
found_key = TRUE;
break;
}
}
+
+ /* Make sure there is a mapping to itself on the list */
if (! found_key) {
- element = newSVpvn_flags(key, key_len, SVf_UTF8);
+ element = newSVuv(val);
av_push(list, element);
}
/* Simply add the value to the list */
- element = newSVpvn_flags(utf8_inverse, utf8_inverse_end - utf8_inverse, SVf_UTF8);
+ element = newSVuv(inverse);
av_push(list, element);
/* swash_get() increments the value of val for each element in the