summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-04-29 09:33:19 -0600
committerKarl Williamson <public@khwilliamson.com>2012-05-22 08:24:21 -0600
commitb0573d8bdb309bce282466c30d1092bc2d421ed7 (patch)
tree778a18ff2670c49c871dac8458c567c8465bbe04 /regexec.c
parent4190d317a527a91a9d49ead7135a074e8ab0ff7f (diff)
downloadperl-b0573d8bdb309bce282466c30d1092bc2d421ed7.tar.gz
regexec.c: Use foldcase instead of lower/upper
The way Perl deals with folds of ASCII-range characters, is to have an array that maps to the opposite case character. For example, fold[a] is A, and fold[A] is a. To see if a character matches a pattern character case-insensitively, you compare it against the pattern character itself, and if that fails, against the fold[pattern character]. Unfortunately that breaks down outside of ASCII, where the folds are not necessarily one-to-one. This code is a vestige of that original paradim, and I'm converting it to use the more modern. The apparent reason it doesn't fail in fold_grind.t is that it is for CURLY nodes, and CURLY is not currently set on EXACTish nodes unless the node contains a single UTF-8 invariant character. Effectively, that means a character in the ASCII range. I think parts of this could be simplified further, but I don't understand the whole context of this code to undertake that.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c15
1 files changed, 5 insertions, 10 deletions
diff --git a/regexec.c b/regexec.c
index 46484dffce..8650654934 100644
--- a/regexec.c
+++ b/regexec.c
@@ -5290,16 +5290,11 @@ NULL
}
else { /* UTF_PATTERN */
if (IS_TEXTFU(text_node) || IS_TEXTF(text_node)) {
- STRLEN ulen1, ulen2;
- U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
- U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
-
- /* XXX probably should be using foldcase */
- to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
- to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
- ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
- uniflags);
- ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
+ STRLEN ulen;
+ U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+
+ to_utf8_fold((U8*)s, tmpbuf, &ulen);
+ ST.c1 = ST.c2 = utf8n_to_uvchr(tmpbuf, UTF8_MAXLEN, 0,
uniflags);
}
else {