summaryrefslogtreecommitdiff
path: root/embedvar.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-11-07 15:25:31 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-11-07 21:42:42 -0800
commit2726813d9af5d50f1451663cd931317e7172da50 (patch)
tree12ffa4ce7951e688df59ceceb9a061ab67d606de /embedvar.h
parenta85c03da46d77cd5b9f4e0ba809245cf000962ad (diff)
downloadperl-2726813d9af5d50f1451663cd931317e7172da50.tar.gz
regexec.c: Don't give up on fold matching early
As noted in the comments of the code, "a" =~ /[A]/i doesn't work currently (except that regcomp.c knows about the ASCII characters and corrects for it, but not always, for example in cases like "a" =~ /\p{Upper}/i. This patch catches all those). It works by computing a list of all characters that (singly) fold to another one, and then checking each of those. The maximum length of the list is 3 in the current Unicode standard. I believe that a better long-term solution is to do this at compile rather than execution time, by generating a closure of everything matched. But this can't be done now because the data structure would need to be extensively revamped to list all non-byte characters, and user-defined \p{} matches are not known at compile-time. And it doesn't handle the multi-char folds. There is a separate ticket for those.
Diffstat (limited to 'embedvar.h')
-rw-r--r--embedvar.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/embedvar.h b/embedvar.h
index 87099c13aa..36f75759be 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -341,6 +341,7 @@
#define PL_utf8_ascii (vTHX->Iutf8_ascii)
#define PL_utf8_cntrl (vTHX->Iutf8_cntrl)
#define PL_utf8_digit (vTHX->Iutf8_digit)
+#define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures)
#define PL_utf8_graph (vTHX->Iutf8_graph)
#define PL_utf8_idcont (vTHX->Iutf8_idcont)
#define PL_utf8_idstart (vTHX->Iutf8_idstart)
@@ -670,6 +671,7 @@
#define PL_Iutf8_ascii PL_utf8_ascii
#define PL_Iutf8_cntrl PL_utf8_cntrl
#define PL_Iutf8_digit PL_utf8_digit
+#define PL_Iutf8_foldclosures PL_utf8_foldclosures
#define PL_Iutf8_graph PL_utf8_graph
#define PL_Iutf8_idcont PL_utf8_idcont
#define PL_Iutf8_idstart PL_utf8_idstart