summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2009-10-25 20:37:08 +0100
committerYves Orton <demerphq@gmail.com>2009-10-25 20:37:08 +0100
commit0abd0d78a73da1c4d13b1c700526b7e5d03b32d4 (patch)
treefb805d77831ce30b7330a3a5956e1ea4123e43a2 /regexec.c
parentfe88edf0c4ada4230b84bdb5417029b8f766694a (diff)
downloadperl-0abd0d78a73da1c4d13b1c700526b7e5d03b32d4.tar.gz
disable non-unicode case insensitive trie matching
Also revert 8902bb05b18c9858efa90229ca1ee42b17277554 as it merely masked one symptom of the deeper problems. Also fixes RT #69973, which was a segfault which was exposed by 8902bb05, see the ticket for further details. http://rt.perl.org/rt3//Public/Bug/Display.html?id=69973 At the code of this is the problem that in unicode matching a bunch of code points have case folding rules beyond just A-Z/a-z. Since the case folding rules are decided at runtime by the string, we cant use the same TRIE tables for both unicode/non-unicode matching. Until this is reconciled or some other solution is found case insensitive matching only gets the TRIE optimisation when the pattern is uniocde. From CaseFolding.txt: 00B5; C; 03BC; # MICRO SIGN 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE 00C6; C; 00E6; # LATIN CAPITAL LETTER AE 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c9
1 files changed, 2 insertions, 7 deletions
diff --git a/regexec.c b/regexec.c
index 402ede3d15..ec09c280e9 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1105,16 +1105,15 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
- UV uvc_unfolded = 0; \
switch (trie_type) { \
case trie_utf8_fold: \
if ( foldlen>0 ) { \
- uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
foldlen -= len; \
uscan += len; \
len=0; \
} else { \
- uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
uvc = to_uni_fold( uvc, foldbuf, &foldlen ); \
foldlen -= UNISKIP( uvc ); \
uscan = foldbuf + UNISKIP( uvc ); \
@@ -1140,7 +1139,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
uvc = (UV)*uc; \
len = 1; \
} \
- \
if (uvc < 256) { \
charid = trie->charmap[ uvc ]; \
} \
@@ -1153,9 +1151,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
charid = (U16)SvIV(*svpp); \
} \
} \
- if (!charid && trie_type == trie_utf8_fold && !UTF) { \
- charid = trie->charmap[uvc_unfolded]; \
- } \
} STMT_END
#define REXEC_FBC_EXACTISH_CHECK(CoNd) \