summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-03-20 11:32:11 -0600
committerKarl Williamson <public@khwilliamson.com>2011-03-20 12:16:13 -0600
commite286af2d135c6b1b03be2bd322f22f89e1b1aa5d (patch)
tree844c6a3e481969052806d93037c6b1d021cf455d /regcomp.c
parent1d4120df745f39cf0ba70379a5bed371bf4c61f4 (diff)
downloadperl-e286af2d135c6b1b03be2bd322f22f89e1b1aa5d.tar.gz
regcomp.c: Remove FOLDCHAR generation
ANYOFV handles multi-char folds in ANYOF nodes, and it turns out it is a superset of what FOLDCHAR does, which never got fully implemented in regexec.c, whereas ANYOFV is. FOLDCHAR may be the better way to go in the long-term, as it takes less space and is faster, but this gives us the functionality today, with no extra work. FOLDCHAR had been generated only when the character in question is a literal in the input stream, and wasn't touched for the probably more common use of \N{} or \x, which were fixed from not doing anything special to using ANYOFV earlier in the 5.13 series, and it turns out that the code that does it all is in a part of the code that gets executed anyway, so that simply removing the special FOLDCHAR code causes execution to drop down to this code. I'm thinking at the moment that for 5.16, ANYOV should be removed in favor of branches, using the technique of recursion that has recently been added to \N{}. That would enable easier trie generation and simplify things in regexec and the optimizer.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c36
1 files changed, 0 insertions, 36 deletions
diff --git a/regcomp.c b/regcomp.c
index 1a2d2b2cdb..c33e5c31eb 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8006,27 +8006,6 @@ tryagain:
RExC_parse++;
vFAIL("Quantifier follows nothing");
break;
- case LATIN_SMALL_LETTER_SHARP_S:
- case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S):
- case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T):
-#if UTF8_TWO_BYTE_HI_nocast(UPSILON_D_T) != UTF8_TWO_BYTE_HI_nocast(IOTA_D_T)
-#error The beginning utf8 byte of IOTA_D_T and UPSILON_D_T unexpectedly differ. Other instances in this code should have the case statement below.
- case UTF8_TWO_BYTE_HI_nocast(UPSILON_D_T):
-#endif
- do_foldchar:
- if (!LOC && FOLD) {
- U32 len,cp;
- len=0; /* silence a spurious compiler warning */
- if ((cp = what_len_TRICKYFOLD_safe(RExC_parse,RExC_end,UTF,len))) {
- *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */
- RExC_parse+=len-1; /* we get one from nextchar() as well. :-( */
- ret = reganode(pRExC_state, FOLDCHAR, cp);
- Set_Node_Length(ret, 1); /* MJD */
- nextchar(pRExC_state); /* kill whitespace under /x */
- return ret;
- }
- }
- goto outer_default;
case '\\':
/* Special Escapes
@@ -8041,10 +8020,6 @@ tryagain:
literal text handling code.
*/
switch ((U8)*++RExC_parse) {
- case LATIN_SMALL_LETTER_SHARP_S:
- case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S):
- case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T):
- goto do_foldchar;
/* Special Escapes */
case 'A':
RExC_seen_zerolen++;
@@ -8465,7 +8440,6 @@ tryagain:
/* FALL THROUGH */
default:
- outer_default:
parse_start = RExC_parse - 1;
@@ -8512,11 +8486,6 @@ tryagain:
if (RExC_flags & RXf_PMf_EXTENDED)
p = regwhite( pRExC_state, p );
switch ((U8)*p) {
- case LATIN_SMALL_LETTER_SHARP_S:
- case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S):
- case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T):
- if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF))
- goto normal_default;
case '^':
case '$':
case '.':
@@ -8541,11 +8510,6 @@ tryagain:
switch ((U8)*++p) {
/* These are all the special escapes. */
- case LATIN_SMALL_LETTER_SHARP_S:
- case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S):
- case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T):
- if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF))
- goto normal_default;
case 'A': /* Start assertion */
case 'b': case 'B': /* Word-boundary assertion*/
case 'C': /* Single char !DANGEROUS! */