diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-03-20 11:32:11 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-03-20 12:16:13 -0600 |
commit | e286af2d135c6b1b03be2bd322f22f89e1b1aa5d (patch) | |
tree | 844c6a3e481969052806d93037c6b1d021cf455d /regcomp.c | |
parent | 1d4120df745f39cf0ba70379a5bed371bf4c61f4 (diff) | |
download | perl-e286af2d135c6b1b03be2bd322f22f89e1b1aa5d.tar.gz |
regcomp.c: Remove FOLDCHAR generation
ANYOFV handles multi-char folds in ANYOF nodes, and it turns
out it is a superset of what FOLDCHAR does, which never got fully
implemented in regexec.c, whereas ANYOFV is. FOLDCHAR may be the better
way to go in the long-term, as it takes less space and is faster, but
this gives us the functionality today, with no extra work.
FOLDCHAR had been generated only when the character in question is a
literal in the input stream, and wasn't touched for the probably more
common use of \N{} or \x, which were fixed from not doing anything
special to using ANYOFV earlier in the 5.13 series, and it turns out
that the code that does it all is in a part of the code that gets
executed anyway, so that simply removing the special FOLDCHAR code
causes execution to drop down to this code.
I'm thinking at the moment that for 5.16, ANYOV should be removed in
favor of branches, using the technique of recursion that has recently
been added to \N{}. That would enable easier trie generation and
simplify things in regexec and the optimizer.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 36 |
1 files changed, 0 insertions, 36 deletions
@@ -8006,27 +8006,6 @@ tryagain: RExC_parse++; vFAIL("Quantifier follows nothing"); break; - case LATIN_SMALL_LETTER_SHARP_S: - case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S): - case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T): -#if UTF8_TWO_BYTE_HI_nocast(UPSILON_D_T) != UTF8_TWO_BYTE_HI_nocast(IOTA_D_T) -#error The beginning utf8 byte of IOTA_D_T and UPSILON_D_T unexpectedly differ. Other instances in this code should have the case statement below. - case UTF8_TWO_BYTE_HI_nocast(UPSILON_D_T): -#endif - do_foldchar: - if (!LOC && FOLD) { - U32 len,cp; - len=0; /* silence a spurious compiler warning */ - if ((cp = what_len_TRICKYFOLD_safe(RExC_parse,RExC_end,UTF,len))) { - *flagp |= HASWIDTH; /* could be SIMPLE too, but needs a handler in regexec.regrepeat */ - RExC_parse+=len-1; /* we get one from nextchar() as well. :-( */ - ret = reganode(pRExC_state, FOLDCHAR, cp); - Set_Node_Length(ret, 1); /* MJD */ - nextchar(pRExC_state); /* kill whitespace under /x */ - return ret; - } - } - goto outer_default; case '\\': /* Special Escapes @@ -8041,10 +8020,6 @@ tryagain: literal text handling code. */ switch ((U8)*++RExC_parse) { - case LATIN_SMALL_LETTER_SHARP_S: - case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S): - case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T): - goto do_foldchar; /* Special Escapes */ case 'A': RExC_seen_zerolen++; @@ -8465,7 +8440,6 @@ tryagain: /* FALL THROUGH */ default: - outer_default: parse_start = RExC_parse - 1; @@ -8512,11 +8486,6 @@ tryagain: if (RExC_flags & RXf_PMf_EXTENDED) p = regwhite( pRExC_state, p ); switch ((U8)*p) { - case LATIN_SMALL_LETTER_SHARP_S: - case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S): - case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T): - if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF)) - goto normal_default; case '^': case '$': case '.': @@ -8541,11 +8510,6 @@ tryagain: switch ((U8)*++p) { /* These are all the special escapes. */ - case LATIN_SMALL_LETTER_SHARP_S: - case UTF8_TWO_BYTE_HI_nocast(LATIN_SMALL_LETTER_SHARP_S): - case UTF8_TWO_BYTE_HI_nocast(IOTA_D_T): - if (LOC || !FOLD || !is_TRICKYFOLD_safe(p,RExC_end,UTF)) - goto normal_default; case 'A': /* Start assertion */ case 'b': case 'B': /* Word-boundary assertion*/ case 'C': /* Single char !DANGEROUS! */ |