diff options
author | Karl Williamson <public@khwilliamson.com> | 2014-01-23 20:09:43 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2014-01-27 23:03:47 -0700 |
commit | 1505a47c967e97cd85fc9d764bd8736468ad8a95 (patch) | |
tree | ca0f6409483485508922737c780a6ae2e5b25d69 /regcomp.c | |
parent | b5049321cab77697d1e1510c28127c08c566f5c4 (diff) | |
download | perl-1505a47c967e97cd85fc9d764bd8736468ad8a95.tar.gz |
regcomp.c: Avoid calling heavy duty functions when possible
This code calls the general purpose functions to fold and convert to
utf8. These can be avoided for many frequently used code points.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 22 |
1 files changed, 21 insertions, 1 deletions
@@ -10801,7 +10801,20 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, if (! len_passed_in) { if (UTF) { - if (FOLD && (! LOC || code_point > 255)) { + if (UNI_IS_INVARIANT(code_point)) { + if (LOC || ! FOLD) { /* /l defers folding until runtime */ + *character = (U8) code_point; + } + else { /* Here is /i and not /l (toFOLD() is defined on just + ASCII, which isn't the same thing as INVARIANT on + EBCDIC, but it works there, as the extra invariants + fold to themselves) */ + *character = toFOLD((U8) code_point); + } + len = 1; + } + else if (FOLD && (! LOC || code_point > 255)) { + /* Folding, and ok to do so now */ _to_uni_fold_flags(code_point, character, &len, @@ -10811,6 +10824,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, ? FOLD_FLAGS_NOMIX_ASCII : 0)); } + else if (code_point <= MAX_UTF8_TWO_BYTE) { + + /* Not folding this cp, and can output it directly */ + *character = UTF8_TWO_BYTE_HI(code_point); + *(character + 1) = UTF8_TWO_BYTE_LO(code_point); + len = 2; + } else { uvchr_to_utf8( character, code_point); len = UTF8SKIP(character); |