summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-01-23 20:09:43 -0700
committerKarl Williamson <public@khwilliamson.com>2014-01-27 23:03:47 -0700
commit1505a47c967e97cd85fc9d764bd8736468ad8a95 (patch)
treeca0f6409483485508922737c780a6ae2e5b25d69 /regcomp.c
parentb5049321cab77697d1e1510c28127c08c566f5c4 (diff)
downloadperl-1505a47c967e97cd85fc9d764bd8736468ad8a95.tar.gz
regcomp.c: Avoid calling heavy duty functions when possible
This code calls the general purpose functions to fold and convert to utf8. These can be avoided for many frequently used code points.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/regcomp.c b/regcomp.c
index 6ec77884e6..9bf05c0ff6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10801,7 +10801,20 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
if (! len_passed_in) {
if (UTF) {
- if (FOLD && (! LOC || code_point > 255)) {
+ if (UNI_IS_INVARIANT(code_point)) {
+ if (LOC || ! FOLD) { /* /l defers folding until runtime */
+ *character = (U8) code_point;
+ }
+ else { /* Here is /i and not /l (toFOLD() is defined on just
+ ASCII, which isn't the same thing as INVARIANT on
+ EBCDIC, but it works there, as the extra invariants
+ fold to themselves) */
+ *character = toFOLD((U8) code_point);
+ }
+ len = 1;
+ }
+ else if (FOLD && (! LOC || code_point > 255)) {
+ /* Folding, and ok to do so now */
_to_uni_fold_flags(code_point,
character,
&len,
@@ -10811,6 +10824,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
? FOLD_FLAGS_NOMIX_ASCII
: 0));
}
+ else if (code_point <= MAX_UTF8_TWO_BYTE) {
+
+ /* Not folding this cp, and can output it directly */
+ *character = UTF8_TWO_BYTE_HI(code_point);
+ *(character + 1) = UTF8_TWO_BYTE_LO(code_point);
+ len = 2;
+ }
else {
uvchr_to_utf8( character, code_point);
len = UTF8SKIP(character);