regcomp.c: Avoid calling heavy duty functions when possible

This code calls the general purpose functions to fold and convert to utf8. These can be avoided for many frequently used code points.
author: Karl Williamson <public@khwilliamson.com> 2014-01-23 20:09:43 -0700
committer: Karl Williamson <public@khwilliamson.com> 2014-01-27 23:03:47 -0700
commit: 1505a47c967e97cd85fc9d764bd8736468ad8a95 (patch)
tree: ca0f6409483485508922737c780a6ae2e5b25d69 /regcomp.c
parent: b5049321cab77697d1e1510c28127c08c566f5c4 (diff)
download: perl-1505a47c967e97cd85fc9d764bd8736468ad8a95.tar.gz
1 files changed, 21 insertions, 1 deletions
diff --git a/regcomp.c b/regcomp.c
index 6ec77884e6..9bf05c0ff6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10801,7 +10801,20 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
 
     if (! len_passed_in) {
         if (UTF) {
-            if (FOLD && (! LOC || code_point > 255)) {
+            if (UNI_IS_INVARIANT(code_point)) {
+                if (LOC || ! FOLD) {    /* /l defers folding until runtime */
+                    *character = (U8) code_point;
+                }
+                else { /* Here is /i and not /l (toFOLD() is defined on just
+                          ASCII, which isn't the same thing as INVARIANT on
+                          EBCDIC, but it works there, as the extra invariants
+                          fold to themselves) */
+                    *character = toFOLD((U8) code_point);
+                }
+                len = 1;
+            }
+            else if (FOLD && (! LOC || code_point > 255)) {
+                /* Folding, and ok to do so now */
                 _to_uni_fold_flags(code_point,
                                    character,
                                    &len,
@@ -10811,6 +10824,13 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
                                                        ? FOLD_FLAGS_NOMIX_ASCII
                                                        : 0));
             }
+            else if (code_point <= MAX_UTF8_TWO_BYTE) {
+
+                /* Not folding this cp, and can output it directly */
+                *character = UTF8_TWO_BYTE_HI(code_point);
+                *(character + 1) = UTF8_TWO_BYTE_LO(code_point);
+                len = 2;
+            }
             else {
                 uvchr_to_utf8( character, code_point);
                 len = UTF8SKIP(character);
author	Karl Williamson <public@khwilliamson.com>	2014-01-23 20:09:43 -0700
committer	Karl Williamson <public@khwilliamson.com>	2014-01-27 23:03:47 -0700
commit	1505a47c967e97cd85fc9d764bd8736468ad8a95 (patch)
tree	ca0f6409483485508922737c780a6ae2e5b25d69 /regcomp.c
parent	b5049321cab77697d1e1510c28127c08c566f5c4 (diff)
download	perl-1505a47c967e97cd85fc9d764bd8736468ad8a95.tar.gz