Minimal patch to allow the trie logic to handle EXACTFU nodessmoke-me/trie-exactfu

The old logic was that we would trie EXACTF nodes only when they were utf8. When the /u support was added things were changed so that EXACTFU nodes are produced in this case, however they are also produced when we are using /u on a non UTF pattern. This patch teaches the trie logic to handle this case, so it is also now converted to a trie. Note, this patch is deliberately minimal, we should in a future patch review this logic for further improvements.
author: Yves Orton <demerphq@gmail.com> 2012-02-14 00:46:10 +0100
committer: Yves Orton <demerphq@gmail.com> 2012-02-14 00:46:10 +0100
commit: 62d9fc4e33d0b7108f75401989e52db8b0c37710 (patch)
tree: 2796289fbc7415d298c9b05a254326a536573cd6
parent: a58a85fab78d767203f1dac26cbf0717d0c47e87 (diff)
download: perl-smoke-me/trie-exactfu.tar.gz
4 files changed, 23 insertions, 12 deletions
diff --git a/embed.fnc b/embed.fnc
index 34aa251d4e..57ab4b71e1 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -604,7 +604,9 @@ Ap	|UV	|to_uni_upper	|UV c|NN U8 *p|NN STRLEN *lenp
 Ap	|UV	|to_uni_title	|UV c|NN U8 *p|NN STRLEN *lenp
 #ifdef PERL_IN_UTF8_C
 sR	|U8	|to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp
-p	|UV	|_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const bool flags
+#endif
+#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C)
+EXp        |UV        |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const bool flags
 #endif
 #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
 p	|UV	|_to_upper_title_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const char S_or_s
diff --git a/embed.h b/embed.h
index d429c8d0ea..e20081cb94 100644
--- a/embed.h
+++ b/embed.h
@@ -980,6 +980,9 @@
 #define reghop4			S_reghop4
 #    endif
 #  endif
+#  if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C)
+#define _to_fold_latin1(a,b,c,d)        Perl__to_fold_latin1(aTHX_ a,b,c,d)
+#  endif
 #  if defined(PERL_OLD_COPY_ON_WRITE)
 #define sv_setsv_cow(a,b)	Perl_sv_setsv_cow(aTHX_ a,b)
 #  endif
@@ -1597,7 +1600,6 @@
 #define isa_lookup(a,b,c,d)	S_isa_lookup(aTHX_ a,b,c,d)
 #  endif
 #  if defined(PERL_IN_UTF8_C)
-#define _to_fold_latin1(a,b,c,d)	Perl__to_fold_latin1(aTHX_ a,b,c,d)
 #define check_locale_boundary_crossing(a,b,c,d)	S_check_locale_boundary_crossing(aTHX_ a,b,c,d)
 #define is_utf8_char_slow	S_is_utf8_char_slow
 #define is_utf8_common(a,b,c)	S_is_utf8_common(aTHX_ a,b,c)
diff --git a/proto.h b/proto.h
index 84bfbf4982..e2a79d7435 100644
--- a/proto.h
+++ b/proto.h
@@ -7115,12 +7115,6 @@ STATIC bool	S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U
 
 #endif
 #if defined(PERL_IN_UTF8_C)
-PERL_CALLCONV UV	Perl__to_fold_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const bool flags)
-			__attribute__nonnull__(pTHX_2)
-			__attribute__nonnull__(pTHX_3);
-#define PERL_ARGS_ASSERT__TO_FOLD_LATIN1	\
-	assert(p); assert(lenp)
-
 STATIC UV	S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1)
@@ -7161,6 +7155,14 @@ PERL_CALLCONV UV	Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *le
 	assert(p); assert(lenp)
 
 #endif
+#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C)
+PERL_CALLCONV UV        Perl__to_fold_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const bool flags)
+                        __attribute__nonnull__(pTHX_2)
+                        __attribute__nonnull__(pTHX_3);
+#define PERL_ARGS_ASSERT__TO_FOLD_LATIN1        \
+        assert(p); assert(lenp)
+
+#endif
 #if defined(PERL_IN_UTIL_C)
 STATIC bool	S_ckwarn_common(pTHX_ U32 w);
 STATIC const COP*	S_closest_cop(pTHX_ const COP *cop, const OP *o)
diff --git a/regcomp.c b/regcomp.c
index b33eddac88..985279c0e4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1380,7 +1380,7 @@ is the recommended Unicode-aware way of saying
 
 #define TRIE_READ_CHAR STMT_START {                                           \
     wordlen++;                                                                \
-    if ( UTF ) {                                                              \
+    if ( UTF || flags == EXACTFU ) {                                          \
 	if ( folder ) {                                                       \
 	    if ( foldlen > 0 ) {                                              \
 	       uvc = utf8n_to_uvuni( scan, UTF8_MAXLEN, &len, uniflags );     \
@@ -1388,8 +1388,13 @@ is the recommended Unicode-aware way of saying
 	       scan += len;                                                   \
 	       len = 0;                                                       \
 	    } else {                                                          \
-		len = UTF8SKIP(uc);\
-		uvc = to_utf8_fold( uc, foldbuf, &foldlen);                   \
+                if (UTF) {                                                    \
+                    len = UTF8SKIP(uc);                                       \
+                    uvc = to_utf8_fold( uc, foldbuf, &foldlen);               \
+                } else {                                                      \
+                    len = 1;                                                  \
+                    uvc = _to_fold_latin1((U32)*uc, foldbuf, &foldlen, 1);    \
+                }                                                             \
 		foldlen -= UNISKIP( uvc );                                    \
 		scan = foldbuf + UNISKIP( uvc );                              \
 	    }                                                                 \
@@ -3289,7 +3294,7 @@ Note that join_exact() assumes that the other types of EXACTFish nodes are not
 used in tries, so that would have to be updated if this changed
 
 */
-#define TRIE_TYPE_IS_SAFE ((UTF && optype == EXACTFU) || optype==EXACT)
+#define TRIE_TYPE_IS_SAFE (optype == EXACTFU || optype==EXACT)
 
                                 if ( last && TRIE_TYPE_IS_SAFE ) {
                                     make_trie( pRExC_state,
author	Yves Orton <demerphq@gmail.com>	2012-02-14 00:46:10 +0100
committer	Yves Orton <demerphq@gmail.com>	2012-02-14 00:46:10 +0100
commit	62d9fc4e33d0b7108f75401989e52db8b0c37710 (patch)
tree	2796289fbc7415d298c9b05a254326a536573cd6
parent	a58a85fab78d767203f1dac26cbf0717d0c47e87 (diff)
download	perl-smoke-me/trie-exactfu.tar.gz