diff options
author | Yves Orton <demerphq@gmail.com> | 2012-02-14 00:46:10 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2012-02-14 00:46:10 +0100 |
commit | 62d9fc4e33d0b7108f75401989e52db8b0c37710 (patch) | |
tree | 2796289fbc7415d298c9b05a254326a536573cd6 | |
parent | a58a85fab78d767203f1dac26cbf0717d0c47e87 (diff) | |
download | perl-smoke-me/trie-exactfu.tar.gz |
Minimal patch to allow the trie logic to handle EXACTFU nodessmoke-me/trie-exactfu
The old logic was that we would trie EXACTF nodes only when they were
utf8. When the /u support was added things were changed so that EXACTFU
nodes are produced in this case, however they are also produced when
we are using /u on a non UTF pattern. This patch teaches the trie logic
to handle this case, so it is also now converted to a trie.
Note, this patch is deliberately minimal, we should in a future patch
review this logic for further improvements.
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | embed.h | 4 | ||||
-rw-r--r-- | proto.h | 14 | ||||
-rw-r--r-- | regcomp.c | 13 |
4 files changed, 23 insertions, 12 deletions
@@ -604,7 +604,9 @@ Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp #ifdef PERL_IN_UTF8_C sR |U8 |to_lower_latin1|const U8 c|NULLOK U8 *p|NULLOK STRLEN *lenp -p |UV |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const bool flags +#endif +#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) +EXp |UV |_to_fold_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const bool flags #endif #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C) p |UV |_to_upper_title_latin1|const U8 c|NN U8 *p|NN STRLEN *lenp|const char S_or_s @@ -980,6 +980,9 @@ #define reghop4 S_reghop4 # endif # endif +# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) +#define _to_fold_latin1(a,b,c,d) Perl__to_fold_latin1(aTHX_ a,b,c,d) +# endif # if defined(PERL_OLD_COPY_ON_WRITE) #define sv_setsv_cow(a,b) Perl_sv_setsv_cow(aTHX_ a,b) # endif @@ -1597,7 +1600,6 @@ #define isa_lookup(a,b,c,d) S_isa_lookup(aTHX_ a,b,c,d) # endif # if defined(PERL_IN_UTF8_C) -#define _to_fold_latin1(a,b,c,d) Perl__to_fold_latin1(aTHX_ a,b,c,d) #define check_locale_boundary_crossing(a,b,c,d) S_check_locale_boundary_crossing(aTHX_ a,b,c,d) #define is_utf8_char_slow S_is_utf8_char_slow #define is_utf8_common(a,b,c) S_is_utf8_common(aTHX_ a,b,c) @@ -7115,12 +7115,6 @@ STATIC bool S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U #endif #if defined(PERL_IN_UTF8_C) -PERL_CALLCONV UV Perl__to_fold_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const bool flags) - __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_3); -#define PERL_ARGS_ASSERT__TO_FOLD_LATIN1 \ - assert(p); assert(lenp) - STATIC UV S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) @@ -7161,6 +7155,14 @@ PERL_CALLCONV UV Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *le assert(p); assert(lenp) #endif +#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) +PERL_CALLCONV UV Perl__to_fold_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const bool flags) + __attribute__nonnull__(pTHX_2) + __attribute__nonnull__(pTHX_3); +#define PERL_ARGS_ASSERT__TO_FOLD_LATIN1 \ + assert(p); assert(lenp) + +#endif #if defined(PERL_IN_UTIL_C) STATIC bool S_ckwarn_common(pTHX_ U32 w); STATIC const COP* S_closest_cop(pTHX_ const COP *cop, const OP *o) @@ -1380,7 +1380,7 @@ is the recommended Unicode-aware way of saying #define TRIE_READ_CHAR STMT_START { \ wordlen++; \ - if ( UTF ) { \ + if ( UTF || flags == EXACTFU ) { \ if ( folder ) { \ if ( foldlen > 0 ) { \ uvc = utf8n_to_uvuni( scan, UTF8_MAXLEN, &len, uniflags ); \ @@ -1388,8 +1388,13 @@ is the recommended Unicode-aware way of saying scan += len; \ len = 0; \ } else { \ - len = UTF8SKIP(uc);\ - uvc = to_utf8_fold( uc, foldbuf, &foldlen); \ + if (UTF) { \ + len = UTF8SKIP(uc); \ + uvc = to_utf8_fold( uc, foldbuf, &foldlen); \ + } else { \ + len = 1; \ + uvc = _to_fold_latin1((U32)*uc, foldbuf, &foldlen, 1); \ + } \ foldlen -= UNISKIP( uvc ); \ scan = foldbuf + UNISKIP( uvc ); \ } \ @@ -3289,7 +3294,7 @@ Note that join_exact() assumes that the other types of EXACTFish nodes are not used in tries, so that would have to be updated if this changed */ -#define TRIE_TYPE_IS_SAFE ((UTF && optype == EXACTFU) || optype==EXACT) +#define TRIE_TYPE_IS_SAFE (optype == EXACTFU || optype==EXACT) if ( last && TRIE_TYPE_IS_SAFE ) { make_trie( pRExC_state, |