diff options
author | Karl Williamson <khw@cpan.org> | 2018-03-31 11:18:38 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-03-31 15:36:45 -0600 |
commit | f1bcae08d82e5348df8b5dc0c44313ea38deb12b (patch) | |
tree | f8c12cfd58ff90426563d05ef2688442b6e58cdf | |
parent | 9c4165e33442f4b2fbc036d677b5a9196a5375ee (diff) | |
download | perl-f1bcae08d82e5348df8b5dc0c44313ea38deb12b.tar.gz |
Use charnames inversion lists
This commit makes the inversion lists for parsing character name global
instead of interpreter level, so can be initialized once per process,
and no copies are created upon new thread instantiation. More
importantly, this is another instance where utf8_heavy.pl no longer
needs to be loaded, and the definition files read from disk.
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | embedvar.h | 6 | ||||
-rw-r--r-- | intrpvar.h | 2 | ||||
-rw-r--r-- | invlist_inline.h | 2 | ||||
-rw-r--r-- | perl.c | 2 | ||||
-rw-r--r-- | perlapi.h | 4 | ||||
-rw-r--r-- | perlvars.h | 2 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | sv.c | 2 | ||||
-rw-r--r-- | toke.c | 27 |
11 files changed, 27 insertions, 26 deletions
@@ -1738,7 +1738,7 @@ EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \ |NN SV* listsv|I32 minbits|I32 none \ |NULLOK SV* invlist|NULLOK U8* const flags_p #endif -#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) EiMRn |UV* |invlist_array |NN SV* const invlist EiMRn |bool* |get_invlist_offset_addr|NN SV* invlist EiMRn |UV |_invlist_len |NN SV* const invlist @@ -1084,7 +1084,7 @@ #endif #define regprop(a,b,c,d,e) Perl_regprop(aTHX_ a,b,c,d,e) # endif -# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) +# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) #define _get_swash_invlist(a) Perl__get_swash_invlist(aTHX_ a) #define _invlist_contains_cp S__invlist_contains_cp #define _invlist_len S__invlist_len diff --git a/embedvar.h b/embedvar.h index e34496522e..890a8b44e1 100644 --- a/embedvar.h +++ b/embedvar.h @@ -336,8 +336,6 @@ #define PL_unitcheckav_save (vTHX->Iunitcheckav_save) #define PL_unlockhook (vTHX->Iunlockhook) #define PL_unsafe (vTHX->Iunsafe) -#define PL_utf8_charname_begin (vTHX->Iutf8_charname_begin) -#define PL_utf8_charname_continue (vTHX->Iutf8_charname_continue) #define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures) #define PL_utf8_mark (vTHX->Iutf8_mark) #define PL_utf8_swash_ptrs (vTHX->Iutf8_swash_ptrs) @@ -463,6 +461,10 @@ #define PL_Gtimesbase (my_vars->Gtimesbase) #define PL_use_safe_putenv (my_vars->Guse_safe_putenv) #define PL_Guse_safe_putenv (my_vars->Guse_safe_putenv) +#define PL_utf8_charname_begin (my_vars->Gutf8_charname_begin) +#define PL_Gutf8_charname_begin (my_vars->Gutf8_charname_begin) +#define PL_utf8_charname_continue (my_vars->Gutf8_charname_continue) +#define PL_Gutf8_charname_continue (my_vars->Gutf8_charname_continue) #define PL_utf8_foldable (my_vars->Gutf8_foldable) #define PL_Gutf8_foldable (my_vars->Gutf8_foldable) #define PL_utf8_idcont (my_vars->Gutf8_idcont) diff --git a/intrpvar.h b/intrpvar.h index 00d612a77f..a05e847e45 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -645,8 +645,6 @@ PERLVAR(I, InBitmap, SV *) /* utf8 character class swashes */ PERLVAR(I, utf8_mark, SV *) -PERLVAR(I, utf8_charname_begin, SV *) -PERLVAR(I, utf8_charname_continue, SV *) PERLVARA(I, utf8_swash_ptrs, POSIX_SWASH_COUNT, SV *) PERLVAR(I, seen_deprecated_macro, HV *) diff --git a/invlist_inline.h b/invlist_inline.h index 4ce04f9087..3a1afc695d 100644 --- a/invlist_inline.h +++ b/invlist_inline.h @@ -6,7 +6,7 @@ * License or the Artistic License, as specified in the README file. */ -#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) +#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, * etc */ @@ -334,6 +334,8 @@ perl_construct(pTHXx) _Perl_Folds_To_Multi_Char_invlist); PL_NonL1NonFinalFold = _new_invlist_C_array( NonL1_Perl_Non_Final_Folds_invlist); + PL_utf8_charname_begin = _new_invlist_C_array(_Perl_Charname_Begin_invlist); + PL_utf8_charname_continue = _new_invlist_C_array(_Perl_Charname_Continue_invlist); #if defined(LOCAL_PATCH_COUNT) @@ -205,6 +205,10 @@ END_EXTERN_C #define PL_timesbase (*Perl_Gtimesbase_ptr(NULL)) #undef PL_use_safe_putenv #define PL_use_safe_putenv (*Perl_Guse_safe_putenv_ptr(NULL)) +#undef PL_utf8_charname_begin +#define PL_utf8_charname_begin (*Perl_Gutf8_charname_begin_ptr(NULL)) +#undef PL_utf8_charname_continue +#define PL_utf8_charname_continue (*Perl_Gutf8_charname_continue_ptr(NULL)) #undef PL_utf8_foldable #define PL_utf8_foldable (*Perl_Gutf8_foldable_ptr(NULL)) #undef PL_utf8_idcont diff --git a/perlvars.h b/perlvars.h index a3ba851b22..af48fa8266 100644 --- a/perlvars.h +++ b/perlvars.h @@ -300,3 +300,5 @@ PERLVAR(G, utf8_totitle, SV *) PERLVAR(G, utf8_tolower, SV *) PERLVAR(G, utf8_tofold, SV *) PERLVAR(G, utf8_tosimplefold, SV *) +PERLVAR(G, utf8_charname_begin, SV *) +PERLVAR(G, utf8_charname_continue, SV *) @@ -5444,7 +5444,7 @@ PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* #define PERL_ARGS_ASSERT_REGPROP \ assert(sv); assert(o) #endif -#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C) PERL_CALLCONV SV* Perl__get_swash_invlist(pTHX_ SV* const swash) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT__GET_SWASH_INVLIST \ @@ -15592,8 +15592,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, } PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param); PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); - PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param); - PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param); if (proto_perl->Ipsig_pend) { Newxz(PL_psig_pend, SIG_SIZE, int); @@ -39,6 +39,7 @@ Individual members of C<PL_parser> have their own documentation. #define PERL_IN_TOKE_C #include "perl.h" #include "dquote_inline.h" +#include "invlist_inline.h" #define new_constant(a,b,c,d,e,f,g) \ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) @@ -2683,14 +2684,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += 2; } else { - if (! PL_utf8_charname_begin) { - U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - PL_utf8_charname_begin = _core_swash_init("utf8", - "_Perl_Charname_Begin", - &PL_sv_undef, - 1, 0, NULL, &flags); - } - if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { + if (! _invlist_contains_cp(PL_utf8_charname_begin, + utf8_to_uvchr_buf((U8 *) s, + (U8 *) e, + NULL))) + { goto bad_charname; } s += UTF8SKIP(s); @@ -2714,14 +2712,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += 2; } else { - if (! PL_utf8_charname_continue) { - U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - PL_utf8_charname_continue = _core_swash_init("utf8", - "_Perl_Charname_Continue", - &PL_sv_undef, - 1, 0, NULL, &flags); - } - if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { + if (! _invlist_contains_cp(PL_utf8_charname_continue, + utf8_to_uvchr_buf((U8 *) s, + (U8 *) e, + NULL))) + { goto bad_charname; } s += UTF8SKIP(s); |