summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-03-31 11:18:38 -0600
committerKarl Williamson <khw@cpan.org>2018-03-31 15:36:45 -0600
commitf1bcae08d82e5348df8b5dc0c44313ea38deb12b (patch)
treef8c12cfd58ff90426563d05ef2688442b6e58cdf
parent9c4165e33442f4b2fbc036d677b5a9196a5375ee (diff)
downloadperl-f1bcae08d82e5348df8b5dc0c44313ea38deb12b.tar.gz
Use charnames inversion lists
This commit makes the inversion lists for parsing character name global instead of interpreter level, so can be initialized once per process, and no copies are created upon new thread instantiation. More importantly, this is another instance where utf8_heavy.pl no longer needs to be loaded, and the definition files read from disk.
-rw-r--r--embed.fnc2
-rw-r--r--embed.h2
-rw-r--r--embedvar.h6
-rw-r--r--intrpvar.h2
-rw-r--r--invlist_inline.h2
-rw-r--r--perl.c2
-rw-r--r--perlapi.h4
-rw-r--r--perlvars.h2
-rw-r--r--proto.h2
-rw-r--r--sv.c2
-rw-r--r--toke.c27
11 files changed, 27 insertions, 26 deletions
diff --git a/embed.fnc b/embed.fnc
index 095fe3e1f9..d2f52d4e8f 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1738,7 +1738,7 @@ EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \
|NN SV* listsv|I32 minbits|I32 none \
|NULLOK SV* invlist|NULLOK U8* const flags_p
#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
EiMRn |UV* |invlist_array |NN SV* const invlist
EiMRn |bool* |get_invlist_offset_addr|NN SV* invlist
EiMRn |UV |_invlist_len |NN SV* const invlist
diff --git a/embed.h b/embed.h
index f77aa4e539..e71262a881 100644
--- a/embed.h
+++ b/embed.h
@@ -1084,7 +1084,7 @@
#endif
#define regprop(a,b,c,d,e) Perl_regprop(aTHX_ a,b,c,d,e)
# endif
-# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
#define _get_swash_invlist(a) Perl__get_swash_invlist(aTHX_ a)
#define _invlist_contains_cp S__invlist_contains_cp
#define _invlist_len S__invlist_len
diff --git a/embedvar.h b/embedvar.h
index e34496522e..890a8b44e1 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -336,8 +336,6 @@
#define PL_unitcheckav_save (vTHX->Iunitcheckav_save)
#define PL_unlockhook (vTHX->Iunlockhook)
#define PL_unsafe (vTHX->Iunsafe)
-#define PL_utf8_charname_begin (vTHX->Iutf8_charname_begin)
-#define PL_utf8_charname_continue (vTHX->Iutf8_charname_continue)
#define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures)
#define PL_utf8_mark (vTHX->Iutf8_mark)
#define PL_utf8_swash_ptrs (vTHX->Iutf8_swash_ptrs)
@@ -463,6 +461,10 @@
#define PL_Gtimesbase (my_vars->Gtimesbase)
#define PL_use_safe_putenv (my_vars->Guse_safe_putenv)
#define PL_Guse_safe_putenv (my_vars->Guse_safe_putenv)
+#define PL_utf8_charname_begin (my_vars->Gutf8_charname_begin)
+#define PL_Gutf8_charname_begin (my_vars->Gutf8_charname_begin)
+#define PL_utf8_charname_continue (my_vars->Gutf8_charname_continue)
+#define PL_Gutf8_charname_continue (my_vars->Gutf8_charname_continue)
#define PL_utf8_foldable (my_vars->Gutf8_foldable)
#define PL_Gutf8_foldable (my_vars->Gutf8_foldable)
#define PL_utf8_idcont (my_vars->Gutf8_idcont)
diff --git a/intrpvar.h b/intrpvar.h
index 00d612a77f..a05e847e45 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -645,8 +645,6 @@ PERLVAR(I, InBitmap, SV *)
/* utf8 character class swashes */
PERLVAR(I, utf8_mark, SV *)
-PERLVAR(I, utf8_charname_begin, SV *)
-PERLVAR(I, utf8_charname_continue, SV *)
PERLVARA(I, utf8_swash_ptrs, POSIX_SWASH_COUNT, SV *)
PERLVAR(I, seen_deprecated_macro, HV *)
diff --git a/invlist_inline.h b/invlist_inline.h
index 4ce04f9087..3a1afc695d 100644
--- a/invlist_inline.h
+++ b/invlist_inline.h
@@ -6,7 +6,7 @@
* License or the Artistic License, as specified in the README file.
*/
-#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C)
/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
* etc */
diff --git a/perl.c b/perl.c
index e364f22dc0..ff66b4e9b4 100644
--- a/perl.c
+++ b/perl.c
@@ -334,6 +334,8 @@ perl_construct(pTHXx)
_Perl_Folds_To_Multi_Char_invlist);
PL_NonL1NonFinalFold = _new_invlist_C_array(
NonL1_Perl_Non_Final_Folds_invlist);
+ PL_utf8_charname_begin = _new_invlist_C_array(_Perl_Charname_Begin_invlist);
+ PL_utf8_charname_continue = _new_invlist_C_array(_Perl_Charname_Continue_invlist);
#if defined(LOCAL_PATCH_COUNT)
diff --git a/perlapi.h b/perlapi.h
index c24f8ea7bf..6b90055773 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -205,6 +205,10 @@ END_EXTERN_C
#define PL_timesbase (*Perl_Gtimesbase_ptr(NULL))
#undef PL_use_safe_putenv
#define PL_use_safe_putenv (*Perl_Guse_safe_putenv_ptr(NULL))
+#undef PL_utf8_charname_begin
+#define PL_utf8_charname_begin (*Perl_Gutf8_charname_begin_ptr(NULL))
+#undef PL_utf8_charname_continue
+#define PL_utf8_charname_continue (*Perl_Gutf8_charname_continue_ptr(NULL))
#undef PL_utf8_foldable
#define PL_utf8_foldable (*Perl_Gutf8_foldable_ptr(NULL))
#undef PL_utf8_idcont
diff --git a/perlvars.h b/perlvars.h
index a3ba851b22..af48fa8266 100644
--- a/perlvars.h
+++ b/perlvars.h
@@ -300,3 +300,5 @@ PERLVAR(G, utf8_totitle, SV *)
PERLVAR(G, utf8_tolower, SV *)
PERLVAR(G, utf8_tofold, SV *)
PERLVAR(G, utf8_tosimplefold, SV *)
+PERLVAR(G, utf8_charname_begin, SV *)
+PERLVAR(G, utf8_charname_continue, SV *)
diff --git a/proto.h b/proto.h
index 6e2f6463ca..91bcd6d4a6 100644
--- a/proto.h
+++ b/proto.h
@@ -5444,7 +5444,7 @@ PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode*
#define PERL_ARGS_ASSERT_REGPROP \
assert(sv); assert(o)
#endif
-#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_TOKE_C) || defined(PERL_IN_UTF8_C)
PERL_CALLCONV SV* Perl__get_swash_invlist(pTHX_ SV* const swash)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT__GET_SWASH_INVLIST \
diff --git a/sv.c b/sv.c
index fb89ac0f2d..54a45086aa 100644
--- a/sv.c
+++ b/sv.c
@@ -15592,8 +15592,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
}
PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param);
PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param);
- PL_utf8_charname_begin = sv_dup_inc(proto_perl->Iutf8_charname_begin, param);
- PL_utf8_charname_continue = sv_dup_inc(proto_perl->Iutf8_charname_continue, param);
if (proto_perl->Ipsig_pend) {
Newxz(PL_psig_pend, SIG_SIZE, int);
diff --git a/toke.c b/toke.c
index 0ef33415c0..3405dc6c89 100644
--- a/toke.c
+++ b/toke.c
@@ -39,6 +39,7 @@ Individual members of C<PL_parser> have their own documentation.
#define PERL_IN_TOKE_C
#include "perl.h"
#include "dquote_inline.h"
+#include "invlist_inline.h"
#define new_constant(a,b,c,d,e,f,g) \
S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
@@ -2683,14 +2684,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
s += 2;
}
else {
- if (! PL_utf8_charname_begin) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_begin = _core_swash_init("utf8",
- "_Perl_Charname_Begin",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_begin,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);
@@ -2714,14 +2712,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
s += 2;
}
else {
- if (! PL_utf8_charname_continue) {
- U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
- PL_utf8_charname_continue = _core_swash_init("utf8",
- "_Perl_Charname_Continue",
- &PL_sv_undef,
- 1, 0, NULL, &flags);
- }
- if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
+ if (! _invlist_contains_cp(PL_utf8_charname_continue,
+ utf8_to_uvchr_buf((U8 *) s,
+ (U8 *) e,
+ NULL)))
+ {
goto bad_charname;
}
s += UTF8SKIP(s);