diff options
author | Karl Williamson <khw@cpan.org> | 2020-01-31 14:07:42 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2020-02-12 16:25:53 -0700 |
commit | 7303cc1fd7b20cfd3aa029c9cfd0ea711e974bb7 (patch) | |
tree | fc5a5d7d74fd53914d765b24621e9d139e1d1b33 /toke.c | |
parent | 4eecf4e587dc70863e2a3705f5e277e2bd8b0fc5 (diff) | |
download | perl-7303cc1fd7b20cfd3aa029c9cfd0ea711e974bb7.tar.gz |
toke.c: extract charnames code from S_new_constant
The code for dealing with charnames is intertwined and special cased in
S_new_constant. My guess is it was originally to offer customized,
better error messages when things go wrong. Much later the function was
changed so that a message could be returned instead of output, and the
code didn't really need the customization any longer. But by then
autoloading of charnames had been added when a \N[} was parsed, meaning
that more special casing was added instead, as that had been the logical
place to do it.
This commit extracts the special charnames handling to the one place it
is actually used, and the disentangled S_new_constant is then called.
This is in preparation for future commits, and makes the code cleaner.
This adds testing of the new syntax to lib/charnames.t. That file
randomly generates some tests, simply because there are too many names
to test reasonably at once. To compensate for the added tests, I
lowered the percentage per run of characters tested so that this file
takes about the same amount of time as before.
Diffstat (limited to 'toke.c')
-rw-r--r-- | toke.c | 158 |
1 files changed, 82 insertions, 76 deletions
@@ -2624,6 +2624,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it * doesn't have to be. */ + SV* char_name; SV* res; HV * table; SV **cvp; @@ -2631,6 +2632,8 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, SV *rv; HV *stash; const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ + bool charnames_loaded = FALSE; /* Is charnames loaded? */ + unsigned int i; dVAR; PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; @@ -2638,27 +2641,59 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, assert(e >= s); assert(s > (char *) 3); - res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); + char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); - if (!SvCUR(res)) { - SvREFCNT_dec_NN(res); + if (!SvCUR(char_name)) { + SvREFCNT_dec_NN(char_name); /* diag_listed_as: Unknown charname '%s' */ *error_msg = Perl_form(aTHX_ "Unknown charname ''"); return NULL; } - res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, + /* Autoload the charnames module */ + for (i = 0; i < 2; i++) { + table = GvHV(PL_hintgv); /* ^H */ + + charnames_loaded = table + && (PL_hints & HINT_LOCALIZE_HH) + && (cvp = hv_fetchs(table, "charnames", FALSE)) + && SvOK(*cvp); + /* Quit if loaded, or failed to load. In the latter case, we break out + * so that new_constant()'s error handling takes care of the necessary + * messages. */ + if (i > 0 || charnames_loaded) { + break; + } + + Perl_load_module(aTHX_ + 0, + newSVpvs("_charnames"), + + /* version parameter; no need to specify it, as if we get too + * early a version, will fail anyway, not being able to find + * '_charnames' */ + NULL, + newSVpvs(":full"), + newSVpvs(":short"), + NULL); + } + + *error_msg = NULL; + res = new_constant( NULL, 0, "charnames", char_name, NULL, backslash_ptr, /* include the <}> */ e - backslash_ptr + 1, error_msg); - if (! SvPOK(res)) { - SvREFCNT_dec_NN(res); + if (*error_msg) { + if (charnames_loaded) { + *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name)); + } + + SvREFCNT_dec(res); return NULL; } /* See if the charnames handler is the Perl core's, and if so, we can skip * the validation needed for a user-supplied one, as Perl's does its own * validation. */ - table = GvHV(PL_hintgv); /* ^H */ cvp = hv_fetchs(table, "charnames", FALSE); if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) @@ -9592,75 +9627,31 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, SV **cvp; SV *cv, *typesv; const char *why1 = "", *why2 = "", *why3 = ""; + const char * optional_colon = ":"; /* Only some messages have a colon */ + char *msg; PERL_ARGS_ASSERT_NEW_CONSTANT; /* We assume that this is true: */ - if (*key == 'c') { assert (strEQ(key, "charnames")); } assert(type || s); sv_2mortal(sv); /* Parent created it permanently */ - if (!table - || ! (PL_hints & HINT_LOCALIZE_HH) - || ! (cvp = hv_fetch(table, key, keylen, FALSE)) - || ! SvOK(*cvp)) + + if ( ! table + || ! (PL_hints & HINT_LOCALIZE_HH)) { - char *msg; - - /* Here haven't found what we're looking for. If it is charnames, - * perhaps it needs to be loaded. Try doing that before giving up */ - if (*key == 'c') { - Perl_load_module(aTHX_ - 0, - newSVpvs("_charnames"), - /* version parameter; no need to specify it, as if - * we get too early a version, will fail anyway, - * not being able to find '_charnames' */ - NULL, - newSVpvs(":full"), - newSVpvs(":short"), - NULL); - assert(sp == PL_stack_sp); - table = GvHV(PL_hintgv); - if (table - && (PL_hints & HINT_LOCALIZE_HH) - && (cvp = hv_fetch(table, key, keylen, FALSE)) - && SvOK(*cvp)) - { - goto now_ok; - } - } - if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { - msg = Perl_form(aTHX_ - "Constant(%.*s) unknown", - (int)(type ? typelen : len), - (type ? type: s)); - } - else { - why1 = "$^H{"; - why2 = key; - why3 = "} is not defined"; - report: - if (*key == 'c') { - msg = Perl_form(aTHX_ - /* The +3 is for '\N{'; -4 for that, plus '}' */ - "Unknown charname '%.*s'", (int)typelen - 4, type + 3 - ); - } - else { - msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", - (int)(type ? typelen : len), - (type ? type: s), why1, why2, why3); - } - } - if (error_msg) { - *error_msg = msg; - } - else { - yyerror_pv(msg, UTF ? SVf_UTF8 : 0); - } - return SvREFCNT_inc_simple_NN(sv); + why1 = "unknown"; + optional_colon = ""; + goto report; + } + + cvp = hv_fetch(table, key, keylen, FALSE); + if (!cvp || !SvOK(*cvp)) { + why1 = "$^H{"; + why2 = key; + why3 = "} is not defined"; + goto report; } - now_ok: + cv = *cvp; if (!pv && s) pv = newSVpvn_flags(s, len, SVs_TEMP); @@ -9705,16 +9696,31 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, LEAVE ; POPSTACK; - if (!SvOK(res)) { - why1 = "Call to &{$^H{"; - why2 = key; - why3 = "}} did not return a defined value"; - sv = res; - (void)sv_2mortal(sv); - goto report; + if (SvOK(res)) { + return res; } - return res; + sv = res; + (void)sv_2mortal(sv); + + why1 = "Call to &{$^H{"; + why2 = key; + why3 = "}} did not return a defined value"; + + report: + + msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s", + (int)(type ? typelen : len), + (type ? type: s), + optional_colon, + why1, why2, why3); + if (error_msg) { + *error_msg = msg; + } + else { + yyerror_pv(msg, UTF ? SVf_UTF8 : 0); + } + return SvREFCNT_inc_simple_NN(sv); } PERL_STATIC_INLINE void |