diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-11-01 12:06:32 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-11-11 10:11:34 -0700 |
commit | 107160e2175acf35e27ea7b7af0c700f994f5437 (patch) | |
tree | 6766127186190810ec6eed356bcc2bbdd9f70c6b /toke.c | |
parent | b6ba113734f2321504b9a5e58f1b107f427927a3 (diff) | |
download | perl-107160e2175acf35e27ea7b7af0c700f994f5437.tar.gz |
toke.c: Fail on malformed UTF-8 in \N{} input
The handler for \N{} can be user-supplied and charnames itself
shouldn't have to worry about malformed input. This changes toke.c to
check for malformed input before calling the \N{} handler.
Diffstat (limited to 'toke.c')
-rw-r--r-- | toke.c | 28 |
1 files changed, 24 insertions, 4 deletions
@@ -2654,9 +2654,28 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; - res = new_constant( NULL, 0, "charnames", - /* includes all of: \N{...} */ - res, NULL, s - 3, e - s + 4 ); + if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, + e - backslash_ptr, + &first_bad_char_loc)) + { + /* If warnings are on, this will print a more detailed analysis of what + * is wrong than the error message below */ + utf8n_to_uvuni(first_bad_char_loc, + e - ((char *) first_bad_char_loc), + NULL, 0); + + /* We deliberately don't try to print the malformed character, which + * might not print very well; it also may be just the first of many + * malformations, so don't print what comes after it */ + yyerror(Perl_form(aTHX_ + "Malformed UTF-8 character immediately after '%.*s'", + (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr)); + return NULL; + } + + res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, + /* include the <}> */ + e - backslash_ptr + 1); if (! SvPOK(res)) { return NULL; } @@ -8931,7 +8950,8 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) /* Either returns sv, or mortalizes sv and returns a new SV*. Best used as sv=new_constant(..., sv, ...). If s, pv are NULL, calls subroutine with one argument, - and type is used with error messages only. */ + and <type> is used with error messages only. + <type> is assumed to be well formed UTF-8 */ STATIC SV * S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, |