summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-11-01 12:06:32 -0600
committerKarl Williamson <public@khwilliamson.com>2012-11-11 10:11:34 -0700
commit107160e2175acf35e27ea7b7af0c700f994f5437 (patch)
tree6766127186190810ec6eed356bcc2bbdd9f70c6b /toke.c
parentb6ba113734f2321504b9a5e58f1b107f427927a3 (diff)
downloadperl-107160e2175acf35e27ea7b7af0c700f994f5437.tar.gz
toke.c: Fail on malformed UTF-8 in \N{} input
The handler for \N{} can be user-supplied and charnames itself shouldn't have to worry about malformed input. This changes toke.c to check for malformed input before calling the \N{} handler.
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c28
1 files changed, 24 insertions, 4 deletions
diff --git a/toke.c b/toke.c
index 0f946aaec3..c71cdb7d20 100644
--- a/toke.c
+++ b/toke.c
@@ -2654,9 +2654,28 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
- res = new_constant( NULL, 0, "charnames",
- /* includes all of: \N{...} */
- res, NULL, s - 3, e - s + 4 );
+ if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
+ e - backslash_ptr,
+ &first_bad_char_loc))
+ {
+ /* If warnings are on, this will print a more detailed analysis of what
+ * is wrong than the error message below */
+ utf8n_to_uvuni(first_bad_char_loc,
+ e - ((char *) first_bad_char_loc),
+ NULL, 0);
+
+ /* We deliberately don't try to print the malformed character, which
+ * might not print very well; it also may be just the first of many
+ * malformations, so don't print what comes after it */
+ yyerror(Perl_form(aTHX_
+ "Malformed UTF-8 character immediately after '%.*s'",
+ (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+ return NULL;
+ }
+
+ res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
+ /* include the <}> */
+ e - backslash_ptr + 1);
if (! SvPOK(res)) {
return NULL;
}
@@ -8931,7 +8950,8 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
/* Either returns sv, or mortalizes sv and returns a new SV*.
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
- and type is used with error messages only. */
+ and <type> is used with error messages only.
+ <type> is assumed to be well formed UTF-8 */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,