summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel E. Denny <jdenny@clemson.edu>2009-08-19 20:37:28 -0400
committerJoel E. Denny <jdenny@clemson.edu>2009-08-19 20:43:40 -0400
commitc2724603c9d87e816dbdf1a9bfd7d70ffc1bd137 (patch)
tree72c3af97bfa7325b52f8da3b786289dadd430888
parent17aed602c6780058fe591e871c97f19d6a14d57a (diff)
downloadbison-c2724603c9d87e816dbdf1a9bfd7d70ffc1bd137.tar.gz
Fix complaints about escape sequences.
Discussed starting at <http://lists.gnu.org/archive/html/bison-patches/2009-08/msg00036.html>. * src/scan-gram.l (SC_ESCAPED_STRING, SC_ESCAPED_CHARACTER): For a \0 and similar escape sequences meaning the null character, report an invalid escape sequence instead of an invalid null character because the latter does not actually appear in the user's input. In all escape sequence complaints, don't escape the initial backslash, and don't quote when the sequence appears at the end of the complaint line unless there's whitespace that quotearg won't escape. Consistently say "invalid" not "unrecognized". Consistently prefer "empty character literal" over "extra characters in character literal" warning for invalid escape sequences; that is, consistently discard those sequences. * tests/input.at (Bad escapes in literals): New.
-rw-r--r--ChangeLog20
-rw-r--r--src/scan-gram.l34
-rw-r--r--tests/input.at43
3 files changed, 83 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index c0d3607e..d1e0bc10 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2009-08-19 Joel E. Denny <jdenny@clemson.edu>
+
+ Fix complaints about escape sequences.
+ Discussed starting at
+ <http://lists.gnu.org/archive/html/bison-patches/2009-08/msg00036.html>.
+ * src/scan-gram.l (SC_ESCAPED_STRING, SC_ESCAPED_CHARACTER):
+ For a \0 and similar escape sequences meaning the null
+ character, report an invalid escape sequence instead of an
+ invalid null character because the latter does not actually
+ appear in the user's input.
+ In all escape sequence complaints, don't escape the initial
+ backslash, and don't quote when the sequence appears at the end
+ of the complaint line unless there's whitespace that quotearg
+ won't escape.
+ Consistently say "invalid" not "unrecognized".
+ Consistently prefer "empty character literal" over "extra
+ characters in character literal" warning for invalid escape
+ sequences; that is, consistently discard those sequences.
+ * tests/input.at (Bad escapes in literals): New.
+
2009-08-19 Akim Demaille <demaille@gostai.com>
doc: fixes.
diff --git a/src/scan-gram.l b/src/scan-gram.l
index 7a6d7bfe..4ed30c6f 100644
--- a/src/scan-gram.l
+++ b/src/scan-gram.l
@@ -37,6 +37,7 @@
#include <src/reader.h>
#include <src/uniqstr.h>
+#include <ctype.h>
#include <mbswidth.h>
#include <quote.h>
@@ -578,10 +579,9 @@ splice (\\[ \f\t\v]*\n)*
{
\\[0-7]{1,3} {
unsigned long int c = strtoul (yytext + 1, NULL, 8);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
@@ -589,10 +589,9 @@ splice (\\[ \f\t\v]*\n)*
\\x[0-9abcdefABCDEF]+ {
verify (UCHAR_MAX < ULONG_MAX);
unsigned long int c = strtoul (yytext + 2, NULL, 16);
- if (UCHAR_MAX < c)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (!c || UCHAR_MAX < c)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
@@ -610,16 +609,23 @@ splice (\\[ \f\t\v]*\n)*
\\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
int c = convert_ucn_to_byte (yytext);
- if (c < 0)
- complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
- else if (! c)
- complain_at (*loc, _("invalid null character: %s"), quote (yytext));
+ if (c <= 0)
+ complain_at (*loc, _("invalid number after \\-escape: %s"),
+ yytext+1);
else
obstack_1grow (&obstack_for_string, c);
}
\\(.|\n) {
- complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
- STRING_GROW;
+ char const *p = yytext + 1;
+ char quoted_ws[] = "` '";
+ if (isspace (*p) && isprint (*p))
+ {
+ quoted_ws[1] = *p;
+ p = quoted_ws;
+ }
+ else
+ p = quotearg_style_mem (escape_quoting_style, p, 1);
+ complain_at (*loc, _("invalid character after \\-escape: %s"), p);
}
}
diff --git a/tests/input.at b/tests/input.at
index 810e7601..f91c0fb8 100644
--- a/tests/input.at
+++ b/tests/input.at
@@ -1220,3 +1220,46 @@ three.y:4.8-11: missing `'' at end of file
]])
AT_CLEANUP
+
+## ------------------------- ##
+## Bad escapes in literals. ##
+## ------------------------- ##
+
+AT_SETUP([[Bad escapes in literals]])
+
+AT_DATA([input.y],
+[[%%
+start: '\777' '\0' '\xfff' '\x0'
+ '\uffff' '\u0000' '\Uffffffff' '\U00000000'
+ '\ ' '\A';
+]])
+echo 'start: "\T\F\0\1" ;' | tr 'TF01' '\011\014\0\1' >> input.y
+
+AT_BISON_CHECK([input.y], [1], [],
+[[input.y:2.9-12: invalid number after \-escape: 777
+input.y:2.8-13: warning: empty character literal
+input.y:2.16-17: invalid number after \-escape: 0
+input.y:2.15-18: warning: empty character literal
+input.y:2.21-25: invalid number after \-escape: xfff
+input.y:2.20-26: warning: empty character literal
+input.y:2.29-31: invalid number after \-escape: x0
+input.y:2.28-32: warning: empty character literal
+input.y:3.9-14: invalid number after \-escape: uffff
+input.y:3.8-15: warning: empty character literal
+input.y:3.18-23: invalid number after \-escape: u0000
+input.y:3.17-24: warning: empty character literal
+input.y:3.27-36: invalid number after \-escape: Uffffffff
+input.y:3.26-37: warning: empty character literal
+input.y:3.40-49: invalid number after \-escape: U00000000
+input.y:3.39-50: warning: empty character literal
+input.y:4.9-10: invalid character after \-escape: ` '
+input.y:4.8-11: warning: empty character literal
+input.y:4.14-15: invalid character after \-escape: A
+input.y:4.13-16: warning: empty character literal
+input.y:5.9-16: invalid character after \-escape: \t
+input.y:5.17: invalid character after \-escape: \f
+input.y:5.18: invalid character after \-escape: \0
+input.y:5.19: invalid character after \-escape: \001
+]])
+
+AT_CLEANUP