summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkim Demaille <akim.demaille@gmail.com>2022-01-15 10:28:16 +0100
committerAkim Demaille <akim.demaille@gmail.com>2022-06-15 07:55:13 +0200
commit6ee1494d6ec270a5832b0ce8e2e5f16cca16935d (patch)
treef9d38041ad4b84a92b9e3824d018c700547fcaa8
parenta475c4d5c1fff75b31dcedf0124c521e573a5fc7 (diff)
downloadbison-6ee1494d6ec270a5832b0ce8e2e5f16cca16935d.tar.gz
doc: explain why location's "column" are defined vaguely
Suuggested by Frank Heckenbach. <https://lists.gnu.org/r/bug-bison/2022-01/msg00000.html> * doc/bison.texi (Location Type): Explain why location's "column" are defined vaguely. Show tab handling in ltcalc and calc++. * examples/c/bistromathic/parse.y: Show tab handling. * examples/c++/calc++/calc++.test, * examples/c/bistromathic/bistromathic.test: Check tab handling.
-rw-r--r--doc/bison.texi39
-rwxr-xr-xexamples/c++/calc++/calc++.test15
-rwxr-xr-xexamples/c/bistromathic/bistromathic.test23
-rw-r--r--examples/c/bistromathic/parse.y7
4 files changed, 79 insertions, 5 deletions
diff --git a/doc/bison.texi b/doc/bison.texi
index 69c92c0b..f4ee13e1 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -2365,6 +2365,8 @@ analyzer.
* Ltcalc Lexer:: The lexical analyzer.
@end menu
+See @ref{Tracking Locations} for details about locations.
+
@node Ltcalc Declarations
@subsection Declarations for @code{ltcalc}
@@ -2488,7 +2490,7 @@ yylex (void)
@group
/* Skip white space. */
while ((c = getchar ()) == ' ' || c == '\t')
- ++yylloc.last_column;
+ yylloc.last_column += c == '\t' ? 8 - ((yylloc.last_column - 1) & 7) : 1;
@end group
@group
@@ -4751,6 +4753,33 @@ to 1 for @code{yylloc} at the beginning of the parsing. To initialize
initialization), use the @code{%initial-action} directive. @xref{Initial
Action Decl}.
+@sp 1
+
+@cindex column
+The meaning of ``column'' is deliberately left vague since there are several
+options, depending on the use cases.
+
+With multibyte input (say UTF-8), simply counting the number of bytes does
+not match character positions on the screen. One needs advanced functions
+mapping multibyte characters to their visual width (see for instance
+Gnulib's @code{mbswidth} and @code{mbsnwidth} functions). Tabulation
+characters probably need a dedicated implementation, to match the ``go to
+next multiple of 8'' behavior.
+
+However to quote input in error messages, as @command{bison} does:
+
+@example
+@group
+1.10-12: @derror{error}: invalid identifier: ‘3.8’
+ 1 | %require @derror{3.8}
+ | @derror{^~~}
+@end group
+@end example
+
+@noindent
+then byte positions are more handy. So in some cases, tracking both visual
+character position @emph{and} byte position is the best option. This is
+what @command{bison} does.
@node Actions and Locations
@subsection Actions and Locations
@@ -13776,8 +13805,14 @@ the blanks preceding tokens. Comments would be treated equally.
@example
@group
%@{
+ // Take 8-space tabulations into account.
+ void add_columns (yy::location& loc, const char *buf, int bufsize)
+ @{
+ for (int i = 0; i < bufsize; ++i)
+ loc.columns (buf[i] == '\t' ? 8 - ((loc.end.column - 1) & 7) : 1);
+ @}
// Code run each time a pattern is matched.
- # define YY_USER_ACTION loc.columns (yyleng);
+ #define YY_USER_ACTION add_columns (loc, yytext, yyleng);
%@}
@end group
%%
diff --git a/examples/c++/calc++/calc++.test b/examples/c++/calc++/calc++.test
index 318c0c83..868a2601 100755
--- a/examples/c++/calc++/calc++.test
+++ b/examples/c++/calc++/calc++.test
@@ -50,6 +50,21 @@ EOF
run 1 'err: -:2.1: syntax error, unexpected end of file, expecting ( or identifier or number'
+# Check handling of tabs.
+cat >input <<EOF
+ *1
+EOF
+run 1 'err: -:1.9: syntax error, unexpected *, expecting ( or identifier or number'
+cat >input <<EOF
+ *2
+EOF
+run 1 'err: -:1.9: syntax error, unexpected *, expecting ( or identifier or number'
+cat >input <<EOF
+ *3
+EOF
+run 1 'err: -:1.9: syntax error, unexpected *, expecting ( or identifier or number'
+
+
# LAC finds many more tokens.
cat >input <<EOF
a := 1
diff --git a/examples/c/bistromathic/bistromathic.test b/examples/c/bistromathic/bistromathic.test
index b46f996a..4a8efe44 100755
--- a/examples/c/bistromathic/bistromathic.test
+++ b/examples/c/bistromathic/bistromathic.test
@@ -366,3 +366,26 @@ err: 1.15: syntax error: expected - or ( or number or function or variable befor
err: 1 | (1++2) + 3 + ''
err: | ^
'
+
+# Check handling of literal tabs. "Escape" them with a C-v, so that
+# they are not processed as completion requests.
+cat >input<<EOF
+ *1
+ *2
+  *3
+EOF
+# readline processes the tabs itself, and replaces then with spaces.
+run -n 0 '> *1
+> *2
+> *3
+> ''
+err: 1.9: syntax error: expected end of file or - or ( or exit or number or function etc., before *
+err: 1 | *1
+err: | ^
+err: 2.9: syntax error: expected end of file or - or ( or exit or number or function etc., before *
+err: 2 | *2
+err: | ^
+err: 3.9: syntax error: expected end of file or - or ( or exit or number or function etc., before *
+err: 3 | *3
+err: | ^
+'
diff --git a/examples/c/bistromathic/parse.y b/examples/c/bistromathic/parse.y
index 6b5adc9a..99db3540 100644
--- a/examples/c/bistromathic/parse.y
+++ b/examples/c/bistromathic/parse.y
@@ -308,14 +308,15 @@ yylex (const char **line, YYSTYPE *yylval, YYLTYPE *yylloc,
{
int c;
- // Ignore white space, get first nonwhite character.
+ // Get next character, ignore white spaces.
do {
// Move the first position onto the last.
yylloc->first_line = yylloc->last_line;
yylloc->first_column = yylloc->last_column;
-
- yylloc->last_column += 1;
c = *((*line)++);
+ // Tab characters go to the next column multiple of 8.
+ yylloc->last_column +=
+ c == '\t' ? 8 - ((yylloc->last_column - 1) & 7) : 1;
} while (c == ' ' || c == '\t');
switch (c)