From 6ee1494d6ec270a5832b0ce8e2e5f16cca16935d Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 15 Jan 2022 10:28:16 +0100 Subject: doc: explain why location's "column" are defined vaguely Suuggested by Frank Heckenbach. * doc/bison.texi (Location Type): Explain why location's "column" are defined vaguely. Show tab handling in ltcalc and calc++. * examples/c/bistromathic/parse.y: Show tab handling. * examples/c++/calc++/calc++.test, * examples/c/bistromathic/bistromathic.test: Check tab handling. --- doc/bison.texi | 39 +++++++++++++++++++++++++++++-- examples/c++/calc++/calc++.test | 15 ++++++++++++ examples/c/bistromathic/bistromathic.test | 23 ++++++++++++++++++ examples/c/bistromathic/parse.y | 7 +++--- 4 files changed, 79 insertions(+), 5 deletions(-) diff --git a/doc/bison.texi b/doc/bison.texi index 69c92c0b..f4ee13e1 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -2365,6 +2365,8 @@ analyzer. * Ltcalc Lexer:: The lexical analyzer. @end menu +See @ref{Tracking Locations} for details about locations. + @node Ltcalc Declarations @subsection Declarations for @code{ltcalc} @@ -2488,7 +2490,7 @@ yylex (void) @group /* Skip white space. */ while ((c = getchar ()) == ' ' || c == '\t') - ++yylloc.last_column; + yylloc.last_column += c == '\t' ? 8 - ((yylloc.last_column - 1) & 7) : 1; @end group @group @@ -4751,6 +4753,33 @@ to 1 for @code{yylloc} at the beginning of the parsing. To initialize initialization), use the @code{%initial-action} directive. @xref{Initial Action Decl}. +@sp 1 + +@cindex column +The meaning of ``column'' is deliberately left vague since there are several +options, depending on the use cases. + +With multibyte input (say UTF-8), simply counting the number of bytes does +not match character positions on the screen. One needs advanced functions +mapping multibyte characters to their visual width (see for instance +Gnulib's @code{mbswidth} and @code{mbsnwidth} functions). Tabulation +characters probably need a dedicated implementation, to match the ``go to +next multiple of 8'' behavior. + +However to quote input in error messages, as @command{bison} does: + +@example +@group +1.10-12: @derror{error}: invalid identifier: ‘3.8’ + 1 | %require @derror{3.8} + | @derror{^~~} +@end group +@end example + +@noindent +then byte positions are more handy. So in some cases, tracking both visual +character position @emph{and} byte position is the best option. This is +what @command{bison} does. @node Actions and Locations @subsection Actions and Locations @@ -13776,8 +13805,14 @@ the blanks preceding tokens. Comments would be treated equally. @example @group %@{ + // Take 8-space tabulations into account. + void add_columns (yy::location& loc, const char *buf, int bufsize) + @{ + for (int i = 0; i < bufsize; ++i) + loc.columns (buf[i] == '\t' ? 8 - ((loc.end.column - 1) & 7) : 1); + @} // Code run each time a pattern is matched. - # define YY_USER_ACTION loc.columns (yyleng); + #define YY_USER_ACTION add_columns (loc, yytext, yyleng); %@} @end group %% diff --git a/examples/c++/calc++/calc++.test b/examples/c++/calc++/calc++.test index 318c0c83..868a2601 100755 --- a/examples/c++/calc++/calc++.test +++ b/examples/c++/calc++/calc++.test @@ -50,6 +50,21 @@ EOF run 1 'err: -:2.1: syntax error, unexpected end of file, expecting ( or identifier or number' +# Check handling of tabs. +cat >input <input <input <input <input< *1 +> *2 +> *3 +> '' +err: 1.9: syntax error: expected end of file or - or ( or exit or number or function etc., before * +err: 1 | *1 +err: | ^ +err: 2.9: syntax error: expected end of file or - or ( or exit or number or function etc., before * +err: 2 | *2 +err: | ^ +err: 3.9: syntax error: expected end of file or - or ( or exit or number or function etc., before * +err: 3 | *3 +err: | ^ +' diff --git a/examples/c/bistromathic/parse.y b/examples/c/bistromathic/parse.y index 6b5adc9a..99db3540 100644 --- a/examples/c/bistromathic/parse.y +++ b/examples/c/bistromathic/parse.y @@ -308,14 +308,15 @@ yylex (const char **line, YYSTYPE *yylval, YYLTYPE *yylloc, { int c; - // Ignore white space, get first nonwhite character. + // Get next character, ignore white spaces. do { // Move the first position onto the last. yylloc->first_line = yylloc->last_line; yylloc->first_column = yylloc->last_column; - - yylloc->last_column += 1; c = *((*line)++); + // Tab characters go to the next column multiple of 8. + yylloc->last_column += + c == '\t' ? 8 - ((yylloc->last_column - 1) & 7) : 1; } while (c == ' ' || c == '\t'); switch (c) -- cgit v1.2.1