diff options
author | Kurt McKee <contactme@kurtmckee.org> | 2020-10-27 01:56:41 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-27 07:56:41 +0100 |
commit | a72957f36e7e317966dfb4115afcca17a0289486 (patch) | |
tree | dc5bd19542e9d836219f83be83076088161e936a /pygments/lexers/sql.py | |
parent | 373aef467d3f4c48e1300087a156db497076dcca (diff) | |
download | pygments-git-a72957f36e7e317966dfb4115afcca17a0289486.tar.gz |
MySQL: Tokenize quoted schema object names, and escape characters, uniquely (#1555)
* MySQL: Tokenize quoted schema object names, and escape characters, uniquely
Changes in this patch:
* Name.Quoted and Name.Quoted.Escape are introduced as non-standard tokens
* HTML and LaTeX formatters were confirmed to provide default formatting
if they encounter these two non-standard tokens. They also add style
classes based on the token name, like "n-Quoted" (HTML) or "nQuoted"
(LaTeX) so that users can add custom styles for these.
* Removed "\`" and "\\" as schema object name escapes. These are relics
of the previous regular expression for backtick-quoted names and are
not treated as escape sequences. The behavior was confirmed in the
MySQL documentation as well as by running queries in MySQL Workbench.
* Prevent "123abc" from being treated as an integer followed by a schema
object name. MySQL allows leading numbers in schema object names as long
as 0-9 are not the only characters in the schema object name.
* Add ~10 more unit tests to validate behavior.
Closes #1551
* Remove an end-of-line regex match that triggered a lint warning
Also, add tests that confirm correct behavior. No tests failed before
or after removing the '$' match in the regex, but now regexlint isn't
complaining.
Removing the '$' matching probably depends on the fact that Pygments
adds a newline at the end of the input text, so there is always something
after a bare integer literal.
Diffstat (limited to 'pygments/lexers/sql.py')
-rw-r--r-- | pygments/lexers/sql.py | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 98d53c5c..e27e0ddb 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -620,7 +620,7 @@ class MySqlLexer(RegexLexer): (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats - (r'[0-9]+', Number.Integer), + (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name # Date literals (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", @@ -673,7 +673,7 @@ class MySqlLexer(RegexLexer): # numeric literals have already been handled above. # ('[0-9a-z$_\u0080-\uffff]+', Name), - (r'`', Name, 'schema-object-name'), + (r'`', Name.Quoted, 'schema-object-name'), # Punctuation (r'[(),.;]', Punctuation), @@ -737,15 +737,15 @@ class MySqlLexer(RegexLexer): # Schema object name substates # ---------------------------- # - # Backtick-quoted schema object names support escape characters. - # It may be desirable to tokenize escape sequences differently, - # but currently Pygments does not have an obvious token type for - # this unique situation (for example, "Name.Escape"). + # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but + # formatters will style them as "Name" by default but add + # additional styles based on the token name. This gives users + # flexibility to add custom styles as desired. # 'schema-object-name': [ - (r'[^`\\]+', Name), - (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type. - (r'`', Name, '#pop'), + (r'[^`]+', Name.Quoted), + (r'``', Name.Quoted.Escape), + (r'`', Name.Quoted, '#pop'), ], } |