summaryrefslogtreecommitdiff
path: root/pygments/lexers/sql.py
diff options
context:
space:
mode:
authorKurt McKee <contactme@kurtmckee.org>2020-10-27 01:56:41 -0500
committerGitHub <noreply@github.com>2020-10-27 07:56:41 +0100
commita72957f36e7e317966dfb4115afcca17a0289486 (patch)
treedc5bd19542e9d836219f83be83076088161e936a /pygments/lexers/sql.py
parent373aef467d3f4c48e1300087a156db497076dcca (diff)
downloadpygments-git-a72957f36e7e317966dfb4115afcca17a0289486.tar.gz
MySQL: Tokenize quoted schema object names, and escape characters, uniquely (#1555)
* MySQL: Tokenize quoted schema object names, and escape characters, uniquely Changes in this patch: * Name.Quoted and Name.Quoted.Escape are introduced as non-standard tokens * HTML and LaTeX formatters were confirmed to provide default formatting if they encounter these two non-standard tokens. They also add style classes based on the token name, like "n-Quoted" (HTML) or "nQuoted" (LaTeX) so that users can add custom styles for these. * Removed "\`" and "\\" as schema object name escapes. These are relics of the previous regular expression for backtick-quoted names and are not treated as escape sequences. The behavior was confirmed in the MySQL documentation as well as by running queries in MySQL Workbench. * Prevent "123abc" from being treated as an integer followed by a schema object name. MySQL allows leading numbers in schema object names as long as 0-9 are not the only characters in the schema object name. * Add ~10 more unit tests to validate behavior. Closes #1551 * Remove an end-of-line regex match that triggered a lint warning Also, add tests that confirm correct behavior. No tests failed before or after removing the '$' match in the regex, but now regexlint isn't complaining. Removing the '$' matching probably depends on the fact that Pygments adds a newline at the end of the input text, so there is always something after a bare integer literal.
Diffstat (limited to 'pygments/lexers/sql.py')
-rw-r--r--pygments/lexers/sql.py18
1 files changed, 9 insertions, 9 deletions
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index 98d53c5c..e27e0ddb 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -620,7 +620,7 @@ class MySqlLexer(RegexLexer):
(r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
(r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
(r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
- (r'[0-9]+', Number.Integer),
+ (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
# Date literals
(r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
@@ -673,7 +673,7 @@ class MySqlLexer(RegexLexer):
# numeric literals have already been handled above.
#
('[0-9a-z$_\u0080-\uffff]+', Name),
- (r'`', Name, 'schema-object-name'),
+ (r'`', Name.Quoted, 'schema-object-name'),
# Punctuation
(r'[(),.;]', Punctuation),
@@ -737,15 +737,15 @@ class MySqlLexer(RegexLexer):
# Schema object name substates
# ----------------------------
#
- # Backtick-quoted schema object names support escape characters.
- # It may be desirable to tokenize escape sequences differently,
- # but currently Pygments does not have an obvious token type for
- # this unique situation (for example, "Name.Escape").
+ # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
+ # formatters will style them as "Name" by default but add
+ # additional styles based on the token name. This gives users
+ # flexibility to add custom styles as desired.
#
'schema-object-name': [
- (r'[^`\\]+', Name),
- (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type.
- (r'`', Name, '#pop'),
+ (r'[^`]+', Name.Quoted),
+ (r'``', Name.Quoted.Escape),
+ (r'`', Name.Quoted, '#pop'),
],
}