summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnteru <bitbucket@ca.sh13.net>2019-01-04 13:58:54 +0000
committerAnteru <bitbucket@ca.sh13.net>2019-01-04 13:58:54 +0000
commit739365558e04bc7bdc67fc522b47f5baa1259d74 (patch)
tree46512822e49f5f6d43bee712ef75a923ff308bc9
parenta895aac301b1f0dae17f03ac78b3e66bc6ebc2f8 (diff)
parentcdbc000e03a5a92ea2a28020e71218cae5010a5d (diff)
downloadpygments-739365558e04bc7bdc67fc522b47f5baa1259d74.tar.gz
Merged in roskakori/pygments-analyze-sql (pull request #672)
Added analyse_text() for SQL
-rw-r--r--pygments/lexers/sql.py52
-rw-r--r--tests/test_sql.py46
2 files changed, 95 insertions, 3 deletions
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index 7dd856b2..d3bc8e23 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -59,7 +59,14 @@ line_re = re.compile('.*?\n')
language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
-do_re = re.compile(r'\bDO\b', re.IGNORECASE)
+do_re = re.compile(r'\bDO\b', re.IGNORECASE)
+
+# Regular expressions for analyse_text()
+name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
+name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
+tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
+tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
+tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
def language_callback(lexer, match):
@@ -82,7 +89,7 @@ def language_callback(lexer, match):
lexer.text[max(0, match.start()-25):match.start()]))
if m:
l = lexer._get_lexer('plpgsql')
-
+
# 1 = $, 2 = delimiter, 3 = $
yield (match.start(1), String, match.group(1))
yield (match.start(2), String.Delimiter, match.group(2))
@@ -474,6 +481,9 @@ class SqlLexer(RegexLexer):
]
}
+ def analyse_text(text):
+ return 0.01
+
class TransactSqlLexer(RegexLexer):
"""
@@ -530,6 +540,29 @@ class TransactSqlLexer(RegexLexer):
]
}
+ def analyse_text(text):
+ rating = 0
+ if tsql_declare_re.search(text):
+ # Found T-SQL variable declaration.
+ rating = 1.0
+ else:
+ name_between_backtick_count = len(
+ name_between_backtick_re.findall((text)))
+ name_between_bracket_count = len(
+ name_between_bracket_re.findall(text))
+ if name_between_bracket_count >= 2 * name_between_backtick_count:
+ # Found at least twice as many [name] as `name`.
+ rating += 0.5
+ elif name_between_bracket_count > name_between_backtick_count:
+ rating += 0.2
+ elif name_between_bracket_count > 0:
+ rating += 0.1
+ if tsql_variable_re.search(text) is not None:
+ rating += 0.1
+ if tsql_go_re.search(text) is not None:
+ rating += 0.1
+ return rating
+
class MySqlLexer(RegexLexer):
"""
@@ -603,6 +636,21 @@ class MySqlLexer(RegexLexer):
]
}
+ def analyse_text(text):
+ rating = 0
+ name_between_backtick_count = len(
+ name_between_backtick_re.findall((text)))
+ name_between_bracket_count = len(
+ name_between_bracket_re.findall(text))
+ if name_between_backtick_count >= 2 * name_between_bracket_count:
+ # Found at least twice as many `name` as [name].
+ rating += 0.5
+ elif name_between_backtick_count > name_between_bracket_count:
+ rating += 0.2
+ elif name_between_backtick_count > 0:
+ rating += 0.1
+ return rating
+
class SqliteConsoleLexer(Lexer):
"""
diff --git a/tests/test_sql.py b/tests/test_sql.py
index c5f5c758..6be34006 100644
--- a/tests/test_sql.py
+++ b/tests/test_sql.py
@@ -8,7 +8,10 @@
"""
import unittest
-from pygments.lexers.sql import TransactSqlLexer
+from pygments.lexers.sql import name_between_bracket_re, \
+ name_between_backtick_re, tsql_go_re, tsql_declare_re, \
+ tsql_variable_re, MySqlLexer, SqlLexer, TransactSqlLexer
+
from pygments.token import Comment, Name, Number, Punctuation, Whitespace
@@ -72,3 +75,44 @@ class TransactSqlLexerTest(unittest.TestCase):
(Comment.Multiline, '*/'),
(Comment.Multiline, '*/'),
))
+
+
+class SqlAnalyzeTextTest(unittest.TestCase):
+ def test_can_match_analyze_text_res(self):
+ self.assertEqual(['`a`', '`bc`'],
+ name_between_backtick_re.findall('select `a`, `bc` from some'))
+ self.assertEqual(['[a]', '[bc]'],
+ name_between_bracket_re.findall('select [a], [bc] from some'))
+ self.assertTrue(tsql_declare_re.search('--\nDeClaRe @some int;'))
+ self.assertTrue(tsql_go_re.search('select 1\ngo\n--'))
+ self.assertTrue(tsql_variable_re.search(
+ 'create procedure dbo.usp_x @a int, @b int'))
+
+ def test_can_analyze_text(self):
+ mysql_lexer = MySqlLexer()
+ sql_lexer = SqlLexer()
+ tsql_lexer = TransactSqlLexer()
+ code_to_expected_lexer_map = {
+ 'select `a`, `bc` from some': mysql_lexer,
+ 'select a, bc from some': sql_lexer,
+ 'select [a], [bc] from some': tsql_lexer,
+ '-- `a`, `bc`\nselect [a], [bc] from some': tsql_lexer,
+ '-- `a`, `bc`\nselect [a], [bc] from some; go': tsql_lexer,
+ }
+ sql_lexers = set(code_to_expected_lexer_map.values())
+ for code, expected_lexer in code_to_expected_lexer_map.items():
+ ratings_and_lexers = list((lexer.analyse_text(code), lexer.name) for lexer in sql_lexers)
+ best_rating, best_lexer_name = sorted(ratings_and_lexers, reverse=True)[0]
+ expected_rating = expected_lexer.analyse_text(code)
+ message = (
+ 'lexer must be %s (rating %.2f) instead of '
+ '%s (rating %.2f) for analyse_text() on code:\n%s') % (
+ expected_lexer.name,
+ expected_rating,
+ best_lexer_name,
+ best_rating,
+ code
+ )
+ self.assertEqual(
+ expected_lexer.name, best_lexer_name, message
+ )