diff options
author | Thomas Aglassinger <roskakori@users.sourceforge.net> | 2016-10-01 18:17:05 +0200 |
---|---|---|
committer | Thomas Aglassinger <roskakori@users.sourceforge.net> | 2016-10-01 18:17:05 +0200 |
commit | cdbc000e03a5a92ea2a28020e71218cae5010a5d (patch) | |
tree | aa99edb179db456678113dde2b12e23d73955a48 | |
parent | 56e75b33d66738b072f9f5525f3af4a8ba863d8b (diff) | |
download | pygments-cdbc000e03a5a92ea2a28020e71218cae5010a5d.tar.gz |
Added analyse_text() that attempts to detect MySQL and Transact-SQL.
-rw-r--r-- | pygments/lexers/sql.py | 52 | ||||
-rw-r--r-- | tests/test_sql.py | 46 |
2 files changed, 95 insertions, 3 deletions
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index e225a66e..0bc33493 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -59,7 +59,14 @@ line_re = re.compile('.*?\n') language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) -do_re = re.compile(r'\bDO\b', re.IGNORECASE) +do_re = re.compile(r'\bDO\b', re.IGNORECASE) + +# Regular expressions for analyse_text() +name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]') +name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`') +tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE) +tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE) +tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b') def language_callback(lexer, match): @@ -82,7 +89,7 @@ def language_callback(lexer, match): lexer.text[max(0, match.start()-25):match.start()])) if m: l = lexer._get_lexer('plpgsql') - + # 1 = $, 2 = delimiter, 3 = $ yield (match.start(1), String, match.group(1)) yield (match.start(2), String.Delimiter, match.group(2)) @@ -480,6 +487,9 @@ class SqlLexer(RegexLexer): ] } + def analyse_text(text): + return 0.01 + class TransactSqlLexer(RegexLexer): """ @@ -536,6 +546,29 @@ class TransactSqlLexer(RegexLexer): ] } + def analyse_text(text): + rating = 0 + if tsql_declare_re.search(text): + # Found T-SQL variable declaration. + rating = 1.0 + else: + name_between_backtick_count = len( + name_between_backtick_re.findall((text))) + name_between_bracket_count = len( + name_between_bracket_re.findall(text)) + if name_between_bracket_count >= 2 * name_between_backtick_count: + # Found at least twice as many [name] as `name`. + rating += 0.5 + elif name_between_bracket_count > name_between_backtick_count: + rating += 0.2 + elif name_between_bracket_count > 0: + rating += 0.1 + if tsql_variable_re.search(text) is not None: + rating += 0.1 + if tsql_go_re.search(text) is not None: + rating += 0.1 + return rating + class MySqlLexer(RegexLexer): """ @@ -609,6 +642,21 @@ class MySqlLexer(RegexLexer): ] } + def analyse_text(text): + rating = 0 + name_between_backtick_count = len( + name_between_backtick_re.findall((text))) + name_between_bracket_count = len( + name_between_bracket_re.findall(text)) + if name_between_backtick_count >= 2 * name_between_bracket_count: + # Found at least twice as many `name` as [name]. + rating += 0.5 + elif name_between_backtick_count > name_between_bracket_count: + rating += 0.2 + elif name_between_backtick_count > 0: + rating += 0.1 + return rating + class SqliteConsoleLexer(Lexer): """ diff --git a/tests/test_sql.py b/tests/test_sql.py index c5f5c758..6be34006 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -8,7 +8,10 @@ """ import unittest -from pygments.lexers.sql import TransactSqlLexer +from pygments.lexers.sql import name_between_bracket_re, \ + name_between_backtick_re, tsql_go_re, tsql_declare_re, \ + tsql_variable_re, MySqlLexer, SqlLexer, TransactSqlLexer + from pygments.token import Comment, Name, Number, Punctuation, Whitespace @@ -72,3 +75,44 @@ class TransactSqlLexerTest(unittest.TestCase): (Comment.Multiline, '*/'), (Comment.Multiline, '*/'), )) + + +class SqlAnalyzeTextTest(unittest.TestCase): + def test_can_match_analyze_text_res(self): + self.assertEqual(['`a`', '`bc`'], + name_between_backtick_re.findall('select `a`, `bc` from some')) + self.assertEqual(['[a]', '[bc]'], + name_between_bracket_re.findall('select [a], [bc] from some')) + self.assertTrue(tsql_declare_re.search('--\nDeClaRe @some int;')) + self.assertTrue(tsql_go_re.search('select 1\ngo\n--')) + self.assertTrue(tsql_variable_re.search( + 'create procedure dbo.usp_x @a int, @b int')) + + def test_can_analyze_text(self): + mysql_lexer = MySqlLexer() + sql_lexer = SqlLexer() + tsql_lexer = TransactSqlLexer() + code_to_expected_lexer_map = { + 'select `a`, `bc` from some': mysql_lexer, + 'select a, bc from some': sql_lexer, + 'select [a], [bc] from some': tsql_lexer, + '-- `a`, `bc`\nselect [a], [bc] from some': tsql_lexer, + '-- `a`, `bc`\nselect [a], [bc] from some; go': tsql_lexer, + } + sql_lexers = set(code_to_expected_lexer_map.values()) + for code, expected_lexer in code_to_expected_lexer_map.items(): + ratings_and_lexers = list((lexer.analyse_text(code), lexer.name) for lexer in sql_lexers) + best_rating, best_lexer_name = sorted(ratings_and_lexers, reverse=True)[0] + expected_rating = expected_lexer.analyse_text(code) + message = ( + 'lexer must be %s (rating %.2f) instead of ' + '%s (rating %.2f) for analyse_text() on code:\n%s') % ( + expected_lexer.name, + expected_rating, + best_lexer_name, + best_rating, + code + ) + self.assertEqual( + expected_lexer.name, best_lexer_name, message + ) |