diff options
author | Adrien nayrat <adrien.nayrat@gmail.com> | 2023-04-04 12:49:08 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-04 12:49:08 +0200 |
commit | ef0abbaece522732031d61391567c017d48d87b7 (patch) | |
tree | f392c944eb11f8d271aa686deef2ffa66031c58d /pygments | |
parent | 3c6e2af8fbc44bb1ef77389d09118c37faea8746 (diff) | |
download | pygments-git-ef0abbaece522732031d61391567c017d48d87b7.tar.gz |
Add PostgreSQL Explain lexer (#2398)
This lexer add support for PostgreSQL Explain plan :
https://www.postgresql.org/docs/current/sql-explain.html
This was heavily inspired by Maxence Ahlouche work, thanks to him :
https://github.com/maahl/pg_explain_lexer
Co-authored-by: Jean Abou Samra <jean@abou-samra.fr>
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/lexers/_mapping.py | 1 | ||||
-rw-r--r-- | pygments/lexers/_postgres_builtins.py | 55 | ||||
-rw-r--r-- | pygments/lexers/sql.py | 194 |
3 files changed, 247 insertions, 3 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index ed3d00b1..94d4f312 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -370,6 +370,7 @@ LEXERS = { 'PortugolLexer': ('pygments.lexers.pascal', 'Portugol', ('portugol',), ('*.alg', '*.portugol'), ()), 'PostScriptLexer': ('pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)), 'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)), + 'PostgresExplainLexer': ('pygments.lexers.sql', 'PostgreSQL EXPLAIN dialect', ('postgres-explain',), ('*.explain',), ('text/x-postgresql-explain',)), 'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)), 'PovrayLexer': ('pygments.lexers.graphics', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)), 'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'pwsh', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)), diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py index 86fd3998..ecc2a7ee 100644 --- a/pygments/lexers/_postgres_builtins.py +++ b/pygments/lexers/_postgres_builtins.py @@ -571,6 +571,61 @@ PLPGSQL_KEYWORDS = ( 'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE', ) +# Most of these keywords are from ExplainNode function +# in src/backend/commands/explain.c + +EXPLAIN_KEYWORDS = ( + 'Aggregate', + 'Append', + 'Bitmap Heap Scan', + 'Bitmap Index Scan', + 'BitmapAnd', + 'BitmapOr', + 'CTE Scan', + 'Custom Scan', + 'Delete', + 'Foreign Scan', + 'Function Scan', + 'Gather Merge', + 'Gather', + 'Group', + 'GroupAggregate', + 'Hash Join', + 'Hash', + 'HashAggregate', + 'Incremental Sort', + 'Index Only Scan', + 'Index Scan', + 'Insert', + 'Limit', + 'LockRows', + 'Materialize', + 'Memoize', + 'Merge Append', + 'Merge Join', + 'Merge', + 'MixedAggregate', + 'Named Tuplestore Scan', + 'Nested Loop', + 'ProjectSet', + 'Recursive Union', + 'Result', + 'Sample Scan', + 'Seq Scan', + 'SetOp', + 'Sort', + 'SubPlan', + 'Subquery Scan', + 'Table Function Scan', + 'Tid Range Scan', + 'Tid Scan', + 'Unique', + 'Update', + 'Values Scan', + 'WindowAgg', + 'WorkTable Scan', +) + if __name__ == '__main__': # pragma: no cover import re diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 87dc638e..ace512be 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -30,6 +30,9 @@ - highlights errors in the output and notification levels; - handles psql backslash commands. + `PostgresExplainLexer` + A lexer to highlight Postgres execution plan. + The ``tests/examplefiles`` contains a few test files with data to be parsed by these lexers. @@ -45,7 +48,7 @@ from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ from pygments.lexers import get_lexer_by_name, ClassNotFound from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ - PSEUDO_TYPES, PLPGSQL_KEYWORDS + PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS from pygments.lexers._mysql_builtins import \ MYSQL_CONSTANTS, \ MYSQL_DATATYPES, \ @@ -57,8 +60,8 @@ from pygments.lexers import _tsql_builtins __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', - 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', - 'SqliteConsoleLexer', 'RqlLexer'] + 'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer', + 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer'] line_re = re.compile('.*?\n') sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )') @@ -368,6 +371,191 @@ class PostgresConsoleLexer(Lexer): return +class PostgresExplainLexer(RegexLexer): + """ + Handle PostgreSQL EXPLAIN output + + """ + + name = 'PostgreSQL EXPLAIN dialect' + aliases = ['postgres-explain'] + filenames = ['*.explain'] + mimetypes = ['text/x-postgresql-explain'] + + tokens = { + 'root': [ + (r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation), + (r'(\s+)', Whitespace), + + # This match estimated cost and effectively measured counters with ANALYZE + # Then, we move to instrumentation state + (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'), + (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'), + + # Misc keywords + (words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches', + 'originally', 'row', 'rows', 'Hits', 'Misses', + 'Evictions', 'Overflows'), suffix=r'\b'), + Comment.Single), + + (r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)), + (r'(shared|temp|local)', Keyword.Pseudo), + + # We move to sort state in order to emphasize specific keywords (especially disk access) + (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'), + + # These keywords can be followed by an object, like a table + (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )', + bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'), + (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'), + + # These keywords can be followed by a predicate + (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond', + 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks', + 'TID Cond', 'Run Condition', 'Order By', 'Function Call', + 'Table Function Call', 'Inner Unique', 'Params Evaluated', + 'Single Copy', 'Sampling', 'One-Time Filter', 'Output', + 'Relations', 'Remote SQL'), suffix=r'\b'), + Comment.Preproc, 'predicate'), + + # Special keyword to handle ON CONFLICT + (r'Conflict ', Comment.Preproc, 'conflict'), + + # Special keyword for InitPlan or SubPlan + (r'(InitPlan|SubPlan)( )(\d+)( )', + bygroups(Keyword, Whitespace, Number.Integer, Whitespace), + 'init_plan'), + + (words(('Sort Method', 'Join Filter', 'Planning time', + 'Planning Time', 'Execution time', 'Execution Time', + 'Workers Planned', 'Workers Launched', 'Buffers', + 'Planning', 'Worker', 'Query Identifier', 'Time', + 'Full-sort Groups'), suffix=r'\b'), Comment.Preproc), + + # Emphasize these keywords + + (words(('Rows Removed by Join Filter', 'Rows Removed by Filter', + 'Rows Removed by Index Recheck', + 'Heap Fetches', 'never executed'), + suffix=r'\b'), Name.Exception), + (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)), + + (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword), + + # join keywords + (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type), + (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc), + (r'Backward', Comment.Preproc), + (r'(Intersect|Except|Hash)', Comment.Preproc), + + (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)), + + + # Treat "on" and "using" as a punctuation + (r'(on|using)', Punctuation, 'object_name'), + + + # strings + (r"'(''|[^'])*'", String.Single), + # numbers + (r'\d+\.\d+', Number.Float), + (r'(\d+)', Number.Integer), + + # boolean + (r'(true|false)', Name.Constant), + # explain header + (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single), + # Settings + (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'), + + # Handle JIT counters + (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)), + (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo), + + # Handle Triggers counters + (r'(Trigger)( )(\S*)(:)( )', + bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)), + + ], + 'expression': [ + # matches any kind of parenthesized expression + # the first opening paren is matched by the 'caller' + (r'\(', Punctuation, '#push'), + (r'\)', Punctuation, '#pop'), + (r'(never executed)', Name.Exception), + (r'[^)(]+', Comment), + ], + 'object_name': [ + + # This is a cost or analyze measure + (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'), + (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'), + + # if object_name is parenthesized, mark opening paren as + # punctuation, call 'expression', and exit state + (r'\(', Punctuation, 'expression'), + (r'(on)', Punctuation), + # matches possibly schema-qualified table and column names + (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable), + (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable), + (r'\'\S*\'', Name.Variable), + + # if we encounter a comma, another object is listed + (r',\n', Punctuation, 'object_name'), + (r',', Punctuation, 'object_name'), + + # special case: "*SELECT*" + (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable), + (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable), + (r'"ANY_subquery"', Name.Variable), + + # Variable $1 ... + (r'\$\d+', Name.Variable), + # cast + (r'::\w+', Name.Variable), + (r' +', Whitespace), + (r'"', Punctuation), + (r'\[\.\.\.\]', Punctuation), + (r'\)', Punctuation, '#pop'), + ], + 'predicate': [ + # if predicate is parenthesized, mark paren as punctuation + (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'), + # otherwise color until newline + (r'[^\n]*', Name.Variable, '#pop'), + ], + 'instrumentation': [ + (r'=|\.\.', Punctuation), + (r' +', Whitespace), + (r'(rows|width|time|loops)', Name.Class), + (r'\d+\.\d+', Number.Float), + (r'(\d+)', Number.Integer), + (r'\)', Punctuation, '#pop'), + ], + 'conflict': [ + (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)), + (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'), + (r'(Filter: )', Comment.Preproc, 'predicate'), + ], + 'setting': [ + (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)), + (r'\, ', Punctuation), + ], + 'init_plan': [ + (r'\(', Punctuation), + (r'returns \$\d+(,\$\d+)?', Name.Variable), + (r'\)', Punctuation, '#pop'), + ], + 'sort': [ + (r':|kB', Punctuation), + (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc), + (r'(external|merge|Disk|sort)', Name.Exception), + (r'(\d+)', Number.Integer), + (r' +', Whitespace), + ], + } + + class SqlLexer(RegexLexer): """ Lexer for Structured Query Language. Currently, this lexer does |