summaryrefslogtreecommitdiff
path: root/pygments
diff options
context:
space:
mode:
authorAdrien nayrat <adrien.nayrat@gmail.com>2023-04-04 12:49:08 +0200
committerGitHub <noreply@github.com>2023-04-04 12:49:08 +0200
commitef0abbaece522732031d61391567c017d48d87b7 (patch)
treef392c944eb11f8d271aa686deef2ffa66031c58d /pygments
parent3c6e2af8fbc44bb1ef77389d09118c37faea8746 (diff)
downloadpygments-git-ef0abbaece522732031d61391567c017d48d87b7.tar.gz
Add PostgreSQL Explain lexer (#2398)
This lexer add support for PostgreSQL Explain plan : https://www.postgresql.org/docs/current/sql-explain.html This was heavily inspired by Maxence Ahlouche work, thanks to him : https://github.com/maahl/pg_explain_lexer Co-authored-by: Jean Abou Samra <jean@abou-samra.fr>
Diffstat (limited to 'pygments')
-rw-r--r--pygments/lexers/_mapping.py1
-rw-r--r--pygments/lexers/_postgres_builtins.py55
-rw-r--r--pygments/lexers/sql.py194
3 files changed, 247 insertions, 3 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index ed3d00b1..94d4f312 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -370,6 +370,7 @@ LEXERS = {
'PortugolLexer': ('pygments.lexers.pascal', 'Portugol', ('portugol',), ('*.alg', '*.portugol'), ()),
'PostScriptLexer': ('pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
+ 'PostgresExplainLexer': ('pygments.lexers.sql', 'PostgreSQL EXPLAIN dialect', ('postgres-explain',), ('*.explain',), ('text/x-postgresql-explain',)),
'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),
'PovrayLexer': ('pygments.lexers.graphics', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)),
'PowerShellLexer': ('pygments.lexers.shell', 'PowerShell', ('powershell', 'pwsh', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)),
diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py
index 86fd3998..ecc2a7ee 100644
--- a/pygments/lexers/_postgres_builtins.py
+++ b/pygments/lexers/_postgres_builtins.py
@@ -571,6 +571,61 @@ PLPGSQL_KEYWORDS = (
'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
)
+# Most of these keywords are from ExplainNode function
+# in src/backend/commands/explain.c
+
+EXPLAIN_KEYWORDS = (
+ 'Aggregate',
+ 'Append',
+ 'Bitmap Heap Scan',
+ 'Bitmap Index Scan',
+ 'BitmapAnd',
+ 'BitmapOr',
+ 'CTE Scan',
+ 'Custom Scan',
+ 'Delete',
+ 'Foreign Scan',
+ 'Function Scan',
+ 'Gather Merge',
+ 'Gather',
+ 'Group',
+ 'GroupAggregate',
+ 'Hash Join',
+ 'Hash',
+ 'HashAggregate',
+ 'Incremental Sort',
+ 'Index Only Scan',
+ 'Index Scan',
+ 'Insert',
+ 'Limit',
+ 'LockRows',
+ 'Materialize',
+ 'Memoize',
+ 'Merge Append',
+ 'Merge Join',
+ 'Merge',
+ 'MixedAggregate',
+ 'Named Tuplestore Scan',
+ 'Nested Loop',
+ 'ProjectSet',
+ 'Recursive Union',
+ 'Result',
+ 'Sample Scan',
+ 'Seq Scan',
+ 'SetOp',
+ 'Sort',
+ 'SubPlan',
+ 'Subquery Scan',
+ 'Table Function Scan',
+ 'Tid Range Scan',
+ 'Tid Scan',
+ 'Unique',
+ 'Update',
+ 'Values Scan',
+ 'WindowAgg',
+ 'WorkTable Scan',
+)
+
if __name__ == '__main__': # pragma: no cover
import re
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index 87dc638e..ace512be 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -30,6 +30,9 @@
- highlights errors in the output and notification levels;
- handles psql backslash commands.
+ `PostgresExplainLexer`
+ A lexer to highlight Postgres execution plan.
+
The ``tests/examplefiles`` contains a few test files with data to be
parsed by these lexers.
@@ -45,7 +48,7 @@ from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
from pygments.lexers import get_lexer_by_name, ClassNotFound
from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
- PSEUDO_TYPES, PLPGSQL_KEYWORDS
+ PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
from pygments.lexers._mysql_builtins import \
MYSQL_CONSTANTS, \
MYSQL_DATATYPES, \
@@ -57,8 +60,8 @@ from pygments.lexers import _tsql_builtins
__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
- 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
- 'SqliteConsoleLexer', 'RqlLexer']
+ 'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',
+ 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
line_re = re.compile('.*?\n')
sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
@@ -368,6 +371,191 @@ class PostgresConsoleLexer(Lexer):
return
+class PostgresExplainLexer(RegexLexer):
+ """
+ Handle PostgreSQL EXPLAIN output
+
+ """
+
+ name = 'PostgreSQL EXPLAIN dialect'
+ aliases = ['postgres-explain']
+ filenames = ['*.explain']
+ mimetypes = ['text/x-postgresql-explain']
+
+ tokens = {
+ 'root': [
+ (r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation),
+ (r'(\s+)', Whitespace),
+
+ # This match estimated cost and effectively measured counters with ANALYZE
+ # Then, we move to instrumentation state
+ (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
+ (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
+
+ # Misc keywords
+ (words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',
+ 'originally', 'row', 'rows', 'Hits', 'Misses',
+ 'Evictions', 'Overflows'), suffix=r'\b'),
+ Comment.Single),
+
+ (r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
+ (r'(shared|temp|local)', Keyword.Pseudo),
+
+ # We move to sort state in order to emphasize specific keywords (especially disk access)
+ (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),
+
+ # These keywords can be followed by an object, like a table
+ (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
+ bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
+ (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),
+
+ # These keywords can be followed by a predicate
+ (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
+ 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
+ 'TID Cond', 'Run Condition', 'Order By', 'Function Call',
+ 'Table Function Call', 'Inner Unique', 'Params Evaluated',
+ 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
+ 'Relations', 'Remote SQL'), suffix=r'\b'),
+ Comment.Preproc, 'predicate'),
+
+ # Special keyword to handle ON CONFLICT
+ (r'Conflict ', Comment.Preproc, 'conflict'),
+
+ # Special keyword for InitPlan or SubPlan
+ (r'(InitPlan|SubPlan)( )(\d+)( )',
+ bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
+ 'init_plan'),
+
+ (words(('Sort Method', 'Join Filter', 'Planning time',
+ 'Planning Time', 'Execution time', 'Execution Time',
+ 'Workers Planned', 'Workers Launched', 'Buffers',
+ 'Planning', 'Worker', 'Query Identifier', 'Time',
+ 'Full-sort Groups'), suffix=r'\b'), Comment.Preproc),
+
+ # Emphasize these keywords
+
+ (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
+ 'Rows Removed by Index Recheck',
+ 'Heap Fetches', 'never executed'),
+ suffix=r'\b'), Name.Exception),
+ (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
+
+ (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),
+
+ # join keywords
+ (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
+ (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
+ (r'Backward', Comment.Preproc),
+ (r'(Intersect|Except|Hash)', Comment.Preproc),
+
+ (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),
+
+
+ # Treat "on" and "using" as a punctuation
+ (r'(on|using)', Punctuation, 'object_name'),
+
+
+ # strings
+ (r"'(''|[^'])*'", String.Single),
+ # numbers
+ (r'\d+\.\d+', Number.Float),
+ (r'(\d+)', Number.Integer),
+
+ # boolean
+ (r'(true|false)', Name.Constant),
+ # explain header
+ (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
+ # Settings
+ (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),
+
+ # Handle JIT counters
+ (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
+ (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),
+
+ # Handle Triggers counters
+ (r'(Trigger)( )(\S*)(:)( )',
+ bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),
+
+ ],
+ 'expression': [
+ # matches any kind of parenthesized expression
+ # the first opening paren is matched by the 'caller'
+ (r'\(', Punctuation, '#push'),
+ (r'\)', Punctuation, '#pop'),
+ (r'(never executed)', Name.Exception),
+ (r'[^)(]+', Comment),
+ ],
+ 'object_name': [
+
+ # This is a cost or analyze measure
+ (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
+ (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
+
+ # if object_name is parenthesized, mark opening paren as
+ # punctuation, call 'expression', and exit state
+ (r'\(', Punctuation, 'expression'),
+ (r'(on)', Punctuation),
+ # matches possibly schema-qualified table and column names
+ (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
+ (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
+ (r'\'\S*\'', Name.Variable),
+
+ # if we encounter a comma, another object is listed
+ (r',\n', Punctuation, 'object_name'),
+ (r',', Punctuation, 'object_name'),
+
+ # special case: "*SELECT*"
+ (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
+ (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
+ (r'"ANY_subquery"', Name.Variable),
+
+ # Variable $1 ...
+ (r'\$\d+', Name.Variable),
+ # cast
+ (r'::\w+', Name.Variable),
+ (r' +', Whitespace),
+ (r'"', Punctuation),
+ (r'\[\.\.\.\]', Punctuation),
+ (r'\)', Punctuation, '#pop'),
+ ],
+ 'predicate': [
+ # if predicate is parenthesized, mark paren as punctuation
+ (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
+ # otherwise color until newline
+ (r'[^\n]*', Name.Variable, '#pop'),
+ ],
+ 'instrumentation': [
+ (r'=|\.\.', Punctuation),
+ (r' +', Whitespace),
+ (r'(rows|width|time|loops)', Name.Class),
+ (r'\d+\.\d+', Number.Float),
+ (r'(\d+)', Number.Integer),
+ (r'\)', Punctuation, '#pop'),
+ ],
+ 'conflict': [
+ (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
+ (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
+ (r'(Filter: )', Comment.Preproc, 'predicate'),
+ ],
+ 'setting': [
+ (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
+ (r'\, ', Punctuation),
+ ],
+ 'init_plan': [
+ (r'\(', Punctuation),
+ (r'returns \$\d+(,\$\d+)?', Name.Variable),
+ (r'\)', Punctuation, '#pop'),
+ ],
+ 'sort': [
+ (r':|kB', Punctuation),
+ (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
+ (r'(external|merge|Disk|sort)', Name.Exception),
+ (r'(\d+)', Number.Integer),
+ (r' +', Whitespace),
+ ],
+ }
+
+
class SqlLexer(RegexLexer):
"""
Lexer for Structured Query Language. Currently, this lexer does