diff options
author | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2011-04-14 21:05:42 +0100 |
---|---|---|
committer | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2011-04-14 21:05:42 +0100 |
commit | 55a8e2536edc91f3a31ff5a436e6dd78dc93e8ee (patch) | |
tree | 3273fe19ad6d1c1855ddebc60ec0ba7088bb41a2 /pygments/lexers/postgres.py | |
parent | 0d94f7f6fa7b5a7cb2df723016330cd26d2f1bc5 (diff) | |
download | pygments-55a8e2536edc91f3a31ff5a436e6dd78dc93e8ee.tar.gz |
Fix occasional parsing of PL/pgSQL using the SQL tokens
Inheritance class refactored avoiding a base class with a _tokens attribute
that may interfere with the creation of the subclass _tokens.
Diffstat (limited to 'pygments/lexers/postgres.py')
-rw-r--r-- | pygments/lexers/postgres.py | 77 |
1 files changed, 51 insertions, 26 deletions
diff --git a/pygments/lexers/postgres.py b/pygments/lexers/postgres.py index afef1ea4..b8901dcf 100644 --- a/pygments/lexers/postgres.py +++ b/pygments/lexers/postgres.py @@ -28,37 +28,47 @@ line_re = re.compile('.*?\n') language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) -class PostgresLexer(RegexLexer): - """ - Lexer for the PostgreSQL dialect of SQL. +def language_callback(lexer, match): + """Parse the content of a $-string using a lexer + + The lexer is chosen looking for a nearby LANGUAGE. + + Note: this function should have been a `PostgresBase` method, but the + rules deepcopy fails in this case. """ + l = None + # TODO: the language can also be before the string + m = language_re.match(lexer.text[match.end():]) + if m is not None: + l = lexer._get_lexer(m.group(1)) + + if l: + yield (match.start(1), String, match.group(1)) + for x in l.get_tokens_unprocessed(match.group(2)): + yield x + yield (match.start(3), String, match.group(3)) - name = 'PostgreSQL SQL dialect' - aliases = ['postgresql', 'postgres'] - mimetypes = ['text/x-postgresql'] + else: + yield (match.start(), String, match.group()) + +class PostgresBase(object): + """Base class for Postgres-related lexers. + This is implemented as a mixin to avoid the Lexer metaclass kicking in. + this way the different lexer don't have a common Lexer ancestor. If they + had, _tokens could be created on this ancestor and not updated for the + other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming + seem to suggest that regexp lexers are not really subclassable. + + `language_callback` should really be our method, but this breaks deepcopy. + """ def get_tokens_unprocessed(self, text, *args): # Have a copy of the entire text to be used by `language_callback`. self.text = text - for x in RegexLexer.get_tokens_unprocessed(self, text, *args): + for x in super(PostgresBase, self).get_tokens_unprocessed( + text, *args): yield x - def language_callback(self, match): - lexer = None - # TODO: the language can also be before the string - m = language_re.match(self.text[match.end():]) - if m is not None: - lexer = self._get_lexer(m.group(1)) - - if lexer: - yield (match.start(1), String, match.group(1)) - for x in lexer.get_tokens_unprocessed(match.group(2)): - yield x - yield (match.start(3), String, match.group(3)) - - else: - yield (match.start(), String, match.group()) - def _get_lexer(self, lang): if lang.lower() == 'sql': return get_lexer_by_name('postgresql', **self.options) @@ -78,9 +88,18 @@ class PostgresLexer(RegexLexer): pass else: # TODO: better logging - print >>sys.stderr, "language not found:", lang + # print >>sys.stderr, "language not found:", lang return None +class PostgresLexer(PostgresBase, RegexLexer): + """ + Lexer for the PostgreSQL dialect of SQL. + """ + + name = 'PostgreSQL SQL dialect' + aliases = ['postgresql', 'postgres'] + mimetypes = ['text/x-postgresql'] + flags = re.IGNORECASE tokens = { 'root': [ @@ -116,13 +135,15 @@ class PostgresLexer(RegexLexer): } -class PlPgsqlLexer(PostgresLexer): +class PlPgsqlLexer(PostgresBase, RegexLexer): """ Handle the extra syntax in Pl/pgSQL language. """ name = 'PL/pgSQL' aliases = ['plpgsql'] mimetypes = ['text/x-plpgsql'] + + flags = re.IGNORECASE tokens = deepcopy(PostgresLexer.tokens) # extend the keywords list @@ -136,6 +157,7 @@ class PlPgsqlLexer(PostgresLexer): else: assert 0, "SQL keywords not found" + # Add specific PL/pgSQL rules (before the SQL ones) tokens['root'][:0] = [ (r'\%[a-z][a-z0-9_]*\b', Name.Builtin), # actually, a datatype (r':=', Operator), @@ -144,7 +166,7 @@ class PlPgsqlLexer(PostgresLexer): ] -class PsqlRegexLexer(PostgresLexer): +class PsqlRegexLexer(PostgresBase, RegexLexer): """ Extend the PostgresLexer adding support specific for psql commands. @@ -153,7 +175,10 @@ class PsqlRegexLexer(PostgresLexer): """ name = 'PostgreSQL console - regexp based lexer' aliases = [] # not public + + flags = re.IGNORECASE tokens = deepcopy(PostgresLexer.tokens) + tokens['root'].append( (r'\\[^\s]+', Keyword.Pseudo, 'psql-command')) tokens['psql-command'] = [ |