Fix occasional parsing of PL/pgSQL using the SQL tokens

Inheritance class refactored avoiding a base class with a _tokens attribute that may interfere with the creation of the subclass _tokens.
author: Daniele Varrazzo <daniele.varrazzo@gmail.com> 2011-04-14 21:05:42 +0100
committer: Daniele Varrazzo <daniele.varrazzo@gmail.com> 2011-04-14 21:05:42 +0100
commit: 55a8e2536edc91f3a31ff5a436e6dd78dc93e8ee (patch)
tree: 3273fe19ad6d1c1855ddebc60ec0ba7088bb41a2 /pygments/lexers/postgres.py
parent: 0d94f7f6fa7b5a7cb2df723016330cd26d2f1bc5 (diff)
download: pygments-55a8e2536edc91f3a31ff5a436e6dd78dc93e8ee.tar.gz
1 files changed, 51 insertions, 26 deletions
diff --git a/pygments/lexers/postgres.py b/pygments/lexers/postgres.py
index afef1ea4..b8901dcf 100644
--- a/pygments/lexers/postgres.py
+++ b/pygments/lexers/postgres.py
@@ -28,37 +28,47 @@ line_re  = re.compile('.*?\n')
 
 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
 
-class PostgresLexer(RegexLexer):
-    """
-    Lexer for the PostgreSQL dialect of SQL.
+def language_callback(lexer, match):
+    """Parse the content of a $-string using a lexer
+
+    The lexer is chosen looking for a nearby LANGUAGE.
+
+    Note: this function should have been a `PostgresBase` method, but the
+    rules deepcopy fails in this case.
     """
+    l = None
+    # TODO: the language can also be before the string
+    m = language_re.match(lexer.text[match.end():])
+    if m is not None:
+        l = lexer._get_lexer(m.group(1))
+
+    if l:
+        yield (match.start(1), String, match.group(1))
+        for x in l.get_tokens_unprocessed(match.group(2)):
+            yield x
+        yield (match.start(3), String, match.group(3))
 
-    name = 'PostgreSQL SQL dialect'
-    aliases = ['postgresql', 'postgres']
-    mimetypes = ['text/x-postgresql']
+    else:
+        yield (match.start(), String, match.group())
+
+class PostgresBase(object):
+    """Base class for Postgres-related lexers.
 
+    This is implemented as a mixin to avoid the Lexer metaclass kicking in.
+    this way the different lexer don't have a common Lexer ancestor. If they
+    had, _tokens could be created on this ancestor and not updated for the
+    other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
+    seem to suggest that regexp lexers are not really subclassable.
+
+    `language_callback` should really be our method, but this breaks deepcopy.
+    """
     def get_tokens_unprocessed(self, text, *args):
         # Have a copy of the entire text to be used by `language_callback`.
         self.text = text
-        for x in RegexLexer.get_tokens_unprocessed(self, text, *args):
+        for x in super(PostgresBase, self).get_tokens_unprocessed(
+                text, *args):
             yield x
 
-    def language_callback(self, match):
-        lexer = None
-        # TODO: the language can also be before the string
-        m = language_re.match(self.text[match.end():])
-        if m is not None:
-            lexer = self._get_lexer(m.group(1))
-
-        if lexer:
-            yield (match.start(1), String, match.group(1))
-            for x in lexer.get_tokens_unprocessed(match.group(2)):
-                yield x
-            yield (match.start(3), String, match.group(3))
-
-        else:
-            yield (match.start(), String, match.group())
-
     def _get_lexer(self, lang):
         if lang.lower() == 'sql':
             return get_lexer_by_name('postgresql', **self.options)
@@ -78,9 +88,18 @@ class PostgresLexer(RegexLexer):
                 pass
         else:
             # TODO: better logging
-            print >>sys.stderr, "language not found:", lang
+            # print >>sys.stderr, "language not found:", lang
             return None
 
+class PostgresLexer(PostgresBase, RegexLexer):
+    """
+    Lexer for the PostgreSQL dialect of SQL.
+    """
+
+    name = 'PostgreSQL SQL dialect'
+    aliases = ['postgresql', 'postgres']
+    mimetypes = ['text/x-postgresql']
+
     flags = re.IGNORECASE
     tokens = {
         'root': [
@@ -116,13 +135,15 @@ class PostgresLexer(RegexLexer):
     }
 
 
-class PlPgsqlLexer(PostgresLexer):
+class PlPgsqlLexer(PostgresBase, RegexLexer):
     """
     Handle the extra syntax in Pl/pgSQL language.
     """
     name = 'PL/pgSQL'
     aliases = ['plpgsql']
     mimetypes = ['text/x-plpgsql']
+
+    flags = re.IGNORECASE
     tokens = deepcopy(PostgresLexer.tokens)
 
     # extend the keywords list
@@ -136,6 +157,7 @@ class PlPgsqlLexer(PostgresLexer):
     else:
         assert 0, "SQL keywords not found"
 
+    # Add specific PL/pgSQL rules (before the SQL ones)
     tokens['root'][:0] = [
         (r'\%[a-z][a-z0-9_]*\b', Name.Builtin),     # actually, a datatype
         (r':=', Operator),
@@ -144,7 +166,7 @@ class PlPgsqlLexer(PostgresLexer):
     ]
 
 
-class PsqlRegexLexer(PostgresLexer):
+class PsqlRegexLexer(PostgresBase, RegexLexer):
     """
     Extend the PostgresLexer adding support specific for psql commands.
 
@@ -153,7 +175,10 @@ class PsqlRegexLexer(PostgresLexer):
     """
     name = 'PostgreSQL console - regexp based lexer'
     aliases = []    # not public
+
+    flags = re.IGNORECASE
     tokens = deepcopy(PostgresLexer.tokens)
+
     tokens['root'].append(
         (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
     tokens['psql-command'] = [
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>	2011-04-14 21:05:42 +0100
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>	2011-04-14 21:05:42 +0100
commit	55a8e2536edc91f3a31ff5a436e6dd78dc93e8ee (patch)
tree	3273fe19ad6d1c1855ddebc60ec0ba7088bb41a2 /pygments/lexers/postgres.py
parent	0d94f7f6fa7b5a7cb2df723016330cd26d2f1bc5 (diff)
download	pygments-55a8e2536edc91f3a31ff5a436e6dd78dc93e8ee.tar.gz