summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexers/_postgres_builtins.py115
-rw-r--r--pygments/lexers/postgres.py121
-rw-r--r--tests/test_basic_api.py2
3 files changed, 237 insertions, 1 deletions
diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py
new file mode 100644
index 00000000..110f14d7
--- /dev/null
+++ b/pygments/lexers/_postgres_builtins.py
@@ -0,0 +1,115 @@
+"""
+ pygments.lexers._postgres_builtins
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Self-updating data files for PostgreSQL lexer.
+
+ :copyright: Copyright 2011 by Daniele Varrazzo.
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+import urllib2
+
+# One man's constant is another man's variable.
+SOURCE_URL = 'https://github.com/postgres/postgres/raw/REL9_0_STABLE'
+KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml'
+DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
+
+def update_myself():
+ datatypes = parse_datatypes(fetch(DATATYPES_URL))
+ keywords = parse_keywords(fetch(KEYWORDS_URL))
+ update_consts(__file__, 'DATATYPES', datatypes)
+ update_consts(__file__, 'KEYWORDS', keywords)
+
+def parse_keywords(f):
+ kw = []
+ re_entry = re.compile('\s*<entry><token>([^<]+)</token></entry>')
+ for line in f:
+ m = re_entry.match(line)
+ if m is None:
+ continue
+
+ kw.append(m.group(1))
+
+ kw.sort()
+ return kw
+
+def parse_datatypes(f):
+ dt = set()
+ re_entry = re.compile('\s*<entry><type>([^<]+)</type></entry>')
+ for line in f:
+ if '<sect1' in line:
+ break
+ if '<entry><type>' not in line:
+ continue
+
+ # Parse a string such as
+ # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
+ # into types "time" and "without time zone"
+
+ # remove all the tags
+ line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
+ line = re.sub("<[^>]+>", "", line)
+
+ # Drop the parts containing braces
+ for tmp in [ t for tmp in line.split('[') for t in tmp.split(']') if "(" not in t ]:
+ for t in tmp.split(','):
+ t = t.strip()
+ if not t: continue
+ dt.add(" ".join(t.split()))
+
+ dt = list(dt)
+ dt.sort()
+ return dt
+
+def fetch(url):
+ return urllib2.urlopen(url)
+
+def update_consts(filename, constname, content):
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+
+ # Line to start/end inserting
+ re_start = re.compile(r'^%s\s*=\s*\[\s*$' % constname)
+ re_end = re.compile(r'^\s*\]\s*$')
+ start = [ n for n, l in enumerate(lines) if re_start.match(l) ]
+ if not start:
+ raise ValueError("couldn't find line containing '%s = ['" % constname)
+ if len(start) > 1:
+ raise ValueError("too many lines containing '%s = ['" % constname)
+ start = start[0] + 1
+
+ end = [ n for n, l in enumerate(lines) if n >= start and re_end.match(l) ]
+ if not end:
+ raise ValueError("couldn't find line containing ']' after %s " % constname)
+ end = end[0]
+
+ # Pack the new content in lines not too long
+ content = [repr(item) for item in content ]
+ new_lines = [[]]
+ for item in content:
+ if sum(map(len, new_lines[-1])) + 2 * len(new_lines[-1]) + len(item) + 4 > 75:
+ new_lines.append([])
+ new_lines[-1].append(item)
+
+ lines[start:end] = [ " %s,\n" % ", ".join(items) for items in new_lines ]
+
+ f = open(filename, 'w')
+ f.write(''.join(lines))
+ f.close()
+
+
+# Autogenerated: please edit them if you like wasting your time.
+
+KEYWORDS = [
+ ]
+
+DATATYPES = [
+ ]
+
+
+if __name__ == '__main__':
+ update_myself()
+
diff --git a/pygments/lexers/postgres.py b/pygments/lexers/postgres.py
new file mode 100644
index 00000000..6f126c4e
--- /dev/null
+++ b/pygments/lexers/postgres.py
@@ -0,0 +1,121 @@
+"""
+ pygments.lexers.postgres
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for PostgreSQL-specific SQL and psql interactive session.
+
+ :copyright: Copyright 2011 by Daniele Varrazzo.
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+import urllib2
+
+from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
+ this, do_insertions
+from pygments.token import Error, Punctuation, Literal, Token, \
+ Text, Comment, Operator, Keyword, Name, String, Number, Generic
+
+from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES
+
+
+__all__ = [ 'PostgresLexer', 'PostgresConsoleLexer' ]
+
+line_re = re.compile('.*?\n')
+
+
+class PostgresLexer(RegexLexer):
+ """
+ Lexer for the PostgreSQL dialect of SQL.
+ """
+
+ name = 'PostgreSQL SQL dialect'
+ aliases = ['postgresql', 'postgres']
+ mimetypes = ['text/x-postgresql']
+
+ flags = re.IGNORECASE
+ tokens = {
+ 'root': [
+ (r'\s+', Text),
+ (r'--.*?\n', Comment.Single),
+ (r'/\*', Comment.Multiline, 'multiline-comments'),
+ (r'(' + '|'.join(KEYWORDS) + r')\b', Keyword),
+ (r'(' + '|'.join([s.replace(" ", "\s+") for s in DATATYPES])
+ + r')\b', Name.Builtin),
+ (r'[+*/<>=~!@#%^&|`?^-]', Operator),
+ (r'::', Operator), # cast
+ (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
+ (r'[0-9]+', Number.Integer),
+ # TODO: Backslash escapes?
+ (r"'(''|[^'])*'", String.Single),
+ (r'"(""|[^"])*"', String.Name), # quoted identifier
+ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
+ (r'[;:()\[\],\.]', Punctuation),
+ # psql backslash command.
+ # This actually belongs to the console lexer,
+ # but putting it here makes things easier.
+ (r'\\.*?\n', Name), # TODO: what is a good token?
+ ],
+ 'multiline-comments': [
+ (r'/\*', Comment.Multiline, 'multiline-comments'),
+ (r'\*/', Comment.Multiline, '#pop'),
+ (r'[^/\*]+', Comment.Multiline),
+ (r'[/*]', Comment.Multiline)
+ ]
+ }
+
+re_prompt = re.compile(r'^([a-zA-Z_][a-zA-Z0-9_]+)?[=\-\(]#')
+re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
+re_error = re.compile(r'ERROR:')
+re_message = re.compile(r'(DEBUG|INFO|WARNING|ERROR|HINT|LINE [0-9]+:?)(.*?\n)')
+re_charhint = re.compile(r'\s*\^\s*\n')
+
+class PostgresConsoleLexer(Lexer):
+ """
+ Lexer for psql sessions.
+
+ TODO: multiline comments are broken.
+ """
+
+ name = 'PostgreSQL console (psql)'
+ aliases = ['psql', 'postgresql-console', 'postgres-console']
+ mimetypes = ['text/x-postgresql-psql']
+
+ def get_tokens_unprocessed(self, data):
+ sql = PostgresLexer(**self.options)
+
+ curcode = ''
+ insertions = []
+ out_token = Generic.Output
+ for match in line_re.finditer(data):
+ line = match.group()
+ mprompt = re_prompt.match(line)
+ if mprompt is not None:
+ out_token = Generic.Output
+ insertions.append((len(curcode),
+ [(0, Generic.Prompt, mprompt.group())]))
+ curcode += line[len(mprompt.group()):]
+ else:
+ if curcode:
+ for item in do_insertions(insertions,
+ sql.get_tokens_unprocessed(curcode)):
+ yield item
+ curcode = ''
+ insertions = []
+ mmsg = re_message.match(line)
+ if mmsg is not None:
+ if mmsg.group(1).startswith("ERROR"):
+ out_token = Generic.Error
+ yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
+ yield (mmsg.start(2), out_token, mmsg.group(2))
+ elif re_charhint.match(line):
+ yield (match.start(), out_token, line)
+ else:
+ yield (match.start(), Generic.Output, line)
+
+ if curcode:
+ for item in do_insertions(insertions,
+ sql.get_tokens_unprocessed(curcode)):
+ yield item
+
+
diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py
index e8b9cf9a..77c0aaea 100644
--- a/tests/test_basic_api.py
+++ b/tests/test_basic_api.py
@@ -87,7 +87,7 @@ def test_lexer_options():
if cls.__name__ not in (
'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
- 'BashSessionLexer', 'LiterateHaskellLexer'):
+ 'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer'):
inst = cls(ensurenl=False)
ensure(inst.get_tokens('a\nb'), 'a\nb')
inst = cls(ensurenl=False, stripall=True)