diff options
-rw-r--r-- | pygments/lexers/_postgres_builtins.py | 115 | ||||
-rw-r--r-- | pygments/lexers/postgres.py | 121 | ||||
-rw-r--r-- | tests/test_basic_api.py | 2 |
3 files changed, 237 insertions, 1 deletions
diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py new file mode 100644 index 00000000..110f14d7 --- /dev/null +++ b/pygments/lexers/_postgres_builtins.py @@ -0,0 +1,115 @@ +""" + pygments.lexers._postgres_builtins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Self-updating data files for PostgreSQL lexer. + + :copyright: Copyright 2011 by Daniele Varrazzo. + :license: BSD, see LICENSE for details. +""" + +import re +import urllib2 + +# One man's constant is another man's variable. +SOURCE_URL = 'https://github.com/postgres/postgres/raw/REL9_0_STABLE' +KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml' +DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' + +def update_myself(): + datatypes = parse_datatypes(fetch(DATATYPES_URL)) + keywords = parse_keywords(fetch(KEYWORDS_URL)) + update_consts(__file__, 'DATATYPES', datatypes) + update_consts(__file__, 'KEYWORDS', keywords) + +def parse_keywords(f): + kw = [] + re_entry = re.compile('\s*<entry><token>([^<]+)</token></entry>') + for line in f: + m = re_entry.match(line) + if m is None: + continue + + kw.append(m.group(1)) + + kw.sort() + return kw + +def parse_datatypes(f): + dt = set() + re_entry = re.compile('\s*<entry><type>([^<]+)</type></entry>') + for line in f: + if '<sect1' in line: + break + if '<entry><type>' not in line: + continue + + # Parse a string such as + # time [ (<replaceable>p</replaceable>) ] [ without time zone ] + # into types "time" and "without time zone" + + # remove all the tags + line = re.sub("<replaceable>[^<]+</replaceable>", "", line) + line = re.sub("<[^>]+>", "", line) + + # Drop the parts containing braces + for tmp in [ t for tmp in line.split('[') for t in tmp.split(']') if "(" not in t ]: + for t in tmp.split(','): + t = t.strip() + if not t: continue + dt.add(" ".join(t.split())) + + dt = list(dt) + dt.sort() + return dt + +def fetch(url): + return urllib2.urlopen(url) + +def update_consts(filename, constname, content): + f = open(filename) + lines = f.readlines() + f.close() + + # Line to start/end inserting + re_start = re.compile(r'^%s\s*=\s*\[\s*$' % constname) + re_end = re.compile(r'^\s*\]\s*$') + start = [ n for n, l in enumerate(lines) if re_start.match(l) ] + if not start: + raise ValueError("couldn't find line containing '%s = ['" % constname) + if len(start) > 1: + raise ValueError("too many lines containing '%s = ['" % constname) + start = start[0] + 1 + + end = [ n for n, l in enumerate(lines) if n >= start and re_end.match(l) ] + if not end: + raise ValueError("couldn't find line containing ']' after %s " % constname) + end = end[0] + + # Pack the new content in lines not too long + content = [repr(item) for item in content ] + new_lines = [[]] + for item in content: + if sum(map(len, new_lines[-1])) + 2 * len(new_lines[-1]) + len(item) + 4 > 75: + new_lines.append([]) + new_lines[-1].append(item) + + lines[start:end] = [ " %s,\n" % ", ".join(items) for items in new_lines ] + + f = open(filename, 'w') + f.write(''.join(lines)) + f.close() + + +# Autogenerated: please edit them if you like wasting your time. + +KEYWORDS = [ + ] + +DATATYPES = [ + ] + + +if __name__ == '__main__': + update_myself() + diff --git a/pygments/lexers/postgres.py b/pygments/lexers/postgres.py new file mode 100644 index 00000000..6f126c4e --- /dev/null +++ b/pygments/lexers/postgres.py @@ -0,0 +1,121 @@ +""" + pygments.lexers.postgres + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for PostgreSQL-specific SQL and psql interactive session. + + :copyright: Copyright 2011 by Daniele Varrazzo. + :license: BSD, see LICENSE for details. +""" + +import re +import urllib2 + +from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ + this, do_insertions +from pygments.token import Error, Punctuation, Literal, Token, \ + Text, Comment, Operator, Keyword, Name, String, Number, Generic + +from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES + + +__all__ = [ 'PostgresLexer', 'PostgresConsoleLexer' ] + +line_re = re.compile('.*?\n') + + +class PostgresLexer(RegexLexer): + """ + Lexer for the PostgreSQL dialect of SQL. + """ + + name = 'PostgreSQL SQL dialect' + aliases = ['postgresql', 'postgres'] + mimetypes = ['text/x-postgresql'] + + flags = re.IGNORECASE + tokens = { + 'root': [ + (r'\s+', Text), + (r'--.*?\n', Comment.Single), + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r'(' + '|'.join(KEYWORDS) + r')\b', Keyword), + (r'(' + '|'.join([s.replace(" ", "\s+") for s in DATATYPES]) + + r')\b', Name.Builtin), + (r'[+*/<>=~!@#%^&|`?^-]', Operator), + (r'::', Operator), # cast + (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), + (r'[0-9]+', Number.Integer), + # TODO: Backslash escapes? + (r"'(''|[^'])*'", String.Single), + (r'"(""|[^"])*"', String.Name), # quoted identifier + (r'[a-zA-Z_][a-zA-Z0-9_]*', Name), + (r'[;:()\[\],\.]', Punctuation), + # psql backslash command. + # This actually belongs to the console lexer, + # but putting it here makes things easier. + (r'\\.*?\n', Name), # TODO: what is a good token? + ], + 'multiline-comments': [ + (r'/\*', Comment.Multiline, 'multiline-comments'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[^/\*]+', Comment.Multiline), + (r'[/*]', Comment.Multiline) + ] + } + +re_prompt = re.compile(r'^([a-zA-Z_][a-zA-Z0-9_]+)?[=\-\(]#') +re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$') +re_error = re.compile(r'ERROR:') +re_message = re.compile(r'(DEBUG|INFO|WARNING|ERROR|HINT|LINE [0-9]+:?)(.*?\n)') +re_charhint = re.compile(r'\s*\^\s*\n') + +class PostgresConsoleLexer(Lexer): + """ + Lexer for psql sessions. + + TODO: multiline comments are broken. + """ + + name = 'PostgreSQL console (psql)' + aliases = ['psql', 'postgresql-console', 'postgres-console'] + mimetypes = ['text/x-postgresql-psql'] + + def get_tokens_unprocessed(self, data): + sql = PostgresLexer(**self.options) + + curcode = '' + insertions = [] + out_token = Generic.Output + for match in line_re.finditer(data): + line = match.group() + mprompt = re_prompt.match(line) + if mprompt is not None: + out_token = Generic.Output + insertions.append((len(curcode), + [(0, Generic.Prompt, mprompt.group())])) + curcode += line[len(mprompt.group()):] + else: + if curcode: + for item in do_insertions(insertions, + sql.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + mmsg = re_message.match(line) + if mmsg is not None: + if mmsg.group(1).startswith("ERROR"): + out_token = Generic.Error + yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) + yield (mmsg.start(2), out_token, mmsg.group(2)) + elif re_charhint.match(line): + yield (match.start(), out_token, line) + else: + yield (match.start(), Generic.Output, line) + + if curcode: + for item in do_insertions(insertions, + sql.get_tokens_unprocessed(curcode)): + yield item + + diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index e8b9cf9a..77c0aaea 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -87,7 +87,7 @@ def test_lexer_options(): if cls.__name__ not in ( 'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer', 'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer', - 'BashSessionLexer', 'LiterateHaskellLexer'): + 'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer'): inst = cls(ensurenl=False) ensure(inst.get_tokens('a\nb'), 'a\nb') inst = cls(ensurenl=False, stripall=True) |