diff options
-rw-r--r-- | pygments/lexers/_postgres_builtins.py | 218 |
1 files changed, 109 insertions, 109 deletions
diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py index c71cfb66..38468452 100644 --- a/pygments/lexers/_postgres_builtins.py +++ b/pygments/lexers/_postgres_builtins.py @@ -9,115 +9,6 @@ :license: BSD, see LICENSE for details. """ -import re -try: - from urllib import urlopen -except ImportError: - from urllib.request import urlopen - -from pygments.util import format_lines - -# One man's constant is another man's variable. -SOURCE_URL = 'https://github.com/postgres/postgres/raw/master' -KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml' -DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' - -def update_myself(): - data_file = list(urlopen(DATATYPES_URL)) - datatypes = parse_datatypes(data_file) - pseudos = parse_pseudos(data_file) - - keywords = parse_keywords(urlopen(KEYWORDS_URL)) - update_consts(__file__, 'DATATYPES', datatypes) - update_consts(__file__, 'PSEUDO_TYPES', pseudos) - update_consts(__file__, 'KEYWORDS', keywords) - -def parse_keywords(f): - kw = [] - for m in re.finditer( - r'\s*<entry><token>([^<]+)</token></entry>\s*' - r'<entry>([^<]+)</entry>', f.read()): - kw.append(m.group(1)) - - if not kw: - raise ValueError('no keyword found') - - kw.sort() - return kw - -def parse_datatypes(f): - dt = set() - for line in f: - if '<sect1' in line: - break - if '<entry><type>' not in line: - continue - - # Parse a string such as - # time [ (<replaceable>p</replaceable>) ] [ without time zone ] - # into types "time" and "without time zone" - - # remove all the tags - line = re.sub("<replaceable>[^<]+</replaceable>", "", line) - line = re.sub("<[^>]+>", "", line) - - # Drop the parts containing braces - for tmp in [t for tmp in line.split('[') - for t in tmp.split(']') if "(" not in t]: - for t in tmp.split(','): - t = t.strip() - if not t: continue - dt.add(" ".join(t.split())) - - dt = list(dt) - dt.sort() - return dt - -def parse_pseudos(f): - dt = [] - re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">') - re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>') - re_end = re.compile(r'\s*</table>') - - f = iter(f) - for line in f: - if re_start.match(line) is not None: - break - else: - raise ValueError('pseudo datatypes table not found') - - for line in f: - m = re_entry.match(line) - if m is not None: - dt.append(m.group(1)) - - if re_end.match(line) is not None: - break - else: - raise ValueError('end of pseudo datatypes table not found') - - if not dt: - raise ValueError('pseudo datatypes not found') - - return dt - -def update_consts(filename, constname, content): - with open(filename) as f: - data = f.read() - - # Line to start/end inserting - re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S) - m = re_match.search(data) - if not m: - raise ValueError('Could not find existing definition for %s' % - (constname,)) - - new_block = format_lines(constname, content) - data = data[:m.start()] + new_block + data[m.end():] - - with open(filename, 'w') as f: - f.write(data) - # Autogenerated: please edit them if you like wasting your time. @@ -617,4 +508,113 @@ PLPGSQL_KEYWORDS = ( ) if __name__ == '__main__': + import re + try: + from urllib import urlopen + except ImportError: + from urllib.request import urlopen + + from pygments.util import format_lines + + # One man's constant is another man's variable. + SOURCE_URL = 'https://github.com/postgres/postgres/raw/master' + KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml' + DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' + + def update_myself(): + data_file = list(urlopen(DATATYPES_URL)) + datatypes = parse_datatypes(data_file) + pseudos = parse_pseudos(data_file) + + keywords = parse_keywords(urlopen(KEYWORDS_URL)) + update_consts(__file__, 'DATATYPES', datatypes) + update_consts(__file__, 'PSEUDO_TYPES', pseudos) + update_consts(__file__, 'KEYWORDS', keywords) + + def parse_keywords(f): + kw = [] + for m in re.finditer( + r'\s*<entry><token>([^<]+)</token></entry>\s*' + r'<entry>([^<]+)</entry>', f.read()): + kw.append(m.group(1)) + + if not kw: + raise ValueError('no keyword found') + + kw.sort() + return kw + + def parse_datatypes(f): + dt = set() + for line in f: + if '<sect1' in line: + break + if '<entry><type>' not in line: + continue + + # Parse a string such as + # time [ (<replaceable>p</replaceable>) ] [ without time zone ] + # into types "time" and "without time zone" + + # remove all the tags + line = re.sub("<replaceable>[^<]+</replaceable>", "", line) + line = re.sub("<[^>]+>", "", line) + + # Drop the parts containing braces + for tmp in [t for tmp in line.split('[') + for t in tmp.split(']') if "(" not in t]: + for t in tmp.split(','): + t = t.strip() + if not t: continue + dt.add(" ".join(t.split())) + + dt = list(dt) + dt.sort() + return dt + + def parse_pseudos(f): + dt = [] + re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">') + re_entry = re.compile(r'\s*<entry><type>([^<]+)</></entry>') + re_end = re.compile(r'\s*</table>') + + f = iter(f) + for line in f: + if re_start.match(line) is not None: + break + else: + raise ValueError('pseudo datatypes table not found') + + for line in f: + m = re_entry.match(line) + if m is not None: + dt.append(m.group(1)) + + if re_end.match(line) is not None: + break + else: + raise ValueError('end of pseudo datatypes table not found') + + if not dt: + raise ValueError('pseudo datatypes not found') + + return dt + + def update_consts(filename, constname, content): + with open(filename) as f: + data = f.read() + + # Line to start/end inserting + re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S) + m = re_match.search(data) + if not m: + raise ValueError('Could not find existing definition for %s' % + (constname,)) + + new_block = format_lines(constname, content) + data = data[:m.start()] + new_block + data[m.end():] + + with open(filename, 'w') as f: + f.write(data) + update_myself() |