diff options
author | Tim Hatch <tim@timhatch.com> | 2012-11-05 15:47:11 -0800 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2012-11-05 15:47:11 -0800 |
commit | d8065d2c09d223162384808f990d0b2f681f8359 (patch) | |
tree | b769d3fff7ca1fbc2efcfca312a95aad8a3f523d | |
parent | 72a60d9fbde9e42376f28bc4cbdd7f16d9c09c91 (diff) | |
parent | 08236e153c506b9e5dbeec0e12f7545a3147cd04 (diff) | |
download | pygments-d8065d2c09d223162384808f990d0b2f681f8359.tar.gz |
Merge dg lexer (Pull Request 116)
-rw-r--r-- | pygments/lexers/_mapping.py | 1 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 96 | ||||
-rw-r--r-- | tests/examplefiles/inet_pton6.dg | 71 |
3 files changed, 167 insertions, 1 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 514783de..2c1f0e9a 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -78,6 +78,7 @@ LEXERS = { 'DartLexer': ('pygments.lexers.web', 'Dart', ('dart',), ('*.dart',), ('text/x-dart',)), 'DebianControlLexer': ('pygments.lexers.text', 'Debian Control file', ('control',), ('control',), ()), 'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',), ('text/x-pascal',)), + 'DgLexer': ('pygments.lexers.agile', 'dg', ('dg',), ('*.dg',), ('text/x-dg',)), 'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')), 'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')), 'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)), diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 85e157fb..5939335b 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -22,7 +22,8 @@ from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer', - 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', 'FancyLexer'] + 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', + 'FancyLexer', 'DgLexer'] # b/w compatibility from pygments.lexers.functional import SchemeLexer @@ -1815,3 +1816,96 @@ class FancyLexer(RegexLexer): (r'\d+', Number.Integer) ] } + + +class DgLexer(RegexLexer): + """ + Lexer for `dg <http://pyos.github.com/dg>`_, + a functional and object-oriented programming language + running on the CPython 3 VM. + """ + name = 'dg' + aliases = ['dg'] + filenames = ['*.dg'] + mimetypes = ['text/x-dg'] + + tokens = { + 'root': [ + # Whitespace: + (r'\s+', Text), + (r'#.*?$', Comment.Single), + # Lexemes: + # Numbers + (r'0[bB][01]+', Number.Bin), + (r'0[oO][0-7]+', Number.Oct), + (r'0[xX][\da-fA-F]+', Number.Hex), + (r'[+-]?\d+\.\d+([eE][+-]?\d+)?[jJ]?', Number.Float), + (r'[+-]?\d+[eE][+-]?\d+[jJ]?', Number.Float), + (r'[+-]?\d+[jJ]?', Number.Integer), + # Character/String Literals + (r"[br]*'''", String, combined('stringescape', 'tsqs', 'string')), + (r'[br]*"""', String, combined('stringescape', 'tdqs', 'string')), + (r"[br]*'", String, combined('stringescape', 'sqs', 'string')), + (r'[br]*"', String, combined('stringescape', 'dqs', 'string')), + # Operators + (r"`\w+'*`", Operator), # Infix links + # Reserved infix links + (r'\b(or|and|if|unless|else|where|is|in)\b', Operator.Word), + (r'[!$%&*+\--/:<-@\\^|~;,]+', Operator), + # Identifiers + # Python 3 types + (r"(?<!\.)(bool|bytearray|bytes|classmethod|complex|dict'?|" + r"float|frozenset|int|list'?|memoryview|object|property|range|" + r"set'?|slice|staticmethod|str|super|tuple'?|type)" + r"(?!['\w])", Name.Builtin), + # Python 3 builtins + some more + (r'(?<!\.)(__import__|abs|all|any|bin|bind|chr|cmp|compile|complex|' + r'delattr|dir|divmod|drop|dropwhile|enumerate|eval|filter|flip|' + r'foldl1?|format|fst|getattr|globals|hasattr|hash|head|hex|id|' + r'init|input|isinstance|issubclass|iter|iterate|last|len|locals|' + r'map|max|min|next|oct|open|ord|pow|print|repr|reversed|round|' + r'setattr|scanl1?|snd|sorted|sum|tail|take|takewhile|vars|zip)' + r"(?!['\w])", Name.Builtin), + (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])", + Name.Builtin.Pseudo), + (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])", + Name.Exception), + (r"(?<!\.)(KeyboardInterrupt|SystemExit|StopIteration|" + r"GeneratorExit)(?!['\w])", Name.Exception), + # Compiler-defined identifiers + (r"(?<![\.\w])(import|inherit|for|while|switch|not|raise|unsafe|" + r"yield|with)(?!['\w])", Keyword.Reserved), + # Other links + (r"[A-Z_']+\b", Name), + (r"[A-Z][\w']*\b", Keyword.Type), + (r"\w+'*", Name), + # Blocks + (r'[()]', Punctuation), + ], + 'stringescape': [ + (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + ], + 'string': [ + (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + (r'[^\\\'"%\n]+', String), + # quotes, percents and backslashes must be parsed one at a time + (r'[\'"\\]', String), + # unhandled string formatting sign + (r'%', String), + (r'\n', String) + ], + 'dqs': [ + (r'"', String, '#pop') + ], + 'sqs': [ + (r"'", String, '#pop') + ], + 'tdqs': [ + (r'"""', String, '#pop') + ], + 'tsqs': [ + (r"'''", String, '#pop') + ], + } diff --git a/tests/examplefiles/inet_pton6.dg b/tests/examplefiles/inet_pton6.dg new file mode 100644 index 00000000..c56a66a3 --- /dev/null +++ b/tests/examplefiles/inet_pton6.dg @@ -0,0 +1,71 @@ +re = import! +sys = import! + + +# IPv6address = hexpart [ ":" IPv4address ] +# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT +# hexpart = [ hexseq ] [ "::" [ hexseq ] ] +# hexseq = hex4 *( ":" hex4) +# hex4 = 1*4HEXDIG +hexpart = r'({0}|)(?:::({0}|)|)'.format r'(?:[\da-f]{1,4})(?::[\da-f]{1,4})*' +addrv4 = r'(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})' +addrv6 = re.compile $ r'(?i)(?:{})(?::{})?$'.format hexpart addrv4 + + +# Parse a base-N number given a list of its digits. +# +# :param q: the number of digits in that numeral system +# +# :param digits: an iterable of integers in range [0..q] +# +# :return: a decimal integer +# +base_n = (q digits) -> foldl (x y) -> (x * q + y) 0 digits + + +# Parse a sequence of hexadecimal numbers +# +# :param q: a string of colon-separated base-16 integers +# +# :return: an iterable of Python ints +# +unhex = q -> q and map p -> (int p 16) (q.split ':') + + +# Parse an IPv6 address as specified in RFC 4291. +# +# :param address: a string, obviously. +# +# :return: an integer which, written in binary form, points to the same node. +# +inet_pton6 = address -> + raise $ ValueError 'not a valid IPv6 address' unless match = addrv6.match address + start, end, *ipv4 = match.groups! + + is_ipv4 = not $ None in ipv4 + shift = (7 - start.count ':' - 2 * is_ipv4) * 16 + + raise $ ValueError 'not a valid IPv6 address' if (end is None and shift) or shift < 0 + hexaddr = (base_n 0x10000 (unhex start) << shift) + base_n 0x10000 (unhex $ end or '') + hexaddr unless is_ipv4 else (hexaddr << 32) + base_n 0x100 (map int ipv4) + + +inet6_type = q -> switch + not q = 'unspecified' + q == 1 = 'loopback' + (q >> 32) == 0x000000000000ffff = 'IPv4-mapped' + (q >> 64) == 0xfe80000000000000 = 'link-local' + (q >> 120) != 0x00000000000000ff = 'general unicast' + (q >> 112) % (1 << 4) == 0x0000000000000000 = 'multicast w/ reserved scope value' + (q >> 112) % (1 << 4) == 0x000000000000000f = 'multicast w/ reserved scope value' + (q >> 112) % (1 << 4) == 0x0000000000000001 = 'interface-local multicast' + (q >> 112) % (1 << 4) == 0x0000000000000004 = 'admin-local multicast' + (q >> 112) % (1 << 4) == 0x0000000000000005 = 'site-local multicast' + (q >> 112) % (1 << 4) == 0x0000000000000008 = 'organization-local multicast' + (q >> 112) % (1 << 4) == 0x000000000000000e = 'global multicast' + (q >> 112) % (1 << 4) != 0x0000000000000002 = 'multicast w/ unknown scope value' + (q >> 24) % (1 << 112) == 0x00000000000001ff = 'solicited-node multicast' + True = 'link-local multicast' + + +print $ (x -> (inet6_type x, hex x)) $ inet_pton6 $ sys.stdin.read!.strip! |