diff options
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | CHANGES | 1 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 2 | ||||
-rw-r--r-- | pygments/lexers/other.py | 111 | ||||
-rw-r--r-- | pygments/lexers/urbiscript.py | 130 | ||||
-rw-r--r-- | tests/examplefiles/example.u | 6 |
6 files changed, 115 insertions, 136 deletions
@@ -72,6 +72,7 @@ Other contributors, listed alphabetically, are: * Ronny Pfannschmidt -- BBCode lexer * Benjamin Peterson -- Test suite refactoring * Dominik Picheta -- Nimrod lexer +* Clément Prévost -- UrbiScript lexer * Justin Reidy -- MXML lexer * Norman Richards -- JSON lexer * Lubomir Rintel -- GoodData MAQL and CL lexers @@ -30,6 +30,7 @@ Version 1.5 * SNOBOL (PR#30) * MoonScript (PR#43) * ECL (PR#29) + * Urbiscript (PR#17) - In the LaTeX formatter, escape special &, < and > chars (#648). diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index dec69e1b..76bbb840 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -219,7 +219,7 @@ LEXERS = { 'TcshLexer': ('pygments.lexers.other', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)), 'TexLexer': ('pygments.lexers.text', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')), 'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)), - 'UrbiscriptLexer': ('pygments.lexers.urbiscript', 'UrbiScript', ('urbiscript','Urbiscript'), ('*.u'), ()), + 'UrbiscriptLexer': ('pygments.lexers.other', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)), 'ValaLexer': ('pygments.lexers.compiled', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)), 'VbNetAspxLexer': ('pygments.lexers.dotnet', 'aspx-vb', ('aspx-vb',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), 'VbNetLexer': ('pygments.lexers.dotnet', 'VB.net', ('vb.net', 'vbnet'), ('*.vb', '*.bas'), ('text/x-vbnet', 'text/x-vba')), diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 44cf4f47..db75c7a1 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -12,7 +12,7 @@ import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ - this, do_insertions, combined + this, do_insertions, combined, ExtendedRegexLexer from pygments.token import Error, Punctuation, Literal, Token, \ Text, Comment, Operator, Keyword, Name, String, Number, Generic from pygments.util import ClassNotFound, shebang_matches @@ -27,7 +27,8 @@ __all__ = ['SqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'BrainfuckLexer', 'NewspeakLexer', 'GherkinLexer', 'AsymptoteLexer', 'PostScriptLexer', 'AutohotkeyLexer', 'GoodDataCLLexer', 'MaqlLexer', 'ProtoBufLexer', 'HybrisLexer', 'AwkLexer', - 'Cfengine3Lexer', 'HttpLexer', 'SnobolLexer', 'ECLLexer'] + 'Cfengine3Lexer', 'HttpLexer', 'SnobolLexer', 'ECLLexer', + 'UrbiscriptLexer'] line_re = re.compile('.*?\n') @@ -3176,3 +3177,109 @@ class SnobolLexer(RegexLexer): (r'.*\n', String.Heredoc) ] } + + +class UrbiscriptLexer(ExtendedRegexLexer): + """ + For UrbiScript source code. + + *New in Pygments 1.5.* + """ + + name = 'UrbiScript' + aliases = ['urbiscript'] + filenames = ['*.u'] + mimetypes = ['application/x-urbiscript'] + + flags = re.DOTALL + + ## TODO + # - handle Experimental and deprecated tags with specific tokens + # - handle Angles and Durations with specific tokens + + def blob_callback(lexer, match, ctx): + text_before_blob = match.group(1) + blob_start = match.group(2) + blob_size_str = match.group(3) + blob_size = int(blob_size_str) + yield match.start(), String, text_before_blob + ctx.pos += len(text_before_blob) + + # if blob size doesn't match blob format (example : "\B(2)(aaa)") + # yield blob as a string + if ctx.text[match.end() + blob_size] != ")": + result = "\\B(" + blob_size_str + ")(" + yield match.start(), String, result + ctx.pos += len(result) + return + + # if blob is well formated, yield as Escape + blob_text = blob_start + ctx.text[match.end():match.end()+blob_size] + ")" + yield match.start(), String.Escape, blob_text + ctx.pos = match.end() + blob_size + 1 # +1 is the ending ")" + + tokens = { + 'root': [ + (r'\s+', Text), + # comments + (r'//.*?\n', Comment), + (r'/\*', Comment.Multiline, 'comment'), + (r'(?:every|for|loop|while)(?:;|&|\||,)',Keyword), + (r'(?:assert|at|break|case|catch|closure|compl|continue|' + r'default|else|enum|every|external|finally|for|freezeif|if|new|' + r'onleave|return|stopif|switch|this|throw|timeout|try|' + r'waituntil|whenever|while)\b', Keyword), + (r'(?:asm|auto|bool|char|const_cast|delete|double|dynamic_cast|' + r'explicit|export|extern|float|friend|goto|inline|int|' + r'long|mutable|namespace|register|reinterpret_cast|short|' + r'signed|sizeof|static_cast|struct|template|typedef|typeid|' + r'typename|union|unsigned|using|virtual|volatile|' + r'wchar_t)\b', Keyword.Reserved), + # deprecated keywords, use a meaningfull token when available + (r'(?:emit|foreach|internal|loopn|static)\b', Keyword), + # ignored keywords, use a meaningfull token when available + (r'(?:private|protected|public)\b', Keyword), + (r'(?:var|do|const|function|class)\b', Keyword.Declaration), + (r'(?:true|false|nil|void)\b', Keyword.Constant), + (r'(?:Barrier|Binary|Boolean|CallMessage|Channel|Code|' + r'Comparable|Container|Control|Date|Dictionary|Directory|' + r'Duration|Enumeration|Event|Exception|Executable|File|Finalizable|' + r'Float|FormatInfo|Formatter|Global|Group|Hash|InputStream|' + r'IoService|Job|Kernel|Lazy|List|Loadable|Lobby|Location|Logger|Math|' + r'Mutex|nil|Object|Orderable|OutputStream|Pair|Path|Pattern|Position|' + r'Primitive|Process|Profile|PseudoLazy|PubSub|RangeIterable|Regexp|' + r'Semaphore|Server|Singleton|Socket|StackFrame|Stream|String|System|' + r'Tag|Timeout|Traceable|TrajectoryGenerator|Triplet|Tuple' + r'|UObject|UValue|UVar)\b', Name.Builtin), + (r'(?:this)\b', Name.Builtin.Pseudo), + # don't match single | and & + (r'(?:[-=+*%/<>~^:]+|\.&?|\|\||&&)', Operator), + (r'(?:and_eq|and|bitand|bitor|in|not|not_eq|or_eq|or|xor_eq|xor)\b', + Operator.Word), + (r'[{}\[\]()]+', Punctuation), + (r'(?:;|\||,|&|\?|!)+', Punctuation), + (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'0x[0-9a-fA-F]+', Number.Hex), + # Float, Integer, Angle and Duration + (r'(?:[0-9]+(?:(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)?' + r'((?:rad|deg|grad)|(?:ms|s|min|h|d))?)\b', Number.Float), + # handle binary blob in strings + (r'"', String.Double, "string.double"), + (r"'", String.Single, "string.single"), + ], + 'string.double': [ + (r'((?:\\\\|\\"|[^"])*?)(\\B\((\d+)\)\()', blob_callback), + (r'(\\\\|\\"|[^"])*?"', String.Double, '#pop'), + ], + 'string.single': [ + (r"((?:\\\\|\\'|[^'])*?)(\\B\((\d+)\)\()", blob_callback), + (r"(\\\\|\\'|[^'])*?'", String.Single, '#pop'), + ], + # from http://pygments.org/docs/lexerdevelopment/#changing-states + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline), + ] + } diff --git a/pygments/lexers/urbiscript.py b/pygments/lexers/urbiscript.py deleted file mode 100644 index cf415721..00000000 --- a/pygments/lexers/urbiscript.py +++ /dev/null @@ -1,130 +0,0 @@ -# -*- coding: utf-8 -*- -""" - pygments.lexers.urbiscript - ~~~~~~~~~~~~~~~~~~~ - - Lexers for urbiscript language. - Based on JavascriptLexer and CppLexer. - - :copyright: 2011 Clément Prévost. - :license: BSD, see LICENSE for more details. -""" - -import re -try: - set -except NameError: - from sets import Set as set - -from pygments.lexer import ExtendedRegexLexer, bygroups, using, include, this -from pygments.token import \ - Text, Comment, Operator, Keyword, Name, String, Number, Other, Punctuation -from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \ - html_doctype_matches - -__all__ = ['UrbiscriptLexer'] - - -class UrbiscriptLexer(ExtendedRegexLexer): - """ - For JavaScript source code. - """ - - name = 'UrbiScript' - aliases = ['urbiscript'] - filenames = ['*.u'] - #mimetypes = ['text/plain'] - - flags = re.DOTALL - - ## TODO - # - handle Experimental and deprecated tags with specific tokens - # - handle Angles and Durations with specific tokens - - def blob_callback(lexer, match, ctx): - text_before_blob = match.group(1) - blob_start = match.group(2) - blob_size_str = match.group(3) - blob_size = int(blob_size_str) - yield match.start(), String, text_before_blob - ctx.pos += len(text_before_blob) - - # if blob size doesn't match blob format (example : "\B(2)(aaa)") - # yield blob as a string - if ctx.text[match.end() + blob_size] != ")": - result = "\\B(" + blob_size_str + ")(" - yield match.start(), String, result - ctx.pos += len(result) - return - - # if blob is well formated, yield as Escape - blob_text = blob_start + ctx.text[match.end():match.end()+blob_size] + ")" - yield match.start(), String.Escape, blob_text - ctx.pos = match.end() + blob_size + 1 # +1 is the ending ")" - - - tokens = { - 'root': [ - (r'\s+', Text), - # comments - (r'//.*?\n', Comment), - (r'/\*', Comment.Multiline, 'comment'), - (r'(?:every|for|loop|while)(?:;|&|\||,)',Keyword), - (r'(?:assert|at|break|case|catch|closure|compl|continue|' - r'default|else|enum|every|external|finally|for|freezeif|if|new|' - r'onleave|return|stopif|switch|this|throw|timeout|try|' - r'waituntil|whenever|while)\b', Keyword), - (r'(?:asm|auto|bool|char|const_cast|delete|double|dynamic_cast|' - r'explicit|export|extern|float|friend|goto|inline|int|' - r'long|mutable|namespace|register|reinterpret_cast|short|' - r'signed|sizeof|static_cast|struct|template|typedef|typeid|' - r'typename|union|unsigned|using|virtual|volatile|' - r'wchar_t)\b', Keyword.Reserved), - # deprecated keywords, use a meaningfull token when available - (r'(?:emit|foreach|internal|loopn|static)\b', Keyword), - # ignored keywords, use a meaningfull token when available - (r'(?:private|protected|public)\b', Keyword), - (r'(?:var|do|const|function|class)\b', Keyword.Declaration), - (r'(?:true|false|nil|void)\b', Keyword.Constant), - (r'(?:Barrier|Binary|Boolean|CallMessage|Channel|Code|' - r'Comparable|Container|Control|Date|Dictionary|Directory|' - r'Duration|Enumeration|Event|Exception|Executable|File|Finalizable|' - r'Float|FormatInfo|Formatter|Global|Group|Hash|InputStream|' - r'IoService|Job|Kernel|Lazy|List|Loadable|Lobby|Location|Logger|Math|' - r'Mutex|nil|Object|Orderable|OutputStream|Pair|Path|Pattern|Position|' - r'Primitive|Process|Profile|PseudoLazy|PubSub|RangeIterable|Regexp|' - r'Semaphore|Server|Singleton|Socket|StackFrame|Stream|String|System|' - r'Tag|Timeout|Traceable|TrajectoryGenerator|Triplet|Tuple' - r'|UObject|UValue|UVar)\b', Name.Builtin), - (r'(?:this)\b', Name.Builtin.Pseudo), - (r'(?:[-=+*%/<>~^:]+|\.&?|\|\||&&)', Operator), # don't match single | and & - (r'(?:and_eq|and|bitand|bitor|in|not|not_eq|or_eq|or|xor_eq|xor)\b', Operator.Word), - (r'[{}\[\]()]+', Punctuation), - (r'(?:;|\||,|&|\?|!)+', Punctuation), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), - (r'0x[0-9a-fA-F]+', Number.Hex), - # Float, Integer, Angle and Duration - (r'(?:[0-9]+(?:(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)?' - r'((?:rad|deg|grad)|(?:ms|s|min|h|d))?)\b', Number.Float), - # handle binary blob in strings - (r'"', String.Double, "string.double"), - (r"'", String.Single, "string.single"), - ], - 'string.double': [ - (r'((?:\\\\|\\"|[^"])*?)(\\B\((\d+)\)\()', blob_callback), - (r'(\\\\|\\"|[^"])*?"', String.Double, '#pop'), - ], - 'string.single': [ - (r"((?:\\\\|\\'|[^'])*?)(\\B\((\d+)\)\()", blob_callback), - (r"(\\\\|\\'|[^'])*?'", String.Single, '#pop'), - ], - # from http://pygments.org/docs/lexerdevelopment/#changing-states - 'comment': [ - (r'[^*/]', Comment.Multiline), - (r'/\*', Comment.Multiline, '#push'), - (r'\*/', Comment.Multiline, '#pop'), - (r'[*/]', Comment.Multiline) - ] - } - - diff --git a/tests/examplefiles/example.u b/tests/examplefiles/example.u index adaa2924..42c85902 100644 --- a/tests/examplefiles/example.u +++ b/tests/examplefiles/example.u @@ -107,13 +107,13 @@ pi == 200grad; 0xFFFFFFFF == 4294967295; -123foo; +//123foo; //[00005658:error] !!! syntax error: invalid token: '123foo' -12.3foo; +//12.3foo; //[00018827:error] !!! syntax error: invalid token: '12.3foo' 0xabcdef; //[00060432] 11259375 -0xabcdefg; +//0xabcdefg; //[00061848:error] !!! syntax error: invalid token: '0xabcdefg' |