merge with bgoetzmann/pygments-main

author: Georg Brandl <georg@python.org> 2011-06-18 13:49:00 +0200
committer: Georg Brandl <georg@python.org> 2011-06-18 13:49:00 +0200
commit: 1507dac8fd099aad7afca378965ceb508b00dddd (patch)
tree: 7c7a3448b6b7628f8bd33e207c1b68854ad375ae /pygments
parent: 522b59a55921a278a847c573974811417cd9fac6 (diff)
parent: 0bf6072b0e75fa4b6afab5d23477d7aa4aa1c77c (diff)
download: pygments-1507dac8fd099aad7afca378965ceb508b00dddd.tar.gz
11 files changed, 371 insertions, 67 deletions
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
index cc464420..8c71f3e1 100644
--- a/pygments/formatters/latex.py
+++ b/pygments/formatters/latex.py
@@ -26,6 +26,9 @@ def escape_tex(text, commandprefix):
                 replace('\x02', r'\%sZcb{}' % commandprefix). \
                 replace('^', r'\%sZca{}' % commandprefix). \
                 replace('_', r'\%sZus{}' % commandprefix). \
+                replace('&', r'\%sZam{}' % commandprefix). \
+                replace('<', r'\%sZlt{}' % commandprefix). \
+                replace('>', r'\%sZgt{}' % commandprefix). \
                 replace('#', r'\%sZsh{}' % commandprefix). \
                 replace('%', r'\%sZpc{}' % commandprefix). \
                 replace('$', r'\%sZdl{}' % commandprefix). \
@@ -106,6 +109,9 @@ STYLE_TEMPLATE = r'''
 \def\%(cp)sZob{\char`\{}
 \def\%(cp)sZcb{\char`\}}
 \def\%(cp)sZca{\char`\^}
+\def\%(cp)sZam{\char`\&}
+\def\%(cp)sZlt{\char`\<}
+\def\%(cp)sZgt{\char`\>}
 \def\%(cp)sZsh{\char`\#}
 \def\%(cp)sZpc{\char`\%%}
 \def\%(cp)sZdl{\char`\$}
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index f6dfb56f..aa6a9180 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -41,7 +41,7 @@ LEXERS = {
     'BlitzMaxLexer': ('pygments.lexers.compiled', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)),
     'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)),
     'BrainfuckLexer': ('pygments.lexers.other', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)),
-    'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h'), ('text/x-chdr', 'text/x-csrc')),
+    'CLexer': ('pygments.lexers.compiled', 'C', ('c',), ('*.c', '*.h', '*.idc'), ('text/x-chdr', 'text/x-csrc')),
     'CMakeLexer': ('pygments.lexers.text', 'CMake', ('cmake',), ('*.cmake', 'CMakeLists.txt'), ('text/x-cmake',)),
     'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)),
     'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()),
@@ -80,6 +80,7 @@ LEXERS = {
     'EvoqueHtmlLexer': ('pygments.lexers.templates', 'HTML+Evoque', ('html+evoque',), ('*.html',), ('text/html+evoque',)),
     'EvoqueLexer': ('pygments.lexers.templates', 'Evoque', ('evoque',), ('*.evoque',), ('application/x-evoque',)),
     'EvoqueXmlLexer': ('pygments.lexers.templates', 'XML+Evoque', ('xml+evoque',), ('*.xml',), ('application/xml+evoque',)),
+    'FSharpLexer': ('pygments.lexers.dotnet', 'FSharp', ('fsharp',), ('*.fs', '*.fsi'), ('text/x-fsharp',)),
     'FactorLexer': ('pygments.lexers.agile', 'Factor', ('factor',), ('*.factor',), ('text/x-factor',)),
     'FancyLexer': ('pygments.lexers.agile', 'Fancy', ('fancy', 'fy'), ('*.fy', '*.fancypack'), ('text/x-fancysrc',)),
     'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)),
@@ -145,6 +146,7 @@ LEXERS = {
     'MyghtyLexer': ('pygments.lexers.templates', 'Myghty', ('myghty',), ('*.myt', 'autodelegate'), ('application/x-myghty',)),
     'MyghtyXmlLexer': ('pygments.lexers.templates', 'XML+Myghty', ('xml+myghty',), (), ('application/xml+myghty',)),
     'NasmLexer': ('pygments.lexers.asm', 'NASM', ('nasm',), ('*.asm', '*.ASM'), ('text/x-nasm',)),
+    'NemerleLexer': ('pygments.lexers.dotnet', 'Nemerle', ('nemerle',), ('*.n',), ('text/x-nemerle',)),
     'NewspeakLexer': ('pygments.lexers.other', 'Newspeak', ('newspeak',), ('*.ns2',), ('text/x-newspeak',)),
     'NginxConfLexer': ('pygments.lexers.text', 'Nginx configuration file', ('nginx',), (), ('text/x-nginx-conf',)),
     'NimrodLexer': ('pygments.lexers.compiled', 'Nimrod', ('nimrod', 'nim'), ('*.nim', '*.nimrod'), ('text/x-nimrod',)),
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py
index 662f8b7e..22fd2dbc 100644
--- a/pygments/lexers/agile.py
+++ b/pygments/lexers/agile.py
@@ -12,7 +12,7 @@
 import re
 
 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
-     LexerContext, include, combined, do_insertions, bygroups, using
+     LexerContext, include, combined, do_insertions, bygroups, using, this
 from pygments.token import Error, Text, Other, \
      Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
 from pygments.util import get_bool_opt, get_list_opt, shebang_matches
@@ -174,7 +174,7 @@ class PythonLexer(RegexLexer):
     }
 
     def analyse_text(text):
-        return shebang_matches(text, r'pythonw?(2\.\d)?')
+        return shebang_matches(text, r'pythonw?(2(\.\d)?)?')
 
 
 class Python3Lexer(RegexLexer):
@@ -484,11 +484,11 @@ class RubyLexer(ExtendedRegexLexer):
 
     def gen_rubystrings_rules():
         def intp_regex_callback(self, match, ctx):
-            yield match.start(1), String.Regex, match.group(1)    # begin
+            yield match.start(1), String.Regex, match.group(1)  # begin
             nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
             for i, t, v in self.get_tokens_unprocessed(context=nctx):
                 yield match.start(3)+i, t, v
-            yield match.start(4), String.Regex, match.group(4)    # end[mixounse]*
+            yield match.start(4), String.Regex, match.group(4)  # end[mixounse]*
             ctx.pos = match.end()
 
         def intp_string_callback(self, match, ctx):
@@ -496,13 +496,13 @@ class RubyLexer(ExtendedRegexLexer):
             nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
             for i, t, v in self.get_tokens_unprocessed(context=nctx):
                 yield match.start(3)+i, t, v
-            yield match.start(4), String.Other, match.group(4)    # end
+            yield match.start(4), String.Other, match.group(4)  # end
             ctx.pos = match.end()
 
         states = {}
         states['strings'] = [
             # easy ones
-            (r'\:([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
+            (r'\:@{0,2}([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
              r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
             (r":'(\\\\|\\'|[^'])*'", String.Symbol),
             (r"'(\\\\|\\'|[^'])*'", String.Single),
@@ -661,7 +661,8 @@ class RubyLexer(ExtendedRegexLexer):
             # multiline regex (in method calls)
             (r'(?<=\(|,)/', String.Regex, 'multiline-regex'),
             # multiline regex (this time the funny no whitespace rule)
-            (r'(\s+)(/[^\s=])', String.Regex, 'multiline-regex'),
+            (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex),
+             'multiline-regex'),
             # lex numbers and ignore following regular expressions which
             # are division operators in fact (grrrr. i hate that. any
             # better ideas?)
@@ -1044,7 +1045,6 @@ class LuaLexer(RegexLexer):
             (r'(true|false|nil)\b', Keyword.Constant),
 
             (r'(function)(\s+)', bygroups(Keyword, Text), 'funcname'),
-            (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
 
             (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
 
@@ -1059,10 +1059,6 @@ class LuaLexer(RegexLexer):
             ('\(', Punctuation, '#pop'),
         ],
 
-        'classname': [
-            ('[A-Za-z_][A-Za-z0-9_]*', Name.Class, '#pop')
-        ],
-
         # if I understand correctly, every character is valid in a lua string,
         # so this state is only for later corrections
         'string': [
@@ -1155,7 +1151,7 @@ class MiniDLexer(RegexLexer):
             ),
             # StringLiteral
             # -- WysiwygString
-            (r'@"(""|.)*"', String),
+            (r'@"(""|[^"])*"', String),
             # -- AlternateWysiwygString
             (r'`(``|.)*`', String),
             # -- DoubleQuotedString
@@ -2003,7 +1999,7 @@ class FancyLexer(RegexLexer):
             # Comments
             (r'#(.*?)\n', Comment.Single),
             # Symbols
-            (r'\'[^\'\s]+', String.Symbol),
+            (r'\'([^\'\s\[\]\(\)\{\}]+|\[\])', String.Symbol),
             # Multi-line DoubleQuotedString
             (r'"""(\\\\|\\"|[^"])*"""', String),
             # DoubleQuotedString
@@ -2026,6 +2022,7 @@ class FancyLexer(RegexLexer):
             ('[A-Z][a-zA-Z0-9_]*', Name.Constant),
             ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
             ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
+            ('(@|@@)', Operator),
             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
             # numbers - / checks are necessary to avoid mismarking regexes,
             # see comment in RubyLexer
@@ -2046,14 +2043,15 @@ class FancyLexer(RegexLexer):
 class GroovyLexer(RegexLexer):
     """
     For `Groovy <http://groovy.codehaus.org/>`_ source code.
-    Syntax can be found at http://svn.codehaus.org/groovy/trunk/groovy/groovy-core/src/main/org/codehaus/groovy/antlr/groovy.g
+
+    *New in Pygments 1.5.*
     """
 
     name = 'Groovy'
     aliases = ['groovy']
     filenames = ['*.groovy']
     mimetypes = ['text/x-groovy']
-    
+
     flags = re.MULTILINE | re.DOTALL
 
     #: optional Comment or Whitespace
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py
index 4740569c..a57ea69d 100644
--- a/pygments/lexers/asm.py
+++ b/pygments/lexers/asm.py
@@ -88,7 +88,11 @@ class GasLexer(RegexLexer):
     }
 
     def analyse_text(text):
-        return re.match(r'^\.\w+', text, re.M)
+        if re.match(r'^\.(text|data|section)', text, re.M):
+            return True
+        elif re.match(r'^\.\w+', text, re.M):
+            return 0.1
+
 
 class ObjdumpLexer(RegexLexer):
     """
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index 2945be98..6c9d7c87 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -35,7 +35,7 @@ class CLexer(RegexLexer):
     """
     name = 'C'
     aliases = ['c']
-    filenames = ['*.c', '*.h']
+    filenames = ['*.c', '*.h', '*.idc']
     mimetypes = ['text/x-chdr', 'text/x-csrc']
 
     #: optional Comment or Whitespace
@@ -1136,7 +1136,7 @@ class ScalaLexer(RegexLexer):
             (r'(true|false|null)\b', Keyword.Constant),
             (r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'),
             (r'(type)(\s+)', bygroups(Keyword, Text), 'type'),
-            (r'"""(?:.|\n)*?"""', String),
+            (r'""".*?"""', String),
             (r'"(\\\\|\\"|[^"])*"', String),
             (ur"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),
 #            (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator,
@@ -1144,7 +1144,7 @@ class ScalaLexer(RegexLexer):
             (idrest, Name),
             (r'`[^`]+`', Name),
             (r'\[', Operator, 'typeparam'),
-            (r'[\(\)\{\};,.]', Operator),
+            (r'[\(\)\{\};,.#]', Operator),
             (op, Operator),
             (ur'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?',
              Number.Float),
@@ -1309,6 +1309,13 @@ class ObjectiveCLexer(RegexLexer):
              bygroups(using(this), Name.Function,
                       using(this), Text, Punctuation),
              'function'),
+            # methods
+            (r'^([-+])(\s*)'                         # method marker
+             r'(\(.*?\))?(\s*)'                      # return type
+             r'([a-zA-Z$_][a-zA-Z0-9$_]*:?)',        # begin of method name
+             bygroups(Keyword, Text, using(this),
+                      Text, Name.Function),
+             'method'),
             # function declarations
             (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))'    # return arguments
              r'([a-zA-Z$_][a-zA-Z0-9$_]*)'           # method name
@@ -1352,6 +1359,15 @@ class ObjectiveCLexer(RegexLexer):
             ('{', Punctuation, '#push'),
             ('}', Punctuation, '#pop'),
         ],
+        'method': [
+            include('whitespace'),
+            (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this),
+                                                              Name.Variable)),
+            (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function),
+            (';', Punctuation, '#pop'),
+            ('{', Punctuation, 'function'),
+            ('', Text, '#pop'),
+        ],
         'string': [
             (r'"', String, '#pop'),
             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py
index 48feeb85..7c13e8a4 100644
--- a/pygments/lexers/dotnet.py
+++ b/pygments/lexers/dotnet.py
@@ -10,7 +10,8 @@
 """
 import re
 
-from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, using, this
+from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \
+     using, this
 from pygments.token import Punctuation, \
      Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other
 from pygments.util import get_choice_opt
@@ -18,8 +19,8 @@ from pygments import unistring as uni
 
 from pygments.lexers.web import XmlLexer
 
-__all__ = ['CSharpLexer', 'BooLexer', 'VbNetLexer', 'CSharpAspxLexer',
-           'VbNetAspxLexer']
+__all__ = ['CSharpLexer', 'NemerleLexer', 'BooLexer', 'VbNetLexer',
+           'CSharpAspxLexer', 'VbNetAspxLexer', 'FSharpLexer']
 
 
 def _escape(st):
@@ -138,6 +139,164 @@ class CSharpLexer(RegexLexer):
         RegexLexer.__init__(self, **options)
 
 
+class NemerleLexer(RegexLexer):
+    """
+    For `Nemerle <http://nemerle.org>`_ source code.
+
+    Additional options accepted:
+
+    `unicodelevel`
+      Determines which Unicode characters this lexer allows for identifiers.
+      The possible values are:
+
+      * ``none`` -- only the ASCII letters and numbers are allowed. This
+        is the fastest selection.
+      * ``basic`` -- all Unicode characters from the specification except
+        category ``Lo`` are allowed.
+      * ``full`` -- all Unicode characters as specified in the C# specs
+        are allowed.  Note that this means a considerable slowdown since the
+        ``Lo`` category has more than 40,000 characters in it!
+
+      The default value is ``basic``.
+
+    *New in Pygments 1.5.*
+    """
+
+    name = 'Nemerle'
+    aliases = ['nemerle']
+    filenames = ['*.n']
+    mimetypes = ['text/x-nemerle'] # inferred
+
+    flags = re.MULTILINE | re.DOTALL | re.UNICODE
+
+    # for the range of allowed unicode characters in identifiers,
+    # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
+
+    levels = {
+        'none': '@?[_a-zA-Z][a-zA-Z0-9_]*',
+        'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
+                  '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
+                  uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
+        'full': ('@?(?:_|[^' +
+                 _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])'
+                 + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
+                                                'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
+                                                'Mc')) + ']*'),
+    }
+
+    tokens = {}
+    token_variants = True
+
+    for levelname, cs_ident in levels.items():
+        tokens[levelname] = {
+            'root': [
+                # method names
+                (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type
+                 r'(' + cs_ident + ')'                           # method name
+                 r'(\s*)(\()',                               # signature start
+                 bygroups(using(this), Name.Function, Text, Punctuation)),
+                (r'^\s*\[.*?\]', Name.Attribute),
+                (r'[^\S\n]+', Text),
+                (r'\\\n', Text), # line continuation
+                (r'//.*?\n', Comment.Single),
+                (r'/[*](.|\n)*?[*]/', Comment.Multiline),
+                (r'\n', Text),
+                (r'\$\s*"', String, 'splice-string'),
+                (r'\$\s*<#', String, 'splice-string2'),
+                (r'<#', String, 'recursive-string'),
+
+                (r'(<\[)\s*(' + cs_ident + ':)?', Keyword),
+                (r'\]\>', Keyword),
+
+                # quasiquotation only
+                (r'\$' + cs_ident, Name), 
+                (r'(\$)(\()', bygroups(Name, Punctuation), 'splice-string-content'),
+
+                (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
+                (r'[{}]', Punctuation),
+                (r'@"(\\\\|\\"|[^"])*"', String),
+                (r'"(\\\\|\\"|[^"\n])*["\n]', String),
+                (r"'\\.'|'[^\\]'", String.Char),
+		(r"0[xX][0-9a-fA-F]+[Ll]?", Number),
+                (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number),
+                (r'#[ \t]*(if|endif|else|elif|define|undef|'
+                 r'line|error|warning|region|endregion|pragma)\b.*?\n',
+                 Comment.Preproc),
+                (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
+                 Keyword)),
+                (r'(abstract|and|as|base|catch|def|delegate|'
+                 r'enum|event|extern|false|finally|'
+                 r'fun|implements|interface|internal|'
+                 r'is|macro|match|matches|module|mutable|new|'
+                 r'null|out|override|params|partial|private|'
+                 r'protected|public|ref|sealed|static|'
+                 r'syntax|this|throw|true|try|type|typeof|'
+                 r'virtual|volatile|when|where|with|'
+                 r'assert|assert2|async|break|checked|continue|do|else|'
+                 r'ensures|for|foreach|if|late|lock|new|nolate|'
+                 r'otherwise|regexp|repeat|requires|return|surroundwith|'
+                 r'unchecked|unless|using|while|yield)\b', Keyword),
+                (r'(global)(::)', bygroups(Keyword, Punctuation)),
+                (r'(bool|byte|char|decimal|double|float|int|long|object|sbyte|'
+                 r'short|string|uint|ulong|ushort|void|array|list)\b\??',
+                 Keyword.Type),
+                (r'(:>?)\s*(' + cs_ident + r'\??)',
+                 bygroups(Punctuation, Keyword.Type)),
+                (r'(class|struct|variant|module)(\s+)',
+                 bygroups(Keyword, Text), 'class'),
+                (r'(namespace|using)(\s+)', bygroups(Keyword, Text),
+                 'namespace'),
+                (cs_ident, Name),
+            ],
+            'class': [
+                (cs_ident, Name.Class, '#pop')
+            ],
+            'namespace': [
+                (r'(?=\()', Text, '#pop'), # using (resource)
+                ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
+            ],
+            'splice-string': [
+                (r'[^"$]',  String), 
+                (r'\$' + cs_ident, Name),
+                (r'(\$)(\()', bygroups(Name, Punctuation),
+                 'splice-string-content'),
+                (r'\\"',  String),
+                (r'"',  String, '#pop')
+            ],
+            'splice-string2': [
+                (r'[^#<>$]',  String), 
+                (r'\$' + cs_ident, Name),
+                (r'(\$)(\()', bygroups(Name, Punctuation),
+                 'splice-string-content'),
+                (r'<#',  String, '#push'),
+                (r'#>',  String, '#pop')
+            ],
+            'recursive-string': [
+                (r'[^#<>]',  String),
+                (r'<#',  String, '#push'),
+                (r'#>',  String, '#pop')
+            ],
+            'splice-string-content': [
+                (r'if|match', Keyword),
+                (r'[~!%^&*+=|\[\]:;,.<>/?-]', Punctuation),
+                (cs_ident, Name), 
+                (r'\(', Punctuation, '#push'),
+                (r'\)', Punctuation, '#pop')
+            ]
+        }
+
+    def __init__(self, **options):
+        level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(),
+                               'basic')
+        if level not in self._all_tokens:
+            # compile the regexes now
+            self._tokens = self.__class__.process_tokendef(level)
+        else:
+            self._tokens = self._all_tokens[level]
+
+        RegexLexer.__init__(self, **options)
+
+
 class BooLexer(RegexLexer):
     """
     For `Boo <http://boo.codehaus.org/>`_ source code.
@@ -365,3 +524,107 @@ class VbNetAspxLexer(DelegatingLexer):
             return 0.2
         elif re.search(r'script[^>]+language=["\']vb', text, re.I) is not None:
             return 0.15
+
+
+# Very close to functional.OcamlLexer
+class FSharpLexer(RegexLexer):
+    """
+    For the F# language.
+
+    *New in Pygments 1.5.*
+    """
+
+    name = 'FSharp'
+    aliases = ['fsharp']
+    filenames = ['*.fs', '*.fsi']
+    mimetypes = ['text/x-fsharp']
+
+    keywords = [
+      'abstract', 'and', 'as', 'assert', 'base', 'begin', 'class',
+      'default', 'delegate', 'do', 'do!', 'done', 'downcast',
+      'downto', 'elif', 'else', 'end', 'exception', 'extern',
+      'false', 'finally', 'for', 'fun', 'function', 'global', 'if',
+      'in', 'inherit', 'inline', 'interface', 'internal', 'lazy',
+      'let', 'let!', 'match', 'member', 'module', 'mutable',
+      'namespace', 'new', 'null', 'of', 'open', 'or', 'override',
+      'private', 'public', 'rec', 'return', 'return!', 'sig',
+      'static', 'struct', 'then', 'to', 'true', 'try', 'type',
+      'upcast', 'use', 'use!', 'val', 'void', 'when', 'while',
+      'with', 'yield', 'yield!'
+    ]
+    keyopts = [
+      '!=','#','&','&&','\(','\)','\*','\+',',','-',
+      '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<',
+      '<-','>','>]','\?','\?\?','\[','\[<','\[>','\[\|',
+      ']','_','`','{','\|','\|]','}','~','<@','=','@>'
+    ]
+
+    operators = r'[!$%&*+\./:<=>?@^|~-]'
+    word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'not', 'or']
+    prefix_syms = r'[!?~]'
+    infix_syms = r'[=<>@^|&+\*/$%-]'
+    primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array',
+                  'byte', 'sbyte', 'int16', 'uint16', 'uint32', 'int64', 'uint64'
+                  'nativeint', 'unativeint', 'decimal', 'void', 'float32', 'single',
+                  'double']
+
+    tokens = {
+        'escape-sequence': [
+            (r'\\[\\\"\'ntbr]', String.Escape),
+            (r'\\[0-9]{3}', String.Escape),
+            (r'\\x[0-9a-fA-F]{2}', String.Escape),
+        ],
+        'root': [
+            (r'\s+', Text),
+            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
+            (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
+             Name.Namespace, 'dotted'),
+            (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
+            (r'//.*?\n', Comment.Single),
+            (r'\(\*', Comment, 'comment'),
+            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
+            (r'(%s)' % '|'.join(keyopts), Operator),
+            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
+            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
+            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
+
+            (r'#[ \t]*(if|endif|else|line|nowarn|light)\b.*?\n',
+             Comment.Preproc),
+
+            (r"[^\W\d][\w']*", Name),
+
+            (r'\d[\d_]*', Number.Integer),
+            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
+            (r'0[oO][0-7][0-7_]*', Number.Oct),
+            (r'0[bB][01][01_]*', Number.Binary),
+            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
+
+            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
+             String.Char),
+            (r"'.'", String.Char),
+            (r"'", Keyword), # a stray quote is another syntax element
+
+            (r'"', String.Double, 'string'),
+
+            (r'[~?][a-z][\w\']*:', Name.Variable),
+        ],
+        'comment': [
+            (r'[^(*)]+', Comment),
+            (r'\(\*', Comment, '#push'),
+            (r'\*\)', Comment, '#pop'),
+            (r'[(*)]', Comment),
+        ],
+        'string': [
+            (r'[^\\"]+', String.Double),
+            include('escape-sequence'),
+            (r'\\\n', String.Double),
+            (r'"', String.Double, '#pop'),
+        ],
+        'dotted': [
+            (r'\s+', Text),
+            (r'\.', Punctuation),
+            (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
+            (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
+            (r'[a-z_][A-Za-z0-9_\']*', Name, '#pop'),
+        ],
+    }
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index 9115f653..724ee833 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -570,11 +570,11 @@ class OcamlLexer(RegexLexer):
 
             (r"[^\W\d][\w']*", Name),
 
-            (r'\d[\d_]*', Number.Integer),
+            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
             (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
             (r'0[oO][0-7][0-7_]*', Number.Oct),
             (r'0[bB][01][01_]*', Number.Binary),
-            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
+            (r'\d[\d_]*', Number.Integer),
 
             (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
              String.Char),
@@ -656,7 +656,7 @@ class ErlangLexer(RegexLexer):
         'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
         ]
 
-    operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
+    operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!|\?)'
     word_operators = [
         'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
         'div', 'not', 'or', 'orelse', 'rem', 'xor'
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 0cf785cc..88236a26 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -817,7 +817,7 @@ class LogtalkLexer(RegexLexer):
              Keyword),
             (r'(object|protocol|category)_property(?=[(])', Keyword),
             # Entity relations
-            (r'complements_object(?=[(])', Keyword),
+            (r'co(mplements_object|nforms_to_protocol)(?=[(])', Keyword),
             (r'extends_(object|protocol|category)(?=[(])', Keyword),
             (r'imp(lements_protocol|orts_category)(?=[(])', Keyword),
             (r'(instantiat|specializ)es_class(?=[(])', Keyword),
@@ -826,11 +826,12 @@ class LogtalkLexer(RegexLexer):
             # Flags
             (r'(current|set)_logtalk_flag(?=[(])', Keyword),
             # Compiling, loading, and library paths
-            (r'logtalk_(compile|l(ibrary_path|oad))(?=[(])', Keyword),
+            (r'logtalk_(compile|l(ibrary_path|oad_context|oad))(?=[(])',
+             Keyword),
             # Database
             (r'(clause|retract(all)?)(?=[(])', Keyword),
             (r'a(bolish|ssert(a|z))(?=[(])', Keyword),
-            # Control
+            # Control constructs
             (r'(ca(ll|tch)|throw)(?=[(])', Keyword),
             (r'(fail|true)\b', Keyword),
             # All solutions
@@ -841,7 +842,7 @@ class LogtalkLexer(RegexLexer):
             # Term unification
             (r'unify_with_occurs_check(?=[(])', Keyword),
             # Term creation and decomposition
-            (r'(functor|arg|copy_term)(?=[(])', Keyword),
+            (r'(functor|arg|copy_term|numbervars)(?=[(])', Keyword),
             # Evaluable functors
             (r'(rem|mod|abs|sign)(?=[(])', Keyword),
             (r'float(_(integer|fractional)_part)?(?=[(])', Keyword),
@@ -849,8 +850,10 @@ class LogtalkLexer(RegexLexer):
             # Other arithmetic functors
             (r'(cos|atan|exp|log|s(in|qrt))(?=[(])', Keyword),
             # Term testing
-            (r'(var|atom(ic)?|integer|float|compound|n(onvar|umber))(?=[(])',
-             Keyword),
+            (r'(var|atom(ic)?|integer|float|c(allable|ompound)|n(onvar|umber))'
+             r'(?=[(])', Keyword),
+            # Term comparison
+            (r'compare(?=[(])', Keyword),
             # Stream selection and control
             (r'(curren|se)t_(in|out)put(?=[(])', Keyword),
             (r'(open|close)(?=[(])', Keyword),
@@ -879,8 +882,10 @@ class LogtalkLexer(RegexLexer):
             # External call
             (r'[{}]', Keyword),
             # Logic and control
-            (r'\bonce(?=[(])', Keyword),
+            (r'\b(ignore|once)(?=[(])', Keyword),
             (r'\brepeat\b', Keyword),
+            # Sorting
+            (r'(key)?sort(?=[(])', Keyword),
             # Bitwise functors
             (r'(>>|<<|/\\|\\\\|\\)', Operator),
             # Arithemtic evaluation
@@ -895,7 +900,7 @@ class LogtalkLexer(RegexLexer):
             (r'(==|\\==|@=<|@<|@>=|@>)', Operator),
             # Evaluable functors
             (r'(//|[-+*/])', Operator),
-            (r'\b(mod|rem)\b', Operator),
+            (r'\b(e|pi|mod|rem)\b', Operator),
             # Other arithemtic functors
             (r'\b\*\*\b', Operator),
             # DCG rules
@@ -906,6 +911,8 @@ class LogtalkLexer(RegexLexer):
             (r'\\+', Operator),
             # Mode operators
             (r'[?@]', Operator),
+            # Existential quantifier
+            (r'\^', Operator),
             # Strings
             (r'"(\\\\|\\"|[^"])*"', String),
             # Ponctuation
@@ -937,16 +944,18 @@ class LogtalkLexer(RegexLexer):
             (r'in(fo|itialization)(?=[(])', Keyword, 'root'),
             (r'(dynamic|synchronized|threaded)[.]', Keyword, 'root'),
             (r'(alias|d(ynamic|iscontiguous)|m(eta_predicate|ode|ultifile)|'
-             r's(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', Keyword, 'root'),
+             r's(et_(logtalk|prolog)_flag|ynchronized))(?=[(])',
+             Keyword, 'root'),
             (r'op(?=[(])', Keyword, 'root'),
-            (r'(calls|reexport|use(s|_module))(?=[(])', Keyword, 'root'),
+            (r'(c(alls|oinductive)|reexport|use(s|_module))(?=[(])',
+             Keyword, 'root'),
             (r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'),
             (r'[a-z][a-zA-Z0-9_]*[.]', Text, 'root'),
         ],
 
         'entityrelations': [
-            (r'(extends|i(nstantiates|mp(lements|orts))|specializes)(?=[(])',
-             Keyword),
+            (r'(complements|extends|i(nstantiates|mp(lements|orts))|specializes)'
+             b'(?=[(])', Keyword),
             # Numbers
             (r"0'.", Number),
             (r'0b[01]+', Number),
diff --git a/pygments/lexers/pypylog.py b/pygments/lexers/pypylog.py
index 186a532f..f2363c5d 100644
--- a/pygments/lexers/pypylog.py
+++ b/pygments/lexers/pypylog.py
@@ -29,7 +29,7 @@ class PyPyLogLexer(RegexLexer):
 
             (r"[ifp]\d+", Name),
             (r"ptr\d+", Name),
-            (r"(\()([\w_]+(:\.[\w_]+)?)(\))", bygroups(Punctuation, Name.Builtin, Punctuation)),
+            (r"(\()([\w_]+(?:\.[\w_]+)?)(\))", bygroups(Punctuation, Name.Builtin, Punctuation)),
             (r"[\[\]=,()]", Punctuation),
             (r"(\d+\.\d+|inf|-inf)", Number.Float),
             (r"-?\d+", Number.Integer),
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py
index 5da30496..e481bb62 100644
--- a/pygments/lexers/text.py
+++ b/pygments/lexers/text.py
@@ -653,6 +653,13 @@ class RstLexer(RegexLexer):
         for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
             yield item
 
+    # from docutils.parsers.rst.states
+    closers = u'\'")]}>\u2019\u201d\xbb!?'
+    unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
+    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
+                         % (re.escape(unicode_delimiters),
+                            re.escape(closers)))
+
     tokens = {
         'root': [
             # Heading with overline
@@ -689,9 +696,9 @@ class RstLexer(RegexLexer):
              bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
                       using(this, state='inline'))),
             # A reference target
-            (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$',
+            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
              bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
-            # A footnote target
+            # A footnote/citation target
             (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
              bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
             # A substitution def
@@ -730,10 +737,9 @@ class RstLexer(RegexLexer):
             (r'.', Text),
         ],
         'literal': [
-            (r'[^`\\]+', String),
-            (r'\\.', String),
-            (r'``', String, '#pop'),
-            (r'[`\\]', String),
+            (r'[^`]+', String),
+            (r'``' + end_string_suffix, String, '#pop'),
+            (r'`', String),
         ]
     }
 
@@ -877,7 +883,7 @@ class SquidConfLexer(RegexLexer):
     mimetypes = ['text/x-squidconf']
     flags = re.IGNORECASE
 
-    keywords = [ "acl", "always_direct", "announce_host",
+    keywords = [ "access_log", "acl", "always_direct", "announce_host",
                  "announce_period", "announce_port", "announce_to",
                  "anonymize_headers", "append_domain", "as_whois_server",
                  "auth_param_basic", "authenticate_children",
@@ -902,7 +908,7 @@ class SquidConfLexer(RegexLexer):
                  "ftpget_options", "ftpget_program", "ftp_list_width",
                  "ftp_passive", "ftp_user", "half_closed_clients",
                  "header_access", "header_replace", "hierarchy_stoplist",
-                 "high_response_time_warning", "high_page_fault_warning",
+                 "high_response_time_warning", "high_page_fault_warning", "hosts_file",
                  "htcp_port", "http_access", "http_anonymizer", "httpd_accel",
                  "httpd_accel_host", "httpd_accel_port",
                  "httpd_accel_uses_host_header", "httpd_accel_with_proxy",
@@ -969,14 +975,14 @@ class SquidConfLexer(RegexLexer):
              "snmp_community",
              ]
 
-    ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
+    ip_re = r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))'
 
     def makelistre(list):
-        return r'\b(?:'+'|'.join(list)+r')\b'
+        return r'\b(?:' + '|'.join(list) + r')\b'
 
     tokens = {
         'root': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'#', Comment, 'comment'),
             (makelistre(keywords), Keyword),
             (makelistre(opts), Name.Constant),
@@ -985,8 +991,8 @@ class SquidConfLexer(RegexLexer):
             (r'stats/'+makelistre(actions), String),
             (r'log/'+makelistre(actions)+r'=', String),
             (makelistre(acls), Keyword),
-            (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number),
-            (r'\b\d+\b', Number),
+            (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float),
+            (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number),
             (r'\S+', Text),
         ],
         'comment': [
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index 88d64a6f..56798441 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -172,9 +172,6 @@ class ActionScriptLexer(RegexLexer):
         ]
     }
 
-    def analyse_text(text):
-        return 0.05
-
 
 class ActionScript3Lexer(RegexLexer):
     """
@@ -190,6 +187,7 @@ class ActionScript3Lexer(RegexLexer):
                  'text/actionscript']
 
     identifier = r'[$a-zA-Z_][a-zA-Z0-9_]*'
+    typeidentifier = identifier + '(?:\.<\w+>)?'
 
     flags = re.DOTALL | re.MULTILINE
     tokens = {
@@ -198,12 +196,13 @@ class ActionScript3Lexer(RegexLexer):
             (r'(function\s+)(' + identifier + r')(\s*)(\()',
              bygroups(Keyword.Declaration, Name.Function, Text, Operator),
              'funcparams'),
-            (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r')',
+            (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' +
+             typeidentifier + r')',
              bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text,
                       Keyword.Type)),
             (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)',
              bygroups(Keyword, Text, Name.Namespace, Text)),
-            (r'(new)(\s+)(' + identifier + r')(\s*)(\()',
+            (r'(new)(\s+)(' + typeidentifier + r')(\s*)(\()',
              bygroups(Keyword, Text, Keyword.Type, Text, Operator)),
             (r'//.*?\n', Comment.Single),
             (r'/\*.*?\*/', Comment.Multiline),
@@ -234,13 +233,13 @@ class ActionScript3Lexer(RegexLexer):
         'funcparams': [
             (r'\s+', Text),
             (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' +
-             identifier + r'|\*)(\s*)',
+             typeidentifier + r'|\*)(\s*)',
              bygroups(Text, Punctuation, Name, Text, Operator, Text,
                       Keyword.Type, Text), 'defval'),
             (r'\)', Operator, 'type')
         ],
         'type': [
-            (r'(\s*)(:)(\s*)(' + identifier + r'|\*)',
+            (r'(\s*)(:)(\s*)(' + typeidentifier + r'|\*)',
              bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'),
             (r'\s*', Text, '#pop:2')
         ],
@@ -252,8 +251,9 @@ class ActionScript3Lexer(RegexLexer):
     }
 
     def analyse_text(text):
-        if re.match(r'\w+\s*:\s*\w', text): return 0.3
-        return 0.1
+        if re.match(r'\w+\s*:\s*\w', text):
+            return 0.3
+        return 0
 
 
 class CssLexer(RegexLexer):
@@ -2216,9 +2216,9 @@ class XQueryLexer(ExtendedRegexLexer):
              punctuation_root_callback),
             (r'(castable|cast)(\s+)(as)',
              bygroups(Keyword, Text, Keyword), 'singletype'),
-            (r'(instance)(\s+)(of)|(treat)(\s+)(as)',
-             bygroups(Keyword, Text, Keyword), 'itemtype'),
-            (r'(case)|(as)', Keyword, 'itemtype'),
+            (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword), 'itemtype'),
+            (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword), 'itemtype'),
+            (r'case|as', Keyword, 'itemtype'),
             (r'(\))(\s*)(as)',
              bygroups(Punctuation, Text, Keyword), 'itemtype'),
             (r'\$', Name.Variable, 'varname'),
@@ -2304,8 +2304,8 @@ class XQueryLexer(ExtendedRegexLexer):
              bygroups(Keyword, Text, Keyword, Text, Keyword), 'root'),
             (r'(castable|cast)(\s+)(as)',
              bygroups(Keyword, Text, Keyword), 'singletype'),
-            (r'(instance)(\s+)(of)|(treat)(\s+)(as)',
-             bygroups(Keyword, Text, Keyword)),
+            (r'(treat)(\s+)(as)', bygroups(Keyword, Text, Keyword)),
+            (r'(instance)(\s+)(of)', bygroups(Keyword, Text, Keyword)),
             (r'case|as', Keyword, 'itemtype'),
             (r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'),
             (ncname + r'(:\*)', Keyword.Type, 'operator'),
author	Georg Brandl <georg@python.org>	2011-06-18 13:49:00 +0200
committer	Georg Brandl <georg@python.org>	2011-06-18 13:49:00 +0200
commit	1507dac8fd099aad7afca378965ceb508b00dddd (patch)
tree	7c7a3448b6b7628f8bd33e207c1b68854ad375ae /pygments
parent	522b59a55921a278a847c573974811417cd9fac6 (diff)
parent	0bf6072b0e75fa4b6afab5d23477d7aa4aa1c77c (diff)
download	pygments-1507dac8fd099aad7afca378965ceb508b00dddd.tar.gz