# -*- coding: utf-8 -*- """ pygments.lexers.web ~~~~~~~~~~~~~~~~~~~ Lexers for web-related languages and markup. :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re try: set except NameError: from sets import Set as set from pygments.lexer import RegexLexer, bygroups, using, include, this from pygments.token import \ Text, Comment, Operator, Keyword, Name, String, Number, Other, Punctuation from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \ html_doctype_matches __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'CssLexer', 'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer', 'MxmlLexer'] class JavascriptLexer(RegexLexer): """ For JavaScript source code. """ name = 'JavaScript' aliases = ['js', 'javascript'] filenames = ['*.js'] mimetypes = ['application/x-javascript', 'text/x-javascript', 'text/javascript'] flags = re.DOTALL tokens = { 'commentsandwhitespace': [ (r'\s+', Text), (r'', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'), (r'[a-zA-Z0-9_:-]+', Name.Attribute), (r'/?\s*>', Name.Tag, '#pop'), ], 'script-content': [ (r'<\s*/\s*script\s*>', Name.Tag, '#pop'), (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), ], 'style-content': [ (r'<\s*/\s*style\s*>', Name.Tag, '#pop'), (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), ], 'attr': [ ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], } def analyse_text(text): if html_doctype_matches(text): return 0.5 class PhpLexer(RegexLexer): """ For `PHP `_ source code. For PHP embedded in HTML, use the `HtmlPhpLexer`. Additional options accepted: `startinline` If given and ``True`` the lexer starts highlighting with php code (i.e.: no starting ``>> from pygments.lexers._phpbuiltins import MODULES >>> MODULES.keys() ['PHP Options/Info', 'Zip', 'dba', ...] In fact the names of those modules match the module names from the php documentation. """ name = 'PHP' aliases = ['php', 'php3', 'php4', 'php5'] filenames = ['*.php', '*.php[345]'] mimetypes = ['text/x-php'] flags = re.IGNORECASE | re.DOTALL | re.MULTILINE tokens = { 'root': [ (r'<\?(php)?', Comment.Preproc, 'php'), (r'[^<]+', Other), (r'<', Other) ], 'php': [ (r'\?>', Comment.Preproc, '#pop'), (r'<<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\;?\n', String), (r'\s+', Text), (r'#.*?\n', Comment.Single), (r'//.*?\n', Comment.Single), # put the empty comment here, it is otherwise seen as # the start of a docstring (r'/\*\*/', Comment.Multiline), (r'/\*\*.*?\*/', String.Doc), (r'/\*.*?\*/', Comment.Multiline), (r'(->|::)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Text, Name.Attribute)), (r'[~!%^&*+=|:.<>/?@-]+', Operator), (r'[\[\]{}();,]+', Punctuation), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(function)(\s+)(&?)(\s*)', bygroups(Keyword, Text, Operator, Text), 'functionname'), (r'(const)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Keyword, Text, Name.Constant)), (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' r'FALSE|print|for|require|continue|foreach|require_once|' r'declare|return|default|static|do|switch|die|stdClass|' r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' r'virtual|endfor|include_once|while|endforeach|global|__FILE__|' r'endif|list|__LINE__|endswitch|new|__sleep|endwhile|not|' r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|' r'implements|public|private|protected|abstract|clone|try|' r'catch|throw|this)\b', Keyword), ('(true|false|null)\b', Keyword.Constant), (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable), (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable), ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other), (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), (r'"', String.Double, 'string'), ], 'classname': [ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') ], 'functionname': [ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') ], 'string': [ (r'"', String.Double, '#pop'), (r'[^{$"\\]+', String.Double), (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape), (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?', String.Interpol), (r'(\{\$\{)(.*?)(\}\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\{)(\$.*?)(\})', bygroups(String.Interpol, using(this, _startinline=True), String.Interpol)), (r'(\$\{)(\S+)(\})', bygroups(String.Interpol, Name.Variable, String.Interpol)), (r'[${\\]+', String.Double) ], } def __init__(self, **options): self.funcnamehighlighting = get_bool_opt( options, 'funcnamehighlighting', True) self.disabledmodules = get_list_opt( options, 'disabledmodules', ['unknown']) self.startinline = get_bool_opt(options, 'startinline', False) # private option argument for the lexer itself if '_startinline' in options: self.startinline = options.pop('_startinline') # collect activated functions in a set self._functions = set() if self.funcnamehighlighting: from pygments.lexers._phpbuiltins import MODULES for key, value in MODULES.iteritems(): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): stack = ['root'] if self.startinline: stack.append('php') for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name.Other: if value in self._functions: yield index, Name.Builtin, value continue yield index, token, value def analyse_text(text): rv = 0.0 if re.search(r'<\?(?!xml)', text): rv += 0.3 if '?>' in text: rv += 0.1 return rv class XmlLexer(RegexLexer): """ Generic lexer for XML (eXtensible Markup Language). """ flags = re.MULTILINE | re.DOTALL name = 'XML' aliases = ['xml'] filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd', '*.wsdl'] mimetypes = ['text/xml', 'application/xml', 'image/svg+xml', 'application/rss+xml', 'application/atom+xml', 'application/xsl+xml', 'application/xslt+xml'] tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), ('', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ ('\s+', Text), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], } def analyse_text(text): if looks_like_xml(text): return 0.5 class XsltLexer(XmlLexer): ''' A lexer for XSLT. *New in Pygments 0.10.* ''' name = 'XSLT' aliases = ['xslt'] filenames = ['*.xsl', '*.xslt'] EXTRA_KEYWORDS = set([ 'apply-imports', 'apply-templates', 'attribute', 'attribute-set', 'call-template', 'choose', 'comment', 'copy', 'copy-of', 'decimal-format', 'element', 'fallback', 'for-each', 'if', 'import', 'include', 'key', 'message', 'namespace-alias', 'number', 'otherwise', 'output', 'param', 'preserve-space', 'processing-instruction', 'sort', 'strip-space', 'stylesheet', 'template', 'text', 'transform', 'value-of', 'variable', 'when', 'with-param' ]) def get_tokens_unprocessed(self, text): for index, token, value in XmlLexer.get_tokens_unprocessed(self, text): m = re.match(']*)/?>?', value) if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS: yield index, Keyword, value else: yield index, token, value def analyse_text(text): if looks_like_xml(text) and ' tags is highlighted by the appropriate lexer. """ flags = re.MULTILINE | re.DOTALL name = 'MXML' aliases = ['mxml'] filenames = ['*.mxml'] mimetimes = ['text/xml', 'application/xml'] tokens = { 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'(\<\!\[CDATA\[)(.*?)(\]\]\>)', bygroups(String, using(ActionScript3Lexer), String)), ('', Comment, '#pop'), ('-', Comment), ], 'tag': [ (r'\s+', Text), (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ ('\s+', Text), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], }