summaryrefslogtreecommitdiff
path: root/pygments/lexers/markup.py
diff options
context:
space:
mode:
authordiskdance <106794417+diskdance@users.noreply.github.com>2023-04-05 22:07:40 +0800
committerGitHub <noreply@github.com>2023-04-05 16:07:40 +0200
commiteaca69091119e0ac5c97e626ba9e3b21b688c5ed (patch)
treeff94454e9d8729d54cee122c943bcf5364af8414 /pygments/lexers/markup.py
parent0e9c87bcf096908956e031f15a4e589e83be1691 (diff)
downloadpygments-git-eaca69091119e0ac5c97e626ba9e3b21b688c5ed.tar.gz
Add lexer for MediaWiki Wikitext (#2373)
Diffstat (limited to 'pygments/lexers/markup.py')
-rw-r--r--pygments/lexers/markup.py737
1 files changed, 736 insertions, 1 deletions
diff --git a/pygments/lexers/markup.py b/pygments/lexers/markup.py
index 224bed0c..ec7da2e7 100644
--- a/pygments/lexers/markup.py
+++ b/pygments/lexers/markup.py
@@ -13,6 +13,8 @@ import re
from pygments.lexers.html import XmlLexer
from pygments.lexers.javascript import JavascriptLexer
from pygments.lexers.css import CssLexer
+from pygments.lexers.lilypond import LilyPondLexer
+from pygments.lexers.data import JsonLexer
from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
using, this, do_insertions, default, words
@@ -23,7 +25,7 @@ from pygments.util import get_bool_opt, ClassNotFound
__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
'MozPreprocHashLexer', 'MozPreprocPercentLexer',
'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
- 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer']
+ 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer']
class BBCodeLexer(RegexLexer):
@@ -763,3 +765,736 @@ class TiddlyWiki5Lexer(RegexLexer):
def __init__(self, **options):
self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
RegexLexer.__init__(self, **options)
+
+
+class WikitextLexer(RegexLexer):
+ """
+ For MediaWiki Wikitext.
+
+ Parsing Wikitext is tricky, and results vary between different MediaWiki installations,
+ so we only highlight common syntaxes (built-in or from popular extensions),
+ and also assume templates produce no unbalanced syntaxes.
+ """
+ name = 'Wikitext'
+ url = 'https://www.mediawiki.org/wiki/Wikitext'
+ aliases = ['wikitext', 'mediawiki']
+ filenames = []
+ mimetypes = ['text/x-wiki']
+ flags = re.MULTILINE
+
+ def nowiki_tag_rules(tag_name):
+ return [
+ (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
+ Name.Tag, Whitespace, Punctuation), '#pop'),
+ include('entity'),
+ include('text'),
+ ]
+
+ def plaintext_tag_rules(tag_name):
+ return [
+ (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text,
+ Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'),
+ ]
+
+ def delegate_tag_rules(tag_name, lexer):
+ return [
+ (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation,
+ Name.Tag, Whitespace, Punctuation), '#pop'),
+ (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)),
+ ]
+
+ def text_rules(token):
+ return [
+ (r'\w+', token),
+ (r'[^\S\n]+', token),
+ (r'(?s).', token),
+ ]
+
+ def handle_syntaxhighlight(self, match, ctx):
+ from pygments.lexers import get_lexer_by_name
+
+ attr_content = match.group()
+ start = 0
+ index = 0
+ while True:
+ index = attr_content.find('>', start)
+ # Exclude comment end (-->)
+ if attr_content[index-2:index] != '--':
+ break
+ start = index + 1
+
+ if index == -1:
+ # No tag end
+ yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
+ return
+ attr = attr_content[:index]
+ yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
+ yield match.start(3) + index, Punctuation, '>'
+
+ lexer = None
+ content = attr_content[index+1:]
+ lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
+
+ if len(lang_match) >= 1:
+ # Pick the last match in case of multiple matches
+ lang = lang_match[-1][1]
+ try:
+ lexer = get_lexer_by_name(lang)
+ except ClassNotFound:
+ pass
+
+ if lexer is None:
+ yield match.start() + index + 1, Text, content
+ else:
+ yield from lexer.get_tokens_unprocessed(content)
+
+ def handle_score(self, match, ctx):
+ attr_content = match.group()
+ start = 0
+ index = 0
+ while True:
+ index = attr_content.find('>', start)
+ # Exclude comment end (-->)
+ if attr_content[index-2:index] != '--':
+ break
+ start = index + 1
+
+ if index == -1:
+ # No tag end
+ yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr'])
+ return
+ attr = attr_content[:index]
+ content = attr_content[index+1:]
+ yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr'])
+ yield match.start(3) + index, Punctuation, '>'
+
+ lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr)
+ # Pick the last match in case of multiple matches
+ lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond'
+
+ if lang == 'lilypond': # Case sensitive
+ yield from LilyPondLexer().get_tokens_unprocessed(content)
+ else: # ABC
+ # FIXME: Use ABC lexer in the future
+ yield match.start() + index + 1, Text, content
+
+ # a-z removed to prevent linter from complaining, REMEMBER to use (?i)
+ title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF'
+ nbsp_char = r'(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])'
+ link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])'
+ link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]'
+ double_slashes_i = {
+ '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__',
+ '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__',
+ }
+ double_slashes = {
+ '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__',
+ '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__',
+ '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__',
+ }
+ protocols = {
+ 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://',
+ 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://',
+ 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:',
+ 'worldwind://', 'xmpp:', '//',
+ }
+ non_relative_protocols = protocols - {'//'}
+ html_tags = {
+ 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code',
+ 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5',
+ 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp',
+ 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
+ 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr',
+ }
+ parser_tags = {
+ 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math',
+ 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages',
+ 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar',
+ 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery',
+ 'maplink', 'ce', 'references',
+ }
+ variant_langs = {
+ # ZhConverter.php
+ 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw',
+ # UnConverter.php
+ 'uz', 'uz-latn', 'uz-cyrl',
+ # TlyConverter.php
+ 'tly', 'tly-cyrl',
+ # TgConverter.php
+ 'tg', 'tg-latn',
+ # SrConverter.php
+ 'sr', 'sr-ec', 'sr-el',
+ # ShiConverter.php
+ 'shi', 'shi-tfng', 'shi-latn',
+ # ShConverter.php
+ 'sh-latn', 'sh-cyrl',
+ # KuConverter.php
+ 'ku', 'ku-arab', 'ku-latn',
+ # KkConverter.php
+ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn',
+ # IuConverter.php
+ 'iu', 'ike-cans', 'ike-latn',
+ # GanConverter.php
+ 'gan', 'gan-hans', 'gan-hant',
+ # EnConverter.php
+ 'en', 'en-x-piglatin',
+ # CrhConverter.php
+ 'crh', 'crh-cyrl', 'crh-latn',
+ # BanConverter.php
+ 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku',
+ }
+ magic_vars_i = {
+ 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH',
+ }
+ magic_vars = {
+ '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE',
+ 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR',
+ 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME',
+ 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK',
+ 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY',
+ 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2',
+ 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME',
+ 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER',
+ 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS',
+ 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE',
+ 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1',
+ 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
+ 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
+ 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
+ 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
+ }
+ parser_functions_i = {
+ 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM',
+ 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL',
+ 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST',
+ 'URLENCODE',
+ }
+ parser_functions = {
+ 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY',
+ 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE',
+ 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS',
+ 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS',
+ 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY',
+ 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH',
+ 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME',
+ 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE',
+ 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE',
+ 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE',
+ 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS',
+ }
+
+ tokens = {
+ 'root': [
+ # Redirects
+ (r"""(?xi)
+ (\A\s*?)(\#REDIRECT:?) # may contain a colon
+ (\s+)(\[\[) (?=[^\]\n]* \]\]$)
+ """,
+ bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'),
+ # Subheadings
+ (r'^(={2,6})(.+?)(\1)(\s*$\n)',
+ bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)),
+ # Headings
+ (r'^(=.+?=)(\s*$\n)',
+ bygroups(Generic.Heading, Whitespace)),
+ # Double-slashed magic words
+ (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic),
+ (words(double_slashes), Name.Function.Magic),
+ # Raw URLs
+ (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols),
+ link_address, link_char_class), Name.Label),
+ # Magic links
+ (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char),
+ Name.Function.Magic),
+ (r"""(?x)
+ \bISBN {nbsp_char}
+ (?: 97[89] {nbsp_dash}? )?
+ (?: [0-9] {nbsp_dash}? ){{9}} # escape format()
+ [0-9Xx]\b
+ """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic),
+ include('list'),
+ include('inline'),
+ include('text'),
+ ],
+ 'redirect-inner': [
+ (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'),
+ (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)),
+ (r'(?i)[{}]+'.format(title_char), Name.Tag),
+ ],
+ 'list': [
+ # Description lists
+ (r'^;', Keyword, 'dt'),
+ # Ordered lists, unordered lists and indents
+ (r'^[#:*]+', Keyword),
+ # Horizontal rules
+ (r'^-{4,}', Keyword),
+ ],
+ 'inline': [
+ # Signatures
+ (r'~{3,5}', Keyword),
+ # Entities
+ include('entity'),
+ # Bold & italic
+ (r"('')(''')(?!')", bygroups(Generic.Emph,
+ Generic.Strong), 'inline-italic-bold'),
+ (r"'''(?!')", Generic.Strong, 'inline-bold'),
+ (r"''(?!')", Generic.Emph, 'inline-italic'),
+ # Comments & parameters & templates
+ include('replaceable'),
+ # Media links
+ (
+ r"""(?xi)
+ (\[\[)
+ (File|Image) (:)
+ ([{}]*)
+ (?: (\#) ([{}]*?) )?
+ """.format(title_char, f'{title_char}#'),
+ bygroups(Punctuation, Name.Namespace, Punctuation,
+ Name.Tag, Punctuation, Name.Label),
+ 'medialink-inner'
+ ),
+ # Wikilinks
+ (
+ r"""(?xi)
+ (\[\[)(?!{}) # Should not contain URLs
+ (?: ([{}]*) (:))?
+ ([{}]*?)
+ (?: (\#) ([{}]*?) )?
+ (\]\])
+ """.format('|'.join(protocols), title_char.replace('/', ''),
+ title_char, f'{title_char}#'),
+ bygroups(Punctuation, Name.Namespace, Punctuation,
+ Name.Tag, Punctuation, Name.Label, Punctuation)
+ ),
+ (
+ r"""(?xi)
+ (\[\[)(?!{})
+ (?: ([{}]*) (:))?
+ ([{}]*?)
+ (?: (\#) ([{}]*?) )?
+ (\|)
+ """.format('|'.join(protocols), title_char.replace('/', ''),
+ title_char, f'{title_char}#'),
+ bygroups(Punctuation, Name.Namespace, Punctuation,
+ Name.Tag, Punctuation, Name.Label, Punctuation),
+ 'wikilink-inner'
+ ),
+ # External links
+ (
+ r"""(?xi)
+ (\[)
+ ((?:{}) {} {}*)
+ (\s*)
+ """.format('|'.join(protocols), link_address, link_char_class),
+ bygroups(Punctuation, Name.Label, Whitespace),
+ 'extlink-inner'
+ ),
+ # Tables
+ (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword,
+ Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'),
+ # HTML tags
+ (r'(?i)(<)({})\b'.format('|'.join(html_tags)),
+ bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
+ (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)),
+ bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
+ # <nowiki>
+ (r'(?i)(<)(nowiki)\b', bygroups(Punctuation,
+ Name.Tag), ('tag-nowiki', 'tag-inner')),
+ # <pre>
+ (r'(?i)(<)(pre)\b', bygroups(Punctuation,
+ Name.Tag), ('tag-pre', 'tag-inner')),
+ # <categorytree>
+ (r'(?i)(<)(categorytree)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')),
+ # <hiero>
+ (r'(?i)(<)(hiero)\b', bygroups(Punctuation,
+ Name.Tag), ('tag-hiero', 'tag-inner')),
+ # <math>
+ (r'(?i)(<)(math)\b', bygroups(Punctuation,
+ Name.Tag), ('tag-math', 'tag-inner')),
+ # <chem>
+ (r'(?i)(<)(chem)\b', bygroups(Punctuation,
+ Name.Tag), ('tag-chem', 'tag-inner')),
+ # <ce>
+ (r'(?i)(<)(ce)\b', bygroups(Punctuation,
+ Name.Tag), ('tag-ce', 'tag-inner')),
+ # <charinsert>
+ (r'(?i)(<)(charinsert)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')),
+ # <templatedata>
+ (r'(?i)(<)(templatedata)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')),
+ # <gallery>
+ (r'(?i)(<)(gallery)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')),
+ # <graph>
+ (r'(?i)(<)(gallery)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-graph', 'tag-inner')),
+ # <dynamicpagelist>
+ (r'(?i)(<)(dynamicpagelist)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')),
+ # <inputbox>
+ (r'(?i)(<)(inputbox)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')),
+ # <rss>
+ (r'(?i)(<)(rss)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-rss', 'tag-inner')),
+ # <imagemap>
+ (r'(?i)(<)(imagemap)\b', bygroups(
+ Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')),
+ # <syntaxhighlight>
+ (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)',
+ bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
+ (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
+ bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
+ # <syntaxhighlight>: Fallback case for self-closing tags
+ (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
+ Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
+ # <source>
+ (r'(?i)(</)(source)\b(\s*)(>)',
+ bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
+ (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
+ bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)),
+ # <source>: Fallback case for self-closing tags
+ (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
+ Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
+ # <score>
+ (r'(?i)(</)(score)\b(\s*)(>)',
+ bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
+ (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)',
+ bygroups(Punctuation, Name.Tag, handle_score)),
+ # <score>: Fallback case for self-closing tags
+ (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups(
+ Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)),
+ # Other parser tags
+ (r'(?i)(<)({})\b'.format('|'.join(parser_tags)),
+ bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
+ (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)),
+ bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
+ # LanguageConverter markups
+ (
+ r"""(?xi)
+ (-\{{) # Escape format()
+ (?: ([^|]) (\|))?
+ (?: (\s* (?:{variants}) \s*) (=>))?
+ (\s* (?:{variants}) \s*) (:)
+ """.format(variants='|'.join(variant_langs)),
+ bygroups(Punctuation, Keyword, Punctuation,
+ Name.Label, Operator, Name.Label, Punctuation),
+ 'lc-inner'
+ ),
+ (r'-\{', Punctuation, 'lc-raw'),
+ ],
+ 'wikilink-inner': [
+ # Quit in case of another wikilink
+ (r'(?=\[\[)', Punctuation, '#pop'),
+ (r'\]\]', Punctuation, '#pop'),
+ include('inline'),
+ include('text'),
+ ],
+ 'medialink-inner': [
+ (r'\]\]', Punctuation, '#pop'),
+ (r'(\|)([^\n=|]*)(=)',
+ bygroups(Punctuation, Name.Attribute, Operator)),
+ (r'\|', Punctuation),
+ include('inline'),
+ include('text'),
+ ],
+ 'quote-common': [
+ # Quit in case of link/template endings
+ (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'),
+ (r'\n', Text, '#pop'),
+ ],
+ 'inline-italic': [
+ include('quote-common'),
+ (r"('')(''')(?!')", bygroups(Generic.Emph,
+ Generic.Strong), ('#pop', 'inline-bold')),
+ (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic-bold')),
+ (r"''(?!')", Generic.Emph, '#pop'),
+ include('inline'),
+ include('text-italic'),
+ ],
+ 'inline-bold': [
+ include('quote-common'),
+ (r"(''')('')(?!')", bygroups(
+ Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')),
+ (r"'''(?!')", Generic.Strong, '#pop'),
+ (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold-italic')),
+ include('inline'),
+ include('text-bold'),
+ ],
+ 'inline-bold-italic': [
+ include('quote-common'),
+ (r"('')(''')(?!')", bygroups(Generic.Emph,
+ Generic.Strong), '#pop'),
+ (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic')),
+ (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold')),
+ include('inline'),
+ include('text-italic'),
+ ],
+ 'inline-italic-bold': [
+ include('quote-common'),
+ (r"(''')('')(?!')", bygroups(
+ Generic.Strong, Generic.Emph), '#pop'),
+ (r"'''(?!')", Generic.Strong, ('#pop', 'inline-italic')),
+ (r"''(?!')", Generic.Emph, ('#pop', 'inline-bold')),
+ include('text-bold'),
+ ],
+ 'lc-inner': [
+ (
+ r"""(?xi)
+ (;)
+ (?: (\s* (?:{variants}) \s*) (=>))?
+ (\s* (?:{variants}) \s*) (:)
+ """.format(variants='|'.join(variant_langs)),
+ bygroups(Punctuation, Name.Label,
+ Operator, Name.Label, Punctuation)
+ ),
+ (r';?\s*?\}-', Punctuation, '#pop'),
+ include('inline'),
+ include('text'),
+ ],
+ 'lc-raw': [
+ (r'\}-', Punctuation, '#pop'),
+ include('inline'),
+ include('text'),
+ ],
+ 'replaceable': [
+ # Comments
+ (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
+ # Parameters
+ (
+ r"""(?x)
+ (\{{3})
+ ([^|]*?)
+ (?=\}{3}|\|)
+ """,
+ bygroups(Punctuation, Name.Variable),
+ 'parameter-inner',
+ ),
+ # Magic variables
+ (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i),
+ bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
+ (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars),
+ bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)),
+ # Parser functions & templates
+ (r'\{\{', Punctuation, 'template-begin-space'),
+ # <tvar> legacy syntax
+ (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation,
+ Name.Tag, Punctuation, String, Punctuation)),
+ (r'</>', Punctuation, '#pop'),
+ # <tvar>
+ (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'),
+ (r'(?i)(</)(tvar)\b(\s*)(>)',
+ bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)),
+ ],
+ 'parameter-inner': [
+ (r'\}{3}', Punctuation, '#pop'),
+ (r'\|', Punctuation),
+ include('inline'),
+ include('text'),
+ ],
+ 'template-begin-space': [
+ # Templates allow line breaks at the beginning, and due to how MediaWiki handles
+ # comments, an extra state is required to handle things like {{\n<!---->\n name}}
+ (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline),
+ (r'\s+', Whitespace),
+ # Parser functions
+ (
+ r'(?i)(\#[%s]*?|%s)(:)' % (title_char,
+ '|'.join(parser_functions_i)),
+ bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
+ ),
+ (
+ r'(%s)(:)' % ('|'.join(parser_functions)),
+ bygroups(Name.Function, Punctuation), ('#pop', 'template-inner')
+ ),
+ # Templates
+ (
+ r'(?i)([%s]*?)(:)' % title_char,
+ bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name')
+ ),
+ default(('#pop', 'template-name'),),
+ ],
+ 'template-name': [
+ (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')),
+ (r'\}\}', Punctuation, '#pop'),
+ (r'\n', Text, '#pop'),
+ include('replaceable'),
+ *text_rules(Name.Tag),
+ ],
+ 'template-inner': [
+ (r'\}\}', Punctuation, '#pop'),
+ (r'\|', Punctuation),
+ (
+ r"""(?x)
+ (?<=\|)
+ ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags
+ (=)
+ """,
+ bygroups(Name.Label, Operator)
+ ),
+ include('inline'),
+ include('text'),
+ ],
+ 'table': [
+ # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior
+ # Endings
+ (r'^([ \t\n\r\0\x0B]*?)(\|\})',
+ bygroups(Whitespace, Punctuation), '#pop'),
+ # Table rows
+ (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation,
+ using(this, state=['root', 'attr']))),
+ # Captions
+ (
+ r"""(?x)
+ ^([ \t\n\r\0\x0B]*?)(\|\+)
+ # Exclude links, template and tags
+ (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )?
+ (.*?)$
+ """,
+ bygroups(Whitespace, Punctuation, using(this, state=[
+ 'root', 'attr']), Punctuation, Generic.Heading),
+ ),
+ # Table data
+ (
+ r"""(?x)
+ ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| )
+ (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
+ """,
+ bygroups(Punctuation, using(this, state=[
+ 'root', 'attr']), Punctuation),
+ ),
+ # Table headers
+ (
+ r"""(?x)
+ ( ^(?:[ \t\n\r\0\x0B]*?)! )
+ (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )?
+ """,
+ bygroups(Punctuation, using(this, state=[
+ 'root', 'attr']), Punctuation),
+ 'table-header',
+ ),
+ include('list'),
+ include('inline'),
+ include('text'),
+ ],
+ 'table-header': [
+ # Requires another state for || handling inside headers
+ (r'\n', Text, '#pop'),
+ (
+ r"""(?x)
+ (!!|\|\|)
+ (?:
+ ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )
+ (\|)(?!\|)
+ )?
+ """,
+ bygroups(Punctuation, using(this, state=[
+ 'root', 'attr']), Punctuation)
+ ),
+ *text_rules(Generic.Subheading),
+ ],
+ 'entity': [
+ (r'&\S*?;', Name.Entity),
+ ],
+ 'dt': [
+ (r'\n', Text, '#pop'),
+ include('inline'),
+ (r':', Keyword, '#pop'),
+ include('text'),
+ ],
+ 'extlink-inner': [
+ (r'\]', Punctuation, '#pop'),
+ include('inline'),
+ include('text'),
+ ],
+ 'nowiki-ish': [
+ include('entity'),
+ include('text'),
+ ],
+ 'attr': [
+ include('replaceable'),
+ (r'\s+', Whitespace),
+ (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'),
+ (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'),
+ (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'),
+ (r'[\w:-]+', Name.Attribute),
+
+ ],
+ 'attr-val-0': [
+ (r'\s', Whitespace, '#pop'),
+ include('replaceable'),
+ *text_rules(String),
+ ],
+ 'attr-val-1': [
+ (r"'", String.Single, '#pop'),
+ include('replaceable'),
+ *text_rules(String.Single),
+ ],
+ 'attr-val-2': [
+ (r'"', String.Double, '#pop'),
+ include('replaceable'),
+ *text_rules(String.Double),
+ ],
+ 'tag-inner-ordinary': [
+ (r'/?\s*>', Punctuation, '#pop'),
+ include('tag-attr'),
+ ],
+ 'tag-inner': [
+ # Return to root state for self-closing tags
+ (r'/\s*>', Punctuation, '#pop:2'),
+ (r'\s*>', Punctuation, '#pop'),
+ include('tag-attr'),
+ ],
+ # There states below are just like their non-tag variants, the key difference is
+ # they forcibly quit when encountering tag closing markup
+ 'tag-attr': [
+ include('replaceable'),
+ (r'\s+', Whitespace),
+ (r'(=)(\s*)(")', bygroups(Operator,
+ Whitespace, String.Double), 'tag-attr-val-2'),
+ (r"(=)(\s*)(')", bygroups(Operator,
+ Whitespace, String.Single), 'tag-attr-val-1'),
+ (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'),
+ (r'[\w:-]+', Name.Attribute),
+
+ ],
+ 'tag-attr-val-0': [
+ (r'\s', Whitespace, '#pop'),
+ (r'/?>', Punctuation, '#pop:2'),
+ include('replaceable'),
+ *text_rules(String),
+ ],
+ 'tag-attr-val-1': [
+ (r"'", String.Single, '#pop'),
+ (r'/?>', Punctuation, '#pop:2'),
+ include('replaceable'),
+ *text_rules(String.Single),
+ ],
+ 'tag-attr-val-2': [
+ (r'"', String.Double, '#pop'),
+ (r'/?>', Punctuation, '#pop:2'),
+ include('replaceable'),
+ *text_rules(String.Double),
+ ],
+ 'tag-nowiki': nowiki_tag_rules('nowiki'),
+ 'tag-pre': nowiki_tag_rules('pre'),
+ 'tag-categorytree': plaintext_tag_rules('categorytree'),
+ 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'),
+ 'tag-hiero': plaintext_tag_rules('hiero'),
+ 'tag-inputbox': plaintext_tag_rules('inputbox'),
+ 'tag-imagemap': plaintext_tag_rules('imagemap'),
+ 'tag-charinsert': plaintext_tag_rules('charinsert'),
+ 'tag-timeline': plaintext_tag_rules('timeline'),
+ 'tag-gallery': plaintext_tag_rules('gallery'),
+ 'tag-graph': plaintext_tag_rules('graph'),
+ 'tag-rss': plaintext_tag_rules('rss'),
+ 'tag-math': delegate_tag_rules('math', TexLexer),
+ 'tag-chem': delegate_tag_rules('chem', TexLexer),
+ 'tag-ce': delegate_tag_rules('ce', TexLexer),
+ 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer),
+ 'text-italic': text_rules(Generic.Emph),
+ 'text-bold': text_rules(Generic.Strong),
+ 'text': text_rules(Text),
+ }