summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexers/_mapping.py3
-rw-r--r--pygments/lexers/web.py86
-rw-r--r--tests/examplefiles/test.dtd89
3 files changed, 176 insertions, 2 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index b3ab5b14..16af9528 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -31,7 +31,7 @@ LEXERS = {
'AppleScriptLexer': ('pygments.lexers.other', 'AppleScript', ('applescript',), ('*.applescript',), ()),
'AsymptoteLexer': ('pygments.lexers.other', 'Asymptote', ('asy', 'asymptote'), ('*.asy',), ('text/x-asymptote',)),
'AutohotkeyLexer': ('pygments.lexers.other', 'autohotkey', ('ahk',), ('*.ahk', '*.ahkl'), ('text/x-autohotkey',)),
- 'AwkLexer': ('pygments.lexers.other', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ('application/x-awk')),
+ 'AwkLexer': ('pygments.lexers.other', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ()),
'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)),
'BaseMakefileLexer': ('pygments.lexers.text', 'Makefile', ('basemake',), (), ()),
'BashLexer': ('pygments.lexers.other', 'Bash', ('bash', 'sh', 'ksh'), ('*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass'), ('application/x-sh', 'application/x-shellscript')),
@@ -71,6 +71,7 @@ LEXERS = {
'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',), ('text/x-pascal',)),
'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')),
'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')),
+ 'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)),
'DuelLexer': ('pygments.lexers.web', 'Duel', ('duel', 'Duel Engine', 'Duel View', 'JBST', 'jbst', 'JsonML+BST'), ('*.duel', '*.jbst'), ('text/x-duel', 'text/x-jbst')),
'DylanLexer': ('pygments.lexers.compiled', 'Dylan', ('dylan',), ('*.dylan', '*.dyl'), ('text/x-dylan',)),
'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)),
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index 11bc2175..b6b6fc5a 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -26,7 +26,7 @@ __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'CssLexer',
'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer',
'MxmlLexer', 'HaxeLexer', 'HamlLexer', 'SassLexer', 'ScssLexer',
'ObjectiveJLexer', 'CoffeeScriptLexer', 'DuelLexer', 'ScamlLexer',
- 'JadeLexer', 'XQueryLexer']
+ 'JadeLexer', 'XQueryLexer', 'DtdLexer']
class JavascriptLexer(RegexLexer):
@@ -826,6 +826,90 @@ class PhpLexer(RegexLexer):
return rv
+class DtdLexer(RegexLexer):
+ """
+ a lexer for DTD (Document Type Definition).
+ """
+
+ flags = re.MULTILINE | re.DOTALL
+
+ name = 'DTD'
+ aliases = ['dtd']
+ filenames = ['*.dtd']
+ mimetypes = ['application/xml-dtd']
+
+ tokens = {
+ 'root': [
+ include('common'),
+
+ (r'(<!ELEMENT)(\s+)([^\s]+)',
+ bygroups(Keyword, Text, Name.Tag), 'element'),
+ (r'(<!ATTLIST)(\s+)([^\s]+)',
+ bygroups(Keyword, Text, Name.Tag), 'attlist'),
+ (r'(<!ENTITY)(\s+)([^\s]+)',
+ bygroups(Keyword, Text, Name.Entity), 'entity'),
+ (r'(<!NOTATION)(\s+)([^\s]+)',
+ bygroups(Keyword, Text, Name.Tag), 'notation'),
+ (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections
+ bygroups(Keyword, Name.Entity, Text, Keyword)),
+
+ (r'(<!DOCTYPE)(\s+)([^>\s]+)',
+ bygroups(Keyword, Text, Name.Tag)),
+ (r'PUBLIC|SYSTEM', Keyword.Constant),
+ (r'[\[\]>]', Keyword),
+ ],
+
+ 'common': [
+ (r'\s+', Text),
+ (r'(%|&)[^;]*;', Name.Entity),
+ ('<!--', Comment, 'comment'),
+ (r'[(|)*,?+]', Operator),
+ (r'"[^"]*"', String.Double),
+ (r'\'[^\']*\'', String.Single),
+ ],
+
+ 'comment': [
+ ('[^-]+', Comment),
+ ('-->', Comment, '#pop'),
+ ('-', Comment),
+ ],
+
+ 'element': [
+ include('common'),
+ (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
+ (r'[^>\s\|()?+*,]+', Name.Tag),
+ (r'>', Keyword, '#pop'),
+ ],
+
+ 'attlist': [
+ include('common'),
+ (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION', Keyword.Constant),
+ (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
+ (r'xml:space|xml:lang', Keyword.Reserved),
+ (r'[^>\s\|()?+*,]+', Name.Attribute),
+ (r'>', Keyword, '#pop'),
+ ],
+
+ 'entity': [
+ include('common'),
+ (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
+ (r'[^>\s\|()?+*,]+', Name.Entity),
+ (r'>', Keyword, '#pop'),
+ ],
+
+ 'notation': [
+ include('common'),
+ (r'SYSTEM|PUBLIC', Keyword.Constant),
+ (r'[^>\s\|()?+*,]+', Name.Attribute),
+ (r'>', Keyword, '#pop'),
+ ],
+ }
+
+ def analyse_text(text):
+ if not looks_like_xml(text) and \
+ ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
+ return 0.8
+
class XmlLexer(RegexLexer):
"""
Generic lexer for XML (eXtensible Markup Language).
diff --git a/tests/examplefiles/test.dtd b/tests/examplefiles/test.dtd
new file mode 100644
index 00000000..639b411a
--- /dev/null
+++ b/tests/examplefiles/test.dtd
@@ -0,0 +1,89 @@
+<!DOCTYPE html>
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" [
+ <!-- an internal subset can be embedded here -->
+]>
+
+<!DOCTYPE greeting SYSTEM "hello.dtd">
+
+<!DOCTYPE greeting [
+ <!ELEMENT greeting (#PCDATA)>
+
+<!-- examples from XML spec -->
+
+<!ELEMENT br EMPTY>
+<!ELEMENT p (#PCDATA|emph)* >
+<!ELEMENT %name.para; %content.para; >
+<!ELEMENT container ANY>
+
+<!ELEMENT spec (front, body, back?)>
+<!ELEMENT div1 (head, (p | list | note)*, div2*)>
+<!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*>
+
+<!ELEMENT p (#PCDATA|a|ul|b|i|em)*>
+<!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* >
+<!ELEMENT b (#PCDATA)>
+
+<!ATTLIST termdef
+ id ID #REQUIRED
+ name CDATA #IMPLIED>
+<!ATTLIST list
+ type (bullets|ordered|glossary) "ordered">
+<!ATTLIST form
+ method CDATA #FIXED "POST">
+
+<!ENTITY d "&#xD;">
+<!ENTITY a "&#xA;">
+<!ENTITY da "&#xD;&#xA;">
+
+<!ENTITY % ISOLat2
+ SYSTEM "http://www.xml.com/iso/isolat2-xml.entities" >
+
+<!ENTITY Pub-Status "This is a pre-release of the
+ specification.">
+
+ <!ENTITY open-hatch
+ SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+<!ENTITY open-hatch
+ PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN"
+ "http://www.textuality.com/boilerplate/OpenHatch.xml">
+<!ENTITY hatch-pic
+ SYSTEM "../grafix/OpenHatch.gif"
+ NDATA gif >
+
+<!NOTATION gif PUBLIC "gif viewer">
+
+<!ENTITY % YN '"Yes"' >
+<!ENTITY WhatHeSaid "He said %YN;" >
+
+<!ENTITY EndAttr "27'" >
+
+<!ENTITY % pub "&#xc9;ditions Gallimard" >
+<!ENTITY rights "All rights reserved" >
+<!ENTITY book "La Peste: Albert Camus,
+&#xA9; 1947 %pub;. &rights;" >
+
+<!ENTITY lt "&#38;#60;">
+<!ENTITY gt "&#62;">
+<!ENTITY amp "&#38;#38;">
+<!ENTITY apos "&#39;">
+<!ENTITY quot "&#34;">
+
+<!ENTITY % draft 'INCLUDE' >
+<!ENTITY % final 'IGNORE' >
+
+<![%draft;[
+<!ELEMENT book (comments*, title, body, supplements?)>
+]]>
+<![%final;[
+<!ELEMENT book (title, body, supplements?)>
+]]>
+
+]>
+
+