summaryrefslogtreecommitdiff
path: root/pygments/util.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2014-11-08 15:15:51 +0100
committerGeorg Brandl <georg@python.org>2014-11-08 15:15:51 +0100
commit0709cd8b978937725382d83953bdad97b78e7b8b (patch)
tree400c387c9f8e0685f040882aff3f5dfed7555b5b /pygments/util.py
parentf46cc61a3ef609baea88aae3869081a741fc6e69 (diff)
downloadpygments-0709cd8b978937725382d83953bdad97b78e7b8b.tar.gz
Closes #1055: fixup guessing routines for HTML/XML related markup
* remove too broad recognition for Lasso lexer * recognize XML declaration (<?xml ...?>) as XML * make HTML doctype recognition more general (HTML5 only requires <!DOCTYPE html>) * fix PHP not to recognize XML declarations
Diffstat (limited to 'pygments/util.py')
-rw-r--r--pygments/util.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/pygments/util.py b/pygments/util.py
index 1f54c291..9f683c0e 100644
--- a/pygments/util.py
+++ b/pygments/util.py
@@ -17,12 +17,15 @@ split_path_re = re.compile(r'[/\\ ]')
doctype_lookup_re = re.compile(r'''(?smx)
(<\?.*?\?>)?\s*
<!DOCTYPE\s+(
+ [a-zA-Z_][a-zA-Z0-9]*
+ (?: \s+ # optional in HTML5
[a-zA-Z_][a-zA-Z0-9]*\s+
- [a-zA-Z_][a-zA-Z0-9]*\s+
- "[^"]*")
+ "[^"]*")?
+ )
[^>]*>
''')
tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>(?uism)')
+xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)
class ClassNotFound(ValueError):
@@ -173,17 +176,19 @@ def doctype_matches(text, regex):
if m is None:
return False
doctype = m.group(2)
- return re.compile(regex).match(doctype.strip()) is not None
+ return re.compile(regex, re.I).match(doctype.strip()) is not None
def html_doctype_matches(text):
"""Check if the file looks like it has a html doctype."""
- return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*')
+ return doctype_matches(text, r'html')
_looks_like_xml_cache = {}
def looks_like_xml(text):
"""Check if a doctype exists or if we have some tags."""
+ if xml_decl_re.match(text):
+ return True
key = hash(text)
try:
return _looks_like_xml_cache[key]