diff options
author | Georg Brandl <georg@python.org> | 2014-11-08 15:15:51 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-11-08 15:15:51 +0100 |
commit | 0709cd8b978937725382d83953bdad97b78e7b8b (patch) | |
tree | 400c387c9f8e0685f040882aff3f5dfed7555b5b /pygments/util.py | |
parent | f46cc61a3ef609baea88aae3869081a741fc6e69 (diff) | |
download | pygments-0709cd8b978937725382d83953bdad97b78e7b8b.tar.gz |
Closes #1055: fixup guessing routines for HTML/XML related markup
* remove too broad recognition for Lasso lexer
* recognize XML declaration (<?xml ...?>) as XML
* make HTML doctype recognition more general (HTML5 only requires <!DOCTYPE html>)
* fix PHP not to recognize XML declarations
Diffstat (limited to 'pygments/util.py')
-rw-r--r-- | pygments/util.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/pygments/util.py b/pygments/util.py index 1f54c291..9f683c0e 100644 --- a/pygments/util.py +++ b/pygments/util.py @@ -17,12 +17,15 @@ split_path_re = re.compile(r'[/\\ ]') doctype_lookup_re = re.compile(r'''(?smx) (<\?.*?\?>)?\s* <!DOCTYPE\s+( + [a-zA-Z_][a-zA-Z0-9]* + (?: \s+ # optional in HTML5 [a-zA-Z_][a-zA-Z0-9]*\s+ - [a-zA-Z_][a-zA-Z0-9]*\s+ - "[^"]*") + "[^"]*")? + ) [^>]*> ''') tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>(?uism)') +xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) class ClassNotFound(ValueError): @@ -173,17 +176,19 @@ def doctype_matches(text, regex): if m is None: return False doctype = m.group(2) - return re.compile(regex).match(doctype.strip()) is not None + return re.compile(regex, re.I).match(doctype.strip()) is not None def html_doctype_matches(text): """Check if the file looks like it has a html doctype.""" - return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*') + return doctype_matches(text, r'html') _looks_like_xml_cache = {} def looks_like_xml(text): """Check if a doctype exists or if we have some tags.""" + if xml_decl_re.match(text): + return True key = hash(text) try: return _looks_like_xml_cache[key] |