summaryrefslogtreecommitdiff
path: root/bs4/builder
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2014-12-11 22:23:26 -0500
committerLeonard Richardson <leonardr@segfault.org>2014-12-11 22:23:26 -0500
commit29f97ad45bfd4a0d1a6b5359b4c10ac9e26a87d5 (patch)
tree5999e990051702ce838a39dbfc0bda0aa018b9a7 /bs4/builder
parent056b9348f66969013c9e48026de69d249f3a101c (diff)
downloadbeautifulsoup4-29f97ad45bfd4a0d1a6b5359b4c10ac9e26a87d5.tar.gz
Improved the lxml tree builder's handling of processing
instructions. [bug=1294645]
Diffstat (limited to 'bs4/builder')
-rw-r--r--bs4/builder/_htmlparser.py8
-rw-r--r--bs4/builder/_lxml.py11
2 files changed, 9 insertions, 10 deletions
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index bf231f1..7f3ae73 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -112,14 +112,6 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_pi(self, data):
self.soup.endData()
- if data.endswith("?") and data.lower().startswith("xml"):
- # "An XHTML processing instruction using the trailing '?'
- # will cause the '?' to be included in data." - HTMLParser
- # docs.
- #
- # Strip the question mark so we don't end up with two
- # question marks.
- data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 978c8df..b0bc8a0 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -7,7 +7,12 @@ from io import BytesIO
from StringIO import StringIO
import collections
from lxml import etree
-from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.element import (
+ Comment,
+ Doctype,
+ NamespacedAttribute,
+ ProcessingInstruction,
+)
from bs4.builder import (
FAST,
HTML,
@@ -191,7 +196,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.pop()
def pi(self, target, data):
- pass
+ self.soup.endData()
+ self.soup.handle_data(target + ' ' + data)
+ self.soup.endData(ProcessingInstruction)
def data(self, content):
self.soup.handle_data(content)