diff options
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/element.py | 14 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 8 |
3 files changed, 17 insertions, 8 deletions
@@ -7,6 +7,9 @@ reparented into a tag that contained an identical whitespace element. [bug=1505351] +* Added support for CSS selector values that contain quoted spaces, + such as tag[style="display: foo"]. [bug=1540588] + * Corrected handling of XML processing instructions. [bug=1504393] * The contents of <textarea> tags will no longer be modified when the diff --git a/bs4/element.py b/bs4/element.py index 7a3aa52..ad13533 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -4,6 +4,7 @@ __license__ = "MIT" import collections import re +import shlex import sys import warnings from bs4.dammit import EntitySubstitution @@ -1319,6 +1320,7 @@ class Tag(PageElement): _selector_combinators = ['>', '+', '~'] _select_debug = False + quoted_colon = re.compile('"[^"]*:[^"]*"') def select_one(self, selector): """Perform a CSS selection operation on the current element.""" value = self.select(selector, limit=1) @@ -1344,8 +1346,7 @@ class Tag(PageElement): if limit and len(context) >= limit: break return context - - tokens = selector.split() + tokens = shlex.split(selector) current_context = [self] if tokens[-1] in self._selector_combinators: @@ -1397,7 +1398,7 @@ class Tag(PageElement): return classes.issubset(candidate.get('class', [])) checker = classes_match - elif ':' in token: + elif ':' in token and not self.quoted_colon.search(token): # Pseudo-class tag_name, pseudo = token.split(':', 1) if tag_name == '': @@ -1428,11 +1429,8 @@ class Tag(PageElement): self.count += 1 if self.count == self.destination: return True - if self.count > self.destination: - # Stop the generator that's sending us - # these things. - raise StopIteration() - return False + else: + return False checker = Counter(pseudo_value).nth_child_of_type else: raise NotImplementedError( diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 8a05990..fc19046 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -1909,6 +1909,14 @@ class TestSoupSelector(TreeTest): ('div[data-tag]', ['data1']) ) + def test_quoted_space_in_selector_name(self): + html = """<div style="display: wrong">nope</div> + <div style="display: right">yes</div> + """ + soup = BeautifulSoup(html, 'html.parser') + [chosen] = soup.select('div[style="display: right"]') + self.assertEqual("yes", chosen.string) + def test_unsupported_pseudoclass(self): self.assertRaises( NotImplementedError, self.soup.select, "a:no-such-pseudoclass") |