diff options
-rw-r--r-- | NEWS.txt | 10 | ||||
-rw-r--r-- | bs4/element.py | 24 | ||||
-rw-r--r-- | bs4/tests/test_tree.py | 17 |
3 files changed, 33 insertions, 18 deletions
@@ -3,6 +3,16 @@ * Beautiful Soup will now work with versions of html5lib greater than 0.99999999. [bug=1603299] +* If a search against each individual value of a multi-valued + attribute fails, the search will be run one final time against the + complete attribute value considered as a single string. That is, if + a tag has class="foo bar" and neither "foo" nor "bar" matches, but + "foo bar" does, the tag is now considered a match. + + This happened in previous versions, but only when the value being + searched for was a string. Now it also works when searching for + regular expressions, lists of values, etc. [bug=1476868] + * Fixed a bug that deranged the tree when a whitespace element was reparented into a tag that contained an identical whitespace element. [bug=1505351] diff --git a/bs4/element.py b/bs4/element.py index ad13533..5a3665e 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -1704,21 +1704,15 @@ class SoupStrainer(object): if isinstance(markup, list) or isinstance(markup, tuple): # This should only happen when searching a multi-valued attribute # like 'class'. - if (isinstance(match_against, unicode) - and ' ' in match_against): - # A bit of a special case. If they try to match "foo - # bar" on a multivalue attribute's value, only accept - # the literal value "foo bar" - # - # XXX This is going to be pretty slow because we keep - # splitting match_against. But it shouldn't come up - # too often. - return (whitespace_re.split(match_against) == markup) - else: - for item in markup: - if self._matches(item, match_against): - return True - return False + for item in markup: + if self._matches(item, match_against): + return True + # We didn't match any particular value of the multivalue + # attribute, but maybe we match the attribute value when + # considered as a string. + if self._matches(' '.join(markup), match_against): + return True + return False if match_against is True: # True matches any non-None value. diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py index 2f9aba1..6bcf8af 100644 --- a/bs4/tests/test_tree.py +++ b/bs4/tests/test_tree.py @@ -222,6 +222,17 @@ class TestFindAllByName(TreeTest): self.assertSelects( tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) + def test_find_with_multi_valued_attribute(self): + soup = self.soup( + "<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>" + ) + r1 = soup.find('div', 'a d'); + r2 = soup.find('div', re.compile(r'a d')); + r3, r4 = soup.find_all('div', ['a b', 'a d']); + self.assertEqual('3', r1.string) + self.assertEqual('3', r2.string) + self.assertEqual('1', r3.string) + self.assertEqual('3', r4.string) class TestFindAllByAttribute(TreeTest): @@ -294,10 +305,10 @@ class TestFindAllByAttribute(TreeTest): f = tree.find_all("gar", class_=re.compile("a")) self.assertSelects(f, ["Found it"]) - # Since the class is not the string "foo bar", but the two - # strings "foo" and "bar", this will not find anything. + # If the search fails to match the individual strings "foo" and "bar", + # it will be tried against the combined string "foo bar". f = tree.find_all("gar", class_=re.compile("o b")) - self.assertSelects(f, []) + self.assertSelects(f, ["Found it"]) def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self): soup = self.soup("<a class='bar'>Found it</a>") |