summaryrefslogtreecommitdiff
path: root/bs4/element.py
diff options
context:
space:
mode:
Diffstat (limited to 'bs4/element.py')
-rw-r--r--bs4/element.py22
1 files changed, 18 insertions, 4 deletions
diff --git a/bs4/element.py b/bs4/element.py
index c431a97..8383c3f 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1409,15 +1409,29 @@ class Tag(PageElement):
# Handle grouping selectors if ',' exists, ie: p,a
if ',' in selector:
context = []
- for partial_selector in selector.split(','):
- partial_selector = partial_selector.strip()
+ selectors = [x.strip() for x in selector.split(",")]
+
+ # If a selector is mentioned multiple times we don't want
+ # to use it more than once.
+ used_selectors = set()
+
+ # We also don't want to select the same element more than once,
+ # if it's matched by multiple selectors.
+ selected_object_ids = set()
+ for partial_selector in selectors:
if partial_selector == '':
raise ValueError('Invalid group selection syntax: %s' % selector)
+ if partial_selector in used_selectors:
+ continue
+ used_selectors.add(partial_selector)
candidates = self.select(partial_selector, limit=limit)
for candidate in candidates:
- if candidate not in context:
+ # This lets us distinguish between distinct tags that
+ # represent the same markup.
+ object_id = id(candidate)
+ if object_id not in selected_object_ids:
context.append(candidate)
-
+ selected_object_ids.add(object_id)
if limit and len(context) >= limit:
break
return context