diff options
Diffstat (limited to 'bs4/builder')
-rw-r--r-- | bs4/builder/__init__.py | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index fdb3362..d7166bb 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -93,7 +93,15 @@ class TreeBuilder(object): preserve_whitespace_tags = set() empty_element_tags = None # A tag will be considered an empty-element # tag when and only when it has no contents. - + void_tags = None # There are no void tags. + + # This string goes just before the end of the start tag for an + # void element. + # + # Leave this alone and you'll get tags like "<br/>". Change it to the + # empty string and you'll get tags like "<br>". + void_element_close_prefix = '/' + # A value for these tag/attribute combinations is a space- or # comma-separated list of CDATA, rather than a single CDATA. cdata_list_attributes = {} @@ -126,6 +134,17 @@ class TreeBuilder(object): return True return tag_name in self.empty_element_tags + def is_void(self, tag_name): + """Must a tag with this name be a void tag? + + A void tag cannot have contents and is presented with neither + a a closing tag or a closing slash, e.g.: + <link href="foo"> + """ + if self.void_tags is None: + return False + return tag_name in self.void_tags + def feed(self, markup): raise NotImplementedError() @@ -235,11 +254,11 @@ class HTMLTreeBuilder(TreeBuilder): empty_element_tags = set([ # These are from HTML5. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', - + # These are from HTML4, removed in HTML5. 'spacer', 'frame' ]) - + # The HTML standard defines these attributes as containing a # space-separated list of values, not a single value. That is, # class="foo bar" means that the 'class' attribute has two values, |