14 files changed, 276 insertions, 69 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 4b4ac23..45a6952 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,4 +1,27 @@
-= 4.6.0 (Unreleased) =
+= Unreleased
+
+* Stop data loss when encountering an empty numeric entity, and
+  possibly in other cases.  Thanks to tos.kamiya for the fix. [bug=1698503]
+
+* Improved the warning given when no parser is specified. [bug=1780571]
+
+* Fixed code that was causing deprecation warnings in recent Python 3
+  versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
+
+* Fixed a Windows crash in diagnose() when checking whether a long
+  markup string is a filename. [bug=1737121]
+
+* Stopped HTMLParser from raising an exception in very rare cases of
+  bad markup. [bug=1708831]
+
+* Added a new formatter, "html5", which represents void elements
+  elements as "<element>" rather than "<element/>".  [bug=1716272]
+
+* You can get finer control over formatting by subclassing
+  bs4.element.Formatter and passing a Formatter instance into (e.g.)
+  encode(). [bug=1716272]
+
+= 4.6.0 (20170507) =
 
 * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for
   getting the value of an attribute, but which always returns a list,
diff --git a/bs4/__init__.py b/bs4/__init__.py
index c984ef6..329ef53 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -21,7 +21,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 # found in the LICENSE file.
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.5.3"
+__version__ = "4.6.0"
 __copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
 __license__ = "MIT"
 
@@ -29,6 +29,7 @@ __all__ = ['BeautifulSoup']
 
 import os
 import re
+import sys
 import traceback
 import warnings
 
@@ -82,14 +83,46 @@ class BeautifulSoup(Tag):
 
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
-    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
 
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  **kwargs):
-        """The Soup object is initialized as the 'root tag', and the
-        provided markup (which can be a string or a file-like object)
-        is fed into the underlying parser."""
+        """Constructor.
+
+        :param markup: A string or a file-like object representing
+        markup to be parsed.
+
+        :param features: Desirable features of the parser to be used. This
+        may be the name of a specific parser ("lxml", "lxml-xml",
+        "html.parser", or "html5lib") or it may be the type of markup
+        to be used ("html", "html5", "xml"). It's recommended that you
+        name a specific parser, so that Beautiful Soup gives you the
+        same results across platforms and virtual environments.
+
+        :param builder: A specific TreeBuilder to use instead of looking one
+        up based on `features`. You shouldn't need to use this.
+
+        :param parse_only: A SoupStrainer. Only parts of the document
+        matching the SoupStrainer will be considered. This is useful
+        when parsing part of a document that would otherwise be too
+        large to fit into memory.
+
+        :param from_encoding: A string indicating the encoding of the
+        document to be parsed. Pass this in if Beautiful Soup is
+        guessing wrongly about the document's encoding.
+
+        :param exclude_encodings: A list of strings indicating
+        encodings known to be wrong. Pass this in if you don't know
+        the document's encoding but you know Beautiful Soup's guess is
+        wrong.
+
+        :param kwargs: For backwards compatibility purposes, the
+        constructor accepts certain keyword arguments used in
+        Beautiful Soup 3. None of these arguments do anything in
+        Beautiful Soup 4 and there's no need to actually pass keyword
+        arguments into the constructor.
+        """
 
         if 'convertEntities' in kwargs:
             warnings.warn(
@@ -171,14 +204,35 @@ class BeautifulSoup(Tag):
                 else:
                     markup_type = "HTML"
 
-                caller = traceback.extract_stack()[0]
-                filename = caller[0]
-                line_number = caller[1]
-                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
-                    filename=filename,
-                    line_number=line_number,
-                    parser=builder.NAME,
-                    markup_type=markup_type))
+                # This code adapted from warnings.py so that we get the same line
+                # of code as our warnings.warn() call gets, even if the answer is wrong
+                # (as it may be in a multithreading situation).
+                caller = None
+                try:
+                    caller = sys._getframe(1)
+                except ValueError:
+                    pass
+                if caller:
+                    globals = caller.f_globals
+                    line_number = caller.f_lineno
+                else:
+                    globals = sys.__dict__
+                    line_number= 1                    
+                filename = globals.get('__file__')
+                if filename:
+                    fnl = filename.lower()
+                    if fnl.endswith((".pyc", ".pyo")):
+                        filename = filename[:-1]
+                if filename:
+                    # If there is no filename at all, the user is most likely in a REPL,
+                    # and the warning is not necessary.
+                    values = dict(
+                        filename=filename,
+                        line_number=line_number,
+                        parser=builder.NAME,
+                        markup_type=markup_type
+                    )
+                    warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
 
         self.builder = builder
         self.is_xml = builder.is_xml
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index fdb3362..21454e6 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -93,7 +93,7 @@ class TreeBuilder(object):
     preserve_whitespace_tags = set()
     empty_element_tags = None # A tag will be considered an empty-element
                               # tag when and only when it has no contents.
-
+    
     # A value for these tag/attribute combinations is a space- or
     # comma-separated list of CDATA, rather than a single CDATA.
     cdata_list_attributes = {}
@@ -125,7 +125,7 @@ class TreeBuilder(object):
         if self.empty_element_tags is None:
             return True
         return tag_name in self.empty_element_tags
-
+        
     def feed(self, markup):
         raise NotImplementedError()
 
@@ -235,11 +235,11 @@ class HTMLTreeBuilder(TreeBuilder):
     empty_element_tags = set([
         # These are from HTML5.
         'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
-
-        # These are from HTML4, removed in HTML5.
-        'spacer', 'frame'
+        
+        # These are from earlier versions of HTML and are removed in HTML5.
+        'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer'
     ])
-
+    
     # The HTML standard defines these attributes as containing a
     # space-separated list of values, not a single value. That is,
     # class="foo bar" means that the 'class' attribute has two values,
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 67890b3..ef9fd1e 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -64,7 +64,18 @@ class BeautifulSoupHTMLParser(HTMLParser):
         # order. It's a list of closing tags we've already handled and
         # will ignore, assuming they ever show up.
         self.already_closed_empty_element = []
-    
+
+    def error(self, msg):
+        """In Python 3, HTMLParser subclasses must implement error(), although this
+        requirement doesn't appear to be documented.
+
+        In Python 2, HTMLParser implements error() as raising an exception.
+
+        In any event, this method is called only on very strange markup and our best strategy
+        is to pretend it didn't happen and keep going.
+        """
+        warnings.warn(msg)
+        
     def handle_startendtag(self, name, attrs):
         # This is only called when the markup looks like
         # <tag/>.
@@ -213,6 +224,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         parser.soup = self.soup
         try:
             parser.feed(markup)
+            parser.close()
         except HTMLParseError, e:
             warnings.warn(RuntimeWarning(
                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index d2ca287..3439271 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -5,9 +5,13 @@ __all__ = [
     'LXMLTreeBuilder',
     ]
 
+try:
+    from collections.abc import Callable # Python 3.6
+except ImportError , e:
+    from collections import Callable
+
 from io import BytesIO
 from StringIO import StringIO
-import collections
 from lxml import etree
 from bs4.element import (
     Comment,
@@ -58,7 +62,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         # Use the default parser.
         parser = self.default_parser(encoding)
 
-        if isinstance(parser, collections.Callable):
+        if isinstance(parser, Callable):
             # Instantiate the parser with default arguments
             parser = parser(target=self, strip_cdata=False, encoding=encoding)
         return parser
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 7965565..be46b39 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -46,9 +46,9 @@ except ImportError:
     pass
 
 xml_encoding_re = re.compile(
-    '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+    '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I)
 html_meta_re = re.compile(
-    '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+    '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
 
 class EntitySubstitution(object):
 
@@ -82,7 +82,7 @@ class EntitySubstitution(object):
         }
 
     BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
-                                           "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)"
                                            ")")
 
     AMPERSAND_OR_BRACKET = re.compile("([<>&])")
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 8768332..7a28c09 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -37,7 +37,7 @@ def diagnose(data):
                 name)
 
     if 'lxml' in basic_parsers:
-        basic_parsers.append(["lxml", "xml"])
+        basic_parsers.append("lxml-xml")
         try:
             from lxml import etree
             print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
@@ -56,21 +56,27 @@ def diagnose(data):
 
     if hasattr(data, 'read'):
         data = data.read()
-    elif os.path.exists(data):
-        print '"%s" looks like a filename. Reading data from the file.' % data
-        with open(data) as fp:
-            data = fp.read()
     elif data.startswith("http:") or data.startswith("https:"):
         print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
         print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
         return
-    print
+    else:
+        try:
+            if os.path.exists(data):
+                print '"%s" looks like a filename. Reading data from the file.' % data
+                with open(data) as fp:
+                    data = fp.read()
+        except ValueError:
+            # This can happen on some platforms when the 'filename' is
+            # too long. Assume it's data and not a filename.
+            pass
+        print
 
     for parser in basic_parsers:
         print "Trying to parse your markup with %s" % parser
         success = False
         try:
-            soup = BeautifulSoup(data, parser)
+            soup = BeautifulSoup(data, features=parser)
             success = True
         except Exception, e:
             print "%s could not parse the markup." % parser
diff --git a/bs4/element.py b/bs4/element.py
index 9ef75f8..911b9bc 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -2,7 +2,10 @@
 # found in the LICENSE file.
 __license__ = "MIT"
 
-import collections
+try:
+    from collections.abc import Callable # Python 3.6
+except ImportError , e:
+    from collections import Callable
 import re
 import shlex
 import sys
@@ -12,7 +15,7 @@ from bs4.dammit import EntitySubstitution
 DEFAULT_OUTPUT_ENCODING = "utf-8"
 PY3K = (sys.version_info[0] > 2)
 
-whitespace_re = re.compile("\s+")
+whitespace_re = re.compile(r"\s+")
 
 def _alias(attr):
     """Alias one attribute name to another for backward compatibility"""
@@ -69,7 +72,7 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
     The value of the 'content' attribute will be one of these objects.
     """
 
-    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+    CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M)
 
     def __new__(cls, original_value):
         match = cls.CHARSET_RE.search(original_value)
@@ -123,6 +126,41 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
         return cls._substitute_if_appropriate(
             ns, EntitySubstitution.substitute_xml)
 
+class Formatter(object):
+    """Contains information about how to format a parse tree."""
+    
+    # By default, represent void elements as <tag/> rather than <tag>
+    void_element_close_prefix = '/'
+
+    def substitute_entities(self, *args, **kwargs):
+        """Transform certain characters into named entities."""
+        raise NotImplementedError()
+
+class HTMLFormatter(Formatter):
+    """The default HTML formatter."""
+    def substitute(self, *args, **kwargs):
+        return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
+
+class MinimalHTMLFormatter(Formatter):
+    """A minimal HTML formatter."""
+    def substitute(self, *args, **kwargs):
+        return HTMLAwareEntitySubstitution.substitute_xml(*args, **kwargs)
+    
+class HTML5Formatter(HTMLFormatter):
+    """An HTML formatter that omits the slash in a void tag."""
+    void_element_close_prefix = None
+
+class XMLFormatter(Formatter):
+    """Substitute only the essential XML entities."""
+    def substitute(self, *args, **kwargs):
+        return EntitySubstitution.substitute_xml(*args, **kwargs)
+
+class HTMLXMLFormatter(Formatter):
+    """Format XML using HTML rules."""
+    def substitute(self, *args, **kwargs):
+        return HTMLAwareEntitySubstitution.substitute_html(*args, **kwargs)
+
+    
 class PageElement(object):
     """Contains the navigational information for some part of the page
     (either a tag or a piece of text)"""
@@ -131,40 +169,49 @@ class PageElement(object):
     # to methods like encode() and prettify():
     #
     # "html" - All Unicode characters with corresponding HTML entities
-    #   are converted to those entities on output. 
-   # "minimal" - Bare ampersands and angle brackets are converted to
+    #   are converted to those entities on output.
+    # "html5" - The same as "html", but empty void tags are represented as
+    #   <tag> rather than <tag/>
+    # "minimal" - Bare ampersands and angle brackets are converted to
     #   XML entities: &amp; &lt; &gt;
     # None - The null formatter. Unicode characters are never
     #   converted to entities.  This is not recommended, but it's
     #   faster than "minimal".
-    # A function - This function will be called on every string that
+    # A callable function - it will be called on every string that needs to undergo entity substitution.
+    # A Formatter instance - Formatter.substitute(string) will be called on every string that
     #  needs to undergo entity substitution.
     #
 
-    # In an HTML document, the default "html" and "minimal" functions
-    # will leave the contents of <script> and <style> tags alone. For
-    # an XML document, all tags will be given the same treatment.
+    # In an HTML document, the default "html", "html5", and "minimal"
+    # functions will leave the contents of <script> and <style> tags
+    # alone. For an XML document, all tags will be given the same
+    # treatment.
 
     HTML_FORMATTERS = {
-        "html" : HTMLAwareEntitySubstitution.substitute_html,
-        "minimal" : HTMLAwareEntitySubstitution.substitute_xml,
+        "html" : HTMLFormatter(),
+        "html5" : HTML5Formatter(),
+        "minimal" : MinimalHTMLFormatter(),
         None : None
         }
 
     XML_FORMATTERS = {
-        "html" : EntitySubstitution.substitute_html,
-        "minimal" : EntitySubstitution.substitute_xml,
+        "html" : HTMLXMLFormatter(),
+        "minimal" : XMLFormatter(),
         None : None
         }
 
     def format_string(self, s, formatter='minimal'):
         """Format the given string using the given formatter."""
-        if not callable(formatter):
+        if isinstance(formatter, basestring):
             formatter = self._formatter_for_name(formatter)
         if formatter is None:
             output = s
         else:
-            output = formatter(s)
+            if callable(formatter):
+                # Backwards compatibility -- you used to pass in a formatting method.
+                output = formatter(s)
+            else:
+                output = formatter.substitute(s)
         return output
 
     @property
@@ -194,11 +241,9 @@ class PageElement(object):
     def _formatter_for_name(self, name):
         "Look up a formatter function based on its name and the tree."
         if self._is_xml:
-            return self.XML_FORMATTERS.get(
-                name, EntitySubstitution.substitute_xml)
+            return self.XML_FORMATTERS.get(name, XMLFormatter())
         else:
-            return self.HTML_FORMATTERS.get(
-                name, HTMLAwareEntitySubstitution.substitute_xml)
+            return self.HTML_FORMATTERS.get(name, HTMLFormatter())
 
     def setup(self, parent=None, previous_element=None, next_element=None,
               previous_sibling=None, next_sibling=None):
@@ -316,6 +361,14 @@ class PageElement(object):
             and not isinstance(new_child, NavigableString)):
             new_child = NavigableString(new_child)
 
+        from bs4 import BeautifulSoup
+        if isinstance(new_child, BeautifulSoup):
+            # We don't want to end up with a situation where one BeautifulSoup
+            # object contains another. Insert the children one at a time.
+            for subchild in list(new_child.contents):
+                self.insert(position, subchild)
+                position += 1
+            return
         position = min(position, len(self.contents))
         if hasattr(new_child, 'parent') and new_child.parent is not None:
             # We're 'inserting' an element that's already one
@@ -862,7 +915,7 @@ class Tag(PageElement):
             self.can_be_empty_element = builder.can_be_empty_element(name)
         else:
             self.can_be_empty_element = False
-
+            
     parserClass = _alias("parser_class")  # BS3
 
     def __copy__(self):
@@ -1129,11 +1182,10 @@ class Tag(PageElement):
            encoding.
         """
 
-        # First off, turn a string formatter into a function. This
+        # First off, turn a string formatter into a Formatter object. This
         # will stop the lookup from happening over and over again.
-        if not callable(formatter):
+        if not isinstance(formatter, Formatter) and not callable(formatter):
             formatter = self._formatter_for_name(formatter)
-
         attrs = []
         if self.attrs:
             for key, val in sorted(self.attrs.items()):
@@ -1162,7 +1214,7 @@ class Tag(PageElement):
             prefix = self.prefix + ":"
 
         if self.is_empty_element:
-            close = '/'
+            close = formatter.void_element_close_prefix or ''
         else:
             closeTag = '</%s%s>' % (prefix, self.name)
 
@@ -1233,9 +1285,9 @@ class Tag(PageElement):
         :param formatter: The output formatter responsible for converting
            entities to Unicode characters.
         """
-        # First off, turn a string formatter into a function. This
+        # First off, turn a string formatter into a Formatter object. This
         # will stop the lookup from happening over and over again.
-        if not callable(formatter):
+        if not isinstance(formatter, Formatter) and not callable(formatter):
             formatter = self._formatter_for_name(formatter)
 
         pretty_print = (indent_level is not None)
@@ -1418,7 +1470,7 @@ class Tag(PageElement):
                 if tag_name == '':
                     raise ValueError(
                         "A pseudo-class must be prefixed with a tag name.")
-                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+                pseudo_attributes = re.match(r'([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                 found = []
                 if pseudo_attributes is None:
                     pseudo_type = pseudo
@@ -1652,7 +1704,7 @@ class SoupStrainer(object):
             markup = markup_name
             markup_attrs = markup
         call_function_with_tag_data = (
-            isinstance(self.name, collections.Callable)
+            isinstance(self.name, Callable)
             and not isinstance(markup_name, Tag))
 
         if ((not self.name)
@@ -1732,7 +1784,7 @@ class SoupStrainer(object):
             # True matches any non-None value.
             return markup is not None
 
-        if isinstance(match_against, collections.Callable):
+        if isinstance(match_against, Callable):
             return match_against(markup)
 
         # Custom callables take the tag as an argument, but all
diff --git a/bs4/testing.py b/bs4/testing.py
index 6ba2506..9d42702 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -334,7 +334,7 @@ Hello, world!
         self.assertSoupEquals("&#10000000000000;", expect)
         self.assertSoupEquals("&#x10000000000000;", expect)
         self.assertSoupEquals("&#1000000000;", expect)
-
+        
     def test_multipart_strings(self):
         "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
index d5cf025..0381c7d 100644
--- a/bs4/tests/test_htmlparser.py
+++ b/bs4/tests/test_htmlparser.py
@@ -5,6 +5,7 @@ from pdb import set_trace
 import pickle
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 from bs4.builder import HTMLParserTreeBuilder
+from bs4.builder._htmlparser import BeautifulSoupHTMLParser
 
 class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
 
@@ -32,3 +33,17 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
     def test_redundant_empty_element_closing_tags(self):
         self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
         self.assertSoupEquals('</br></br></br>', "")
+
+    def test_empty_element(self):
+        # This verifies that any buffered data present when the parser
+        # finishes working is handled.
+        self.assertSoupEquals("foo &# bar", "foo &amp;# bar")
+
+
+class TestHTMLParserSubclass(SoupTest):
+    def test_error(self):
+        """Verify that our HTMLParser subclass implements error() in a way
+        that doesn't cause a crash.
+        """
+        parser = BeautifulSoupHTMLParser()
+        parser.error("don't crash")
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index c0e7c40..e8903e3 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -605,7 +605,7 @@ class SiblingTest(TreeTest):
                     </html>'''
         # All that whitespace looks good but makes the tests more
         # difficult. Get rid of it.
-        markup = re.compile("\n\s*").sub("", markup)
+        markup = re.compile(r"\n\s*").sub("", markup)
         self.tree = self.soup(markup)
 
 
@@ -821,6 +821,26 @@ class TestTreeModification(SoupTest):
         soup = self.soup(text)
         self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
 
+    def test_insert_beautifulsoup_object_inserts_children(self):
+        """Inserting one BeautifulSoup object into another actually inserts all
+        of its children -- you'll never combine BeautifulSoup objects.
+        """
+        soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>")
+        
+        text = "<p>p2</p><p>p3</p>"
+        to_insert = self.soup(text)
+        soup.insert(1, to_insert)
+
+        for i in soup.descendants:
+            assert not isinstance(i, BeautifulSoup)
+        
+        p1, p2, p3, p4 = list(soup.children)
+        self.assertEquals("And now, a word:", p1.string)
+        self.assertEquals("p2", p2.string)
+        self.assertEquals("p3", p3.string)
+        self.assertEquals("And we're back.", p4.string)
+        
+        
     def test_replace_with_maintains_next_element_throughout(self):
         soup = self.soup('<p><a>one</a><b>three</b></p>')
         a = soup.a
@@ -1419,13 +1439,21 @@ class TestSubstitutions(SoupTest):
                 u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
 
     def test_formatter_html(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
         decoded = soup.decode(formatter="html")
         self.assertEqual(
             decoded,
-            self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+            self.document_for("<br/><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
 
+    def test_formatter_html5(self):
+        markup = u"<br><b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="html5")
+        self.assertEqual(
+            decoded,
+            self.document_for("<br><b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+        
     def test_formatter_minimal(self):
         markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
         soup = self.soup(markup)
@@ -1498,7 +1526,7 @@ class TestSubstitutions(SoupTest):
             u'<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n</div>',
             soup.div.prettify())
 
-    def test_prettify_accepts_formatter(self):
+    def test_prettify_accepts_formatter_function(self):
         soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
         pretty = soup.prettify(formatter = lambda x: x.upper())
         self.assertTrue("FOO" in pretty)
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 9269385..8b2822d 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -2145,7 +2145,7 @@ invalid HTML or XML::
 
 You can change this behavior by providing a value for the
 ``formatter`` argument to ``prettify()``, ``encode()``, or
-``decode()``. Beautiful Soup recognizes four possible values for
+``decode()``. Beautiful Soup recognizes six possible values for
 ``formatter``.
 
 The default is ``formatter="minimal"``. Strings will only be processed
@@ -2174,6 +2174,18 @@ Unicode characters to HTML entities whenever possible::
  #  </body>
  # </html>
 
+ If you pass in ``formatter="html5"``, it's the same as
+``formatter="html5"``, but Beautiful Soup will
+omit the closing slash in HTML void tags like "br"::
+
+ soup = BeautifulSoup("<br>")
+ 
+ print(soup.encode(formatter="html"))
+ # <html><body><br/></body></html>
+ 
+ print(soup.encode(formatter="html5"))
+ # <html><body><br></body></html>
+ 
 If you pass in ``formatter=None``, Beautiful Soup will not modify
 strings at all on output. This is the fastest option, but it may lead
 to Beautiful Soup generating invalid HTML/XML, as in these examples::
diff --git a/prepare-release.sh b/prepare-release.sh
index d88ff1e..c278b67 100644
--- a/prepare-release.sh
+++ b/prepare-release.sh
@@ -55,6 +55,7 @@ source ../py2-install-test-virtualenv/bin/activate
 python setup.py install
 echo "EXPECT HTML ON LINE BELOW"
 (cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html.parser'))")
+echo
 # That should print '<a>foo</a>'
 deactivate
 rm -rf ../py2-install-test-virtualenv
diff --git a/setup.py b/setup.py
index 89c9661..ab14d8d 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from setuptools import (
 
 setup(
     name="beautifulsoup4",
-    version = "4.5.3",
+    version = "4.6.0",
     author="Leonard Richardson",
     author_email='leonardr@segfault.org',
     url="http://www.crummy.com/software/BeautifulSoup/bs4/",