18 files changed, 582 insertions, 158 deletions
diff --git a/AUTHORS.txt b/AUTHORS.txt
index 2ac8fcc..ea6f785 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -16,7 +16,7 @@ support CSS selectors.
 
 Sam Ruby helped with a lot of edge cases.
 
-Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
+Jonathan Ellis was awarded the prestigious Beau Potage D'Or for his
 work in solving the nestable tags conundrum.
 
 An incomplete list of people have contributed patches to Beautiful
diff --git a/COPYING.txt b/LICENSE
index b911888..19fac0e 100644
--- a/COPYING.txt
+++ b/LICENSE
@@ -1,6 +1,6 @@
 Beautiful Soup is made available under the MIT license:
 
- Copyright (c) 2004-2015 Leonard Richardson
+ Copyright (c) 2004-2016 Leonard Richardson
 
  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
diff --git a/NEWS.txt b/NEWS.txt
index 3726c57..c05f189 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -1,3 +1,70 @@
+= Unreleased =
+
+* Fixed foster parenting when html5lib is the tree builder. Thanks to
+  Geoffrey Sneddon for a patch and test.
+
+* Fixed yet another problem that caused the html5lib tree builder to
+  create a disconnected parse tree. [bug=1629825]
+
+= 4.5.1 (20160802) =
+
+* Fixed a crash when passing Unicode markup that contained a
+  processing instruction into the lxml HTML parser on Python
+  3. [bug=1608048]
+
+= 4.5.0 (20160719) =
+
+* Beautiful Soup is no longer compatible with Python 2.6. This
+  actually happened a few releases ago, but it's now official.
+
+* Beautiful Soup will now work with versions of html5lib greater than
+  0.99999999. [bug=1603299]
+
+* If a search against each individual value of a multi-valued
+  attribute fails, the search will be run one final time against the
+  complete attribute value considered as a single string. That is, if
+  a tag has class="foo bar" and neither "foo" nor "bar" matches, but
+  "foo bar" does, the tag is now considered a match.
+
+  This happened in previous versions, but only when the value being
+  searched for was a string. Now it also works when that value is
+  a regular expression, a list of strings, etc. [bug=1476868]
+
+* Fixed a bug that deranged the tree when a whitespace element was
+  reparented into a tag that contained an identical whitespace
+  element. [bug=1505351]
+
+* Added support for CSS selector values that contain quoted spaces,
+  such as tag[style="display: foo"]. [bug=1540588]
+
+* Corrected handling of XML processing instructions. [bug=1504393]
+
+* Corrected an encoding error that happened when a BeautifulSoup
+  object was copied. [bug=1554439]
+
+* The contents of <textarea> tags will no longer be modified when the
+  tree is prettified. [bug=1555829]
+
+* When a BeautifulSoup object is pickled but its tree builder cannot
+  be pickled, its .builder attribute is set to None instead of being
+  destroyed. This avoids a performance problem once the object is
+  unpickled. [bug=1523629]
+
+* Specify the file and line number when warning about a
+  BeautifulSoup object being instantiated without a parser being
+  specified. [bug=1574647]
+
+* The `limit` argument to `select()` now works correctly, though it's
+  not implemented very efficiently. [bug=1520530]
+
+* Fixed a Python 3 ByteWarning when a URL was passed in as though it
+  were markup. Thanks to James Salter for a patch and
+  test. [bug=1533762]
+
+* We don't run the check for a filename passed in as markup if the
+  'filename' contains a less-than character; the less-than character
+  indicates it's most likely a very small document. [bug=1577864]
+
 = 4.4.1 (20150928) =
 
 * Fixed a bug that deranged the tree when part of it was
@@ -455,7 +522,7 @@ Bug fixes:
 * Renamed Tag.nsprefix to Tag.prefix, for consistency with
   NamespacedAttribute.
 
-* Fixed a test failure that occured on Python 3.x when chardet was
+* Fixed a test failure that occurred on Python 3.x when chardet was
   installed.
 
 * Made prettify() return Unicode by default, so it will look nice on
@@ -489,7 +556,7 @@ Bug fixes:
 
 * Restored compatibility with Python 2.6.
 
-* The install process no longer installs docs or auxillary text files.
+* The install process no longer installs docs or auxiliary text files.
 
 * It's now possible to deepcopy a BeautifulSoup object created with
   Python's built-in HTML parser.
@@ -728,7 +795,7 @@ Added an import that makes BS work in Python 2.3.
 Fixed a UnicodeDecodeError when unpickling documents that contain
 non-ASCII characters.
 
-Fixed a TypeError that occured in some circumstances when a tag
+Fixed a TypeError that occurred in some circumstances when a tag
 contained no text.
 
 Jump through hoops to avoid the use of chardet, which can be extremely
diff --git a/bs4/__init__.py b/bs4/__init__.py
index d35f765..aa818ae 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -5,26 +5,31 @@ http://www.crummy.com/software/BeautifulSoup/
 
 Beautiful Soup uses a pluggable XML or HTML parser to parse a
 (possibly invalid) document into a tree representation. Beautiful Soup
-provides provides methods and Pythonic idioms that make it easy to
-navigate, search, and modify the parse tree.
+provides methods and Pythonic idioms that make it easy to navigate,
+search, and modify the parse tree.
 
-Beautiful Soup works with Python 2.6 and up. It works better if lxml
+Beautiful Soup works with Python 2.7 and up. It works better if lxml
 and/or html5lib is installed.
 
 For more than you ever wanted to know about Beautiful Soup, see the
 documentation:
 http://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
 """
 
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.4.0"
-__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
+__version__ = "4.5.1"
+__copyright__ = "Copyright (c) 2004-2016 Leonard Richardson"
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
 import os
 import re
+import traceback
 import warnings
 
 from .builder import builder_registry, ParserRejectedMarkup
@@ -77,7 +82,7 @@ class BeautifulSoup(Tag):
 
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
-    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
 
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
@@ -137,6 +142,10 @@ class BeautifulSoup(Tag):
         from_encoding = from_encoding or deprecated_argument(
             "fromEncoding", "from_encoding")
 
+        if from_encoding and isinstance(markup, unicode):
+            warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
+            from_encoding = None
+
         if len(kwargs) > 0:
             arg = kwargs.keys().pop()
             raise TypeError(
@@ -161,19 +170,29 @@ class BeautifulSoup(Tag):
                     markup_type = "XML"
                 else:
                     markup_type = "HTML"
+
+                caller = traceback.extract_stack()[0]
+                filename = caller[0]
+                line_number = caller[1]
                 warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+                    filename=filename,
+                    line_number=line_number,
                     parser=builder.NAME,
                     markup_type=markup_type))
 
         self.builder = builder
         self.is_xml = builder.is_xml
+        self.known_xml = self.is_xml
         self.builder.soup = self
 
         self.parse_only = parse_only
 
         if hasattr(markup, 'read'):        # It's a file-type object.
             markup = markup.read()
-        elif len(markup) <= 256:
+        elif len(markup) <= 256 and (
+                (isinstance(markup, bytes) and not b'<' in markup)
+                or (isinstance(markup, unicode) and not u'<' in markup)
+        ):
             # Print out warnings for a couple beginner problems
             # involving passing non-markup to Beautiful Soup.
             # Beautiful Soup will still parse the input as markup,
@@ -195,16 +214,10 @@ class BeautifulSoup(Tag):
                 if isinstance(markup, unicode):
                     markup = markup.encode("utf8")
                 warnings.warn(
-                    '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
-            if markup[:5] == "http:" or markup[:6] == "https:":
-                # TODO: This is ugly but I couldn't get it to work in
-                # Python 3 otherwise.
-                if ((isinstance(markup, bytes) and not b' ' in markup)
-                    or (isinstance(markup, unicode) and not u' ' in markup)):
-                    if isinstance(markup, unicode):
-                        markup = markup.encode("utf8")
-                    warnings.warn(
-                        '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
+                    '"%s" looks like a filename, not markup. You should'
+                    'probably open this file and pass the filehandle into'
+                    'Beautiful Soup.' % markup)
+            self._check_markup_is_url(markup)
 
         for (self.markup, self.original_encoding, self.declared_html_encoding,
          self.contains_replacement_characters) in (
@@ -223,15 +236,52 @@ class BeautifulSoup(Tag):
         self.builder.soup = None
 
     def __copy__(self):
-        return type(self)(self.encode(), builder=self.builder)
+        copy = type(self)(
+            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
+        )
+
+        # Although we encoded the tree to UTF-8, that may not have
+        # been the encoding of the original markup. Set the copy's
+        # .original_encoding to reflect the original object's
+        # .original_encoding.
+        copy.original_encoding = self.original_encoding
+        return copy
 
     def __getstate__(self):
         # Frequently a tree builder can't be pickled.
         d = dict(self.__dict__)
         if 'builder' in d and not self.builder.picklable:
-            del d['builder']
+            d['builder'] = None
         return d
 
+    @staticmethod
+    def _check_markup_is_url(markup):
+        """ 
+        Check if markup looks like it's actually a url and raise a warning 
+        if so. Markup can be unicode or str (py2) / bytes (py3).
+        """
+        if isinstance(markup, bytes):
+            space = b' '
+            cant_start_with = (b"http:", b"https:")
+        elif isinstance(markup, unicode):
+            space = u' '
+            cant_start_with = (u"http:", u"https:")
+        else:
+            return
+
+        if any(markup.startswith(prefix) for prefix in cant_start_with):
+            if not space in markup:
+                if isinstance(markup, bytes):
+                    decoded_markup = markup.decode('utf-8', 'replace')
+                else:
+                    decoded_markup = markup
+                warnings.warn(
+                    '"%s" looks like a URL. Beautiful Soup is not an'
+                    ' HTTP client. You should probably use an HTTP client like'
+                    ' requests to get the document behind the URL, and feed'
+                    ' that document to Beautiful Soup.' % decoded_markup
+                )
+
     def _feed(self):
         # Convert the document to Unicode.
         self.builder.reset()
@@ -335,7 +385,18 @@ class BeautifulSoup(Tag):
         if parent.next_sibling:
             # This node is being inserted into an element that has
             # already been parsed. Deal with any dangling references.
-            index = parent.contents.index(o)
+            index = len(parent.contents)-1
+            while index >= 0:
+                if parent.contents[index] is o:
+                    break
+                index -= 1
+            else:
+                raise ValueError(
+                    "Error building tree: supposedly %r was inserted "
+                    "into %r after the fact, but I don't see it!" % (
+                        o, parent
+                    )
+                )
             if index == 0:
                 previous_element = parent
                 previous_sibling = None
@@ -387,7 +448,7 @@ class BeautifulSoup(Tag):
         """Push a start tag on to the stack.
 
         If this method returns None, the tag was rejected by the
-        SoupStrainer. You should proceed as if the tag had not occured
+        SoupStrainer. You should proceed as if the tag had not occurred
         in the document. For instance, if this was a self-closing tag,
         don't call handle_endtag.
         """
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index f8fce56..601979b 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -1,9 +1,13 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
 from collections import defaultdict
 import itertools
 import sys
 from bs4.element import (
     CharsetMetaAttributeValue,
     ContentMetaAttributeValue,
+    HTMLAwareEntitySubstitution,
     whitespace_re
     )
 
@@ -227,7 +231,7 @@ class HTMLTreeBuilder(TreeBuilder):
     Such as which tags are empty-element tags.
     """
 
-    preserve_whitespace_tags = set(['pre', 'textarea'])
+    preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
     empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
                               'spacer', 'link', 'frame', 'base'])
 
diff --git a/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
index 8725a65..5f54893 100644
--- a/bs4/builder/_html5lib.py
+++ b/bs4/builder/_html5lib.py
@@ -1,9 +1,12 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
 __all__ = [
     'HTML5TreeBuilder',
     ]
 
-from pdb import set_trace
 import warnings
+import re
 from bs4.builder import (
     PERMISSIVE,
     HTML,
@@ -15,7 +18,10 @@ from bs4.element import (
     whitespace_re,
 )
 import html5lib
-from html5lib.constants import namespaces
+from html5lib.constants import (
+    namespaces,
+    prefixes,
+    )
 from bs4.element import (
     Comment,
     Doctype,
@@ -23,6 +29,15 @@ from bs4.element import (
     Tag,
     )
 
+try:
+    # Pre-0.99999999
+    from html5lib.treebuilders import _base as treebuilder_base
+    new_html5lib = False
+except ImportError, e:
+    # 0.99999999 and up
+    from html5lib.treebuilders import base as treebuilder_base
+    new_html5lib = True
+
 class HTML5TreeBuilder(HTMLTreeBuilder):
     """Use html5lib to build a tree."""
 
@@ -47,7 +62,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         if self.soup.parse_only is not None:
             warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
-        doc = parser.parse(markup, encoding=self.user_specified_encoding)
+
+        extra_kwargs = dict()
+        if not isinstance(markup, unicode):
+            if new_html5lib:
+                extra_kwargs['override_encoding'] = self.user_specified_encoding
+            else:
+                extra_kwargs['encoding'] = self.user_specified_encoding
+        doc = parser.parse(markup, **extra_kwargs)
 
         # Set the character encoding detected by the tokenizer.
         if isinstance(markup, unicode):
@@ -55,11 +77,17 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
             # charEncoding to UTF-8 if it gets Unicode input.
             doc.original_encoding = None
         else:
-            doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
+            original_encoding = parser.tokenizer.stream.charEncoding[0]
+            if not isinstance(original_encoding, basestring):
+                # In 0.99999999 and up, the encoding is an html5lib
+                # Encoding object. We want to use a string for compatibility
+                # with other tree builders.
+                original_encoding = original_encoding.name
+            doc.original_encoding = original_encoding
 
     def create_treebuilder(self, namespaceHTMLElements):
         self.underlying_builder = TreeBuilderForHtml5lib(
-            self.soup, namespaceHTMLElements)
+            namespaceHTMLElements, self.soup)
         return self.underlying_builder
 
     def test_fragment_to_document(self, fragment):
@@ -67,10 +95,14 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         return u'<html><head></head><body>%s</body></html>' % fragment
 
 
-class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
+class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
 
-    def __init__(self, soup, namespaceHTMLElements):
-        self.soup = soup
+    def __init__(self, namespaceHTMLElements, soup=None):
+        if soup:
+            self.soup = soup
+        else:
+            from bs4 import BeautifulSoup
+            self.soup = BeautifulSoup("", "html.parser")
         super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
 
     def documentClass(self):
@@ -93,7 +125,8 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
         return TextNode(Comment(data), self.soup)
 
     def fragmentClass(self):
-        self.soup = BeautifulSoup("")
+        from bs4 import BeautifulSoup
+        self.soup = BeautifulSoup("", "html.parser")
         self.soup.name = "[document_fragment]"
         return Element(self.soup, self.soup, None)
 
@@ -105,7 +138,57 @@ class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
         return self.soup
 
     def getFragment(self):
-        return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
+        return treebuilder_base.TreeBuilder.getFragment(self).element
+
+    def testSerializer(self, element):
+        from bs4 import BeautifulSoup
+        rv = []
+        doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
+
+        def serializeElement(element, indent=0):
+            if isinstance(element, BeautifulSoup):
+                pass
+            if isinstance(element, Doctype):
+                m = doctype_re.match(element)
+                if m:
+                    name = m.group(1)
+                    if m.lastindex > 1:
+                        publicId = m.group(2) or ""
+                        systemId = m.group(3) or m.group(4) or ""
+                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (' ' * indent, name, publicId, systemId))
+                    else:
+                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
+                else:
+                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
+            elif isinstance(element, Comment):
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element))
+            elif isinstance(element, NavigableString):
+                rv.append("|%s\"%s\"" % (' ' * indent, element))
+            else:
+                if element.namespace:
+                    name = "%s %s" % (prefixes[element.namespace],
+                                      element.name)
+                else:
+                    name = element.name
+                rv.append("|%s<%s>" % (' ' * indent, name))
+                if element.attrs:
+                    attributes = []
+                    for name, value in element.attrs.items():
+                        if isinstance(name, NamespacedAttribute):
+                            name = "%s %s" % (prefixes[name.namespace], name.name)
+                        if isinstance(value, list):
+                            value = " ".join(value)
+                        attributes.append((name, value))
+
+                    for name, value in sorted(attributes):
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+                indent += 2
+                for child in element.children:
+                    serializeElement(child, indent)
+        serializeElement(element, 0)
+
+        return "\n".join(rv)
 
 class AttrList(object):
     def __init__(self, element):
@@ -137,9 +220,9 @@ class AttrList(object):
         return name in list(self.attrs.keys())
 
 
-class Element(html5lib.treebuilders._base.Node):
+class Element(treebuilder_base.Node):
     def __init__(self, element, soup, namespace):
-        html5lib.treebuilders._base.Node.__init__(self, element.name)
+        treebuilder_base.Node.__init__(self, element.name)
         self.element = element
         self.soup = soup
         self.namespace = namespace
@@ -158,8 +241,10 @@ class Element(html5lib.treebuilders._base.Node):
             child = node
         elif node.element.__class__ == NavigableString:
             string_child = child = node.element
+            node.parent = self
         else:
             child = node.element
+            node.parent = self
 
         if not isinstance(child, basestring) and child.parent is not None:
             node.element.extract()
@@ -197,6 +282,8 @@ class Element(html5lib.treebuilders._base.Node):
                 most_recent_element=most_recent_element)
 
     def getAttributes(self):
+        if isinstance(self.element, Comment):
+            return {}
         return AttrList(self.element)
 
     def setAttributes(self, attributes):
@@ -224,11 +311,11 @@ class Element(html5lib.treebuilders._base.Node):
     attributes = property(getAttributes, setAttributes)
 
     def insertText(self, data, insertBefore=None):
+        text = TextNode(self.soup.new_string(data), self.soup)
         if insertBefore:
-            text = TextNode(self.soup.new_string(data), self.soup)
-            self.insertBefore(data, insertBefore)
+            self.insertBefore(text, insertBefore)
         else:
-            self.appendChild(data)
+            self.appendChild(text)
 
     def insertBefore(self, node, refNode):
         index = self.element.index(refNode.element)
@@ -250,6 +337,7 @@ class Element(html5lib.treebuilders._base.Node):
         # print "MOVE", self.element.contents
         # print "FROM", self.element
         # print "TO", new_parent.element
+
         element = self.element
         new_parent_element = new_parent.element
         # Determine what this tag's next_element will be once all the children
@@ -268,7 +356,6 @@ class Element(html5lib.treebuilders._base.Node):
             new_parents_last_descendant_next_element = new_parent_element.next_element
 
         to_append = element.contents
-        append_after = new_parent_element.contents
         if len(to_append) > 0:
             # Set the first child's previous_element and previous_sibling
             # to elements within the new parent
@@ -285,12 +372,19 @@ class Element(html5lib.treebuilders._base.Node):
             if new_parents_last_child:
                 new_parents_last_child.next_sibling = first_child
 
-            # Fix the last child's next_element and next_sibling
-            last_child = to_append[-1]
-            last_child.next_element = new_parents_last_descendant_next_element
+            # Find the very last element being moved. It is now the
+            # parent's last descendant. It has no .next_sibling and
+            # its .next_element is whatever the previous last
+            # descendant had.
+            last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
+
+            last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
             if new_parents_last_descendant_next_element:
-                new_parents_last_descendant_next_element.previous_element = last_child
-            last_child.next_sibling = None
+                # TODO: This code has no test coverage and I'm not sure
+                # how to get html5lib to go through this path, but it's
+                # just the other side of the previous line.
+                new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
+            last_childs_last_descendant.next_sibling = None
 
         for child in to_append:
             child.parent = new_parent_element
@@ -324,7 +418,7 @@ class Element(html5lib.treebuilders._base.Node):
 
 class TextNode(Element):
     def __init__(self, element, soup):
-        html5lib.treebuilders._base.Node.__init__(self, None)
+        treebuilder_base.Node.__init__(self, None)
         self.element = element
         self.soup = soup
 
diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
index 0101d64..823ca15 100644
--- a/bs4/builder/_htmlparser.py
+++ b/bs4/builder/_htmlparser.py
@@ -1,5 +1,8 @@
 """Use the HTMLParser library to parse HTML files that aren't too bad."""
 
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
 __all__ = [
     'HTMLParserTreeBuilder',
     ]
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 9e8f88f..d2ca287 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -1,3 +1,5 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
 __all__ = [
     'LXMLTreeBuilderForXML',
     'LXMLTreeBuilder',
@@ -12,6 +14,7 @@ from bs4.element import (
     Doctype,
     NamespacedAttribute,
     ProcessingInstruction,
+    XMLProcessingInstruction,
 )
 from bs4.builder import (
     FAST,
@@ -29,6 +32,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     DEFAULT_PARSER_CLASS = etree.XMLParser
 
     is_xml = True
+    processing_instruction_class = XMLProcessingInstruction
 
     NAME = "lxml-xml"
     ALTERNATE_NAMES = ["xml"]
@@ -87,6 +91,16 @@ class LXMLTreeBuilderForXML(TreeBuilder):
 
         Each 4-tuple represents a strategy for parsing the document.
         """
+        # Instead of using UnicodeDammit to convert the bytestring to
+        # Unicode using different encodings, use EncodingDetector to
+        # iterate over the encodings, and tell lxml to try to parse
+        # the document as each one in turn.
+        is_html = not self.is_xml
+        if is_html:
+            self.processing_instruction_class = ProcessingInstruction
+        else:
+            self.processing_instruction_class = XMLProcessingInstruction
+
         if isinstance(markup, unicode):
             # We were given Unicode. Maybe lxml can parse Unicode on
             # this system?
@@ -98,11 +112,6 @@ class LXMLTreeBuilderForXML(TreeBuilder):
             yield (markup.encode("utf8"), "utf8",
                    document_declared_encoding, False)
 
-        # Instead of using UnicodeDammit to convert the bytestring to
-        # Unicode using different encodings, use EncodingDetector to
-        # iterate over the encodings, and tell lxml to try to parse
-        # the document as each one in turn.
-        is_html = not self.is_xml
         try_encodings = [user_specified_encoding, document_declared_encoding]
         detector = EncodingDetector(
             markup, try_encodings, is_html, exclude_encodings)
@@ -201,7 +210,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     def pi(self, target, data):
         self.soup.endData()
         self.soup.handle_data(target + ' ' + data)
-        self.soup.endData(ProcessingInstruction)
+        self.soup.endData(self.processing_instruction_class)
 
     def data(self, content):
         self.soup.handle_data(content)
@@ -229,6 +238,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
 
     features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
     is_xml = False
+    processing_instruction_class = ProcessingInstruction
 
     def default_parser(self, encoding):
         return etree.HTMLParser
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 030f04a..7965565 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -6,9 +6,10 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal
 Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
 __license__ = "MIT"
 
-from pdb import set_trace
 import codecs
 from htmlentitydefs import codepoint2name
 import re
@@ -346,7 +347,7 @@ class UnicodeDammit:
         self.tried_encodings = []
         self.contains_replacement_characters = False
         self.is_html = is_html
-
+        self.log = logging.getLogger(__name__)
         self.detector = EncodingDetector(
             markup, override_encodings, is_html, exclude_encodings)
 
@@ -376,9 +377,10 @@ class UnicodeDammit:
                 if encoding != "ascii":
                     u = self._convert_from(encoding, "replace")
                 if u is not None:
-                    logging.warning(
+                    self.log.warning(
                             "Some characters could not be decoded, and were "
-                            "replaced with REPLACEMENT CHARACTER.")
+                            "replaced with REPLACEMENT CHARACTER."
+                    )
                     self.contains_replacement_characters = True
                     break
 
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index c04d23c..8768332 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -1,5 +1,7 @@
 """Diagnostic functions, mainly for use when doing tech support."""
 
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
 __license__ = "MIT"
 
 import cProfile
@@ -56,7 +58,8 @@ def diagnose(data):
         data = data.read()
     elif os.path.exists(data):
         print '"%s" looks like a filename. Reading data from the file.' % data
-        data = open(data).read()
+        with open(data) as fp:
+            data = fp.read()
     elif data.startswith("http:") or data.startswith("https:"):
         print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
         print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
diff --git a/bs4/element.py b/bs4/element.py
index ecf2b28..b100d18 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -1,8 +1,10 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
 __license__ = "MIT"
 
-from pdb import set_trace
 import collections
 import re
+import shlex
 import sys
 import warnings
 from bs4.dammit import EntitySubstitution
@@ -99,6 +101,8 @@ class HTMLAwareEntitySubstitution(EntitySubstitution):
 
     preformatted_tags = set(["pre"])
 
+    preserve_whitespace_tags = set(['pre', 'textarea'])
+
     @classmethod
     def _substitute_if_appropriate(cls, ns, f):
         if (isinstance(ns, NavigableString)
@@ -169,11 +173,19 @@ class PageElement(object):
 
         This is used when mapping a formatter name ("minimal") to an
         appropriate function (one that performs entity-substitution on
-        the contents of <script> and <style> tags, or not). It's
+        the contents of <script> and <style> tags, or not). It can be
         inefficient, but it should be called very rarely.
         """
+        if self.known_xml is not None:
+            # Most of the time we will have determined this when the
+            # document is parsed.
+            return self.known_xml
+
+        # Otherwise, it's likely that this element was created by
+        # direct invocation of the constructor from within the user's
+        # Python code.
         if self.parent is None:
-            # This is the top-level object. It should have .is_xml set
+            # This is the top-level object. It should have .known_xml set
             # from tree creation. If not, take a guess--BS is usually
             # used on HTML markup.
             return getattr(self, 'is_xml', False)
@@ -637,7 +649,7 @@ class PageElement(object):
             return lambda el: el._attr_value_as_string(
                 attribute, '').startswith(value)
         elif operator == '$':
-            # string represenation of `attribute` ends with `value`
+            # string representation of `attribute` ends with `value`
             return lambda el: el._attr_value_as_string(
                 attribute, '').endswith(value)
         elif operator == '*':
@@ -677,6 +689,11 @@ class NavigableString(unicode, PageElement):
     PREFIX = ''
     SUFFIX = ''
 
+    # We can't tell just by looking at a string whether it's contained
+    # in an XML document or an HTML document.
+
+    known_xml = None
+
     def __new__(cls, value):
         """Create a new NavigableString.
 
@@ -743,10 +760,16 @@ class CData(PreformattedString):
     SUFFIX = u']]>'
 
 class ProcessingInstruction(PreformattedString):
+    """A SGML processing instruction."""
 
     PREFIX = u'<?'
     SUFFIX = u'>'
 
+class XMLProcessingInstruction(ProcessingInstruction):
+    """An XML processing instruction."""
+    PREFIX = u'<?'
+    SUFFIX = u'?>'
+
 class Comment(PreformattedString):
 
     PREFIX = u'<!--'
@@ -781,7 +804,8 @@ class Tag(PageElement):
     """Represents a found HTML tag with its attributes and contents."""
 
     def __init__(self, parser=None, builder=None, name=None, namespace=None,
-                 prefix=None, attrs=None, parent=None, previous=None):
+                 prefix=None, attrs=None, parent=None, previous=None,
+                 is_xml=None):
         "Basic constructor."
 
         if parser is None:
@@ -795,6 +819,14 @@ class Tag(PageElement):
         self.name = name
         self.namespace = namespace
         self.prefix = prefix
+        if builder is not None:
+            preserve_whitespace_tags = builder.preserve_whitespace_tags
+        else:
+            if is_xml:
+                preserve_whitespace_tags = []
+            else:
+                preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
+        self.preserve_whitespace_tags = preserve_whitespace_tags
         if attrs is None:
             attrs = {}
         elif attrs:
@@ -805,6 +837,13 @@ class Tag(PageElement):
                 attrs = dict(attrs)
         else:
             attrs = dict(attrs)
+
+        # If possible, determine ahead of time whether this tag is an
+        # XML tag.
+        if builder:
+            self.known_xml = builder.is_xml
+        else:
+            self.known_xml = is_xml
         self.attrs = attrs
         self.contents = []
         self.setup(parent, previous)
@@ -824,7 +863,7 @@ class Tag(PageElement):
         Its contents are a copy of the old Tag's contents.
         """
         clone = type(self)(None, self.builder, self.name, self.namespace,
-                           self.nsprefix, self.attrs)
+                           self.nsprefix, self.attrs, is_xml=self._is_xml)
         for attr in ('can_be_empty_element', 'hidden'):
             setattr(clone, attr, getattr(self, attr))
         for child in self.contents:
@@ -997,7 +1036,7 @@ class Tag(PageElement):
                     tag_name, tag_name))
             return self.find(tag_name)
         # We special case contents to avoid recursion.
-        elif not tag.startswith("__") and not tag=="contents":
+        elif not tag.startswith("__") and not tag == "contents":
             return self.find(tag)
         raise AttributeError(
             "'%s' object has no attribute '%s'" % (self.__class__, tag))
@@ -1057,10 +1096,11 @@ class Tag(PageElement):
 
     def _should_pretty_print(self, indent_level):
         """Should this tag be pretty-printed?"""
+
         return (
-            indent_level is not None and
-            (self.name not in HTMLAwareEntitySubstitution.preformatted_tags
-             or self._is_xml))
+            indent_level is not None
+            and self.name not in self.preserve_whitespace_tags
+        )
 
     def decode(self, indent_level=None,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
@@ -1280,6 +1320,7 @@ class Tag(PageElement):
 
     _selector_combinators = ['>', '+', '~']
     _select_debug = False
+    quoted_colon = re.compile('"[^"]*:[^"]*"')
     def select_one(self, selector):
         """Perform a CSS selection operation on the current element."""
         value = self.select(selector, limit=1)
@@ -1305,8 +1346,7 @@ class Tag(PageElement):
                 if limit and len(context) >= limit:
                     break
             return context
-
-        tokens = selector.split()
+        tokens = shlex.split(selector)
         current_context = [self]
 
         if tokens[-1] in self._selector_combinators:
@@ -1358,7 +1398,7 @@ class Tag(PageElement):
                     return classes.issubset(candidate.get('class', []))
                 checker = classes_match
 
-            elif ':' in token:
+            elif ':' in token and not self.quoted_colon.search(token):
                 # Pseudo-class
                 tag_name, pseudo = token.split(':', 1)
                 if tag_name == '':
@@ -1389,11 +1429,8 @@ class Tag(PageElement):
                             self.count += 1
                             if self.count == self.destination:
                                 return True
-                            if self.count > self.destination:
-                                # Stop the generator that's sending us
-                                # these things.
-                                raise StopIteration()
-                            return False
+                            else:
+                                return False
                     checker = Counter(pseudo_value).nth_child_of_type
                 else:
                     raise NotImplementedError(
@@ -1498,13 +1535,12 @@ class Tag(PageElement):
                             # don't include it in the context more than once.
                             new_context.append(candidate)
                             new_context_ids.add(id(candidate))
-                            if limit and len(new_context) >= limit:
-                                break
                     elif self._select_debug:
                         print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
 
-
             current_context = new_context
+        if limit and len(current_context) >= limit:
+            current_context = current_context[:limit]
 
         if self._select_debug:
             print "Final verdict:"
@@ -1668,21 +1704,15 @@ class SoupStrainer(object):
         if isinstance(markup, list) or isinstance(markup, tuple):
             # This should only happen when searching a multi-valued attribute
             # like 'class'.
-            if (isinstance(match_against, unicode)
-                and ' ' in match_against):
-                # A bit of a special case. If they try to match "foo
-                # bar" on a multivalue attribute's value, only accept
-                # the literal value "foo bar"
-                #
-                # XXX This is going to be pretty slow because we keep
-                # splitting match_against. But it shouldn't come up
-                # too often.
-                return (whitespace_re.split(match_against) == markup)
-            else:
-                for item in markup:
-                    if self._matches(item, match_against):
-                        return True
-                return False
+            for item in markup:
+                if self._matches(item, match_against):
+                    return True
+            # We didn't match any particular value of the multivalue
+            # attribute, but maybe we match the attribute value when
+            # considered as a string.
+            if self._matches(' '.join(markup), match_against):
+                return True
+            return False
 
         if match_against is True:
             # True matches any non-None value.
diff --git a/bs4/testing.py b/bs4/testing.py
index 7ba54ab..3a6ed42 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -1,5 +1,7 @@
 """Helper classes for tests."""
 
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
 __license__ = "MIT"
 
 import pickle
@@ -137,6 +139,14 @@ class HTMLTreeBuilderSmokeTest(object):
             markup.replace(b"\n", b""))
 
     def test_processing_instruction(self):
+        # We test both Unicode and bytestring to verify that
+        # process_markup correctly sets processing_instruction_class
+        # even when the markup is already Unicode and there is no
+        # need to process anything.
+        markup = u"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.decode())
+
         markup = b"""<?PITarget PIContent?>"""
         soup = self.soup(markup)
         self.assertEqual(markup, soup.encode("utf8"))
@@ -215,9 +225,22 @@ Hello, world!
         self.assertEqual(comment, baz.previous_element)
 
     def test_preserved_whitespace_in_pre_and_textarea(self):
-        """Whitespace must be preserved in <pre> and <textarea> tags."""
-        self.assertSoupEquals("<pre>   </pre>")
-        self.assertSoupEquals("<textarea> woo  </textarea>")
+        """Whitespace must be preserved in <pre> and <textarea> tags,
+        even if that would mean not prettifying the markup.
+        """
+        pre_markup = "<pre>   </pre>"
+        textarea_markup = "<textarea> woo\nwoo  </textarea>"
+        self.assertSoupEquals(pre_markup)
+        self.assertSoupEquals(textarea_markup)
+
+        soup = self.soup(pre_markup)
+        self.assertEqual(soup.pre.prettify(), pre_markup)
+
+        soup = self.soup(textarea_markup)
+        self.assertEqual(soup.textarea.prettify(), textarea_markup)
+
+        soup = self.soup("<textarea></textarea>")
+        self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
 
     def test_nested_inline_elements(self):
         """Inline elements can be nested indefinitely."""
@@ -480,7 +503,9 @@ Hello, world!
         hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
         soup = self.soup(
             hebrew_document, from_encoding="iso8859-8")
-        self.assertEqual(soup.original_encoding, 'iso8859-8')
+        # Some tree builders call it iso8859-8, others call it iso-8859-9.
+        # That's not a difference we really care about.
+        assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
         self.assertEqual(
             soup.encode('utf-8'),
             hebrew_document.decode("iso8859-8").encode("utf-8"))
@@ -563,6 +588,11 @@ class XMLTreeBuilderSmokeTest(object):
         soup = self.soup(markup)
         self.assertEqual(markup, soup.encode("utf8"))
 
+    def test_processing_instruction(self):
+        markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
     def test_real_xhtml_document(self):
         """A real XHTML document should come out *exactly* the same as it went in."""
         markup = b"""<?xml version="1.0" encoding="utf-8"?>
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
index 65536c2..0f89d62 100644
--- a/bs4/tests/test_html5lib.py
+++ b/bs4/tests/test_html5lib.py
@@ -84,6 +84,33 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
         self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
         self.assertEqual(2, len(soup.find_all('p')))
 
+    def test_reparented_markup_containing_identical_whitespace_nodes(self):
+        """Verify that we keep the two whitespace nodes in this
+        document distinct when reparenting the adjacent <tbody> tags.
+        """
+        markup = '<table> <tbody><tbody><ims></tbody> </table>'
+        soup = self.soup(markup)
+        space1, space2 = soup.find_all(string=' ')
+        tbody1, tbody2 = soup.find_all('tbody')
+        assert space1.next_element is tbody1
+        assert tbody2.next_element is space2
+
+    def test_reparented_markup_containing_children(self):
+        markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
+        soup = self.soup(markup)
+        noscript = soup.noscript
+        self.assertEqual("target", noscript.next_element)
+        target = soup.find(string='target')
+
+        # The 'aftermath' string was duplicated; we want the second one.
+        final_aftermath = soup.find_all(string='aftermath')[-1]
+
+        # The <noscript> tag was moved beneath a copy of the <a> tag,
+        # but the 'target' string within is still connected to the
+        # (second) 'aftermath' string.
+        self.assertEqual(final_aftermath, target.next_element)
+        self.assertEqual(target, final_aftermath.previous_element)
+        
     def test_processing_instruction(self):
         """Processing instructions become comments."""
         markup = b"""<?PITarget PIContent?>"""
@@ -96,3 +123,8 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
         a1, a2 = soup.find_all('a')
         self.assertEqual(a1, a2)
         assert a1 is not a2
+
+    def test_foster_parenting(self):
+        markup = b"""<table><td></tbody>A"""
+        soup = self.soup(markup)
+        self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
index 1238af2..f3e69ed 100644
--- a/bs4/tests/test_soup.py
+++ b/bs4/tests/test_soup.py
@@ -35,7 +35,6 @@ try:
 except ImportError, e:
     LXML_PRESENT = False
 
-PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
 PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
 
 class TestConstructor(SoupTest):
@@ -77,7 +76,7 @@ class TestWarnings(SoupTest):
     def test_no_warning_if_explicit_parser_specified(self):
         with warnings.catch_warnings(record=True) as w:
             soup = self.soup("<a><b></b></a>", "html.parser")
-        self.assertEquals([], w)
+        self.assertEqual([], w)
 
     def test_parseOnlyThese_renamed_to_parse_only(self):
         with warnings.catch_warnings(record=True) as w:
@@ -118,15 +117,34 @@ class TestWarnings(SoupTest):
             soup = self.soup(filename)
         self.assertEqual(0, len(w))
 
-    def test_url_warning(self):
-        with warnings.catch_warnings(record=True) as w:
-            soup = self.soup("http://www.crummy.com/")
-        msg = str(w[0].message)
-        self.assertTrue("looks like a URL" in msg)
+    def test_url_warning_with_bytes_url(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            soup = self.soup(b"http://www.crummybytes.com/")
+        # Be aware this isn't the only warning that can be raised during
+        # execution..
+        self.assertTrue(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+    def test_url_warning_with_unicode_url(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            # note - this url must differ from the bytes one otherwise
+            # python's warnings system swallows the second warning
+            soup = self.soup(u"http://www.crummyunicode.com/")
+        self.assertTrue(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+    def test_url_warning_with_bytes_and_space(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            soup = self.soup(b"http://www.crummybytes.com/ is great")
+        self.assertFalse(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+    def test_url_warning_with_unicode_and_space(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            soup = self.soup(u"http://www.crummyuncode.com/ is great")
+        self.assertFalse(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
 
-        with warnings.catch_warnings(record=True) as w:
-            soup = self.soup("http://www.crummy.com/ is great")
-        self.assertEqual(0, len(w))
 
 class TestSelectiveParsing(SoupTest):
 
@@ -260,7 +278,7 @@ class TestEncodingConversion(SoupTest):
         self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
 
     @skipIf(
-        PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
+        PYTHON_3_PRE_3_2,
         "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
     def test_attribute_name_containing_unicode_characters(self):
         markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index 6b2a123..a4fe0b1 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -222,6 +222,17 @@ class TestFindAllByName(TreeTest):
         self.assertSelects(
             tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
 
+    def test_find_with_multi_valued_attribute(self):
+        soup = self.soup(
+            "<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
+        )
+        r1 = soup.find('div', 'a d');
+        r2 = soup.find('div', re.compile(r'a d'));
+        r3, r4 = soup.find_all('div', ['a b', 'a d']);
+        self.assertEqual('3', r1.string)
+        self.assertEqual('3', r2.string)
+        self.assertEqual('1', r3.string)
+        self.assertEqual('3', r4.string)
 
 class TestFindAllByAttribute(TreeTest):
 
@@ -294,10 +305,10 @@ class TestFindAllByAttribute(TreeTest):
         f = tree.find_all("gar", class_=re.compile("a"))
         self.assertSelects(f, ["Found it"])
 
-        # Since the class is not the string "foo bar", but the two
-        # strings "foo" and "bar", this will not find anything.
+        # If the search fails to match the individual strings "foo" and "bar",
+        # it will be tried against the combined string "foo bar".
         f = tree.find_all("gar", class_=re.compile("o b"))
-        self.assertSelects(f, [])
+        self.assertSelects(f, ["Found it"])
 
     def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
         soup = self.soup("<a class='bar'>Found it</a>")
@@ -335,7 +346,7 @@ class TestFindAllByAttribute(TreeTest):
         strainer = SoupStrainer(attrs={'id' : 'first'})
         self.assertSelects(tree.find_all(strainer), ['Match.'])
 
-    def test_find_all_with_missing_atribute(self):
+    def test_find_all_with_missing_attribute(self):
         # You can pass in None as the value of an attribute to find_all.
         # This will match tags that do not have that attribute set.
         tree = self.soup("""<a id="1">ID present.</a>
@@ -1328,6 +1339,13 @@ class TestPersistence(SoupTest):
         copied = copy.deepcopy(self.tree)
         self.assertEqual(copied.decode(), self.tree.decode())
 
+    def test_copy_preserves_encoding(self):
+        soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
+        encoding = soup.original_encoding
+        copy = soup.__copy__()
+        self.assertEqual(u"<p> </p>", unicode(copy))
+        self.assertEqual(encoding, copy.original_encoding)
+
     def test_unicode_pickle(self):
         # A tree containing Unicode characters can be pickled.
         html = u"<b>\N{SNOWMAN}</b>"
@@ -1676,8 +1694,8 @@ class TestSoupSelector(TreeTest):
     def setUp(self):
         self.soup = BeautifulSoup(self.HTML, 'html.parser')
 
-    def assertSelects(self, selector, expected_ids):
-        el_ids = [el['id'] for el in self.soup.select(selector)]
+    def assertSelects(self, selector, expected_ids, **kwargs):
+        el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
         el_ids.sort()
         expected_ids.sort()
         self.assertEqual(expected_ids, el_ids,
@@ -1720,6 +1738,13 @@ class TestSoupSelector(TreeTest):
         for selector in ('html div', 'html body div', 'body div'):
             self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
 
+
+    def test_limit(self):
+        self.assertSelects('html div', ['main'], limit=1)
+        self.assertSelects('html body div', ['inner', 'main'], limit=2)
+        self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
+                           limit=10)
+
     def test_tag_no_match(self):
         self.assertEqual(len(self.soup.select('del')), 0)
 
@@ -1902,6 +1927,14 @@ class TestSoupSelector(TreeTest):
             ('div[data-tag]', ['data1'])
         )
 
+    def test_quoted_space_in_selector_name(self):
+        html = """<div style="display: wrong">nope</div>
+        <div style="display: right">yes</div>
+        """
+        soup = BeautifulSoup(html, 'html.parser')
+        [chosen] = soup.select('div[style="display: right"]')
+        self.assertEqual("yes", chosen.string)
+
     def test_unsupported_pseudoclass(self):
         self.assertRaises(
             NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 8258e97..56aa7fe 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -166,12 +166,16 @@ Installing Beautiful Soup
 If you're using a recent version of Debian or Ubuntu Linux, you can
 install Beautiful Soup with the system package manager:
 
-:kbd:`$ apt-get install python-bs4`
+:kbd:`$ apt-get install python-bs4` (for Python 2)
+
+:kbd:`$ apt-get install python3-bs4` (for Python 3)
 
 Beautiful Soup 4 is published through PyPi, so if you can't install it
 with the system packager, you can install it with ``easy_install`` or
 ``pip``. The package name is ``beautifulsoup4``, and the same package
-works on Python 2 and Python 3.
+works on Python 2 and Python 3. Make sure you use the right version of
+``pip`` or ``easy_install`` for your Python version (these may be named
+``pip3`` and ``easy_install3`` respectively if you're using Python 3).
 
 :kbd:`$ easy_install beautifulsoup4`
 
@@ -298,7 +302,8 @@ constructor. You can pass in a string or an open filehandle::
 
  from bs4 import BeautifulSoup
 
- soup = BeautifulSoup(open("index.html"))
+ with open("index.html") as fp:
+     soup = BeautifulSoup(fp)
 
  soup = BeautifulSoup("<html>data</html>")
 
@@ -355,34 +360,34 @@ Attributes
 ^^^^^^^^^^
 
 A tag may have any number of attributes. The tag ``<b
-class="boldest">`` has an attribute "class" whose value is
+id="boldest">`` has an attribute "id" whose value is
 "boldest". You can access a tag's attributes by treating the tag like
 a dictionary::
 
- tag['class']
+ tag['id']
  # u'boldest'
 
 You can access that dictionary directly as ``.attrs``::
 
  tag.attrs
- # {u'class': u'boldest'}
+ # {u'id': 'boldest'}
 
 You can add, remove, and modify a tag's attributes. Again, this is
 done by treating the tag as a dictionary::
 
- tag['class'] = 'verybold'
- tag['id'] = 1
+ tag['id'] = 'verybold'
+ tag['another-attribute'] = 1
  tag
- # <blockquote class="verybold" id="1">Extremely bold</blockquote>
+ # <b another-attribute="1" id="verybold"></b>
 
- del tag['class']
  del tag['id']
+ del tag['another-attribute']
  tag
- # <blockquote>Extremely bold</blockquote>
+ # <b></b>
 
- tag['class']
- # KeyError: 'class'
- print(tag.get('class'))
+ tag['id']
+ # KeyError: 'id'
+ print(tag.get('id'))
  # None
 
 .. _multivalue:
@@ -1045,7 +1050,7 @@ A regular expression
 ^^^^^^^^^^^^^^^^^^^^
 
 If you pass in a regular expression object, Beautiful Soup will filter
-against that regular expression using its ``match()`` method. This code
+against that regular expression using its ``search()`` method. This code
 finds all the tags whose names start with the letter "b"; in this
 case, the <body> tag and the <b> tag::
 
@@ -1257,6 +1262,17 @@ dictionary and passing the dictionary into ``find_all()`` as the
  data_soup.find_all(attrs={"data-foo": "value"})
  # [<div data-foo="value">foo!</div>]
 
+You can't use a keyword argument to search for HTML's 'name' element,
+because Beautiful Soup uses the ``name`` argument to contain the name
+of the tag itself. Instead, you can give a value to 'name' in the
+``attrs`` argument.
+
+ name_soup = BeautifulSoup('<input name="email"/>')
+ name_soup.find_all(name="email")
+ # []
+ name_soup.find_all(attrs={"name": "email"})
+ # [<input name="email"/>]
+
 .. _attrs:
 
 Searching by CSS class
@@ -2776,7 +2792,8 @@ you how different parsers handle the document, and tell you if you're
 missing a parser that Beautiful Soup could be using::
 
  from bs4.diagnose import diagnose
- data = open("bad.html").read()
+ with open("bad.html") as fp:
+     data = fp.read()
  diagnose(data)
 
  # Diagnostic running on Beautiful Soup 4.2.0
diff --git a/prepare-release.sh b/prepare-release.sh
index 48bff57..aaa95a5 100644
--- a/prepare-release.sh
+++ b/prepare-release.sh
@@ -11,52 +11,42 @@
 # Make sure tests pass
 ./test-all-versions
 
-# Make sure nothing broke on 2.6
-source ../virtualenv-2.6/bin/activate
-nosetests
-deactivate
-
-rm -rf dist
+rm -rf build dist
 
 # Create the 2.x source distro and wheel
 python setup.py sdist bdist_wheel
 
-# Create the 3.x wheel
-source ../virtualenv-3/bin/activate
-python setup.py bdist_wheel
-deactivate
-
-# Upload to pypi test
+# Upload the 2.x source distro and wheel to pypi test
 python setup.py register -r test
 python setup.py sdist bdist_wheel upload -r test
 
-source ../virtualenv-3/bin/activate
-python setup.py bdist_wheel upload -r test
-deactivate
-
 # Try 2.x install from pypi test
 rm -rf ../py2-install-test-virtualenv
 virtualenv -p /usr/bin/python2.7 ../py2-install-test-virtualenv
 source ../py2-install-test-virtualenv/bin/activate
-pip install -i https://testpypi.python.org/pypi beautifulsoup4
+pip install --pre -i https://pypi.python.org/pypi beautifulsoup4
 echo "EXPECT HTML ON LINE BELOW"
 (cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html.parser'))")
 # That should print '<a>foo</a>'
 deactivate
 rm -rf ../py2-install-test-virtualenv
 
-# Try 3.x install from pypi test
-rm -rf ../py3-install-test-virtualenv
-virtualenv -p /usr/bin/python3 ../py3-install-test-virtualenv
-source ../py3-install-test-virtualenv/bin/activate
+# Try 3.x source install from pypi test
+rm -rf ../py3-source-install
+virtualenv -p /usr/bin/python3 ../py3-source-install
+source ../py3-source-install/bin/activate
 pip install -i https://testpypi.python.org/pypi beautifulsoup4
 echo "EXPECT HTML ON LINE BELOW"
 (cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html.parser'))")
 # That should print '<a>foo</a>'
-deactivate
-rm -rf ../py3-install-test-virtualenv
 
+# Create and upload a Python 3 wheel from within a virtual environment
+# that has the Python 3 version of the code.
+pip install wheel
+python3 setup.py bdist_wheel upload -r test
 
+deactivate
+rm -rf ../py3-source-install
 
 # Make sure setup.py works on 2.x
 rm -rf ../py2-install-test-virtualenv
@@ -86,6 +76,7 @@ echo
 rm -rf ../py2-install-test-virtualenv
 virtualenv -p /usr/bin/python2.7 ../py2-install-test-virtualenv
 source ../py2-install-test-virtualenv/bin/activate
+pip install --upgrade setuptools
 pip install dist/beautifulsoup4-4.*-py2-none-any.whl -e .[html5lib]
 echo "EXPECT HTML ON LINE BELOW"
 (cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html5lib'))")
@@ -98,6 +89,7 @@ echo
 rm -rf ../py3-install-test-virtualenv
 virtualenv -p /usr/bin/python3 ../py3-install-test-virtualenv
 source ../py3-install-test-virtualenv/bin/activate
+pip install --upgrade setuptools
 pip install dist/beautifulsoup4-4.*-py3-none-any.whl -e .[html5lib]
 echo "EXPECT HTML ON LINE BELOW"
 (cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html5lib'))")
@@ -107,6 +99,34 @@ rm -rf ../py3-install-test-virtualenv
 
 ################
 
+Do the release for real.
+
+# Register the project and upload the source distribution and Python 2 wheel.
+python setup.py register -r test
+python setup.py sdist bdist_wheel upload -r test
+
+# Create a Python 3 environment and install Beautiful Soup
+# from the source distribution that was just uploaded
+rm -rf ../py3-source-install
+virtualenv -p /usr/bin/python3 ../py3-source-install
+source ../py3-source-install/bin/activate
+pip install -i https://pypi.python.org/pypi beautifulsoup4
+echo "EXPECT HTML ON LINE BELOW"
+(cd .. && python -c "from bs4 import _s; print(_s('<a>foo', 'html.parser'))")
+# That should print '<a>foo</a>'
+
+# Create and upload a Python 3 wheel from within a virtual environment
+# that has the Python 3 version of the code.
+pip install wheel
+python3 setup.py bdist_wheel upload -r test
+
+# Remove the Python 3 virtual environment.
+deactivate
+rm -rf ../py3-source-install
+
+
+################
+
 To test, after release:
 
 rm -rf ../py2-install-test-virtualenv
diff --git a/setup.py b/setup.py
index 50a8d8a..d04a26e 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from setuptools import (
 
 setup(
     name="beautifulsoup4",
-    version = "4.4.0",
+    version = "4.5.1",
     author="Leonard Richardson",
     author_email='leonardr@segfault.org',
     url="http://www.crummy.com/software/BeautifulSoup/bs4/",
@@ -23,7 +23,7 @@ setup(
                  "Intended Audience :: Developers",
                  "License :: OSI Approved :: MIT License",
                  "Programming Language :: Python",
-                 "Programming Language :: Python :: 2",
+                 "Programming Language :: Python :: 2.7",
                  'Programming Language :: Python :: 3',
                  "Topic :: Text Processing :: Markup :: HTML",
                  "Topic :: Text Processing :: Markup :: XML",