diff options
-rw-r--r-- | NEWS.txt | 2 | ||||
-rw-r--r-- | README.md (renamed from README.txt) | 38 | ||||
-rw-r--r-- | bs4/builder/__init__.py | 6 | ||||
-rw-r--r-- | setup.py | 6 |
4 files changed, 37 insertions, 15 deletions
@@ -1,4 +1,4 @@ -= 4.6.2 (unreleased) += 4.6.2 (20180812) * Fix an exception when a custom formatter was asked to format a void element. [bug=1784408] @@ -1,5 +1,10 @@ -= Introduction = +Beautiful Soup is a library that makes it easy to scrape information +from web pages. It sits atop an HTML or XML parser, providing Pythonic +idioms for iterating, searching, and modifying the parse tree. +# Quick start + +``` >>> from bs4 import BeautifulSoup >>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML") >>> print soup.prettify() @@ -33,31 +38,38 @@ XML </tag3> </tag1> +``` + +To go beyond the basics, [comprehensive documentation is available](http://www.crummy.com/software/BeautifulSoup/bs4/doc/). + +# Links -= Full documentation = +* [Homepage](http://www.crummy.com/software/BeautifulSoup/bs4/) +* [Documentation](http://www.crummy.com/software/BeautifulSoup/bs4/doc/) +* [Discussion group](http://groups.google.com/group/beautifulsoup/) +* [Development](https://code.launchpad.net/beautifulsoup/) +* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/) +* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/NEWS.txt) + +# Building the documentation The bs4/doc/ directory contains full documentation in Sphinx -format. Run "make html" in that directory to create HTML +format. Run `make html` in that directory to create HTML documentation. -= Running the unit tests = +# Running the unit tests Beautiful Soup supports unit test discovery from the project root directory: +``` $ nosetests +``` +``` $ python -m unittest discover -s bs4 # Python 2.7 and up +``` If you checked out the source tree, you should see a script in the home directory called test-all-versions. This script will run the unit tests under Python 2.7, then create a temporary Python 3 conversion of the source and run the unit tests again under Python 3. - -= Links = - -Homepage: http://www.crummy.com/software/BeautifulSoup/bs4/ -Documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ - http://readthedocs.org/docs/beautiful-soup-4/ -Discussion group: http://groups.google.com/group/beautifulsoup/ -Development: https://code.launchpad.net/beautifulsoup/ -Bug tracker: https://bugs.launchpad.net/beautifulsoup/ diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 21454e6..c9e3f3d 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -239,6 +239,12 @@ class HTMLTreeBuilder(TreeBuilder): # These are from earlier versions of HTML and are removed in HTML5. 'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer' ]) + + # The HTML standard defines these as block-level elements. Beautiful + # Soup does not treat these elements differently from other elements, + # but it may do so eventually, and this information is available if + # you need to use it. + block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) # The HTML standard defines these attributes as containing a # space-separated list of values, not a single value. That is, @@ -3,6 +3,9 @@ from setuptools import ( find_packages, ) +with open("README.md", "r") as fh: + long_description = fh.read() + setup( name="beautifulsoup4", version = "4.6.1", @@ -11,7 +14,8 @@ setup( url="http://www.crummy.com/software/BeautifulSoup/bs4/", download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/", description="Screen-scraping library", - long_description="""Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.""", + long_description=long_description, + long_description_content_type="text/markdown", license="MIT", packages=find_packages(exclude=['tests*']), extras_require = { |